summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--goo/GooString.h12
-rw-r--r--poppler/Error.h4
-rw-r--r--test/goostring-format-checker/README16
-rw-r--r--test/goostring-format-checker/goostring-format-checker.cc369
4 files changed, 397 insertions, 4 deletions
diff --git a/goo/GooString.h b/goo/GooString.h
index 6bdcf06f..5932be99 100644
--- a/goo/GooString.h
+++ b/goo/GooString.h
@@ -18,7 +18,7 @@
// Copyright (C) 2006 Kristian Høgsberg <krh@redhat.com>
// Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
// Copyright (C) 2008-2010, 2012 Albert Astals Cid <aacid@kde.org>
-// Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
+// Copyright (C) 2012-2014 Fabio D'Urso <fabiodurso@hotmail.it>
// Copyright (C) 2013 Jason Crain <jason@aquaticape.us>
//
// To see a description of the changes please see the Changelog file that
@@ -38,6 +38,12 @@
#include <stdlib.h> // for NULL
#include "gtypes.h"
+#ifdef __clang__
+# define GOOSTRING_FORMAT __attribute__((__annotate__("gooformat")))
+#else
+# define GOOSTRING_FORMAT
+#endif
+
class GooString {
public:
@@ -97,7 +103,7 @@ public:
// t -- GooString *
// w -- blank space; arg determines width
// To get literal curly braces, use {{ or }}.
- static GooString *format(const char *fmt, ...);
+ static GooString *format(const char *fmt, ...) GOOSTRING_FORMAT;
static GooString *formatv(const char *fmt, va_list argList);
// Destructor.
@@ -124,7 +130,7 @@ public:
GooString *append(const char *str, int lengthA=CALC_STRING_LEN);
// Append a formatted string.
- GooString *appendf(const char *fmt, ...);
+ GooString *appendf(const char *fmt, ...) GOOSTRING_FORMAT;
GooString *appendfv(const char *fmt, va_list argList);
// Insert a character or string.
diff --git a/poppler/Error.h b/poppler/Error.h
index 88fc3ea4..9e11733d 100644
--- a/poppler/Error.h
+++ b/poppler/Error.h
@@ -17,6 +17,7 @@
// Copyright (C) 2005 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2005 Kristian Høgsberg <krh@redhat.com>
// Copyright (C) 2013 Adrian Johnson <ajohnson@redneon.com>
+// Copyright (C) 2014 Fabio D'Urso <fabiodurso@hotmail.it>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -33,6 +34,7 @@
#include <stdarg.h>
#include "poppler-config.h"
#include "goo/gtypes.h"
+#include "goo/GooString.h"
enum ErrorCategory {
errSyntaxWarning, // PDF syntax error which can be worked around;
@@ -53,6 +55,6 @@ extern void setErrorCallback(void (*cbk)(void *data, ErrorCategory category,
Goffset pos, char *msg),
void *data);
-extern void CDECL error(ErrorCategory category, Goffset pos, const char *msg, ...);
+extern void CDECL error(ErrorCategory category, Goffset pos, const char *msg, ...) GOOSTRING_FORMAT;
#endif
diff --git a/test/goostring-format-checker/README b/test/goostring-format-checker/README
new file mode 100644
index 00000000..cc58a5d7
--- /dev/null
+++ b/test/goostring-format-checker/README
@@ -0,0 +1,16 @@
+== Clang++ compiler plugin that checks usage of GooString::format-like functions ==
+
+1) Compile the plugin with:
+ clang++ -shared -o goostring-format-checker.so goostring-format-checker.cc -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS
+
+2) Compile poppler and pass the following options to the clang++ compiler:
+ -Xclang -load -Xclang goostring-format-checker.so -Xclang -plugin -Xclang goostring-format-check
+
+Example:
+$ clang++ -shared -o goostring-format-checker.so goostring-format-checker.cc -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS
+$ export CXX=clang++
+$ export CXXFLAGS="-Xclang -load -Xclang $PWD/goostring-format-checker.so -Xclang -add-plugin -Xclang goostring-format-checker"
+$ mkdir build
+$ cd build
+$ cmake ../../..
+$ make
diff --git a/test/goostring-format-checker/goostring-format-checker.cc b/test/goostring-format-checker/goostring-format-checker.cc
new file mode 100644
index 00000000..eab92213
--- /dev/null
+++ b/test/goostring-format-checker/goostring-format-checker.cc
@@ -0,0 +1,369 @@
+/*
+ * goostring-format-checker.cc
+ *
+ * This file is licensed under the GPLv2 or later
+ *
+ * Clang++ compiler plugin that checks usage of GooString::format-like functions
+ *
+ * Copyright (C) 2014 Fabio D'Urso <fabiodurso@hotmail.it>
+ */
+
+#include <cctype>
+
+#include <clang/Frontend/FrontendPluginRegistry.h>
+#include <clang/AST/AST.h>
+#include <clang/AST/ASTConsumer.h>
+#include <clang/AST/Attr.h>
+#include <clang/AST/RecursiveASTVisitor.h>
+#include <clang/Frontend/CompilerInstance.h>
+
+using namespace clang;
+
+namespace
+{
+
+class GooStringFormatCheckerVisitor : public RecursiveASTVisitor<GooStringFormatCheckerVisitor> {
+public:
+ explicit GooStringFormatCheckerVisitor(CompilerInstance *compInst);
+
+ bool VisitFunctionDecl(FunctionDecl *funcDecl);
+ bool VisitCallExpr(CallExpr *callExpr);
+
+private:
+ /* Returns the index of the format argument, or -1 if the function must
+ * not be checked */
+ int findFormatArgumentIndex(const FunctionDecl *funcDecl) const;
+
+ /* Returns the SourceLocation of the n-th character */
+ SourceLocation getLocationOfCharacter(const StringLiteral *strLiteral, unsigned n);
+
+ /* Validates usage of a placeholder and returns the corresponding
+ * argument index, or -1 in case of errors */
+ int verifyPlaceholder(const CallExpr *callExpr, const SourceLocation &placeholderLocation,
+ std::string &placeholderText, int baseArgIdx) const;
+
+ CompilerInstance *compInst;
+ DiagnosticsEngine *diag;
+ unsigned diag_badFuncZeroArgs;
+ unsigned diag_badFuncNonVariadic;
+ unsigned diag_badFuncLastArgInvalidType;
+ unsigned diag_notStringLiteral;
+ unsigned diag_notPlainASCII;
+ unsigned diag_wrongOrder;
+ unsigned diag_unescapedBracket;
+ unsigned diag_unterminatedPlaceholder;
+ unsigned diag_unconsumedArgs;
+ unsigned diag_missingColon;
+ unsigned diag_missingArgNumber;
+ unsigned diag_badArgNumber;
+ unsigned diag_argumentNotPresent;
+ unsigned diag_badPrecision;
+ unsigned diag_badType;
+ unsigned diag_wrongArgExprType;
+};
+
+GooStringFormatCheckerVisitor::GooStringFormatCheckerVisitor(CompilerInstance *compInst)
+: compInst(compInst) {
+ diag = &compInst->getDiagnostics();
+
+ diag_badFuncZeroArgs = diag->getCustomDiagID(DiagnosticsEngine::Error, "Cannot enforce format string checks on a function that takes no arguments");
+ diag_badFuncNonVariadic = diag->getCustomDiagID(DiagnosticsEngine::Error, "Cannot enforce format string checks on a non-variadic function");
+ diag_badFuncLastArgInvalidType = diag->getCustomDiagID(DiagnosticsEngine::Error, "Cannot enforce format string checks if the last non-variadic argument is not const char *");
+ diag_notStringLiteral = diag->getCustomDiagID(DiagnosticsEngine::Warning, "Format string is not a string literal. Skipping format checks");
+ diag_notPlainASCII = diag->getCustomDiagID(DiagnosticsEngine::Warning, "Format string contains non-ASCII or NUL characters. Skipping format checks");
+ diag_wrongOrder = diag->getCustomDiagID(DiagnosticsEngine::Error, "Argument %0 must be consumed before argument %1");
+ diag_unescapedBracket = diag->getCustomDiagID(DiagnosticsEngine::Error, "Unescaped '}' character");
+ diag_unterminatedPlaceholder = diag->getCustomDiagID(DiagnosticsEngine::Error, "Unterminated placeholder");
+ diag_unconsumedArgs = diag->getCustomDiagID(DiagnosticsEngine::Warning, "Unconsumed argument(s)");
+ diag_missingColon = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': missing colon character");
+ diag_missingArgNumber = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': missing <arg> number");
+ diag_badArgNumber = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': bad <arg> number");
+ diag_argumentNotPresent = diag->getCustomDiagID(DiagnosticsEngine::Error, "Argument for placeholder '{%0}' is not present");
+ diag_badPrecision = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': bad <precision> value");
+ diag_badType = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': bad <type> specifier");
+ diag_wrongArgExprType = diag->getCustomDiagID(DiagnosticsEngine::Error, "Expected %0 for placeholder '{%1}', found %2");
+}
+
+bool GooStringFormatCheckerVisitor::VisitFunctionDecl(FunctionDecl *funcDecl) {
+ findFormatArgumentIndex(funcDecl); // Spot misuse of the "gooformat" annotation
+ return true;
+}
+
+bool GooStringFormatCheckerVisitor::VisitCallExpr(CallExpr *callExpr) {
+ /*** Locate format argument or skip calls that needn't be checked ***/
+
+ const int formatArgIdx = findFormatArgumentIndex(callExpr->getDirectCallee());
+ if (formatArgIdx == -1)
+ return true;
+
+ /*** Obtain format string value ***/
+
+ const Expr *formatArgExpr = callExpr->getArg(formatArgIdx);
+ while (formatArgExpr->getStmtClass() == Stmt::ImplicitCastExprClass) {
+ formatArgExpr = static_cast<const ImplicitCastExpr*>(formatArgExpr)->getSubExpr();
+ }
+ if (formatArgExpr->getStmtClass() != Stmt::StringLiteralClass) {
+ diag->Report(formatArgExpr->getExprLoc(), diag_notStringLiteral);
+ return true;
+ }
+ const StringLiteral *formatArgStrLiteral = static_cast<const StringLiteral*>(formatArgExpr);
+ if (formatArgStrLiteral->containsNonAsciiOrNull()) {
+ diag->Report(formatArgExpr->getExprLoc(), diag_notPlainASCII);
+ return true;
+ }
+
+ /*** Parse format string and verify arguments ***/
+
+ const std::string format = formatArgStrLiteral->getString().str();
+
+ /* Keeps track of whether we are currently parsing a character contained
+ * within '{' ... '}'. If set, current_placeholder contains the contents
+ * parsed so far (without brackets) */
+ bool in_placeholder = false;
+ std::string current_placeholder;
+
+ // Source location of the current placeholder's opening bracket
+ SourceLocation placeholderLoc;
+
+ /* Keeps track of the next expected argument number, to check that
+ * arguments are first consumed in order (eg {0:d}{2:d}{1:d} is wrong).
+ * Note that it's possible to "look back" at already consumed
+ * arguments (eg {0:d}{1:d}{0:d} is OK) */
+ int nextExpectedArgNum = 0;
+
+ for (unsigned i = 0; i < format.length(); i++) {
+ if (in_placeholder) {
+ // Have we reached the end of the placeholder?
+ if (format[i] == '}') {
+ in_placeholder = false;
+
+ // Verifies the placeholder and returns the argument number
+ const int foundArgNum = verifyPlaceholder(callExpr, placeholderLoc, current_placeholder, formatArgIdx+1);
+
+ // If the placeholder wasn't valid, disable argument order checks
+ if (foundArgNum == -1) {
+ nextExpectedArgNum = -1;
+ }
+
+ // If argument order checks are enabled, let's check!
+ if (nextExpectedArgNum != -1) {
+ if (foundArgNum == nextExpectedArgNum) {
+ nextExpectedArgNum++;
+ } else if (foundArgNum > nextExpectedArgNum) {
+ diag->Report(placeholderLoc, diag_wrongOrder) << nextExpectedArgNum << foundArgNum;
+ nextExpectedArgNum = -1; // disable further checks
+ }
+ }
+ } else {
+ current_placeholder += format[i];
+ }
+ } else if (format[i] == '{') {
+ // If we find a '{' then a placeholder is starting...
+ in_placeholder = true;
+ current_placeholder = "";
+ placeholderLoc = getLocationOfCharacter(formatArgStrLiteral, i);
+
+ // ...unless it's followed by another '{' (escape sequence)
+ if (i+1 < format.length() && format[i+1] == '{') {
+ i++; // skip next '{' character
+ in_placeholder = false;
+ }
+ } else if (format[i] == '}') {
+ /* If we have found a '}' and we're not in a placeholder,
+ * then it *MUST* be followed by another '}' (escape sequence) */
+ if (i+1 >= format.length() || format[i+1] != '}') {
+ diag->Report(getLocationOfCharacter(formatArgStrLiteral, i), diag_unescapedBracket);
+ } else {
+ i++; // skip next '}' character
+ }
+ }
+ }
+
+ /* If we've reached the end of the format string and in_placeholder is
+ * still set, then the last placeholder wasn't terminated properly */
+ if (in_placeholder)
+ diag->Report(placeholderLoc, diag_unterminatedPlaceholder);
+
+ int unconsumedArgs = callExpr->getNumArgs() - (formatArgIdx + 1 + nextExpectedArgNum);
+ if (unconsumedArgs > 0)
+ diag->Report(callExpr->getArg(callExpr->getNumArgs() - unconsumedArgs)->getExprLoc(), diag_unconsumedArgs);
+
+ return true;
+}
+
+int GooStringFormatCheckerVisitor::findFormatArgumentIndex(const FunctionDecl *funcDecl) const {
+ if (!funcDecl)
+ return -1;
+
+ AnnotateAttr *annotation = NULL;
+ for (specific_attr_iterator<AnnotateAttr> it = funcDecl->specific_attr_begin<AnnotateAttr>();
+ it != funcDecl->specific_attr_end<AnnotateAttr>() && !annotation; ++it) {
+ if (it->getAnnotation() == "gooformat")
+ annotation = *it;
+ }
+
+ // If this function hasn't got the "gooformat" annotation on it
+ if (!annotation)
+ return -1;
+
+ if (funcDecl->getNumParams() == 0) {
+ diag->Report(annotation->getLocation(), diag_badFuncZeroArgs);
+ return -1;
+ }
+
+ if (!funcDecl->isVariadic()) {
+ diag->Report(annotation->getLocation(), diag_badFuncNonVariadic);
+ return -1;
+ }
+
+ // Assume the last non-variadic argument is the format specifier
+ const int formatArgIdx = funcDecl->getNumParams() - 1;
+ const QualType formatArgType = funcDecl->getParamDecl(formatArgIdx)->getType();
+ if (formatArgType.getAsString() != "const char *") {
+ diag->Report(annotation->getLocation(), diag_badFuncLastArgInvalidType);
+ return -1;
+ }
+
+ return formatArgIdx;
+}
+
+SourceLocation GooStringFormatCheckerVisitor::getLocationOfCharacter(const StringLiteral *strLiteral, unsigned n)
+{
+ return strLiteral->getLocationOfByte(n, compInst->getSourceManager(),
+ compInst->getLangOpts(), compInst->getTarget());
+}
+
+int GooStringFormatCheckerVisitor::verifyPlaceholder(const CallExpr *callExpr, const SourceLocation &placeholderLocation,
+ std::string &placeholderText, int baseArgIdx) const
+{
+ // Find the colon that separates the argument number and the format specifier
+ const size_t delim = placeholderText.find(':');
+ if (delim == std::string::npos) {
+ diag->Report(placeholderLocation, diag_missingColon) << placeholderText;
+ return -1;
+ }
+ if (delim == 0) {
+ diag->Report(placeholderLocation, diag_missingArgNumber) << placeholderText;
+ return -1;
+ }
+ for (unsigned int i = 0; i < delim; i++) {
+ if (!isdigit(placeholderText[i])) {
+ diag->Report(placeholderLocation, diag_badArgNumber) << placeholderText;
+ return -1;
+ }
+ }
+
+ // Extract argument number and its actual position in the call's argument list
+ const int argNum = atoi(placeholderText.substr(0, delim).c_str());
+ const int argIdx = baseArgIdx + argNum;
+ if (argIdx >= callExpr->getNumArgs()) {
+ diag->Report(placeholderLocation, diag_argumentNotPresent) << placeholderText;
+ return argNum;
+ }
+
+ // Check and strip width/precision specifiers
+ std::string format = placeholderText.substr(delim + 1);
+ bool dot_found = false;
+ while (isdigit(format[0]) || format[0] == '.') {
+ if (format[0] == '.') {
+ if (dot_found) {
+ diag->Report(placeholderLocation, diag_badPrecision) << placeholderText;
+ return argNum;
+ }
+ dot_found = true;
+ }
+ format = format.substr(1);
+ }
+
+ const Expr *argExpr = callExpr->getArg(argIdx);
+ const QualType qualType = argExpr->getType();
+ const Type *valueType = qualType->getUnqualifiedDesugaredType();
+
+ if (format == "d" || format == "x" || format == "X" || format == "o" || format == "b" || format == "w") {
+ if (!valueType->isSpecificBuiltinType(BuiltinType::Int)) {
+ diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "int" << placeholderText << qualType.getAsString();
+ }
+ } else if (format == "ud" || format == "ux" || format == "uX" || format == "uo" || format == "ub") {
+ if (!valueType->isSpecificBuiltinType(BuiltinType::UInt)) {
+ diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "unsigned int" << placeholderText << qualType.getAsString();
+ }
+ } else if (format == "ld" || format == "lx" || format == "lX" || format == "lo" || format == "lb") {
+ if (!valueType->isSpecificBuiltinType(BuiltinType::Long)) {
+ diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "long" << placeholderText << qualType.getAsString();
+ }
+ } else if (format == "uld" || format == "ulx" || format == "ulX" || format == "ulo" || format == "ulb") {
+ if (!valueType->isSpecificBuiltinType(BuiltinType::ULong)) {
+ diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "unsigned long" << placeholderText << qualType.getAsString();
+ }
+ } else if (format == "lld" || format == "llx" || format == "llX" || format == "llo" || format == "llb") {
+ if (!valueType->isSpecificBuiltinType(BuiltinType::LongLong)) {
+ diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "long long" << placeholderText << qualType.getAsString();
+ }
+ } else if (format == "ulld" || format == "ullx" || format == "ullX" || format == "ullo" || format == "ullb") {
+ if (!valueType->isSpecificBuiltinType(BuiltinType::ULongLong)) {
+ diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "unsigned long long" << placeholderText << qualType.getAsString();
+ }
+ } else if (format == "f" || format == "g" || format == "gs") {
+ if (!valueType->isSpecificBuiltinType(BuiltinType::Double)) {
+ diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "float or double" << placeholderText << qualType.getAsString();
+ }
+ } else if (format == "c") {
+ if (!valueType->isSpecificBuiltinType(BuiltinType::UInt) &&
+ !valueType->isSpecificBuiltinType(BuiltinType::Int)) {
+ diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "char, short or int" << placeholderText << qualType.getAsString();
+ }
+ } else if (format == "s") {
+ if (!valueType->isPointerType()
+ || !valueType->getPointeeType()->getUnqualifiedDesugaredType()->isCharType()) {
+ diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "char *" << placeholderText << qualType.getAsString();
+ }
+ } else if (format == "t") {
+ const CXXRecordDecl *pointeeType = valueType->isPointerType() ?
+ valueType->getPointeeType()->getAsCXXRecordDecl() : 0;
+ if (pointeeType == 0 || pointeeType->getQualifiedNameAsString() != "GooString") {
+ diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "GooString *" << placeholderText << qualType.getAsString();
+ }
+ } else {
+ diag->Report(placeholderLocation, diag_badType) << placeholderText;
+ return argNum;
+ }
+
+ return argNum;
+}
+
+class GooStringFormatCheckerConsumer : public clang::ASTConsumer {
+public:
+ GooStringFormatCheckerConsumer(CompilerInstance *compInst)
+ : visitor(compInst) {
+ }
+
+ virtual void HandleTranslationUnit(clang::ASTContext &ctx) {
+ visitor.TraverseDecl(ctx.getTranslationUnitDecl());
+ }
+
+private:
+ GooStringFormatCheckerVisitor visitor;
+};
+
+class GooStringFormatCheckerAction : public PluginASTAction
+{
+protected:
+ ASTConsumer *CreateASTConsumer(CompilerInstance &compInst, llvm::StringRef inFile) {
+ return new GooStringFormatCheckerConsumer(&compInst);
+ }
+
+ bool ParseArgs(const CompilerInstance &compInst, const std::vector<std::string>& args) {
+ if (args.size() != 0) {
+ DiagnosticsEngine &D = compInst.getDiagnostics();
+ D.Report(D.getCustomDiagID(DiagnosticsEngine::Error, "goostring-format-checker takes no arguments"));
+ return false;
+ } else {
+ return true;
+ }
+ }
+};
+
+}
+
+static FrontendPluginRegistry::Add<GooStringFormatCheckerAction>
+X("goostring-format-checker", "Checks usage of GooString::format-like functions");