diff options
-rw-r--r-- | goo/GooString.h | 12 | ||||
-rw-r--r-- | poppler/Error.h | 4 | ||||
-rw-r--r-- | test/goostring-format-checker/README | 16 | ||||
-rw-r--r-- | test/goostring-format-checker/goostring-format-checker.cc | 369 |
4 files changed, 397 insertions, 4 deletions
diff --git a/goo/GooString.h b/goo/GooString.h index 6bdcf06f..5932be99 100644 --- a/goo/GooString.h +++ b/goo/GooString.h @@ -18,7 +18,7 @@ // Copyright (C) 2006 Kristian Høgsberg <krh@redhat.com> // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com> // Copyright (C) 2008-2010, 2012 Albert Astals Cid <aacid@kde.org> -// Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it> +// Copyright (C) 2012-2014 Fabio D'Urso <fabiodurso@hotmail.it> // Copyright (C) 2013 Jason Crain <jason@aquaticape.us> // // To see a description of the changes please see the Changelog file that @@ -38,6 +38,12 @@ #include <stdlib.h> // for NULL #include "gtypes.h" +#ifdef __clang__ +# define GOOSTRING_FORMAT __attribute__((__annotate__("gooformat"))) +#else +# define GOOSTRING_FORMAT +#endif + class GooString { public: @@ -97,7 +103,7 @@ public: // t -- GooString * // w -- blank space; arg determines width // To get literal curly braces, use {{ or }}. - static GooString *format(const char *fmt, ...); + static GooString *format(const char *fmt, ...) GOOSTRING_FORMAT; static GooString *formatv(const char *fmt, va_list argList); // Destructor. @@ -124,7 +130,7 @@ public: GooString *append(const char *str, int lengthA=CALC_STRING_LEN); // Append a formatted string. - GooString *appendf(const char *fmt, ...); + GooString *appendf(const char *fmt, ...) GOOSTRING_FORMAT; GooString *appendfv(const char *fmt, va_list argList); // Insert a character or string. diff --git a/poppler/Error.h b/poppler/Error.h index 88fc3ea4..9e11733d 100644 --- a/poppler/Error.h +++ b/poppler/Error.h @@ -17,6 +17,7 @@ // Copyright (C) 2005 Albert Astals Cid <aacid@kde.org> // Copyright (C) 2005 Kristian Høgsberg <krh@redhat.com> // Copyright (C) 2013 Adrian Johnson <ajohnson@redneon.com> +// Copyright (C) 2014 Fabio D'Urso <fabiodurso@hotmail.it> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -33,6 +34,7 @@ #include <stdarg.h> #include "poppler-config.h" #include "goo/gtypes.h" +#include "goo/GooString.h" enum ErrorCategory { errSyntaxWarning, // PDF syntax error which can be worked around; @@ -53,6 +55,6 @@ extern void setErrorCallback(void (*cbk)(void *data, ErrorCategory category, Goffset pos, char *msg), void *data); -extern void CDECL error(ErrorCategory category, Goffset pos, const char *msg, ...); +extern void CDECL error(ErrorCategory category, Goffset pos, const char *msg, ...) GOOSTRING_FORMAT; #endif diff --git a/test/goostring-format-checker/README b/test/goostring-format-checker/README new file mode 100644 index 00000000..cc58a5d7 --- /dev/null +++ b/test/goostring-format-checker/README @@ -0,0 +1,16 @@ +== Clang++ compiler plugin that checks usage of GooString::format-like functions == + +1) Compile the plugin with: + clang++ -shared -o goostring-format-checker.so goostring-format-checker.cc -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS + +2) Compile poppler and pass the following options to the clang++ compiler: + -Xclang -load -Xclang goostring-format-checker.so -Xclang -plugin -Xclang goostring-format-check + +Example: +$ clang++ -shared -o goostring-format-checker.so goostring-format-checker.cc -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS +$ export CXX=clang++ +$ export CXXFLAGS="-Xclang -load -Xclang $PWD/goostring-format-checker.so -Xclang -add-plugin -Xclang goostring-format-checker" +$ mkdir build +$ cd build +$ cmake ../../.. +$ make diff --git a/test/goostring-format-checker/goostring-format-checker.cc b/test/goostring-format-checker/goostring-format-checker.cc new file mode 100644 index 00000000..eab92213 --- /dev/null +++ b/test/goostring-format-checker/goostring-format-checker.cc @@ -0,0 +1,369 @@ +/* + * goostring-format-checker.cc + * + * This file is licensed under the GPLv2 or later + * + * Clang++ compiler plugin that checks usage of GooString::format-like functions + * + * Copyright (C) 2014 Fabio D'Urso <fabiodurso@hotmail.it> + */ + +#include <cctype> + +#include <clang/Frontend/FrontendPluginRegistry.h> +#include <clang/AST/AST.h> +#include <clang/AST/ASTConsumer.h> +#include <clang/AST/Attr.h> +#include <clang/AST/RecursiveASTVisitor.h> +#include <clang/Frontend/CompilerInstance.h> + +using namespace clang; + +namespace +{ + +class GooStringFormatCheckerVisitor : public RecursiveASTVisitor<GooStringFormatCheckerVisitor> { +public: + explicit GooStringFormatCheckerVisitor(CompilerInstance *compInst); + + bool VisitFunctionDecl(FunctionDecl *funcDecl); + bool VisitCallExpr(CallExpr *callExpr); + +private: + /* Returns the index of the format argument, or -1 if the function must + * not be checked */ + int findFormatArgumentIndex(const FunctionDecl *funcDecl) const; + + /* Returns the SourceLocation of the n-th character */ + SourceLocation getLocationOfCharacter(const StringLiteral *strLiteral, unsigned n); + + /* Validates usage of a placeholder and returns the corresponding + * argument index, or -1 in case of errors */ + int verifyPlaceholder(const CallExpr *callExpr, const SourceLocation &placeholderLocation, + std::string &placeholderText, int baseArgIdx) const; + + CompilerInstance *compInst; + DiagnosticsEngine *diag; + unsigned diag_badFuncZeroArgs; + unsigned diag_badFuncNonVariadic; + unsigned diag_badFuncLastArgInvalidType; + unsigned diag_notStringLiteral; + unsigned diag_notPlainASCII; + unsigned diag_wrongOrder; + unsigned diag_unescapedBracket; + unsigned diag_unterminatedPlaceholder; + unsigned diag_unconsumedArgs; + unsigned diag_missingColon; + unsigned diag_missingArgNumber; + unsigned diag_badArgNumber; + unsigned diag_argumentNotPresent; + unsigned diag_badPrecision; + unsigned diag_badType; + unsigned diag_wrongArgExprType; +}; + +GooStringFormatCheckerVisitor::GooStringFormatCheckerVisitor(CompilerInstance *compInst) +: compInst(compInst) { + diag = &compInst->getDiagnostics(); + + diag_badFuncZeroArgs = diag->getCustomDiagID(DiagnosticsEngine::Error, "Cannot enforce format string checks on a function that takes no arguments"); + diag_badFuncNonVariadic = diag->getCustomDiagID(DiagnosticsEngine::Error, "Cannot enforce format string checks on a non-variadic function"); + diag_badFuncLastArgInvalidType = diag->getCustomDiagID(DiagnosticsEngine::Error, "Cannot enforce format string checks if the last non-variadic argument is not const char *"); + diag_notStringLiteral = diag->getCustomDiagID(DiagnosticsEngine::Warning, "Format string is not a string literal. Skipping format checks"); + diag_notPlainASCII = diag->getCustomDiagID(DiagnosticsEngine::Warning, "Format string contains non-ASCII or NUL characters. Skipping format checks"); + diag_wrongOrder = diag->getCustomDiagID(DiagnosticsEngine::Error, "Argument %0 must be consumed before argument %1"); + diag_unescapedBracket = diag->getCustomDiagID(DiagnosticsEngine::Error, "Unescaped '}' character"); + diag_unterminatedPlaceholder = diag->getCustomDiagID(DiagnosticsEngine::Error, "Unterminated placeholder"); + diag_unconsumedArgs = diag->getCustomDiagID(DiagnosticsEngine::Warning, "Unconsumed argument(s)"); + diag_missingColon = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': missing colon character"); + diag_missingArgNumber = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': missing <arg> number"); + diag_badArgNumber = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': bad <arg> number"); + diag_argumentNotPresent = diag->getCustomDiagID(DiagnosticsEngine::Error, "Argument for placeholder '{%0}' is not present"); + diag_badPrecision = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': bad <precision> value"); + diag_badType = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': bad <type> specifier"); + diag_wrongArgExprType = diag->getCustomDiagID(DiagnosticsEngine::Error, "Expected %0 for placeholder '{%1}', found %2"); +} + +bool GooStringFormatCheckerVisitor::VisitFunctionDecl(FunctionDecl *funcDecl) { + findFormatArgumentIndex(funcDecl); // Spot misuse of the "gooformat" annotation + return true; +} + +bool GooStringFormatCheckerVisitor::VisitCallExpr(CallExpr *callExpr) { + /*** Locate format argument or skip calls that needn't be checked ***/ + + const int formatArgIdx = findFormatArgumentIndex(callExpr->getDirectCallee()); + if (formatArgIdx == -1) + return true; + + /*** Obtain format string value ***/ + + const Expr *formatArgExpr = callExpr->getArg(formatArgIdx); + while (formatArgExpr->getStmtClass() == Stmt::ImplicitCastExprClass) { + formatArgExpr = static_cast<const ImplicitCastExpr*>(formatArgExpr)->getSubExpr(); + } + if (formatArgExpr->getStmtClass() != Stmt::StringLiteralClass) { + diag->Report(formatArgExpr->getExprLoc(), diag_notStringLiteral); + return true; + } + const StringLiteral *formatArgStrLiteral = static_cast<const StringLiteral*>(formatArgExpr); + if (formatArgStrLiteral->containsNonAsciiOrNull()) { + diag->Report(formatArgExpr->getExprLoc(), diag_notPlainASCII); + return true; + } + + /*** Parse format string and verify arguments ***/ + + const std::string format = formatArgStrLiteral->getString().str(); + + /* Keeps track of whether we are currently parsing a character contained + * within '{' ... '}'. If set, current_placeholder contains the contents + * parsed so far (without brackets) */ + bool in_placeholder = false; + std::string current_placeholder; + + // Source location of the current placeholder's opening bracket + SourceLocation placeholderLoc; + + /* Keeps track of the next expected argument number, to check that + * arguments are first consumed in order (eg {0:d}{2:d}{1:d} is wrong). + * Note that it's possible to "look back" at already consumed + * arguments (eg {0:d}{1:d}{0:d} is OK) */ + int nextExpectedArgNum = 0; + + for (unsigned i = 0; i < format.length(); i++) { + if (in_placeholder) { + // Have we reached the end of the placeholder? + if (format[i] == '}') { + in_placeholder = false; + + // Verifies the placeholder and returns the argument number + const int foundArgNum = verifyPlaceholder(callExpr, placeholderLoc, current_placeholder, formatArgIdx+1); + + // If the placeholder wasn't valid, disable argument order checks + if (foundArgNum == -1) { + nextExpectedArgNum = -1; + } + + // If argument order checks are enabled, let's check! + if (nextExpectedArgNum != -1) { + if (foundArgNum == nextExpectedArgNum) { + nextExpectedArgNum++; + } else if (foundArgNum > nextExpectedArgNum) { + diag->Report(placeholderLoc, diag_wrongOrder) << nextExpectedArgNum << foundArgNum; + nextExpectedArgNum = -1; // disable further checks + } + } + } else { + current_placeholder += format[i]; + } + } else if (format[i] == '{') { + // If we find a '{' then a placeholder is starting... + in_placeholder = true; + current_placeholder = ""; + placeholderLoc = getLocationOfCharacter(formatArgStrLiteral, i); + + // ...unless it's followed by another '{' (escape sequence) + if (i+1 < format.length() && format[i+1] == '{') { + i++; // skip next '{' character + in_placeholder = false; + } + } else if (format[i] == '}') { + /* If we have found a '}' and we're not in a placeholder, + * then it *MUST* be followed by another '}' (escape sequence) */ + if (i+1 >= format.length() || format[i+1] != '}') { + diag->Report(getLocationOfCharacter(formatArgStrLiteral, i), diag_unescapedBracket); + } else { + i++; // skip next '}' character + } + } + } + + /* If we've reached the end of the format string and in_placeholder is + * still set, then the last placeholder wasn't terminated properly */ + if (in_placeholder) + diag->Report(placeholderLoc, diag_unterminatedPlaceholder); + + int unconsumedArgs = callExpr->getNumArgs() - (formatArgIdx + 1 + nextExpectedArgNum); + if (unconsumedArgs > 0) + diag->Report(callExpr->getArg(callExpr->getNumArgs() - unconsumedArgs)->getExprLoc(), diag_unconsumedArgs); + + return true; +} + +int GooStringFormatCheckerVisitor::findFormatArgumentIndex(const FunctionDecl *funcDecl) const { + if (!funcDecl) + return -1; + + AnnotateAttr *annotation = NULL; + for (specific_attr_iterator<AnnotateAttr> it = funcDecl->specific_attr_begin<AnnotateAttr>(); + it != funcDecl->specific_attr_end<AnnotateAttr>() && !annotation; ++it) { + if (it->getAnnotation() == "gooformat") + annotation = *it; + } + + // If this function hasn't got the "gooformat" annotation on it + if (!annotation) + return -1; + + if (funcDecl->getNumParams() == 0) { + diag->Report(annotation->getLocation(), diag_badFuncZeroArgs); + return -1; + } + + if (!funcDecl->isVariadic()) { + diag->Report(annotation->getLocation(), diag_badFuncNonVariadic); + return -1; + } + + // Assume the last non-variadic argument is the format specifier + const int formatArgIdx = funcDecl->getNumParams() - 1; + const QualType formatArgType = funcDecl->getParamDecl(formatArgIdx)->getType(); + if (formatArgType.getAsString() != "const char *") { + diag->Report(annotation->getLocation(), diag_badFuncLastArgInvalidType); + return -1; + } + + return formatArgIdx; +} + +SourceLocation GooStringFormatCheckerVisitor::getLocationOfCharacter(const StringLiteral *strLiteral, unsigned n) +{ + return strLiteral->getLocationOfByte(n, compInst->getSourceManager(), + compInst->getLangOpts(), compInst->getTarget()); +} + +int GooStringFormatCheckerVisitor::verifyPlaceholder(const CallExpr *callExpr, const SourceLocation &placeholderLocation, + std::string &placeholderText, int baseArgIdx) const +{ + // Find the colon that separates the argument number and the format specifier + const size_t delim = placeholderText.find(':'); + if (delim == std::string::npos) { + diag->Report(placeholderLocation, diag_missingColon) << placeholderText; + return -1; + } + if (delim == 0) { + diag->Report(placeholderLocation, diag_missingArgNumber) << placeholderText; + return -1; + } + for (unsigned int i = 0; i < delim; i++) { + if (!isdigit(placeholderText[i])) { + diag->Report(placeholderLocation, diag_badArgNumber) << placeholderText; + return -1; + } + } + + // Extract argument number and its actual position in the call's argument list + const int argNum = atoi(placeholderText.substr(0, delim).c_str()); + const int argIdx = baseArgIdx + argNum; + if (argIdx >= callExpr->getNumArgs()) { + diag->Report(placeholderLocation, diag_argumentNotPresent) << placeholderText; + return argNum; + } + + // Check and strip width/precision specifiers + std::string format = placeholderText.substr(delim + 1); + bool dot_found = false; + while (isdigit(format[0]) || format[0] == '.') { + if (format[0] == '.') { + if (dot_found) { + diag->Report(placeholderLocation, diag_badPrecision) << placeholderText; + return argNum; + } + dot_found = true; + } + format = format.substr(1); + } + + const Expr *argExpr = callExpr->getArg(argIdx); + const QualType qualType = argExpr->getType(); + const Type *valueType = qualType->getUnqualifiedDesugaredType(); + + if (format == "d" || format == "x" || format == "X" || format == "o" || format == "b" || format == "w") { + if (!valueType->isSpecificBuiltinType(BuiltinType::Int)) { + diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "int" << placeholderText << qualType.getAsString(); + } + } else if (format == "ud" || format == "ux" || format == "uX" || format == "uo" || format == "ub") { + if (!valueType->isSpecificBuiltinType(BuiltinType::UInt)) { + diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "unsigned int" << placeholderText << qualType.getAsString(); + } + } else if (format == "ld" || format == "lx" || format == "lX" || format == "lo" || format == "lb") { + if (!valueType->isSpecificBuiltinType(BuiltinType::Long)) { + diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "long" << placeholderText << qualType.getAsString(); + } + } else if (format == "uld" || format == "ulx" || format == "ulX" || format == "ulo" || format == "ulb") { + if (!valueType->isSpecificBuiltinType(BuiltinType::ULong)) { + diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "unsigned long" << placeholderText << qualType.getAsString(); + } + } else if (format == "lld" || format == "llx" || format == "llX" || format == "llo" || format == "llb") { + if (!valueType->isSpecificBuiltinType(BuiltinType::LongLong)) { + diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "long long" << placeholderText << qualType.getAsString(); + } + } else if (format == "ulld" || format == "ullx" || format == "ullX" || format == "ullo" || format == "ullb") { + if (!valueType->isSpecificBuiltinType(BuiltinType::ULongLong)) { + diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "unsigned long long" << placeholderText << qualType.getAsString(); + } + } else if (format == "f" || format == "g" || format == "gs") { + if (!valueType->isSpecificBuiltinType(BuiltinType::Double)) { + diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "float or double" << placeholderText << qualType.getAsString(); + } + } else if (format == "c") { + if (!valueType->isSpecificBuiltinType(BuiltinType::UInt) && + !valueType->isSpecificBuiltinType(BuiltinType::Int)) { + diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "char, short or int" << placeholderText << qualType.getAsString(); + } + } else if (format == "s") { + if (!valueType->isPointerType() + || !valueType->getPointeeType()->getUnqualifiedDesugaredType()->isCharType()) { + diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "char *" << placeholderText << qualType.getAsString(); + } + } else if (format == "t") { + const CXXRecordDecl *pointeeType = valueType->isPointerType() ? + valueType->getPointeeType()->getAsCXXRecordDecl() : 0; + if (pointeeType == 0 || pointeeType->getQualifiedNameAsString() != "GooString") { + diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "GooString *" << placeholderText << qualType.getAsString(); + } + } else { + diag->Report(placeholderLocation, diag_badType) << placeholderText; + return argNum; + } + + return argNum; +} + +class GooStringFormatCheckerConsumer : public clang::ASTConsumer { +public: + GooStringFormatCheckerConsumer(CompilerInstance *compInst) + : visitor(compInst) { + } + + virtual void HandleTranslationUnit(clang::ASTContext &ctx) { + visitor.TraverseDecl(ctx.getTranslationUnitDecl()); + } + +private: + GooStringFormatCheckerVisitor visitor; +}; + +class GooStringFormatCheckerAction : public PluginASTAction +{ +protected: + ASTConsumer *CreateASTConsumer(CompilerInstance &compInst, llvm::StringRef inFile) { + return new GooStringFormatCheckerConsumer(&compInst); + } + + bool ParseArgs(const CompilerInstance &compInst, const std::vector<std::string>& args) { + if (args.size() != 0) { + DiagnosticsEngine &D = compInst.getDiagnostics(); + D.Report(D.getCustomDiagID(DiagnosticsEngine::Error, "goostring-format-checker takes no arguments")); + return false; + } else { + return true; + } + } +}; + +} + +static FrontendPluginRegistry::Add<GooStringFormatCheckerAction> +X("goostring-format-checker", "Checks usage of GooString::format-like functions"); |