diff options
Diffstat (limited to 'compilerplugins/clang/stringliteralvar.cxx')
-rw-r--r-- | compilerplugins/clang/stringliteralvar.cxx | 135 |
1 files changed, 126 insertions, 9 deletions
diff --git a/compilerplugins/clang/stringliteralvar.cxx b/compilerplugins/clang/stringliteralvar.cxx index 348193421a61..fcd3690669e7 100644 --- a/compilerplugins/clang/stringliteralvar.cxx +++ b/compilerplugins/clang/stringliteralvar.cxx @@ -7,8 +7,10 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -// Find constant character array variables that are passed into O[U]String constructors and should -// thus be turned into O[U]StringLiteral variables. +// Find constant character array variables that are either +// (a) passed into O[U]String constructors +// (b) assigned to O[U]String +// and should thus be turned into O[U]StringLiteral variables. // // Such a variable may have been used in multiple places, not all of which would be compatible with // changing the variable's type to O[U]StringLiteral. However, this plugin is aggressive and @@ -26,6 +28,7 @@ #include <cassert> #include "check.hxx" +#include "compat.hxx" #include "plugin.hxx" namespace @@ -74,7 +77,8 @@ public: { return true; } - switch (expr->getConstructor()->getNumParams()) + auto const ctor = expr->getConstructor(); + switch (ctor->getNumParams()) { case 1: { @@ -120,16 +124,24 @@ public: return true; } auto const e2 = expr->getArg(1); - if (!(isa<CXXDefaultArgExpr>(e2) - && loplugin::TypeCheck(e2->getType()) - .Struct("Dummy") - .Namespace("libreoffice_internal") - .Namespace("rtl") - .GlobalNamespace())) + if (!((isa<CXXDefaultArgExpr>(e2) + && loplugin::TypeCheck(e2->getType()) + .Struct("Dummy") + .Namespace("libreoffice_internal") + .Namespace("rtl") + .GlobalNamespace()) + || (loplugin::TypeCheck(ctor->getParamDecl(1)->getType()) + .Typedef("sal_Int32") + .GlobalNamespace() + && e2->isIntegerConstantExpr(compiler.getASTContext())))) { return true; } auto const d = e1->getDecl(); + if (isPotentiallyInitializedWithMalformedUtf16(d)) + { + return true; + } if (!reportedArray_.insert(d).second) { return true; @@ -150,6 +162,56 @@ public: return true; } + bool VisitCXXOperatorCallExpr(CXXOperatorCallExpr const* expr) + { + if (ignoreLocation(expr)) + { + return true; + } + if (expr->getOperator() != OO_Equal) + { + return true; + } + loplugin::TypeCheck const tc(expr->getType()); + if (!(tc.Class("OString").Namespace("rtl").GlobalNamespace() + || tc.Class("OUString").Namespace("rtl").GlobalNamespace())) + { + return true; + } + if (expr->getNumArgs() != 2) + { + return true; + } + auto const e = dyn_cast<DeclRefExpr>(expr->getArg(1)->IgnoreParenImpCasts()); + if (e == nullptr) + { + return true; + } + auto const t = e->getType(); + if (!(t.isConstQualified() && t->isConstantArrayType())) + { + return true; + } + auto const d = e->getDecl(); + if (isPotentiallyInitializedWithMalformedUtf16(d)) + { + return true; + } + if (!reportedArray_.insert(d).second) + { + return true; + } + report(DiagnosticsEngine::Warning, + "change type of variable %0 from constant character array (%1) to " + "%select{OStringLiteral|OUStringLiteral}2%select{|, and make it static}3", + d->getLocation()) + << d << d->getType() << (tc.Class("OString").Namespace("rtl").GlobalNamespace() ? 0 : 1) + << isAutomaticVariable(cast<VarDecl>(d)) << d->getSourceRange(); + report(DiagnosticsEngine::Note, "first assigned here", expr->getBeginLoc()) + << expr->getSourceRange(); + return true; + } + bool VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr const* expr) { if (ignoreLocation(expr)) @@ -193,6 +255,61 @@ private: } } + // There is some confusion on the semantics of numeric-escape-sequences in string literals, see + // <https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2029r4.html> "Proposed resolution + // for core issues 411, 1656, and 2333; numeric and universal character escapes in character and + // string literals", so suppress warnings about arrays that are deliberately not written as + // UTF-16 string literals because they contain lone surrogates: + bool isPotentiallyInitializedWithMalformedUtf16(ValueDecl const* decl) const + { + if (!decl->getType()->getArrayElementTypeNoTypeQual()->isChar16Type()) + { + return false; + } + auto const init = cast<VarDecl>(decl)->getAnyInitializer(); + if (init == nullptr) + { + return true; + } + auto const list = dyn_cast<InitListExpr>(init); + if (list == nullptr) + { + // Assuming that the initializer already is a string literal, assume that that string + // literal has no issues with malformed UTF-16: + if (isDebugMode()) + { + assert(isa<clang::StringLiteral>(init)); + } + return false; + } + auto highSurrogate = false; + for (auto const e : list->inits()) + { + llvm::APSInt v; + if (!compat::EvaluateAsInt(e, v, compiler.getASTContext())) + { + return true; + } + if (highSurrogate) + { + if (v < 0xDC00 || v > 0xDFFF) + { + return true; + } + highSurrogate = false; + } + else if (v >= 0xD800 && v <= 0xDBFF) + { + highSurrogate = true; + } + else if (v >= 0xDC00 && v <= 0xDFFF) + { + return true; + } + } + return highSurrogate; + } + std::set<Decl const*> reportedAutomatic_; std::set<Decl const*> reportedArray_; }; |