summaryrefslogtreecommitdiff
path: root/compilerplugins/clang/stringliteralvar.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'compilerplugins/clang/stringliteralvar.cxx')
-rw-r--r--compilerplugins/clang/stringliteralvar.cxx135
1 files changed, 126 insertions, 9 deletions
diff --git a/compilerplugins/clang/stringliteralvar.cxx b/compilerplugins/clang/stringliteralvar.cxx
index 348193421a61..fcd3690669e7 100644
--- a/compilerplugins/clang/stringliteralvar.cxx
+++ b/compilerplugins/clang/stringliteralvar.cxx
@@ -7,8 +7,10 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
-// Find constant character array variables that are passed into O[U]String constructors and should
-// thus be turned into O[U]StringLiteral variables.
+// Find constant character array variables that are either
+// (a) passed into O[U]String constructors
+// (b) assigned to O[U]String
+// and should thus be turned into O[U]StringLiteral variables.
//
// Such a variable may have been used in multiple places, not all of which would be compatible with
// changing the variable's type to O[U]StringLiteral. However, this plugin is aggressive and
@@ -26,6 +28,7 @@
#include <cassert>
#include "check.hxx"
+#include "compat.hxx"
#include "plugin.hxx"
namespace
@@ -74,7 +77,8 @@ public:
{
return true;
}
- switch (expr->getConstructor()->getNumParams())
+ auto const ctor = expr->getConstructor();
+ switch (ctor->getNumParams())
{
case 1:
{
@@ -120,16 +124,24 @@ public:
return true;
}
auto const e2 = expr->getArg(1);
- if (!(isa<CXXDefaultArgExpr>(e2)
- && loplugin::TypeCheck(e2->getType())
- .Struct("Dummy")
- .Namespace("libreoffice_internal")
- .Namespace("rtl")
- .GlobalNamespace()))
+ if (!((isa<CXXDefaultArgExpr>(e2)
+ && loplugin::TypeCheck(e2->getType())
+ .Struct("Dummy")
+ .Namespace("libreoffice_internal")
+ .Namespace("rtl")
+ .GlobalNamespace())
+ || (loplugin::TypeCheck(ctor->getParamDecl(1)->getType())
+ .Typedef("sal_Int32")
+ .GlobalNamespace()
+ && e2->isIntegerConstantExpr(compiler.getASTContext()))))
{
return true;
}
auto const d = e1->getDecl();
+ if (isPotentiallyInitializedWithMalformedUtf16(d))
+ {
+ return true;
+ }
if (!reportedArray_.insert(d).second)
{
return true;
@@ -150,6 +162,56 @@ public:
return true;
}
+ bool VisitCXXOperatorCallExpr(CXXOperatorCallExpr const* expr)
+ {
+ if (ignoreLocation(expr))
+ {
+ return true;
+ }
+ if (expr->getOperator() != OO_Equal)
+ {
+ return true;
+ }
+ loplugin::TypeCheck const tc(expr->getType());
+ if (!(tc.Class("OString").Namespace("rtl").GlobalNamespace()
+ || tc.Class("OUString").Namespace("rtl").GlobalNamespace()))
+ {
+ return true;
+ }
+ if (expr->getNumArgs() != 2)
+ {
+ return true;
+ }
+ auto const e = dyn_cast<DeclRefExpr>(expr->getArg(1)->IgnoreParenImpCasts());
+ if (e == nullptr)
+ {
+ return true;
+ }
+ auto const t = e->getType();
+ if (!(t.isConstQualified() && t->isConstantArrayType()))
+ {
+ return true;
+ }
+ auto const d = e->getDecl();
+ if (isPotentiallyInitializedWithMalformedUtf16(d))
+ {
+ return true;
+ }
+ if (!reportedArray_.insert(d).second)
+ {
+ return true;
+ }
+ report(DiagnosticsEngine::Warning,
+ "change type of variable %0 from constant character array (%1) to "
+ "%select{OStringLiteral|OUStringLiteral}2%select{|, and make it static}3",
+ d->getLocation())
+ << d << d->getType() << (tc.Class("OString").Namespace("rtl").GlobalNamespace() ? 0 : 1)
+ << isAutomaticVariable(cast<VarDecl>(d)) << d->getSourceRange();
+ report(DiagnosticsEngine::Note, "first assigned here", expr->getBeginLoc())
+ << expr->getSourceRange();
+ return true;
+ }
+
bool VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr const* expr)
{
if (ignoreLocation(expr))
@@ -193,6 +255,61 @@ private:
}
}
+ // There is some confusion on the semantics of numeric-escape-sequences in string literals, see
+ // <https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2029r4.html> "Proposed resolution
+ // for core issues 411, 1656, and 2333; numeric and universal character escapes in character and
+ // string literals", so suppress warnings about arrays that are deliberately not written as
+ // UTF-16 string literals because they contain lone surrogates:
+ bool isPotentiallyInitializedWithMalformedUtf16(ValueDecl const* decl) const
+ {
+ if (!decl->getType()->getArrayElementTypeNoTypeQual()->isChar16Type())
+ {
+ return false;
+ }
+ auto const init = cast<VarDecl>(decl)->getAnyInitializer();
+ if (init == nullptr)
+ {
+ return true;
+ }
+ auto const list = dyn_cast<InitListExpr>(init);
+ if (list == nullptr)
+ {
+ // Assuming that the initializer already is a string literal, assume that that string
+ // literal has no issues with malformed UTF-16:
+ if (isDebugMode())
+ {
+ assert(isa<clang::StringLiteral>(init));
+ }
+ return false;
+ }
+ auto highSurrogate = false;
+ for (auto const e : list->inits())
+ {
+ llvm::APSInt v;
+ if (!compat::EvaluateAsInt(e, v, compiler.getASTContext()))
+ {
+ return true;
+ }
+ if (highSurrogate)
+ {
+ if (v < 0xDC00 || v > 0xDFFF)
+ {
+ return true;
+ }
+ highSurrogate = false;
+ }
+ else if (v >= 0xD800 && v <= 0xDBFF)
+ {
+ highSurrogate = true;
+ }
+ else if (v >= 0xDC00 && v <= 0xDFFF)
+ {
+ return true;
+ }
+ }
+ return highSurrogate;
+ }
+
std::set<Decl const*> reportedAutomatic_;
std::set<Decl const*> reportedArray_;
};