//===--- StringIntegerAssignmentCheck.cpp - clang-tidy---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "StringIntegerAssignmentCheck.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Lex/Lexer.h" using namespace clang::ast_matchers; namespace clang { namespace tidy { namespace bugprone { void StringIntegerAssignmentCheck::registerMatchers(MatchFinder *Finder) { if (!getLangOpts().CPlusPlus) return; Finder->addMatcher( cxxOperatorCallExpr( anyOf(hasOverloadedOperatorName("="), hasOverloadedOperatorName("+=")), callee(cxxMethodDecl(ofClass(classTemplateSpecializationDecl( hasName("::std::basic_string"), hasTemplateArgument(0, refersToType(hasCanonicalType( qualType().bind("type")))))))), hasArgument( 1, ignoringImpCasts( expr(hasType(isInteger()), unless(hasType(isAnyCharacter())), // Ignore calls to tolower/toupper (see PR27723). unless(callExpr(callee(functionDecl( hasAnyName("tolower", "std::tolower", "toupper", "std::toupper"))))), // Do not warn if assigning e.g. `CodePoint` to // `basic_string` unless(hasType(qualType( hasCanonicalType(equalsBoundNode("type")))))) .bind("expr"))), unless(isInTemplateInstantiation())), this); } class CharExpressionDetector { public: CharExpressionDetector(QualType CharType, const ASTContext &Ctx) : CharType(CharType), Ctx(Ctx) {} bool isLikelyCharExpression(const Expr *E) const { if (isCharTyped(E)) return true; if (const auto *BinOp = dyn_cast(E)) { const auto *LHS = BinOp->getLHS()->IgnoreParenImpCasts(); const auto *RHS = BinOp->getRHS()->IgnoreParenImpCasts(); // Handle both directions, e.g. `'a' + (i % 26)` and `(i % 26) + 'a'`. if (BinOp->isAdditiveOp() || BinOp->isBitwiseOp()) return handleBinaryOp(BinOp->getOpcode(), LHS, RHS) || handleBinaryOp(BinOp->getOpcode(), RHS, LHS); // Except in the case of '%'. if (BinOp->getOpcode() == BO_Rem) return handleBinaryOp(BinOp->getOpcode(), LHS, RHS); return false; } // Ternary where at least one branch is a likely char expression, e.g. // i < 265 ? i : ' ' if (const auto *CondOp = dyn_cast(E)) return isLikelyCharExpression( CondOp->getFalseExpr()->IgnoreParenImpCasts()) || isLikelyCharExpression( CondOp->getTrueExpr()->IgnoreParenImpCasts()); return false; } private: bool handleBinaryOp(clang::BinaryOperatorKind Opcode, const Expr *const LHS, const Expr *const RHS) const { // (c++ integer promotion rules make this an // int), e.g. // 'a' + c if (isCharTyped(LHS) && isCharTyped(RHS)) return true; // & or % , e.g. // i & 0xff if ((Opcode == BO_And || Opcode == BO_Rem) && isCharValuedConstant(RHS)) return true; // | , e.g. // c | 0x80 if (Opcode == BO_Or && isCharTyped(LHS) && isCharValuedConstant(RHS)) return true; // + , e.g. // 'a' + (i % 26) if (Opcode == BO_Add) return isCharConstant(LHS) && isLikelyCharExpression(RHS); return false; } // Returns true if `E` is an character constant. bool isCharConstant(const Expr *E) const { return isCharTyped(E) && isCharValuedConstant(E); }; // Returns true if `E` is an integer constant which fits in `CharType`. bool isCharValuedConstant(const Expr *E) const { if (E->isInstantiationDependent()) return false; Expr::EvalResult EvalResult; if (!E->EvaluateAsInt(EvalResult, Ctx, Expr::SE_AllowSideEffects)) return false; return EvalResult.Val.getInt().getActiveBits() <= Ctx.getTypeSize(CharType); }; // Returns true if `E` has the right character type. bool isCharTyped(const Expr *E) const { return E->getType().getCanonicalType().getTypePtr() == CharType.getTypePtr(); }; const QualType CharType; const ASTContext &Ctx; }; void StringIntegerAssignmentCheck::check( const MatchFinder::MatchResult &Result) { const auto *Argument = Result.Nodes.getNodeAs("expr"); const auto CharType = Result.Nodes.getNodeAs("type")->getCanonicalType(); SourceLocation Loc = Argument->getBeginLoc(); // Try to detect a few common expressions to reduce false positives. if (CharExpressionDetector(CharType, *Result.Context) .isLikelyCharExpression(Argument)) return; auto Diag = diag(Loc, "an integer is interpreted as a character code when assigning " "it to a string; if this is intended, cast the integer to the " "appropriate character type; if you want a string " "representation, use the appropriate conversion facility"); if (Loc.isMacroID()) return; bool IsWideCharType = CharType->isWideCharType(); if (!CharType->isCharType() && !IsWideCharType) return; bool IsOneDigit = false; bool IsLiteral = false; if (const auto *Literal = dyn_cast(Argument)) { IsOneDigit = Literal->getValue().getLimitedValue() < 10; IsLiteral = true; } SourceLocation EndLoc = Lexer::getLocForEndOfToken( Argument->getEndLoc(), 0, *Result.SourceManager, getLangOpts()); if (IsOneDigit) { Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "L'" : "'") << FixItHint::CreateInsertion(EndLoc, "'"); return; } if (IsLiteral) { Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "L\"" : "\"") << FixItHint::CreateInsertion(EndLoc, "\""); return; } if (getLangOpts().CPlusPlus11) { Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "std::to_wstring(" : "std::to_string(") << FixItHint::CreateInsertion(EndLoc, ")"); } } } // namespace bugprone } // namespace tidy } // namespace clang