1 //===--- StringIntegerAssignmentCheck.cpp - clang-tidy---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "StringIntegerAssignmentCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Lex/Lexer.h"
13
14 using namespace clang::ast_matchers;
15
16 namespace clang {
17 namespace tidy {
18 namespace bugprone {
19
registerMatchers(MatchFinder * Finder)20 void StringIntegerAssignmentCheck::registerMatchers(MatchFinder *Finder) {
21 Finder->addMatcher(
22 cxxOperatorCallExpr(
23 hasAnyOverloadedOperatorName("=", "+="),
24 callee(cxxMethodDecl(ofClass(classTemplateSpecializationDecl(
25 hasName("::std::basic_string"),
26 hasTemplateArgument(0, refersToType(hasCanonicalType(
27 qualType().bind("type")))))))),
28 hasArgument(
29 1,
30 ignoringImpCasts(
31 expr(hasType(isInteger()), unless(hasType(isAnyCharacter())),
32 // Ignore calls to tolower/toupper (see PR27723).
33 unless(callExpr(callee(functionDecl(
34 hasAnyName("tolower", "std::tolower", "toupper",
35 "std::toupper"))))),
36 // Do not warn if assigning e.g. `CodePoint` to
37 // `basic_string<CodePoint>`
38 unless(hasType(qualType(
39 hasCanonicalType(equalsBoundNode("type"))))))
40 .bind("expr"))),
41 unless(isInTemplateInstantiation())),
42 this);
43 }
44
45 class CharExpressionDetector {
46 public:
CharExpressionDetector(QualType CharType,const ASTContext & Ctx)47 CharExpressionDetector(QualType CharType, const ASTContext &Ctx)
48 : CharType(CharType), Ctx(Ctx) {}
49
isLikelyCharExpression(const Expr * E) const50 bool isLikelyCharExpression(const Expr *E) const {
51 if (isCharTyped(E))
52 return true;
53
54 if (const auto *BinOp = dyn_cast<BinaryOperator>(E)) {
55 const auto *LHS = BinOp->getLHS()->IgnoreParenImpCasts();
56 const auto *RHS = BinOp->getRHS()->IgnoreParenImpCasts();
57 // Handle both directions, e.g. `'a' + (i % 26)` and `(i % 26) + 'a'`.
58 if (BinOp->isAdditiveOp() || BinOp->isBitwiseOp())
59 return handleBinaryOp(BinOp->getOpcode(), LHS, RHS) ||
60 handleBinaryOp(BinOp->getOpcode(), RHS, LHS);
61 // Except in the case of '%'.
62 if (BinOp->getOpcode() == BO_Rem)
63 return handleBinaryOp(BinOp->getOpcode(), LHS, RHS);
64 return false;
65 }
66
67 // Ternary where at least one branch is a likely char expression, e.g.
68 // i < 265 ? i : ' '
69 if (const auto *CondOp = dyn_cast<AbstractConditionalOperator>(E))
70 return isLikelyCharExpression(
71 CondOp->getFalseExpr()->IgnoreParenImpCasts()) ||
72 isLikelyCharExpression(
73 CondOp->getTrueExpr()->IgnoreParenImpCasts());
74 return false;
75 }
76
77 private:
handleBinaryOp(clang::BinaryOperatorKind Opcode,const Expr * const LHS,const Expr * const RHS) const78 bool handleBinaryOp(clang::BinaryOperatorKind Opcode, const Expr *const LHS,
79 const Expr *const RHS) const {
80 // <char_expr> <op> <char_expr> (c++ integer promotion rules make this an
81 // int), e.g.
82 // 'a' + c
83 if (isCharTyped(LHS) && isCharTyped(RHS))
84 return true;
85
86 // <expr> & <char_valued_constant> or <expr> % <char_valued_constant>, e.g.
87 // i & 0xff
88 if ((Opcode == BO_And || Opcode == BO_Rem) && isCharValuedConstant(RHS))
89 return true;
90
91 // <char_expr> | <char_valued_constant>, e.g.
92 // c | 0x80
93 if (Opcode == BO_Or && isCharTyped(LHS) && isCharValuedConstant(RHS))
94 return true;
95
96 // <char_constant> + <likely_char_expr>, e.g.
97 // 'a' + (i % 26)
98 if (Opcode == BO_Add)
99 return isCharConstant(LHS) && isLikelyCharExpression(RHS);
100
101 return false;
102 }
103
104 // Returns true if `E` is an character constant.
isCharConstant(const Expr * E) const105 bool isCharConstant(const Expr *E) const {
106 return isCharTyped(E) && isCharValuedConstant(E);
107 };
108
109 // Returns true if `E` is an integer constant which fits in `CharType`.
isCharValuedConstant(const Expr * E) const110 bool isCharValuedConstant(const Expr *E) const {
111 if (E->isInstantiationDependent())
112 return false;
113 Expr::EvalResult EvalResult;
114 if (!E->EvaluateAsInt(EvalResult, Ctx, Expr::SE_AllowSideEffects))
115 return false;
116 return EvalResult.Val.getInt().getActiveBits() <= Ctx.getTypeSize(CharType);
117 };
118
119 // Returns true if `E` has the right character type.
isCharTyped(const Expr * E) const120 bool isCharTyped(const Expr *E) const {
121 return E->getType().getCanonicalType().getTypePtr() ==
122 CharType.getTypePtr();
123 };
124
125 const QualType CharType;
126 const ASTContext &Ctx;
127 };
128
check(const MatchFinder::MatchResult & Result)129 void StringIntegerAssignmentCheck::check(
130 const MatchFinder::MatchResult &Result) {
131 const auto *Argument = Result.Nodes.getNodeAs<Expr>("expr");
132 const auto CharType =
133 Result.Nodes.getNodeAs<QualType>("type")->getCanonicalType();
134 SourceLocation Loc = Argument->getBeginLoc();
135
136 // Try to detect a few common expressions to reduce false positives.
137 if (CharExpressionDetector(CharType, *Result.Context)
138 .isLikelyCharExpression(Argument))
139 return;
140
141 auto Diag =
142 diag(Loc, "an integer is interpreted as a character code when assigning "
143 "it to a string; if this is intended, cast the integer to the "
144 "appropriate character type; if you want a string "
145 "representation, use the appropriate conversion facility");
146
147 if (Loc.isMacroID())
148 return;
149
150 bool IsWideCharType = CharType->isWideCharType();
151 if (!CharType->isCharType() && !IsWideCharType)
152 return;
153 bool IsOneDigit = false;
154 bool IsLiteral = false;
155 if (const auto *Literal = dyn_cast<IntegerLiteral>(Argument)) {
156 IsOneDigit = Literal->getValue().getLimitedValue() < 10;
157 IsLiteral = true;
158 }
159
160 SourceLocation EndLoc = Lexer::getLocForEndOfToken(
161 Argument->getEndLoc(), 0, *Result.SourceManager, getLangOpts());
162 if (IsOneDigit) {
163 Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "L'" : "'")
164 << FixItHint::CreateInsertion(EndLoc, "'");
165 return;
166 }
167 if (IsLiteral) {
168 Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "L\"" : "\"")
169 << FixItHint::CreateInsertion(EndLoc, "\"");
170 return;
171 }
172
173 if (getLangOpts().CPlusPlus11) {
174 Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "std::to_wstring("
175 : "std::to_string(")
176 << FixItHint::CreateInsertion(EndLoc, ")");
177 }
178 }
179
180 } // namespace bugprone
181 } // namespace tidy
182 } // namespace clang
183