1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10 #include "clang/AST/Decl.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/AST/StmtVisitor.h"
14 #include "clang/ASTMatchers/ASTMatchFinder.h"
15 #include "clang/Lex/Lexer.h"
16 #include "clang/Lex/Preprocessor.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include <memory>
19 #include <optional>
20 #include <sstream>
21 #include <queue>
22
23 using namespace llvm;
24 using namespace clang;
25 using namespace ast_matchers;
26
27 #ifndef NDEBUG
28 namespace {
29 class StmtDebugPrinter
30 : public ConstStmtVisitor<StmtDebugPrinter, std::string> {
31 public:
VisitStmt(const Stmt * S)32 std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); }
33
VisitBinaryOperator(const BinaryOperator * BO)34 std::string VisitBinaryOperator(const BinaryOperator *BO) {
35 return "BinaryOperator(" + BO->getOpcodeStr().str() + ")";
36 }
37
VisitUnaryOperator(const UnaryOperator * UO)38 std::string VisitUnaryOperator(const UnaryOperator *UO) {
39 return "UnaryOperator(" + UO->getOpcodeStr(UO->getOpcode()).str() + ")";
40 }
41
VisitImplicitCastExpr(const ImplicitCastExpr * ICE)42 std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
43 return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")";
44 }
45 };
46
47 // Returns a string of ancestor `Stmt`s of the given `DRE` in such a form:
48 // "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...".
getDREAncestorString(const DeclRefExpr * DRE,ASTContext & Ctx)49 static std::string getDREAncestorString(const DeclRefExpr *DRE,
50 ASTContext &Ctx) {
51 std::stringstream SS;
52 const Stmt *St = DRE;
53 StmtDebugPrinter StmtPriner;
54
55 do {
56 SS << StmtPriner.Visit(St);
57
58 DynTypedNodeList StParents = Ctx.getParents(*St);
59
60 if (StParents.size() > 1)
61 return "unavailable due to multiple parents";
62 if (StParents.size() == 0)
63 break;
64 St = StParents.begin()->get<Stmt>();
65 if (St)
66 SS << " ==> ";
67 } while (St);
68 return SS.str();
69 }
70 } // namespace
71 #endif /* NDEBUG */
72
73 namespace clang::ast_matchers {
74 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
75 // except for those belonging to a different callable of "n".
76 class MatchDescendantVisitor
77 : public RecursiveASTVisitor<MatchDescendantVisitor> {
78 public:
79 typedef RecursiveASTVisitor<MatchDescendantVisitor> VisitorBase;
80
81 // Creates an AST visitor that matches `Matcher` on all
82 // descendants of a given node "n" except for the ones
83 // belonging to a different callable of "n".
MatchDescendantVisitor(const internal::DynTypedMatcher * Matcher,internal::ASTMatchFinder * Finder,internal::BoundNodesTreeBuilder * Builder,internal::ASTMatchFinder::BindKind Bind,const bool ignoreUnevaluatedContext)84 MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher,
85 internal::ASTMatchFinder *Finder,
86 internal::BoundNodesTreeBuilder *Builder,
87 internal::ASTMatchFinder::BindKind Bind,
88 const bool ignoreUnevaluatedContext)
89 : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind),
90 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext) {}
91
92 // Returns true if a match is found in a subtree of `DynNode`, which belongs
93 // to the same callable of `DynNode`.
findMatch(const DynTypedNode & DynNode)94 bool findMatch(const DynTypedNode &DynNode) {
95 Matches = false;
96 if (const Stmt *StmtNode = DynNode.get<Stmt>()) {
97 TraverseStmt(const_cast<Stmt *>(StmtNode));
98 *Builder = ResultBindings;
99 return Matches;
100 }
101 return false;
102 }
103
104 // The following are overriding methods from the base visitor class.
105 // They are public only to allow CRTP to work. They are *not *part
106 // of the public API of this class.
107
108 // For the matchers so far used in safe buffers, we only need to match
109 // `Stmt`s. To override more as needed.
110
TraverseDecl(Decl * Node)111 bool TraverseDecl(Decl *Node) {
112 if (!Node)
113 return true;
114 if (!match(*Node))
115 return false;
116 // To skip callables:
117 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node))
118 return true;
119 // Traverse descendants
120 return VisitorBase::TraverseDecl(Node);
121 }
122
TraverseGenericSelectionExpr(GenericSelectionExpr * Node)123 bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) {
124 // These are unevaluated, except the result expression.
125 if(ignoreUnevaluatedContext)
126 return TraverseStmt(Node->getResultExpr());
127 return VisitorBase::TraverseGenericSelectionExpr(Node);
128 }
129
TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr * Node)130 bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) {
131 // Unevaluated context.
132 if(ignoreUnevaluatedContext)
133 return true;
134 return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node);
135 }
136
TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node)137 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) {
138 // Unevaluated context.
139 if(ignoreUnevaluatedContext)
140 return true;
141 return VisitorBase::TraverseTypeOfExprTypeLoc(Node);
142 }
143
TraverseDecltypeTypeLoc(DecltypeTypeLoc Node)144 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) {
145 // Unevaluated context.
146 if(ignoreUnevaluatedContext)
147 return true;
148 return VisitorBase::TraverseDecltypeTypeLoc(Node);
149 }
150
TraverseCXXNoexceptExpr(CXXNoexceptExpr * Node)151 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) {
152 // Unevaluated context.
153 if(ignoreUnevaluatedContext)
154 return true;
155 return VisitorBase::TraverseCXXNoexceptExpr(Node);
156 }
157
TraverseCXXTypeidExpr(CXXTypeidExpr * Node)158 bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) {
159 // Unevaluated context.
160 if(ignoreUnevaluatedContext)
161 return true;
162 return VisitorBase::TraverseCXXTypeidExpr(Node);
163 }
164
TraverseStmt(Stmt * Node,DataRecursionQueue * Queue=nullptr)165 bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) {
166 if (!Node)
167 return true;
168 if (!match(*Node))
169 return false;
170 return VisitorBase::TraverseStmt(Node);
171 }
172
shouldVisitTemplateInstantiations() const173 bool shouldVisitTemplateInstantiations() const { return true; }
shouldVisitImplicitCode() const174 bool shouldVisitImplicitCode() const {
175 // TODO: let's ignore implicit code for now
176 return false;
177 }
178
179 private:
180 // Sets 'Matched' to true if 'Matcher' matches 'Node'
181 //
182 // Returns 'true' if traversal should continue after this function
183 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
match(const T & Node)184 template <typename T> bool match(const T &Node) {
185 internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder);
186
187 if (Matcher->matches(DynTypedNode::create(Node), Finder,
188 &RecursiveBuilder)) {
189 ResultBindings.addMatch(RecursiveBuilder);
190 Matches = true;
191 if (Bind != internal::ASTMatchFinder::BK_All)
192 return false; // Abort as soon as a match is found.
193 }
194 return true;
195 }
196
197 const internal::DynTypedMatcher *const Matcher;
198 internal::ASTMatchFinder *const Finder;
199 internal::BoundNodesTreeBuilder *const Builder;
200 internal::BoundNodesTreeBuilder ResultBindings;
201 const internal::ASTMatchFinder::BindKind Bind;
202 bool Matches;
203 bool ignoreUnevaluatedContext;
204 };
205
206 // Because we're dealing with raw pointers, let's define what we mean by that.
hasPointerType()207 static auto hasPointerType() {
208 return hasType(hasCanonicalType(pointerType()));
209 }
210
hasArrayType()211 static auto hasArrayType() {
212 return hasType(hasCanonicalType(arrayType()));
213 }
214
AST_MATCHER_P(Stmt,forEachDescendantEvaluatedStmt,internal::Matcher<Stmt>,innerMatcher)215 AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>, innerMatcher) {
216 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
217
218 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, true);
219 return Visitor.findMatch(DynTypedNode::create(Node));
220 }
221
AST_MATCHER_P(Stmt,forEachDescendantStmt,internal::Matcher<Stmt>,innerMatcher)222 AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>, innerMatcher) {
223 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
224
225 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, false);
226 return Visitor.findMatch(DynTypedNode::create(Node));
227 }
228
229 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
AST_MATCHER_P(Stmt,notInSafeBufferOptOut,const UnsafeBufferUsageHandler *,Handler)230 AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *,
231 Handler) {
232 return !Handler->isSafeBufferOptOut(Node.getBeginLoc());
233 }
234
AST_MATCHER_P(CastExpr,castSubExpr,internal::Matcher<Expr>,innerMatcher)235 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {
236 return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder);
237 }
238
239 // Matches a `UnaryOperator` whose operator is pre-increment:
AST_MATCHER(UnaryOperator,isPreInc)240 AST_MATCHER(UnaryOperator, isPreInc) {
241 return Node.getOpcode() == UnaryOperator::Opcode::UO_PreInc;
242 }
243
244 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
245 // matches 'e' and 'e' is in an Unspecified Lvalue Context.
isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher)246 static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) {
247 // clang-format off
248 return
249 expr(anyOf(
250 implicitCastExpr(
251 hasCastKind(CastKind::CK_LValueToRValue),
252 castSubExpr(innerMatcher)),
253 binaryOperator(
254 hasAnyOperatorName("="),
255 hasLHS(innerMatcher)
256 )
257 ));
258 // clang-format on
259 }
260
261
262 // Returns a matcher that matches any expression `e` such that `InnerMatcher`
263 // matches `e` and `e` is in an Unspecified Pointer Context (UPC).
264 static internal::Matcher<Stmt>
isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher)265 isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
266 // A UPC can be
267 // 1. an argument of a function call (except the callee has [[unsafe_...]]
268 // attribute), or
269 // 2. the operand of a pointer-to-(integer or bool) cast operation; or
270 // 3. the operand of a comparator operation; or
271 // 4. the operand of a pointer subtraction operation
272 // (i.e., computing the distance between two pointers); or ...
273
274 auto CallArgMatcher =
275 callExpr(forEachArgumentWithParam(InnerMatcher,
276 hasPointerType() /* array also decays to pointer type*/),
277 unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
278
279 auto CastOperandMatcher =
280 castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral),
281 hasCastKind(CastKind::CK_PointerToBoolean)),
282 castSubExpr(allOf(hasPointerType(), InnerMatcher)));
283
284 auto CompOperandMatcher =
285 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="),
286 eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher)),
287 hasRHS(allOf(hasPointerType(), InnerMatcher))));
288
289 // A matcher that matches pointer subtractions:
290 auto PtrSubtractionMatcher =
291 binaryOperator(hasOperatorName("-"),
292 // Note that here we need both LHS and RHS to be
293 // pointer. Then the inner matcher can match any of
294 // them:
295 allOf(hasLHS(hasPointerType()),
296 hasRHS(hasPointerType())),
297 eachOf(hasLHS(InnerMatcher),
298 hasRHS(InnerMatcher)));
299
300 return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher,
301 PtrSubtractionMatcher));
302 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we
303 // don't have to check that.)
304 }
305
306 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
307 // matches 'e' and 'e' is in an unspecified untyped context (i.e the expression
308 // 'e' isn't evaluated to an RValue). For example, consider the following code:
309 // int *p = new int[4];
310 // int *q = new int[4];
311 // if ((p = q)) {}
312 // p = q;
313 // The expression `p = q` in the conditional of the `if` statement
314 // `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
315 // in the assignment statement is in an untyped context.
316 static internal::Matcher<Stmt>
isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher)317 isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) {
318 // An unspecified context can be
319 // 1. A compound statement,
320 // 2. The body of an if statement
321 // 3. Body of a loop
322 auto CompStmt = compoundStmt(forEach(InnerMatcher));
323 auto IfStmtThen = ifStmt(hasThen(InnerMatcher));
324 auto IfStmtElse = ifStmt(hasElse(InnerMatcher));
325 // FIXME: Handle loop bodies.
326 return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse));
327 }
328 } // namespace clang::ast_matchers
329
330 namespace {
331 // Because the analysis revolves around variables and their types, we'll need to
332 // track uses of variables (aka DeclRefExprs).
333 using DeclUseList = SmallVector<const DeclRefExpr *, 1>;
334
335 // Convenience typedef.
336 using FixItList = SmallVector<FixItHint, 4>;
337
338 // Defined below.
339 class Strategy;
340 } // namespace
341
342 namespace {
343 /// Gadget is an individual operation in the code that may be of interest to
344 /// this analysis. Each (non-abstract) subclass corresponds to a specific
345 /// rigid AST structure that constitutes an operation on a pointer-type object.
346 /// Discovery of a gadget in the code corresponds to claiming that we understand
347 /// what this part of code is doing well enough to potentially improve it.
348 /// Gadgets can be warning (immediately deserving a warning) or fixable (not
349 /// always deserving a warning per se, but requires our attention to identify
350 /// it warrants a fixit).
351 class Gadget {
352 public:
353 enum class Kind {
354 #define GADGET(x) x,
355 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
356 };
357
358 /// Common type of ASTMatchers used for discovering gadgets.
359 /// Useful for implementing the static matcher() methods
360 /// that are expected from all non-abstract subclasses.
361 using Matcher = decltype(stmt());
362
Gadget(Kind K)363 Gadget(Kind K) : K(K) {}
364
getKind() const365 Kind getKind() const { return K; }
366
367 #ifndef NDEBUG
getDebugName() const368 StringRef getDebugName() const {
369 switch (K) {
370 #define GADGET(x) case Kind::x: return #x;
371 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
372 }
373 llvm_unreachable("Unhandled Gadget::Kind enum");
374 }
375 #endif
376
377 virtual bool isWarningGadget() const = 0;
378 virtual const Stmt *getBaseStmt() const = 0;
379
380 /// Returns the list of pointer-type variables on which this gadget performs
381 /// its operation. Typically, there's only one variable. This isn't a list
382 /// of all DeclRefExprs in the gadget's AST!
383 virtual DeclUseList getClaimedVarUseSites() const = 0;
384
385 virtual ~Gadget() = default;
386
387 private:
388 Kind K;
389 };
390
391
392 /// Warning gadgets correspond to unsafe code patterns that warrants
393 /// an immediate warning.
394 class WarningGadget : public Gadget {
395 public:
WarningGadget(Kind K)396 WarningGadget(Kind K) : Gadget(K) {}
397
classof(const Gadget * G)398 static bool classof(const Gadget *G) { return G->isWarningGadget(); }
isWarningGadget() const399 bool isWarningGadget() const final { return true; }
400 };
401
402 /// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be
403 /// properly recognized in order to emit fixes. For example, if a raw pointer-type
404 /// variable is replaced by a safe C++ container, every use of such variable must be
405 /// carefully considered and possibly updated.
406 class FixableGadget : public Gadget {
407 public:
FixableGadget(Kind K)408 FixableGadget(Kind K) : Gadget(K) {}
409
classof(const Gadget * G)410 static bool classof(const Gadget *G) { return !G->isWarningGadget(); }
isWarningGadget() const411 bool isWarningGadget() const final { return false; }
412
413 /// Returns a fixit that would fix the current gadget according to
414 /// the current strategy. Returns std::nullopt if the fix cannot be produced;
415 /// returns an empty list if no fixes are necessary.
getFixits(const Strategy &) const416 virtual std::optional<FixItList> getFixits(const Strategy &) const {
417 return std::nullopt;
418 }
419
420 /// Returns a list of two elements where the first element is the LHS of a pointer assignment
421 /// statement and the second element is the RHS. This two-element list represents the fact that
422 /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used
423 /// later to group all those variables whose types must be modified together to prevent type
424 /// mismatches.
425 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
getStrategyImplications() const426 getStrategyImplications() const {
427 return std::nullopt;
428 }
429 };
430
toSupportedVariable()431 static auto toSupportedVariable() {
432 return to(varDecl());
433 }
434
435 using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>;
436 using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>;
437
438 /// An increment of a pointer-type value is unsafe as it may run the pointer
439 /// out of bounds.
440 class IncrementGadget : public WarningGadget {
441 static constexpr const char *const OpTag = "op";
442 const UnaryOperator *Op;
443
444 public:
IncrementGadget(const MatchFinder::MatchResult & Result)445 IncrementGadget(const MatchFinder::MatchResult &Result)
446 : WarningGadget(Kind::Increment),
447 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
448
classof(const Gadget * G)449 static bool classof(const Gadget *G) {
450 return G->getKind() == Kind::Increment;
451 }
452
matcher()453 static Matcher matcher() {
454 return stmt(unaryOperator(
455 hasOperatorName("++"),
456 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
457 ).bind(OpTag));
458 }
459
getBaseStmt() const460 const UnaryOperator *getBaseStmt() const override { return Op; }
461
getClaimedVarUseSites() const462 DeclUseList getClaimedVarUseSites() const override {
463 SmallVector<const DeclRefExpr *, 2> Uses;
464 if (const auto *DRE =
465 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
466 Uses.push_back(DRE);
467 }
468
469 return std::move(Uses);
470 }
471 };
472
473 /// A decrement of a pointer-type value is unsafe as it may run the pointer
474 /// out of bounds.
475 class DecrementGadget : public WarningGadget {
476 static constexpr const char *const OpTag = "op";
477 const UnaryOperator *Op;
478
479 public:
DecrementGadget(const MatchFinder::MatchResult & Result)480 DecrementGadget(const MatchFinder::MatchResult &Result)
481 : WarningGadget(Kind::Decrement),
482 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
483
classof(const Gadget * G)484 static bool classof(const Gadget *G) {
485 return G->getKind() == Kind::Decrement;
486 }
487
matcher()488 static Matcher matcher() {
489 return stmt(unaryOperator(
490 hasOperatorName("--"),
491 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
492 ).bind(OpTag));
493 }
494
getBaseStmt() const495 const UnaryOperator *getBaseStmt() const override { return Op; }
496
getClaimedVarUseSites() const497 DeclUseList getClaimedVarUseSites() const override {
498 if (const auto *DRE =
499 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
500 return {DRE};
501 }
502
503 return {};
504 }
505 };
506
507 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
508 /// it doesn't have any bounds checks for the array.
509 class ArraySubscriptGadget : public WarningGadget {
510 static constexpr const char *const ArraySubscrTag = "ArraySubscript";
511 const ArraySubscriptExpr *ASE;
512
513 public:
ArraySubscriptGadget(const MatchFinder::MatchResult & Result)514 ArraySubscriptGadget(const MatchFinder::MatchResult &Result)
515 : WarningGadget(Kind::ArraySubscript),
516 ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {}
517
classof(const Gadget * G)518 static bool classof(const Gadget *G) {
519 return G->getKind() == Kind::ArraySubscript;
520 }
521
matcher()522 static Matcher matcher() {
523 // FIXME: What if the index is integer literal 0? Should this be
524 // a safe gadget in this case?
525 // clang-format off
526 return stmt(arraySubscriptExpr(
527 hasBase(ignoringParenImpCasts(
528 anyOf(hasPointerType(), hasArrayType()))),
529 unless(hasIndex(
530 anyOf(integerLiteral(equals(0)), arrayInitIndexExpr())
531 )))
532 .bind(ArraySubscrTag));
533 // clang-format on
534 }
535
getBaseStmt() const536 const ArraySubscriptExpr *getBaseStmt() const override { return ASE; }
537
getClaimedVarUseSites() const538 DeclUseList getClaimedVarUseSites() const override {
539 if (const auto *DRE =
540 dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) {
541 return {DRE};
542 }
543
544 return {};
545 }
546 };
547
548 /// A pointer arithmetic expression of one of the forms:
549 /// \code
550 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
551 /// \endcode
552 class PointerArithmeticGadget : public WarningGadget {
553 static constexpr const char *const PointerArithmeticTag = "ptrAdd";
554 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr";
555 const BinaryOperator *PA; // pointer arithmetic expression
556 const Expr *Ptr; // the pointer expression in `PA`
557
558 public:
PointerArithmeticGadget(const MatchFinder::MatchResult & Result)559 PointerArithmeticGadget(const MatchFinder::MatchResult &Result)
560 : WarningGadget(Kind::PointerArithmetic),
561 PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)),
562 Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {}
563
classof(const Gadget * G)564 static bool classof(const Gadget *G) {
565 return G->getKind() == Kind::PointerArithmetic;
566 }
567
matcher()568 static Matcher matcher() {
569 auto HasIntegerType = anyOf(hasType(isInteger()), hasType(enumType()));
570 auto PtrAtRight =
571 allOf(hasOperatorName("+"),
572 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)),
573 hasLHS(HasIntegerType));
574 auto PtrAtLeft =
575 allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"),
576 hasOperatorName("+="), hasOperatorName("-=")),
577 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)),
578 hasRHS(HasIntegerType));
579
580 return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight))
581 .bind(PointerArithmeticTag));
582 }
583
getBaseStmt() const584 const Stmt *getBaseStmt() const override { return PA; }
585
getClaimedVarUseSites() const586 DeclUseList getClaimedVarUseSites() const override {
587 if (const auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) {
588 return {DRE};
589 }
590
591 return {};
592 }
593 // FIXME: pointer adding zero should be fine
594 // FIXME: this gadge will need a fix-it
595 };
596
597 /// A pointer initialization expression of the form:
598 /// \code
599 /// int *p = q;
600 /// \endcode
601 class PointerInitGadget : public FixableGadget {
602 private:
603 static constexpr const char *const PointerInitLHSTag = "ptrInitLHS";
604 static constexpr const char *const PointerInitRHSTag = "ptrInitRHS";
605 const VarDecl * PtrInitLHS; // the LHS pointer expression in `PI`
606 const DeclRefExpr * PtrInitRHS; // the RHS pointer expression in `PI`
607
608 public:
PointerInitGadget(const MatchFinder::MatchResult & Result)609 PointerInitGadget(const MatchFinder::MatchResult &Result)
610 : FixableGadget(Kind::PointerInit),
611 PtrInitLHS(Result.Nodes.getNodeAs<VarDecl>(PointerInitLHSTag)),
612 PtrInitRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerInitRHSTag)) {}
613
classof(const Gadget * G)614 static bool classof(const Gadget *G) {
615 return G->getKind() == Kind::PointerInit;
616 }
617
matcher()618 static Matcher matcher() {
619 auto PtrInitStmt = declStmt(hasSingleDecl(varDecl(
620 hasInitializer(ignoringImpCasts(declRefExpr(
621 hasPointerType(),
622 toSupportedVariable()).
623 bind(PointerInitRHSTag)))).
624 bind(PointerInitLHSTag)));
625
626 return stmt(PtrInitStmt);
627 }
628
629 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
630
getBaseStmt() const631 virtual const Stmt *getBaseStmt() const override {
632 // FIXME: This needs to be the entire DeclStmt, assuming that this method
633 // makes sense at all on a FixableGadget.
634 return PtrInitRHS;
635 }
636
getClaimedVarUseSites() const637 virtual DeclUseList getClaimedVarUseSites() const override {
638 return DeclUseList{PtrInitRHS};
639 }
640
641 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
getStrategyImplications() const642 getStrategyImplications() const override {
643 return std::make_pair(PtrInitLHS,
644 cast<VarDecl>(PtrInitRHS->getDecl()));
645 }
646 };
647
648 /// A pointer assignment expression of the form:
649 /// \code
650 /// p = q;
651 /// \endcode
652 class PointerAssignmentGadget : public FixableGadget {
653 private:
654 static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
655 static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
656 const DeclRefExpr * PtrLHS; // the LHS pointer expression in `PA`
657 const DeclRefExpr * PtrRHS; // the RHS pointer expression in `PA`
658
659 public:
PointerAssignmentGadget(const MatchFinder::MatchResult & Result)660 PointerAssignmentGadget(const MatchFinder::MatchResult &Result)
661 : FixableGadget(Kind::PointerAssignment),
662 PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)),
663 PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {}
664
classof(const Gadget * G)665 static bool classof(const Gadget *G) {
666 return G->getKind() == Kind::PointerAssignment;
667 }
668
matcher()669 static Matcher matcher() {
670 auto PtrAssignExpr = binaryOperator(allOf(hasOperatorName("="),
671 hasRHS(ignoringParenImpCasts(declRefExpr(hasPointerType(),
672 toSupportedVariable()).
673 bind(PointerAssignRHSTag))),
674 hasLHS(declRefExpr(hasPointerType(),
675 toSupportedVariable()).
676 bind(PointerAssignLHSTag))));
677
678 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr));
679 }
680
681 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
682
getBaseStmt() const683 virtual const Stmt *getBaseStmt() const override {
684 // FIXME: This should be the binary operator, assuming that this method
685 // makes sense at all on a FixableGadget.
686 return PtrLHS;
687 }
688
getClaimedVarUseSites() const689 virtual DeclUseList getClaimedVarUseSites() const override {
690 return DeclUseList{PtrLHS, PtrRHS};
691 }
692
693 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
getStrategyImplications() const694 getStrategyImplications() const override {
695 return std::make_pair(cast<VarDecl>(PtrLHS->getDecl()),
696 cast<VarDecl>(PtrRHS->getDecl()));
697 }
698 };
699
700 /// A call of a function or method that performs unchecked buffer operations
701 /// over one of its pointer parameters.
702 class UnsafeBufferUsageAttrGadget : public WarningGadget {
703 constexpr static const char *const OpTag = "call_expr";
704 const CallExpr *Op;
705
706 public:
UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult & Result)707 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result)
708 : WarningGadget(Kind::UnsafeBufferUsageAttr),
709 Op(Result.Nodes.getNodeAs<CallExpr>(OpTag)) {}
710
classof(const Gadget * G)711 static bool classof(const Gadget *G) {
712 return G->getKind() == Kind::UnsafeBufferUsageAttr;
713 }
714
matcher()715 static Matcher matcher() {
716 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))
717 .bind(OpTag));
718 }
getBaseStmt() const719 const Stmt *getBaseStmt() const override { return Op; }
720
getClaimedVarUseSites() const721 DeclUseList getClaimedVarUseSites() const override { return {}; }
722 };
723
724 // Warning gadget for unsafe invocation of span::data method.
725 // Triggers when the pointer returned by the invocation is immediately
726 // cast to a larger type.
727
728 class DataInvocationGadget : public WarningGadget {
729 constexpr static const char *const OpTag = "data_invocation_expr";
730 const ExplicitCastExpr *Op;
731
732 public:
DataInvocationGadget(const MatchFinder::MatchResult & Result)733 DataInvocationGadget(const MatchFinder::MatchResult &Result)
734 : WarningGadget(Kind::DataInvocation),
735 Op(Result.Nodes.getNodeAs<ExplicitCastExpr>(OpTag)) {}
736
classof(const Gadget * G)737 static bool classof(const Gadget *G) {
738 return G->getKind() == Kind::DataInvocation;
739 }
740
matcher()741 static Matcher matcher() {
742 Matcher callExpr = cxxMemberCallExpr(
743 callee(cxxMethodDecl(hasName("data"), ofClass(hasName("std::span")))));
744 return stmt(
745 explicitCastExpr(anyOf(has(callExpr), has(parenExpr(has(callExpr)))))
746 .bind(OpTag));
747 }
getBaseStmt() const748 const Stmt *getBaseStmt() const override { return Op; }
749
getClaimedVarUseSites() const750 DeclUseList getClaimedVarUseSites() const override { return {}; }
751 };
752
753 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
754 // Context (see `isInUnspecifiedLvalueContext`).
755 // Note here `[]` is the built-in subscript operator.
756 class ULCArraySubscriptGadget : public FixableGadget {
757 private:
758 static constexpr const char *const ULCArraySubscriptTag =
759 "ArraySubscriptUnderULC";
760 const ArraySubscriptExpr *Node;
761
762 public:
ULCArraySubscriptGadget(const MatchFinder::MatchResult & Result)763 ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result)
764 : FixableGadget(Kind::ULCArraySubscript),
765 Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) {
766 assert(Node != nullptr && "Expecting a non-null matching result");
767 }
768
classof(const Gadget * G)769 static bool classof(const Gadget *G) {
770 return G->getKind() == Kind::ULCArraySubscript;
771 }
772
matcher()773 static Matcher matcher() {
774 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
775 auto BaseIsArrayOrPtrDRE =
776 hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr,
777 toSupportedVariable())));
778 auto Target =
779 arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag);
780
781 return expr(isInUnspecifiedLvalueContext(Target));
782 }
783
784 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
785
getBaseStmt() const786 virtual const Stmt *getBaseStmt() const override { return Node; }
787
getClaimedVarUseSites() const788 virtual DeclUseList getClaimedVarUseSites() const override {
789 if (const auto *DRE =
790 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) {
791 return {DRE};
792 }
793 return {};
794 }
795 };
796
797 // Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
798 // unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits
799 // fixit of the form `UPC(DRE.data())`.
800 class UPCStandalonePointerGadget : public FixableGadget {
801 private:
802 static constexpr const char *const DeclRefExprTag = "StandalonePointer";
803 const DeclRefExpr *Node;
804
805 public:
UPCStandalonePointerGadget(const MatchFinder::MatchResult & Result)806 UPCStandalonePointerGadget(const MatchFinder::MatchResult &Result)
807 : FixableGadget(Kind::UPCStandalonePointer),
808 Node(Result.Nodes.getNodeAs<DeclRefExpr>(DeclRefExprTag)) {
809 assert(Node != nullptr && "Expecting a non-null matching result");
810 }
811
classof(const Gadget * G)812 static bool classof(const Gadget *G) {
813 return G->getKind() == Kind::UPCStandalonePointer;
814 }
815
matcher()816 static Matcher matcher() {
817 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
818 auto target = expr(
819 ignoringParenImpCasts(declRefExpr(allOf(ArrayOrPtr,
820 toSupportedVariable())).bind(DeclRefExprTag)));
821 return stmt(isInUnspecifiedPointerContext(target));
822 }
823
824 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
825
getBaseStmt() const826 virtual const Stmt *getBaseStmt() const override { return Node; }
827
getClaimedVarUseSites() const828 virtual DeclUseList getClaimedVarUseSites() const override {
829 return {Node};
830 }
831 };
832
833 class PointerDereferenceGadget : public FixableGadget {
834 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
835 static constexpr const char *const OperatorTag = "op";
836
837 const DeclRefExpr *BaseDeclRefExpr = nullptr;
838 const UnaryOperator *Op = nullptr;
839
840 public:
PointerDereferenceGadget(const MatchFinder::MatchResult & Result)841 PointerDereferenceGadget(const MatchFinder::MatchResult &Result)
842 : FixableGadget(Kind::PointerDereference),
843 BaseDeclRefExpr(
844 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)),
845 Op(Result.Nodes.getNodeAs<UnaryOperator>(OperatorTag)) {}
846
classof(const Gadget * G)847 static bool classof(const Gadget *G) {
848 return G->getKind() == Kind::PointerDereference;
849 }
850
matcher()851 static Matcher matcher() {
852 auto Target =
853 unaryOperator(
854 hasOperatorName("*"),
855 has(expr(ignoringParenImpCasts(
856 declRefExpr(toSupportedVariable()).bind(BaseDeclRefExprTag)))))
857 .bind(OperatorTag);
858
859 return expr(isInUnspecifiedLvalueContext(Target));
860 }
861
getClaimedVarUseSites() const862 DeclUseList getClaimedVarUseSites() const override {
863 return {BaseDeclRefExpr};
864 }
865
getBaseStmt() const866 virtual const Stmt *getBaseStmt() const final { return Op; }
867
868 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
869 };
870
871 // Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
872 // Context (see `isInUnspecifiedPointerContext`).
873 // Note here `[]` is the built-in subscript operator.
874 class UPCAddressofArraySubscriptGadget : public FixableGadget {
875 private:
876 static constexpr const char *const UPCAddressofArraySubscriptTag =
877 "AddressofArraySubscriptUnderUPC";
878 const UnaryOperator *Node; // the `&DRE[any]` node
879
880 public:
UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult & Result)881 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result)
882 : FixableGadget(Kind::ULCArraySubscript),
883 Node(Result.Nodes.getNodeAs<UnaryOperator>(
884 UPCAddressofArraySubscriptTag)) {
885 assert(Node != nullptr && "Expecting a non-null matching result");
886 }
887
classof(const Gadget * G)888 static bool classof(const Gadget *G) {
889 return G->getKind() == Kind::UPCAddressofArraySubscript;
890 }
891
matcher()892 static Matcher matcher() {
893 return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
894 unaryOperator(hasOperatorName("&"),
895 hasUnaryOperand(arraySubscriptExpr(
896 hasBase(ignoringParenImpCasts(declRefExpr(
897 toSupportedVariable()))))))
898 .bind(UPCAddressofArraySubscriptTag)))));
899 }
900
901 virtual std::optional<FixItList> getFixits(const Strategy &) const override;
902
getBaseStmt() const903 virtual const Stmt *getBaseStmt() const override { return Node; }
904
getClaimedVarUseSites() const905 virtual DeclUseList getClaimedVarUseSites() const override {
906 const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr());
907 const auto *DRE =
908 cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreImpCasts());
909 return {DRE};
910 }
911 };
912 } // namespace
913
914 namespace {
915 // An auxiliary tracking facility for the fixit analysis. It helps connect
916 // declarations to its uses and make sure we've covered all uses with our
917 // analysis before we try to fix the declaration.
918 class DeclUseTracker {
919 using UseSetTy = SmallSet<const DeclRefExpr *, 16>;
920 using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>;
921
922 // Allocate on the heap for easier move.
923 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()};
924 DefMapTy Defs{};
925
926 public:
927 DeclUseTracker() = default;
928 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies.
929 DeclUseTracker &operator=(const DeclUseTracker &) = delete;
930 DeclUseTracker(DeclUseTracker &&) = default;
931 DeclUseTracker &operator=(DeclUseTracker &&) = default;
932
933 // Start tracking a freshly discovered DRE.
discoverUse(const DeclRefExpr * DRE)934 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); }
935
936 // Stop tracking the DRE as it's been fully figured out.
claimUse(const DeclRefExpr * DRE)937 void claimUse(const DeclRefExpr *DRE) {
938 assert(Uses->count(DRE) &&
939 "DRE not found or claimed by multiple matchers!");
940 Uses->erase(DRE);
941 }
942
943 // A variable is unclaimed if at least one use is unclaimed.
hasUnclaimedUses(const VarDecl * VD) const944 bool hasUnclaimedUses(const VarDecl *VD) const {
945 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
946 return any_of(*Uses, [VD](const DeclRefExpr *DRE) {
947 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl();
948 });
949 }
950
getUnclaimedUses(const VarDecl * VD) const951 UseSetTy getUnclaimedUses(const VarDecl *VD) const {
952 UseSetTy ReturnSet;
953 for (auto use : *Uses) {
954 if (use->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) {
955 ReturnSet.insert(use);
956 }
957 }
958 return ReturnSet;
959 }
960
discoverDecl(const DeclStmt * DS)961 void discoverDecl(const DeclStmt *DS) {
962 for (const Decl *D : DS->decls()) {
963 if (const auto *VD = dyn_cast<VarDecl>(D)) {
964 // FIXME: Assertion temporarily disabled due to a bug in
965 // ASTMatcher internal behavior in presence of GNU
966 // statement-expressions. We need to properly investigate this
967 // because it can screw up our algorithm in other ways.
968 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
969 Defs[VD] = DS;
970 }
971 }
972 }
973
lookupDecl(const VarDecl * VD) const974 const DeclStmt *lookupDecl(const VarDecl *VD) const {
975 return Defs.lookup(VD);
976 }
977 };
978 } // namespace
979
980 namespace {
981 // Strategy is a map from variables to the way we plan to emit fixes for
982 // these variables. It is figured out gradually by trying different fixes
983 // for different variables depending on gadgets in which these variables
984 // participate.
985 class Strategy {
986 public:
987 enum class Kind {
988 Wontfix, // We don't plan to emit a fixit for this variable.
989 Span, // We recommend replacing the variable with std::span.
990 Iterator, // We recommend replacing the variable with std::span::iterator.
991 Array, // We recommend replacing the variable with std::array.
992 Vector // We recommend replacing the variable with std::vector.
993 };
994
995 private:
996 using MapTy = llvm::DenseMap<const VarDecl *, Kind>;
997
998 MapTy Map;
999
1000 public:
1001 Strategy() = default;
1002 Strategy(const Strategy &) = delete; // Let's avoid copies.
1003 Strategy &operator=(const Strategy &) = delete;
1004 Strategy(Strategy &&) = default;
1005 Strategy &operator=(Strategy &&) = default;
1006
set(const VarDecl * VD,Kind K)1007 void set(const VarDecl *VD, Kind K) { Map[VD] = K; }
1008
lookup(const VarDecl * VD) const1009 Kind lookup(const VarDecl *VD) const {
1010 auto I = Map.find(VD);
1011 if (I == Map.end())
1012 return Kind::Wontfix;
1013
1014 return I->second;
1015 }
1016 };
1017 } // namespace
1018
1019
1020 // Representing a pointer type expression of the form `++Ptr` in an Unspecified
1021 // Pointer Context (UPC):
1022 class UPCPreIncrementGadget : public FixableGadget {
1023 private:
1024 static constexpr const char *const UPCPreIncrementTag =
1025 "PointerPreIncrementUnderUPC";
1026 const UnaryOperator *Node; // the `++Ptr` node
1027
1028 public:
UPCPreIncrementGadget(const MatchFinder::MatchResult & Result)1029 UPCPreIncrementGadget(const MatchFinder::MatchResult &Result)
1030 : FixableGadget(Kind::UPCPreIncrement),
1031 Node(Result.Nodes.getNodeAs<UnaryOperator>(UPCPreIncrementTag)) {
1032 assert(Node != nullptr && "Expecting a non-null matching result");
1033 }
1034
classof(const Gadget * G)1035 static bool classof(const Gadget *G) {
1036 return G->getKind() == Kind::UPCPreIncrement;
1037 }
1038
matcher()1039 static Matcher matcher() {
1040 // Note here we match `++Ptr` for any expression `Ptr` of pointer type.
1041 // Although currently we can only provide fix-its when `Ptr` is a DRE, we
1042 // can have the matcher be general, so long as `getClaimedVarUseSites` does
1043 // things right.
1044 return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
1045 unaryOperator(isPreInc(),
1046 hasUnaryOperand(declRefExpr(
1047 toSupportedVariable()))
1048 ).bind(UPCPreIncrementTag)))));
1049 }
1050
1051 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
1052
getBaseStmt() const1053 virtual const Stmt *getBaseStmt() const override { return Node; }
1054
getClaimedVarUseSites() const1055 virtual DeclUseList getClaimedVarUseSites() const override {
1056 return {dyn_cast<DeclRefExpr>(Node->getSubExpr())};
1057 }
1058 };
1059
1060 // Representing a pointer type expression of the form `Ptr += n` in an
1061 // Unspecified Untyped Context (UUC):
1062 class UUCAddAssignGadget : public FixableGadget {
1063 private:
1064 static constexpr const char *const UUCAddAssignTag =
1065 "PointerAddAssignUnderUUC";
1066 static constexpr const char *const OffsetTag = "Offset";
1067
1068 const BinaryOperator *Node; // the `Ptr += n` node
1069 const Expr *Offset = nullptr;
1070
1071 public:
UUCAddAssignGadget(const MatchFinder::MatchResult & Result)1072 UUCAddAssignGadget(const MatchFinder::MatchResult &Result)
1073 : FixableGadget(Kind::UUCAddAssign),
1074 Node(Result.Nodes.getNodeAs<BinaryOperator>(UUCAddAssignTag)),
1075 Offset(Result.Nodes.getNodeAs<Expr>(OffsetTag)) {
1076 assert(Node != nullptr && "Expecting a non-null matching result");
1077 }
1078
classof(const Gadget * G)1079 static bool classof(const Gadget *G) {
1080 return G->getKind() == Kind::UUCAddAssign;
1081 }
1082
matcher()1083 static Matcher matcher() {
1084 return stmt(isInUnspecifiedUntypedContext(expr(ignoringImpCasts(
1085 binaryOperator(hasOperatorName("+="),
1086 hasLHS(declRefExpr(toSupportedVariable())),
1087 hasRHS(expr().bind(OffsetTag)))
1088 .bind(UUCAddAssignTag)))));
1089 }
1090
1091 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
1092
getBaseStmt() const1093 virtual const Stmt *getBaseStmt() const override { return Node; }
1094
getClaimedVarUseSites() const1095 virtual DeclUseList getClaimedVarUseSites() const override {
1096 return {dyn_cast<DeclRefExpr>(Node->getLHS())};
1097 }
1098 };
1099
1100 // Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
1101 // ptr)`:
1102 class DerefSimplePtrArithFixableGadget : public FixableGadget {
1103 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
1104 static constexpr const char *const DerefOpTag = "DerefOp";
1105 static constexpr const char *const AddOpTag = "AddOp";
1106 static constexpr const char *const OffsetTag = "Offset";
1107
1108 const DeclRefExpr *BaseDeclRefExpr = nullptr;
1109 const UnaryOperator *DerefOp = nullptr;
1110 const BinaryOperator *AddOp = nullptr;
1111 const IntegerLiteral *Offset = nullptr;
1112
1113 public:
DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult & Result)1114 DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result)
1115 : FixableGadget(Kind::DerefSimplePtrArithFixable),
1116 BaseDeclRefExpr(
1117 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)),
1118 DerefOp(Result.Nodes.getNodeAs<UnaryOperator>(DerefOpTag)),
1119 AddOp(Result.Nodes.getNodeAs<BinaryOperator>(AddOpTag)),
1120 Offset(Result.Nodes.getNodeAs<IntegerLiteral>(OffsetTag)) {}
1121
matcher()1122 static Matcher matcher() {
1123 // clang-format off
1124 auto ThePtr = expr(hasPointerType(),
1125 ignoringImpCasts(declRefExpr(toSupportedVariable()).
1126 bind(BaseDeclRefExprTag)));
1127 auto PlusOverPtrAndInteger = expr(anyOf(
1128 binaryOperator(hasOperatorName("+"), hasLHS(ThePtr),
1129 hasRHS(integerLiteral().bind(OffsetTag)))
1130 .bind(AddOpTag),
1131 binaryOperator(hasOperatorName("+"), hasRHS(ThePtr),
1132 hasLHS(integerLiteral().bind(OffsetTag)))
1133 .bind(AddOpTag)));
1134 return isInUnspecifiedLvalueContext(unaryOperator(
1135 hasOperatorName("*"),
1136 hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger)))
1137 .bind(DerefOpTag));
1138 // clang-format on
1139 }
1140
1141 virtual std::optional<FixItList> getFixits(const Strategy &s) const final;
1142
1143 // TODO remove this method from FixableGadget interface
getBaseStmt() const1144 virtual const Stmt *getBaseStmt() const final { return nullptr; }
1145
getClaimedVarUseSites() const1146 virtual DeclUseList getClaimedVarUseSites() const final {
1147 return {BaseDeclRefExpr};
1148 }
1149 };
1150
1151 /// Scan the function and return a list of gadgets found with provided kits.
1152 static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker>
findGadgets(const Decl * D,const UnsafeBufferUsageHandler & Handler,bool EmitSuggestions)1153 findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler,
1154 bool EmitSuggestions) {
1155
1156 struct GadgetFinderCallback : MatchFinder::MatchCallback {
1157 FixableGadgetList FixableGadgets;
1158 WarningGadgetList WarningGadgets;
1159 DeclUseTracker Tracker;
1160
1161 void run(const MatchFinder::MatchResult &Result) override {
1162 // In debug mode, assert that we've found exactly one gadget.
1163 // This helps us avoid conflicts in .bind() tags.
1164 #if NDEBUG
1165 #define NEXT return
1166 #else
1167 [[maybe_unused]] int numFound = 0;
1168 #define NEXT ++numFound
1169 #endif
1170
1171 if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) {
1172 Tracker.discoverUse(DRE);
1173 NEXT;
1174 }
1175
1176 if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) {
1177 Tracker.discoverDecl(DS);
1178 NEXT;
1179 }
1180
1181 // Figure out which matcher we've found, and call the appropriate
1182 // subclass constructor.
1183 // FIXME: Can we do this more logarithmically?
1184 #define FIXABLE_GADGET(name) \
1185 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
1186 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
1187 NEXT; \
1188 }
1189 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1190 #define WARNING_GADGET(name) \
1191 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
1192 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
1193 NEXT; \
1194 }
1195 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1196
1197 assert(numFound >= 1 && "Gadgets not found in match result!");
1198 assert(numFound <= 1 && "Conflicting bind tags in gadgets!");
1199 }
1200 };
1201
1202 MatchFinder M;
1203 GadgetFinderCallback CB;
1204
1205 // clang-format off
1206 M.addMatcher(
1207 stmt(
1208 forEachDescendantEvaluatedStmt(stmt(anyOf(
1209 // Add Gadget::matcher() for every gadget in the registry.
1210 #define WARNING_GADGET(x) \
1211 allOf(x ## Gadget::matcher().bind(#x), \
1212 notInSafeBufferOptOut(&Handler)),
1213 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1214 // Avoid a hanging comma.
1215 unless(stmt())
1216 )))
1217 ),
1218 &CB
1219 );
1220 // clang-format on
1221
1222 if (EmitSuggestions) {
1223 // clang-format off
1224 M.addMatcher(
1225 stmt(
1226 forEachDescendantStmt(stmt(eachOf(
1227 #define FIXABLE_GADGET(x) \
1228 x ## Gadget::matcher().bind(#x),
1229 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1230 // In parallel, match all DeclRefExprs so that to find out
1231 // whether there are any uncovered by gadgets.
1232 declRefExpr(anyOf(hasPointerType(), hasArrayType()),
1233 to(anyOf(varDecl(), bindingDecl()))).bind("any_dre"),
1234 // Also match DeclStmts because we'll need them when fixing
1235 // their underlying VarDecls that otherwise don't have
1236 // any backreferences to DeclStmts.
1237 declStmt().bind("any_ds")
1238 )))
1239 ),
1240 &CB
1241 );
1242 // clang-format on
1243 }
1244
1245 M.match(*D->getBody(), D->getASTContext());
1246 return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets),
1247 std::move(CB.Tracker)};
1248 }
1249
1250 // Compares AST nodes by source locations.
1251 template <typename NodeTy> struct CompareNode {
operator ()CompareNode1252 bool operator()(const NodeTy *N1, const NodeTy *N2) const {
1253 return N1->getBeginLoc().getRawEncoding() <
1254 N2->getBeginLoc().getRawEncoding();
1255 }
1256 };
1257
1258 struct WarningGadgetSets {
1259 std::map<const VarDecl *, std::set<const WarningGadget *>,
1260 // To keep keys sorted by their locations in the map so that the
1261 // order is deterministic:
1262 CompareNode<VarDecl>>
1263 byVar;
1264 // These Gadgets are not related to pointer variables (e. g. temporaries).
1265 llvm::SmallVector<const WarningGadget *, 16> noVar;
1266 };
1267
1268 static WarningGadgetSets
groupWarningGadgetsByVar(const WarningGadgetList & AllUnsafeOperations)1269 groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) {
1270 WarningGadgetSets result;
1271 // If some gadgets cover more than one
1272 // variable, they'll appear more than once in the map.
1273 for (auto &G : AllUnsafeOperations) {
1274 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites();
1275
1276 bool AssociatedWithVarDecl = false;
1277 for (const DeclRefExpr *DRE : ClaimedVarUseSites) {
1278 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1279 result.byVar[VD].insert(G.get());
1280 AssociatedWithVarDecl = true;
1281 }
1282 }
1283
1284 if (!AssociatedWithVarDecl) {
1285 result.noVar.push_back(G.get());
1286 continue;
1287 }
1288 }
1289 return result;
1290 }
1291
1292 struct FixableGadgetSets {
1293 std::map<const VarDecl *, std::set<const FixableGadget *>,
1294 // To keep keys sorted by their locations in the map so that the
1295 // order is deterministic:
1296 CompareNode<VarDecl>>
1297 byVar;
1298 };
1299
1300 static FixableGadgetSets
groupFixablesByVar(FixableGadgetList && AllFixableOperations)1301 groupFixablesByVar(FixableGadgetList &&AllFixableOperations) {
1302 FixableGadgetSets FixablesForUnsafeVars;
1303 for (auto &F : AllFixableOperations) {
1304 DeclUseList DREs = F->getClaimedVarUseSites();
1305
1306 for (const DeclRefExpr *DRE : DREs) {
1307 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1308 FixablesForUnsafeVars.byVar[VD].insert(F.get());
1309 }
1310 }
1311 }
1312 return FixablesForUnsafeVars;
1313 }
1314
anyConflict(const SmallVectorImpl<FixItHint> & FixIts,const SourceManager & SM)1315 bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts,
1316 const SourceManager &SM) {
1317 // A simple interval overlap detection algorithm. Sorts all ranges by their
1318 // begin location then finds the first overlap in one pass.
1319 std::vector<const FixItHint *> All; // a copy of `FixIts`
1320
1321 for (const FixItHint &H : FixIts)
1322 All.push_back(&H);
1323 std::sort(All.begin(), All.end(),
1324 [&SM](const FixItHint *H1, const FixItHint *H2) {
1325 return SM.isBeforeInTranslationUnit(H1->RemoveRange.getBegin(),
1326 H2->RemoveRange.getBegin());
1327 });
1328
1329 const FixItHint *CurrHint = nullptr;
1330
1331 for (const FixItHint *Hint : All) {
1332 if (!CurrHint ||
1333 SM.isBeforeInTranslationUnit(CurrHint->RemoveRange.getEnd(),
1334 Hint->RemoveRange.getBegin())) {
1335 // Either to initialize `CurrHint` or `CurrHint` does not
1336 // overlap with `Hint`:
1337 CurrHint = Hint;
1338 } else
1339 // In case `Hint` overlaps the `CurrHint`, we found at least one
1340 // conflict:
1341 return true;
1342 }
1343 return false;
1344 }
1345
1346 std::optional<FixItList>
getFixits(const Strategy & S) const1347 PointerAssignmentGadget::getFixits(const Strategy &S) const {
1348 const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl());
1349 const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl());
1350 switch (S.lookup(LeftVD)) {
1351 case Strategy::Kind::Span:
1352 if (S.lookup(RightVD) == Strategy::Kind::Span)
1353 return FixItList{};
1354 return std::nullopt;
1355 case Strategy::Kind::Wontfix:
1356 return std::nullopt;
1357 case Strategy::Kind::Iterator:
1358 case Strategy::Kind::Array:
1359 case Strategy::Kind::Vector:
1360 llvm_unreachable("unsupported strategies for FixableGadgets");
1361 }
1362 return std::nullopt;
1363 }
1364
1365 std::optional<FixItList>
getFixits(const Strategy & S) const1366 PointerInitGadget::getFixits(const Strategy &S) const {
1367 const auto *LeftVD = PtrInitLHS;
1368 const auto *RightVD = cast<VarDecl>(PtrInitRHS->getDecl());
1369 switch (S.lookup(LeftVD)) {
1370 case Strategy::Kind::Span:
1371 if (S.lookup(RightVD) == Strategy::Kind::Span)
1372 return FixItList{};
1373 return std::nullopt;
1374 case Strategy::Kind::Wontfix:
1375 return std::nullopt;
1376 case Strategy::Kind::Iterator:
1377 case Strategy::Kind::Array:
1378 case Strategy::Kind::Vector:
1379 llvm_unreachable("unsupported strategies for FixableGadgets");
1380 }
1381 return std::nullopt;
1382 }
1383
isNonNegativeIntegerExpr(const Expr * Expr,const VarDecl * VD,const ASTContext & Ctx)1384 static bool isNonNegativeIntegerExpr(const Expr *Expr, const VarDecl *VD,
1385 const ASTContext &Ctx) {
1386 if (auto ConstVal = Expr->getIntegerConstantExpr(Ctx)) {
1387 if (ConstVal->isNegative())
1388 return false;
1389 } else if (!Expr->getType()->isUnsignedIntegerType())
1390 return false;
1391 return true;
1392 }
1393
1394 std::optional<FixItList>
getFixits(const Strategy & S) const1395 ULCArraySubscriptGadget::getFixits(const Strategy &S) const {
1396 if (const auto *DRE =
1397 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts()))
1398 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1399 switch (S.lookup(VD)) {
1400 case Strategy::Kind::Span: {
1401
1402 // If the index has a negative constant value, we give up as no valid
1403 // fix-it can be generated:
1404 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in!
1405 VD->getASTContext();
1406 if (!isNonNegativeIntegerExpr(Node->getIdx(), VD, Ctx))
1407 return std::nullopt;
1408 // no-op is a good fix-it, otherwise
1409 return FixItList{};
1410 }
1411 case Strategy::Kind::Wontfix:
1412 case Strategy::Kind::Iterator:
1413 case Strategy::Kind::Array:
1414 case Strategy::Kind::Vector:
1415 llvm_unreachable("unsupported strategies for FixableGadgets");
1416 }
1417 }
1418 return std::nullopt;
1419 }
1420
1421 static std::optional<FixItList> // forward declaration
1422 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node);
1423
1424 std::optional<FixItList>
getFixits(const Strategy & S) const1425 UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const {
1426 auto DREs = getClaimedVarUseSites();
1427 const auto *VD = cast<VarDecl>(DREs.front()->getDecl());
1428
1429 switch (S.lookup(VD)) {
1430 case Strategy::Kind::Span:
1431 return fixUPCAddressofArraySubscriptWithSpan(Node);
1432 case Strategy::Kind::Wontfix:
1433 case Strategy::Kind::Iterator:
1434 case Strategy::Kind::Array:
1435 case Strategy::Kind::Vector:
1436 llvm_unreachable("unsupported strategies for FixableGadgets");
1437 }
1438 return std::nullopt; // something went wrong, no fix-it
1439 }
1440
1441 // FIXME: this function should be customizable through format
getEndOfLine()1442 static StringRef getEndOfLine() {
1443 static const char *const EOL = "\n";
1444 return EOL;
1445 }
1446
1447 // Returns the text indicating that the user needs to provide input there:
getUserFillPlaceHolder(StringRef HintTextToUser="placeholder")1448 std::string getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") {
1449 std::string s = std::string("<# ");
1450 s += HintTextToUser;
1451 s += " #>";
1452 return s;
1453 }
1454
1455 // Return the text representation of the given `APInt Val`:
getAPIntText(APInt Val)1456 static std::string getAPIntText(APInt Val) {
1457 SmallVector<char> Txt;
1458 Val.toString(Txt, 10, true);
1459 // APInt::toString does not add '\0' to the end of the string for us:
1460 Txt.push_back('\0');
1461 return Txt.data();
1462 }
1463
1464 // Return the source location of the last character of the AST `Node`.
1465 template <typename NodeTy>
1466 static std::optional<SourceLocation>
getEndCharLoc(const NodeTy * Node,const SourceManager & SM,const LangOptions & LangOpts)1467 getEndCharLoc(const NodeTy *Node, const SourceManager &SM,
1468 const LangOptions &LangOpts) {
1469 unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts);
1470 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
1471
1472 if (Loc.isValid())
1473 return Loc;
1474
1475 return std::nullopt;
1476 }
1477
1478 // Return the source location just past the last character of the AST `Node`.
1479 template <typename NodeTy>
getPastLoc(const NodeTy * Node,const SourceManager & SM,const LangOptions & LangOpts)1480 static std::optional<SourceLocation> getPastLoc(const NodeTy *Node,
1481 const SourceManager &SM,
1482 const LangOptions &LangOpts) {
1483 SourceLocation Loc =
1484 Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts);
1485 if (Loc.isValid())
1486 return Loc;
1487 return std::nullopt;
1488 }
1489
1490 // Return text representation of an `Expr`.
getExprText(const Expr * E,const SourceManager & SM,const LangOptions & LangOpts)1491 static std::optional<StringRef> getExprText(const Expr *E,
1492 const SourceManager &SM,
1493 const LangOptions &LangOpts) {
1494 std::optional<SourceLocation> LastCharLoc = getPastLoc(E, SM, LangOpts);
1495
1496 if (LastCharLoc)
1497 return Lexer::getSourceText(
1498 CharSourceRange::getCharRange(E->getBeginLoc(), *LastCharLoc), SM,
1499 LangOpts);
1500
1501 return std::nullopt;
1502 }
1503
1504 // Returns the literal text in `SourceRange SR`, if `SR` is a valid range.
getRangeText(SourceRange SR,const SourceManager & SM,const LangOptions & LangOpts)1505 static std::optional<StringRef> getRangeText(SourceRange SR,
1506 const SourceManager &SM,
1507 const LangOptions &LangOpts) {
1508 bool Invalid = false;
1509 CharSourceRange CSR = CharSourceRange::getCharRange(SR);
1510 StringRef Text = Lexer::getSourceText(CSR, SM, LangOpts, &Invalid);
1511
1512 if (!Invalid)
1513 return Text;
1514 return std::nullopt;
1515 }
1516
1517 // Returns the begin location of the identifier of the given variable
1518 // declaration.
getVarDeclIdentifierLoc(const VarDecl * VD)1519 static SourceLocation getVarDeclIdentifierLoc(const VarDecl *VD) {
1520 // According to the implementation of `VarDecl`, `VD->getLocation()` actually
1521 // returns the begin location of the identifier of the declaration:
1522 return VD->getLocation();
1523 }
1524
1525 // Returns the literal text of the identifier of the given variable declaration.
1526 static std::optional<StringRef>
getVarDeclIdentifierText(const VarDecl * VD,const SourceManager & SM,const LangOptions & LangOpts)1527 getVarDeclIdentifierText(const VarDecl *VD, const SourceManager &SM,
1528 const LangOptions &LangOpts) {
1529 SourceLocation ParmIdentBeginLoc = getVarDeclIdentifierLoc(VD);
1530 SourceLocation ParmIdentEndLoc =
1531 Lexer::getLocForEndOfToken(ParmIdentBeginLoc, 0, SM, LangOpts);
1532
1533 if (ParmIdentEndLoc.isMacroID() &&
1534 !Lexer::isAtEndOfMacroExpansion(ParmIdentEndLoc, SM, LangOpts))
1535 return std::nullopt;
1536 return getRangeText({ParmIdentBeginLoc, ParmIdentEndLoc}, SM, LangOpts);
1537 }
1538
1539 // We cannot fix a variable declaration if it has some other specifiers than the
1540 // type specifier. Because the source ranges of those specifiers could overlap
1541 // with the source range that is being replaced using fix-its. Especially when
1542 // we often cannot obtain accurate source ranges of cv-qualified type
1543 // specifiers.
1544 // FIXME: also deal with type attributes
hasUnsupportedSpecifiers(const VarDecl * VD,const SourceManager & SM)1545 static bool hasUnsupportedSpecifiers(const VarDecl *VD,
1546 const SourceManager &SM) {
1547 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the
1548 // source range of `VD`:
1549 bool AttrRangeOverlapping = llvm::any_of(VD->attrs(), [&](Attr *At) -> bool {
1550 return !(SM.isBeforeInTranslationUnit(At->getRange().getEnd(),
1551 VD->getBeginLoc())) &&
1552 !(SM.isBeforeInTranslationUnit(VD->getEndLoc(),
1553 At->getRange().getBegin()));
1554 });
1555 return VD->isInlineSpecified() || VD->isConstexpr() ||
1556 VD->hasConstantInitialization() || !VD->hasLocalStorage() ||
1557 AttrRangeOverlapping;
1558 }
1559
1560 // Returns the `SourceRange` of `D`. The reason why this function exists is
1561 // that `D->getSourceRange()` may return a range where the end location is the
1562 // starting location of the last token. The end location of the source range
1563 // returned by this function is the last location of the last token.
getSourceRangeToTokenEnd(const Decl * D,const SourceManager & SM,const LangOptions & LangOpts)1564 static SourceRange getSourceRangeToTokenEnd(const Decl *D,
1565 const SourceManager &SM,
1566 const LangOptions &LangOpts) {
1567 SourceLocation Begin = D->getBeginLoc();
1568 SourceLocation
1569 End = // `D->getEndLoc` should always return the starting location of the
1570 // last token, so we should get the end of the token
1571 Lexer::getLocForEndOfToken(D->getEndLoc(), 0, SM, LangOpts);
1572
1573 return SourceRange(Begin, End);
1574 }
1575
1576 // Returns the text of the pointee type of `T` from a `VarDecl` of a pointer
1577 // type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not
1578 // have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me
1579 // :( ), `Qualifiers` of the pointee type is returned separately through the
1580 // output parameter `QualifiersToAppend`.
1581 static std::optional<std::string>
getPointeeTypeText(const VarDecl * VD,const SourceManager & SM,const LangOptions & LangOpts,std::optional<Qualifiers> * QualifiersToAppend)1582 getPointeeTypeText(const VarDecl *VD, const SourceManager &SM,
1583 const LangOptions &LangOpts,
1584 std::optional<Qualifiers> *QualifiersToAppend) {
1585 QualType Ty = VD->getType();
1586 QualType PteTy;
1587
1588 assert(Ty->isPointerType() && !Ty->isFunctionPointerType() &&
1589 "Expecting a VarDecl of type of pointer to object type");
1590 PteTy = Ty->getPointeeType();
1591
1592 TypeLoc TyLoc = VD->getTypeSourceInfo()->getTypeLoc().getUnqualifiedLoc();
1593 TypeLoc PteTyLoc;
1594
1595 // We only deal with the cases that we know `TypeLoc::getNextTypeLoc` returns
1596 // the `TypeLoc` of the pointee type:
1597 switch (TyLoc.getTypeLocClass()) {
1598 case TypeLoc::ConstantArray:
1599 case TypeLoc::IncompleteArray:
1600 case TypeLoc::VariableArray:
1601 case TypeLoc::DependentSizedArray:
1602 case TypeLoc::Decayed:
1603 assert(isa<ParmVarDecl>(VD) && "An array type shall not be treated as a "
1604 "pointer type unless it decays.");
1605 PteTyLoc = TyLoc.getNextTypeLoc();
1606 break;
1607 case TypeLoc::Pointer:
1608 PteTyLoc = TyLoc.castAs<PointerTypeLoc>().getPointeeLoc();
1609 break;
1610 default:
1611 return std::nullopt;
1612 }
1613 if (PteTyLoc.isNull())
1614 // Sometimes we cannot get a useful `TypeLoc` for the pointee type, e.g.,
1615 // when the pointer type is `auto`.
1616 return std::nullopt;
1617
1618 SourceLocation IdentLoc = getVarDeclIdentifierLoc(VD);
1619
1620 if (!(IdentLoc.isValid() && PteTyLoc.getSourceRange().isValid())) {
1621 // We are expecting these locations to be valid. But in some cases, they are
1622 // not all valid. It is a Clang bug to me and we are not responsible for
1623 // fixing it. So we will just give up for now when it happens.
1624 return std::nullopt;
1625 }
1626
1627 // Note that TypeLoc.getEndLoc() returns the begin location of the last token:
1628 SourceLocation PteEndOfTokenLoc =
1629 Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts);
1630
1631 if (!PteEndOfTokenLoc.isValid())
1632 // Sometimes we cannot get the end location of the pointee type, e.g., when
1633 // there are macros involved.
1634 return std::nullopt;
1635 if (!SM.isBeforeInTranslationUnit(PteEndOfTokenLoc, IdentLoc)) {
1636 // We only deal with the cases where the source text of the pointee type
1637 // appears on the left-hand side of the variable identifier completely,
1638 // including the following forms:
1639 // `T ident`,
1640 // `T ident[]`, where `T` is any type.
1641 // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`.
1642 return std::nullopt;
1643 }
1644 if (PteTy.hasQualifiers()) {
1645 // TypeLoc does not provide source ranges for qualifiers (it says it's
1646 // intentional but seems fishy to me), so we cannot get the full text
1647 // `PteTy` via source ranges.
1648 *QualifiersToAppend = PteTy.getQualifiers();
1649 }
1650 return getRangeText({PteTyLoc.getBeginLoc(), PteEndOfTokenLoc}, SM, LangOpts)
1651 ->str();
1652 }
1653
1654 // Returns the text of the name (with qualifiers) of a `FunctionDecl`.
getFunNameText(const FunctionDecl * FD,const SourceManager & SM,const LangOptions & LangOpts)1655 static std::optional<StringRef> getFunNameText(const FunctionDecl *FD,
1656 const SourceManager &SM,
1657 const LangOptions &LangOpts) {
1658 SourceLocation BeginLoc = FD->getQualifier()
1659 ? FD->getQualifierLoc().getBeginLoc()
1660 : FD->getNameInfo().getBeginLoc();
1661 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the
1662 // last token:
1663 SourceLocation EndLoc = Lexer::getLocForEndOfToken(
1664 FD->getNameInfo().getEndLoc(), 0, SM, LangOpts);
1665 SourceRange NameRange{BeginLoc, EndLoc};
1666
1667 return getRangeText(NameRange, SM, LangOpts);
1668 }
1669
1670 // Returns the text representing a `std::span` type where the element type is
1671 // represented by `EltTyText`.
1672 //
1673 // Note the optional parameter `Qualifiers`: one needs to pass qualifiers
1674 // explicitly if the element type needs to be qualified.
1675 static std::string
getSpanTypeText(StringRef EltTyText,std::optional<Qualifiers> Quals=std::nullopt)1676 getSpanTypeText(StringRef EltTyText,
1677 std::optional<Qualifiers> Quals = std::nullopt) {
1678 const char *const SpanOpen = "std::span<";
1679
1680 if (Quals)
1681 return SpanOpen + EltTyText.str() + ' ' + Quals->getAsString() + '>';
1682 return SpanOpen + EltTyText.str() + '>';
1683 }
1684
1685 std::optional<FixItList>
getFixits(const Strategy & s) const1686 DerefSimplePtrArithFixableGadget::getFixits(const Strategy &s) const {
1687 const VarDecl *VD = dyn_cast<VarDecl>(BaseDeclRefExpr->getDecl());
1688
1689 if (VD && s.lookup(VD) == Strategy::Kind::Span) {
1690 ASTContext &Ctx = VD->getASTContext();
1691 // std::span can't represent elements before its begin()
1692 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx))
1693 if (ConstVal->isNegative())
1694 return std::nullopt;
1695
1696 // note that the expr may (oddly) has multiple layers of parens
1697 // example:
1698 // *((..(pointer + 123)..))
1699 // goal:
1700 // pointer[123]
1701 // Fix-It:
1702 // remove '*('
1703 // replace ' + ' with '['
1704 // replace ')' with ']'
1705
1706 // example:
1707 // *((..(123 + pointer)..))
1708 // goal:
1709 // 123[pointer]
1710 // Fix-It:
1711 // remove '*('
1712 // replace ' + ' with '['
1713 // replace ')' with ']'
1714
1715 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS();
1716 const SourceManager &SM = Ctx.getSourceManager();
1717 const LangOptions &LangOpts = Ctx.getLangOpts();
1718 CharSourceRange StarWithTrailWhitespace =
1719 clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(),
1720 LHS->getBeginLoc());
1721
1722 std::optional<SourceLocation> LHSLocation = getPastLoc(LHS, SM, LangOpts);
1723 if (!LHSLocation)
1724 return std::nullopt;
1725
1726 CharSourceRange PlusWithSurroundingWhitespace =
1727 clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc());
1728
1729 std::optional<SourceLocation> AddOpLocation =
1730 getPastLoc(AddOp, SM, LangOpts);
1731 std::optional<SourceLocation> DerefOpLocation =
1732 getPastLoc(DerefOp, SM, LangOpts);
1733
1734 if (!AddOpLocation || !DerefOpLocation)
1735 return std::nullopt;
1736
1737 CharSourceRange ClosingParenWithPrecWhitespace =
1738 clang::CharSourceRange::getCharRange(*AddOpLocation, *DerefOpLocation);
1739
1740 return FixItList{
1741 {FixItHint::CreateRemoval(StarWithTrailWhitespace),
1742 FixItHint::CreateReplacement(PlusWithSurroundingWhitespace, "["),
1743 FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace, "]")}};
1744 }
1745 return std::nullopt; // something wrong or unsupported, give up
1746 }
1747
1748 std::optional<FixItList>
getFixits(const Strategy & S) const1749 PointerDereferenceGadget::getFixits(const Strategy &S) const {
1750 const VarDecl *VD = cast<VarDecl>(BaseDeclRefExpr->getDecl());
1751 switch (S.lookup(VD)) {
1752 case Strategy::Kind::Span: {
1753 ASTContext &Ctx = VD->getASTContext();
1754 SourceManager &SM = Ctx.getSourceManager();
1755 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0]
1756 // Deletes the *operand
1757 CharSourceRange derefRange = clang::CharSourceRange::getCharRange(
1758 Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1));
1759 // Inserts the [0]
1760 if (auto LocPastOperand =
1761 getPastLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts())) {
1762 return FixItList{{FixItHint::CreateRemoval(derefRange),
1763 FixItHint::CreateInsertion(*LocPastOperand, "[0]")}};
1764 }
1765 break;
1766 }
1767 case Strategy::Kind::Iterator:
1768 case Strategy::Kind::Array:
1769 case Strategy::Kind::Vector:
1770 llvm_unreachable("Strategy not implemented yet!");
1771 case Strategy::Kind::Wontfix:
1772 llvm_unreachable("Invalid strategy!");
1773 }
1774
1775 return std::nullopt;
1776 }
1777
1778 // Generates fix-its replacing an expression of the form UPC(DRE) with
1779 // `DRE.data()`
getFixits(const Strategy & S) const1780 std::optional<FixItList> UPCStandalonePointerGadget::getFixits(const Strategy &S)
1781 const {
1782 const auto VD = cast<VarDecl>(Node->getDecl());
1783 switch (S.lookup(VD)) {
1784 case Strategy::Kind::Span: {
1785 ASTContext &Ctx = VD->getASTContext();
1786 SourceManager &SM = Ctx.getSourceManager();
1787 // Inserts the .data() after the DRE
1788 std::optional<SourceLocation> EndOfOperand =
1789 getPastLoc(Node, SM, Ctx.getLangOpts());
1790
1791 if (EndOfOperand)
1792 return FixItList{{FixItHint::CreateInsertion(
1793 *EndOfOperand, ".data()")}};
1794 // FIXME: Points inside a macro expansion.
1795 break;
1796 }
1797 case Strategy::Kind::Wontfix:
1798 case Strategy::Kind::Iterator:
1799 case Strategy::Kind::Array:
1800 case Strategy::Kind::Vector:
1801 llvm_unreachable("unsupported strategies for FixableGadgets");
1802 }
1803
1804 return std::nullopt;
1805 }
1806
1807 // Generates fix-its replacing an expression of the form `&DRE[e]` with
1808 // `&DRE.data()[e]`:
1809 static std::optional<FixItList>
fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator * Node)1810 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) {
1811 const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr());
1812 const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts());
1813 // FIXME: this `getASTContext` call is costly, we should pass the
1814 // ASTContext in:
1815 const ASTContext &Ctx = DRE->getDecl()->getASTContext();
1816 const Expr *Idx = ArraySub->getIdx();
1817 const SourceManager &SM = Ctx.getSourceManager();
1818 const LangOptions &LangOpts = Ctx.getLangOpts();
1819 std::stringstream SS;
1820 bool IdxIsLitZero = false;
1821
1822 if (auto ICE = Idx->getIntegerConstantExpr(Ctx))
1823 if ((*ICE).isZero())
1824 IdxIsLitZero = true;
1825 std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts);
1826 if (!DreString)
1827 return std::nullopt;
1828
1829 if (IdxIsLitZero) {
1830 // If the index is literal zero, we produce the most concise fix-it:
1831 SS << (*DreString).str() << ".data()";
1832 } else {
1833 std::optional<StringRef> IndexString = getExprText(Idx, SM, LangOpts);
1834 if (!IndexString)
1835 return std::nullopt;
1836
1837 SS << "&" << (*DreString).str() << ".data()"
1838 << "[" << (*IndexString).str() << "]";
1839 }
1840 return FixItList{
1841 FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())};
1842 }
1843
1844 std::optional<FixItList>
getFixits(const Strategy & S) const1845 UUCAddAssignGadget::getFixits(const Strategy &S) const {
1846 DeclUseList DREs = getClaimedVarUseSites();
1847
1848 if (DREs.size() != 1)
1849 return std::nullopt; // In cases of `Ptr += n` where `Ptr` is not a DRE, we
1850 // give up
1851 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) {
1852 if (S.lookup(VD) == Strategy::Kind::Span) {
1853 FixItList Fixes;
1854
1855 const Stmt *AddAssignNode = getBaseStmt();
1856 StringRef varName = VD->getName();
1857 const ASTContext &Ctx = VD->getASTContext();
1858
1859 if (!isNonNegativeIntegerExpr(Offset, VD, Ctx))
1860 return std::nullopt;
1861
1862 // To transform UUC(p += n) to UUC(p = p.subspan(..)):
1863 bool NotParenExpr =
1864 (Offset->IgnoreParens()->getBeginLoc() == Offset->getBeginLoc());
1865 std::string SS = varName.str() + " = " + varName.str() + ".subspan";
1866 if (NotParenExpr)
1867 SS += "(";
1868
1869 std::optional<SourceLocation> AddAssignLocation = getEndCharLoc(
1870 AddAssignNode, Ctx.getSourceManager(), Ctx.getLangOpts());
1871 if (!AddAssignLocation)
1872 return std::nullopt;
1873
1874 Fixes.push_back(FixItHint::CreateReplacement(
1875 SourceRange(AddAssignNode->getBeginLoc(), Node->getOperatorLoc()),
1876 SS));
1877 if (NotParenExpr)
1878 Fixes.push_back(FixItHint::CreateInsertion(
1879 Offset->getEndLoc().getLocWithOffset(1), ")"));
1880 return Fixes;
1881 }
1882 }
1883 return std::nullopt; // Not in the cases that we can handle for now, give up.
1884 }
1885
getFixits(const Strategy & S) const1886 std::optional<FixItList> UPCPreIncrementGadget::getFixits(const Strategy &S) const {
1887 DeclUseList DREs = getClaimedVarUseSites();
1888
1889 if (DREs.size() != 1)
1890 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we
1891 // give up
1892 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) {
1893 if (S.lookup(VD) == Strategy::Kind::Span) {
1894 FixItList Fixes;
1895 std::stringstream SS;
1896 const Stmt *PreIncNode = getBaseStmt();
1897 StringRef varName = VD->getName();
1898 const ASTContext &Ctx = VD->getASTContext();
1899
1900 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()):
1901 SS << "(" << varName.data() << " = " << varName.data()
1902 << ".subspan(1)).data()";
1903 std::optional<SourceLocation> PreIncLocation =
1904 getEndCharLoc(PreIncNode, Ctx.getSourceManager(), Ctx.getLangOpts());
1905 if (!PreIncLocation)
1906 return std::nullopt;
1907
1908 Fixes.push_back(FixItHint::CreateReplacement(
1909 SourceRange(PreIncNode->getBeginLoc(), *PreIncLocation), SS.str()));
1910 return Fixes;
1911 }
1912 }
1913 return std::nullopt; // Not in the cases that we can handle for now, give up.
1914 }
1915
1916
1917 // For a non-null initializer `Init` of `T *` type, this function returns
1918 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
1919 // to output stream.
1920 // In many cases, this function cannot figure out the actual extent `S`. It
1921 // then will use a place holder to replace `S` to ask users to fill `S` in. The
1922 // initializer shall be used to initialize a variable of type `std::span<T>`.
1923 //
1924 // FIXME: Support multi-level pointers
1925 //
1926 // Parameters:
1927 // `Init` a pointer to the initializer expression
1928 // `Ctx` a reference to the ASTContext
1929 static FixItList
FixVarInitializerWithSpan(const Expr * Init,ASTContext & Ctx,const StringRef UserFillPlaceHolder)1930 FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx,
1931 const StringRef UserFillPlaceHolder) {
1932 const SourceManager &SM = Ctx.getSourceManager();
1933 const LangOptions &LangOpts = Ctx.getLangOpts();
1934
1935 // If `Init` has a constant value that is (or equivalent to) a
1936 // NULL pointer, we use the default constructor to initialize the span
1937 // object, i.e., a `std:span` variable declaration with no initializer.
1938 // So the fix-it is just to remove the initializer.
1939 if (Init->isNullPointerConstant(Ctx,
1940 // FIXME: Why does this function not ask for `const ASTContext
1941 // &`? It should. Maybe worth an NFC patch later.
1942 Expr::NullPointerConstantValueDependence::
1943 NPC_ValueDependentIsNotNull)) {
1944 std::optional<SourceLocation> InitLocation =
1945 getEndCharLoc(Init, SM, LangOpts);
1946 if (!InitLocation)
1947 return {};
1948
1949 SourceRange SR(Init->getBeginLoc(), *InitLocation);
1950
1951 return {FixItHint::CreateRemoval(SR)};
1952 }
1953
1954 FixItList FixIts{};
1955 std::string ExtentText = UserFillPlaceHolder.data();
1956 StringRef One = "1";
1957
1958 // Insert `{` before `Init`:
1959 FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{"));
1960 // Try to get the data extent. Break into different cases:
1961 if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) {
1962 // In cases `Init` is `new T[n]` and there is no explicit cast over
1963 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
1964 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
1965 // simpler for the case where `Init` is `new T`.
1966 if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) {
1967 if (!Ext->HasSideEffects(Ctx)) {
1968 std::optional<StringRef> ExtentString = getExprText(Ext, SM, LangOpts);
1969 if (!ExtentString)
1970 return {};
1971 ExtentText = *ExtentString;
1972 }
1973 } else if (!CxxNew->isArray())
1974 // Although the initializer is not allocating a buffer, the pointer
1975 // variable could still be used in buffer access operations.
1976 ExtentText = One;
1977 } else if (const auto *CArrTy = Ctx.getAsConstantArrayType(
1978 Init->IgnoreImpCasts()->getType())) {
1979 // In cases `Init` is of an array type after stripping off implicit casts,
1980 // the extent is the array size. Note that if the array size is not a
1981 // constant, we cannot use it as the extent.
1982 ExtentText = getAPIntText(CArrTy->getSize());
1983 } else {
1984 // In cases `Init` is of the form `&Var` after stripping of implicit
1985 // casts, where `&` is the built-in operator, the extent is 1.
1986 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts()))
1987 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf &&
1988 isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr()))
1989 ExtentText = One;
1990 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`,
1991 // and explicit casting, etc. etc.
1992 }
1993
1994 SmallString<32> StrBuffer{};
1995 std::optional<SourceLocation> LocPassInit = getPastLoc(Init, SM, LangOpts);
1996
1997 if (!LocPassInit)
1998 return {};
1999
2000 StrBuffer.append(", ");
2001 StrBuffer.append(ExtentText);
2002 StrBuffer.append("}");
2003 FixIts.push_back(FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str()));
2004 return FixIts;
2005 }
2006
2007 #ifndef NDEBUG
2008 #define DEBUG_NOTE_DECL_FAIL(D, Msg) \
2009 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), "failed to produce fixit for declaration '" + (D)->getNameAsString() + "'" + (Msg))
2010 #else
2011 #define DEBUG_NOTE_DECL_FAIL(D, Msg)
2012 #endif
2013
2014 // For the given variable declaration with a pointer-to-T type, returns the text
2015 // `std::span<T>`. If it is unable to generate the text, returns
2016 // `std::nullopt`.
createSpanTypeForVarDecl(const VarDecl * VD,const ASTContext & Ctx)2017 static std::optional<std::string> createSpanTypeForVarDecl(const VarDecl *VD,
2018 const ASTContext &Ctx) {
2019 assert(VD->getType()->isPointerType());
2020
2021 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
2022 std::optional<std::string> PteTyText = getPointeeTypeText(
2023 VD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers);
2024
2025 if (!PteTyText)
2026 return std::nullopt;
2027
2028 std::string SpanTyText = "std::span<";
2029
2030 SpanTyText.append(*PteTyText);
2031 // Append qualifiers to span element type if any:
2032 if (PteTyQualifiers) {
2033 SpanTyText.append(" ");
2034 SpanTyText.append(PteTyQualifiers->getAsString());
2035 }
2036 SpanTyText.append(">");
2037 return SpanTyText;
2038 }
2039
2040 // For a `VarDecl` of the form `T * var (= Init)?`, this
2041 // function generates fix-its that
2042 // 1) replace `T * var` with `std::span<T> var`; and
2043 // 2) change `Init` accordingly to a span constructor, if it exists.
2044 //
2045 // FIXME: support Multi-level pointers
2046 //
2047 // Parameters:
2048 // `D` a pointer the variable declaration node
2049 // `Ctx` a reference to the ASTContext
2050 // `UserFillPlaceHolder` the user-input placeholder text
2051 // Returns:
2052 // the non-empty fix-it list, if fix-its are successfuly generated; empty
2053 // list otherwise.
fixLocalVarDeclWithSpan(const VarDecl * D,ASTContext & Ctx,const StringRef UserFillPlaceHolder,UnsafeBufferUsageHandler & Handler)2054 static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx,
2055 const StringRef UserFillPlaceHolder,
2056 UnsafeBufferUsageHandler &Handler) {
2057 if (hasUnsupportedSpecifiers(D, Ctx.getSourceManager()))
2058 return {};
2059
2060 FixItList FixIts{};
2061 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(D, Ctx);
2062
2063 if (!SpanTyText) {
2064 DEBUG_NOTE_DECL_FAIL(D, " : failed to generate 'std::span' type");
2065 return {};
2066 }
2067
2068 // Will hold the text for `std::span<T> Ident`:
2069 std::stringstream SS;
2070
2071 SS << *SpanTyText;
2072 // Append qualifiers to the type of `D`, if any:
2073 if (D->getType().hasQualifiers())
2074 SS << " " << D->getType().getQualifiers().getAsString();
2075
2076 // The end of the range of the original source that will be replaced
2077 // by `std::span<T> ident`:
2078 SourceLocation EndLocForReplacement = D->getEndLoc();
2079 std::optional<StringRef> IdentText =
2080 getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts());
2081
2082 if (!IdentText) {
2083 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier");
2084 return {};
2085 }
2086 // Fix the initializer if it exists:
2087 if (const Expr *Init = D->getInit()) {
2088 FixItList InitFixIts =
2089 FixVarInitializerWithSpan(Init, Ctx, UserFillPlaceHolder);
2090 if (InitFixIts.empty())
2091 return {};
2092 FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts.begin()),
2093 std::make_move_iterator(InitFixIts.end()));
2094 // If the declaration has the form `T *ident = init`, we want to replace
2095 // `T *ident = ` with `std::span<T> ident`:
2096 EndLocForReplacement = Init->getBeginLoc().getLocWithOffset(-1);
2097 }
2098 SS << " " << IdentText->str();
2099 if (!EndLocForReplacement.isValid()) {
2100 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration");
2101 return {};
2102 }
2103 FixIts.push_back(FixItHint::CreateReplacement(
2104 SourceRange(D->getBeginLoc(), EndLocForReplacement), SS.str()));
2105 return FixIts;
2106 }
2107
hasConflictingOverload(const FunctionDecl * FD)2108 static bool hasConflictingOverload(const FunctionDecl *FD) {
2109 return !FD->getDeclContext()->lookup(FD->getDeclName()).isSingleResult();
2110 }
2111
2112 // For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new
2113 // types, this function produces fix-its to make the change self-contained. Let
2114 // 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the
2115 // entity defined by the `FunctionDecl` after the change to the parameters.
2116 // Fix-its produced by this function are
2117 // 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration
2118 // of 'F';
2119 // 2. Create a declaration of "NewF" next to each declaration of `F`;
2120 // 3. Create a definition of "F" (as its' original definition is now belongs
2121 // to "NewF") next to its original definition. The body of the creating
2122 // definition calls to "NewF".
2123 //
2124 // Example:
2125 //
2126 // void f(int *p); // original declaration
2127 // void f(int *p) { // original definition
2128 // p[5];
2129 // }
2130 //
2131 // To change the parameter `p` to be of `std::span<int>` type, we
2132 // also add overloads:
2133 //
2134 // [[clang::unsafe_buffer_usage]] void f(int *p); // original decl
2135 // void f(std::span<int> p); // added overload decl
2136 // void f(std::span<int> p) { // original def where param is changed
2137 // p[5];
2138 // }
2139 // [[clang::unsafe_buffer_usage]] void f(int *p) { // added def
2140 // return f(std::span(p, <# size #>));
2141 // }
2142 //
2143 static std::optional<FixItList>
createOverloadsForFixedParams(const Strategy & S,const FunctionDecl * FD,const ASTContext & Ctx,UnsafeBufferUsageHandler & Handler)2144 createOverloadsForFixedParams(const Strategy &S, const FunctionDecl *FD,
2145 const ASTContext &Ctx,
2146 UnsafeBufferUsageHandler &Handler) {
2147 // FIXME: need to make this conflict checking better:
2148 if (hasConflictingOverload(FD))
2149 return std::nullopt;
2150
2151 const SourceManager &SM = Ctx.getSourceManager();
2152 const LangOptions &LangOpts = Ctx.getLangOpts();
2153 const unsigned NumParms = FD->getNumParams();
2154 std::vector<std::string> NewTysTexts(NumParms);
2155 std::vector<bool> ParmsMask(NumParms, false);
2156 bool AtLeastOneParmToFix = false;
2157
2158 for (unsigned i = 0; i < NumParms; i++) {
2159 const ParmVarDecl *PVD = FD->getParamDecl(i);
2160
2161 if (S.lookup(PVD) == Strategy::Kind::Wontfix)
2162 continue;
2163 if (S.lookup(PVD) != Strategy::Kind::Span)
2164 // Not supported, not suppose to happen:
2165 return std::nullopt;
2166
2167 std::optional<Qualifiers> PteTyQuals = std::nullopt;
2168 std::optional<std::string> PteTyText =
2169 getPointeeTypeText(PVD, SM, LangOpts, &PteTyQuals);
2170
2171 if (!PteTyText)
2172 // something wrong in obtaining the text of the pointee type, give up
2173 return std::nullopt;
2174 // FIXME: whether we should create std::span type depends on the Strategy.
2175 NewTysTexts[i] = getSpanTypeText(*PteTyText, PteTyQuals);
2176 ParmsMask[i] = true;
2177 AtLeastOneParmToFix = true;
2178 }
2179 if (!AtLeastOneParmToFix)
2180 // No need to create function overloads:
2181 return {};
2182 // FIXME Respect indentation of the original code.
2183
2184 // A lambda that creates the text representation of a function declaration
2185 // with the new type signatures:
2186 const auto NewOverloadSignatureCreator =
2187 [&SM, &LangOpts, &NewTysTexts,
2188 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
2189 std::stringstream SS;
2190
2191 SS << ";";
2192 SS << getEndOfLine().str();
2193 // Append: ret-type func-name "("
2194 if (auto Prefix = getRangeText(
2195 SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()),
2196 SM, LangOpts))
2197 SS << Prefix->str();
2198 else
2199 return std::nullopt; // give up
2200 // Append: parameter-type-list
2201 const unsigned NumParms = FD->getNumParams();
2202
2203 for (unsigned i = 0; i < NumParms; i++) {
2204 const ParmVarDecl *Parm = FD->getParamDecl(i);
2205
2206 if (Parm->isImplicit())
2207 continue;
2208 if (ParmsMask[i]) {
2209 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its
2210 // new type:
2211 SS << NewTysTexts[i];
2212 // print parameter name if provided:
2213 if (IdentifierInfo *II = Parm->getIdentifier())
2214 SS << ' ' << II->getName().str();
2215 } else if (auto ParmTypeText = getRangeText(
2216 getSourceRangeToTokenEnd(Parm, SM, LangOpts),
2217 SM, LangOpts)) {
2218 // print the whole `Parm` without modification:
2219 SS << ParmTypeText->str();
2220 } else
2221 return std::nullopt; // something wrong, give up
2222 if (i != NumParms - 1)
2223 SS << ", ";
2224 }
2225 SS << ")";
2226 return SS.str();
2227 };
2228
2229 // A lambda that creates the text representation of a function definition with
2230 // the original signature:
2231 const auto OldOverloadDefCreator =
2232 [&Handler, &SM, &LangOpts, &NewTysTexts,
2233 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
2234 std::stringstream SS;
2235
2236 SS << getEndOfLine().str();
2237 // Append: attr-name ret-type func-name "(" param-list ")" "{"
2238 if (auto FDPrefix = getRangeText(
2239 SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM,
2240 LangOpts))
2241 SS << Handler.getUnsafeBufferUsageAttributeTextAt(FD->getBeginLoc(), " ")
2242 << FDPrefix->str() << "{";
2243 else
2244 return std::nullopt;
2245 // Append: "return" func-name "("
2246 if (auto FunQualName = getFunNameText(FD, SM, LangOpts))
2247 SS << "return " << FunQualName->str() << "(";
2248 else
2249 return std::nullopt;
2250
2251 // Append: arg-list
2252 const unsigned NumParms = FD->getNumParams();
2253 for (unsigned i = 0; i < NumParms; i++) {
2254 const ParmVarDecl *Parm = FD->getParamDecl(i);
2255
2256 if (Parm->isImplicit())
2257 continue;
2258 // FIXME: If a parameter has no name, it is unused in the
2259 // definition. So we could just leave it as it is.
2260 if (!Parm->getIdentifier())
2261 // If a parameter of a function definition has no name:
2262 return std::nullopt;
2263 if (ParmsMask[i])
2264 // This is our spanified paramter!
2265 SS << NewTysTexts[i] << "(" << Parm->getIdentifier()->getName().str()
2266 << ", " << getUserFillPlaceHolder("size") << ")";
2267 else
2268 SS << Parm->getIdentifier()->getName().str();
2269 if (i != NumParms - 1)
2270 SS << ", ";
2271 }
2272 // finish call and the body
2273 SS << ");}" << getEndOfLine().str();
2274 // FIXME: 80-char line formatting?
2275 return SS.str();
2276 };
2277
2278 FixItList FixIts{};
2279 for (FunctionDecl *FReDecl : FD->redecls()) {
2280 std::optional<SourceLocation> Loc = getPastLoc(FReDecl, SM, LangOpts);
2281
2282 if (!Loc)
2283 return {};
2284 if (FReDecl->isThisDeclarationADefinition()) {
2285 assert(FReDecl == FD && "inconsistent function definition");
2286 // Inserts a definition with the old signature to the end of
2287 // `FReDecl`:
2288 if (auto OldOverloadDef = OldOverloadDefCreator(FReDecl))
2289 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *OldOverloadDef));
2290 else
2291 return {}; // give up
2292 } else {
2293 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`:
2294 if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) {
2295 FixIts.emplace_back(FixItHint::CreateInsertion(
2296 FReDecl->getBeginLoc(), Handler.getUnsafeBufferUsageAttributeTextAt(
2297 FReDecl->getBeginLoc(), " ")));
2298 }
2299 // Inserts a declaration with the new signature to the end of `FReDecl`:
2300 if (auto NewOverloadDecl = NewOverloadSignatureCreator(FReDecl))
2301 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *NewOverloadDecl));
2302 else
2303 return {};
2304 }
2305 }
2306 return FixIts;
2307 }
2308
2309 // To fix a `ParmVarDecl` to be of `std::span` type.
fixParamWithSpan(const ParmVarDecl * PVD,const ASTContext & Ctx,UnsafeBufferUsageHandler & Handler)2310 static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx,
2311 UnsafeBufferUsageHandler &Handler) {
2312 if (hasUnsupportedSpecifiers(PVD, Ctx.getSourceManager())) {
2313 DEBUG_NOTE_DECL_FAIL(PVD, " : has unsupport specifier(s)");
2314 return {};
2315 }
2316 if (PVD->hasDefaultArg()) {
2317 // FIXME: generate fix-its for default values:
2318 DEBUG_NOTE_DECL_FAIL(PVD, " : has default arg");
2319 return {};
2320 }
2321
2322 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
2323 std::optional<std::string> PteTyText = getPointeeTypeText(
2324 PVD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers);
2325
2326 if (!PteTyText) {
2327 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid pointee type");
2328 return {};
2329 }
2330
2331 std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName();
2332
2333 if (!PVDNameText) {
2334 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid identifier name");
2335 return {};
2336 }
2337
2338 std::stringstream SS;
2339 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(PVD, Ctx);
2340
2341 if (PteTyQualifiers)
2342 // Append qualifiers if they exist:
2343 SS << getSpanTypeText(*PteTyText, PteTyQualifiers);
2344 else
2345 SS << getSpanTypeText(*PteTyText);
2346 // Append qualifiers to the type of the parameter:
2347 if (PVD->getType().hasQualifiers())
2348 SS << ' ' << PVD->getType().getQualifiers().getAsString();
2349 // Append parameter's name:
2350 SS << ' ' << PVDNameText->str();
2351 // Add replacement fix-it:
2352 return {FixItHint::CreateReplacement(PVD->getSourceRange(), SS.str())};
2353 }
2354
fixVariableWithSpan(const VarDecl * VD,const DeclUseTracker & Tracker,ASTContext & Ctx,UnsafeBufferUsageHandler & Handler)2355 static FixItList fixVariableWithSpan(const VarDecl *VD,
2356 const DeclUseTracker &Tracker,
2357 ASTContext &Ctx,
2358 UnsafeBufferUsageHandler &Handler) {
2359 const DeclStmt *DS = Tracker.lookupDecl(VD);
2360 if (!DS) {
2361 DEBUG_NOTE_DECL_FAIL(VD, " : variables declared this way not implemented yet");
2362 return {};
2363 }
2364 if (!DS->isSingleDecl()) {
2365 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
2366 DEBUG_NOTE_DECL_FAIL(VD, " : multiple VarDecls");
2367 return {};
2368 }
2369 // Currently DS is an unused variable but we'll need it when
2370 // non-single decls are implemented, where the pointee type name
2371 // and the '*' are spread around the place.
2372 (void)DS;
2373
2374 // FIXME: handle cases where DS has multiple declarations
2375 return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler);
2376 }
2377
2378 // TODO: we should be consistent to use `std::nullopt` to represent no-fix due
2379 // to any unexpected problem.
2380 static FixItList
fixVariable(const VarDecl * VD,Strategy::Kind K,const Decl * D,const DeclUseTracker & Tracker,ASTContext & Ctx,UnsafeBufferUsageHandler & Handler)2381 fixVariable(const VarDecl *VD, Strategy::Kind K,
2382 /* The function decl under analysis */ const Decl *D,
2383 const DeclUseTracker &Tracker, ASTContext &Ctx,
2384 UnsafeBufferUsageHandler &Handler) {
2385 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) {
2386 auto *FD = dyn_cast<clang::FunctionDecl>(PVD->getDeclContext());
2387 if (!FD || FD != D) {
2388 // `FD != D` means that `PVD` belongs to a function that is not being
2389 // analyzed currently. Thus `FD` may not be complete.
2390 DEBUG_NOTE_DECL_FAIL(VD, " : function not currently analyzed");
2391 return {};
2392 }
2393
2394 // TODO If function has a try block we can't change params unless we check
2395 // also its catch block for their use.
2396 // FIXME We might support static class methods, some select methods,
2397 // operators and possibly lamdas.
2398 if (FD->isMain() || FD->isConstexpr() ||
2399 FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate ||
2400 FD->isVariadic() ||
2401 // also covers call-operator of lamdas
2402 isa<CXXMethodDecl>(FD) ||
2403 // skip when the function body is a try-block
2404 (FD->hasBody() && isa<CXXTryStmt>(FD->getBody())) ||
2405 FD->isOverloadedOperator()) {
2406 DEBUG_NOTE_DECL_FAIL(VD, " : unsupported function decl");
2407 return {}; // TODO test all these cases
2408 }
2409 }
2410
2411 switch (K) {
2412 case Strategy::Kind::Span: {
2413 if (VD->getType()->isPointerType()) {
2414 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD))
2415 return fixParamWithSpan(PVD, Ctx, Handler);
2416
2417 if (VD->isLocalVarDecl())
2418 return fixVariableWithSpan(VD, Tracker, Ctx, Handler);
2419 }
2420 DEBUG_NOTE_DECL_FAIL(VD, " : not a pointer");
2421 return {};
2422 }
2423 case Strategy::Kind::Iterator:
2424 case Strategy::Kind::Array:
2425 case Strategy::Kind::Vector:
2426 llvm_unreachable("Strategy not implemented yet!");
2427 case Strategy::Kind::Wontfix:
2428 llvm_unreachable("Invalid strategy!");
2429 }
2430 llvm_unreachable("Unknown strategy!");
2431 }
2432
2433 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
2434 // `RemoveRange` of 'h' overlaps with a macro use.
overlapWithMacro(const FixItList & FixIts)2435 static bool overlapWithMacro(const FixItList &FixIts) {
2436 // FIXME: For now we only check if the range (or the first token) is (part of)
2437 // a macro expansion. Ideally, we want to check for all tokens in the range.
2438 return llvm::any_of(FixIts, [](const FixItHint &Hint) {
2439 auto Range = Hint.RemoveRange;
2440 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
2441 // If the range (or the first token) is (part of) a macro expansion:
2442 return true;
2443 return false;
2444 });
2445 }
2446
2447 // Returns true iff `VD` is a parameter of the declaration `D`:
isParameterOf(const VarDecl * VD,const Decl * D)2448 static bool isParameterOf(const VarDecl *VD, const Decl *D) {
2449 return isa<ParmVarDecl>(VD) &&
2450 VD->getDeclContext() == dyn_cast<DeclContext>(D);
2451 }
2452
2453 // Erases variables in `FixItsForVariable`, if such a variable has an unfixable
2454 // group mate. A variable `v` is unfixable iff `FixItsForVariable` does not
2455 // contain `v`.
eraseVarsForUnfixableGroupMates(std::map<const VarDecl *,FixItList> & FixItsForVariable,const VariableGroupsManager & VarGrpMgr)2456 static void eraseVarsForUnfixableGroupMates(
2457 std::map<const VarDecl *, FixItList> &FixItsForVariable,
2458 const VariableGroupsManager &VarGrpMgr) {
2459 // Variables will be removed from `FixItsForVariable`:
2460 SmallVector<const VarDecl *, 8> ToErase;
2461
2462 for (const auto &[VD, Ignore] : FixItsForVariable) {
2463 VarGrpRef Grp = VarGrpMgr.getGroupOfVar(VD);
2464 if (llvm::any_of(Grp,
2465 [&FixItsForVariable](const VarDecl *GrpMember) -> bool {
2466 return !FixItsForVariable.count(GrpMember);
2467 })) {
2468 // At least one group member cannot be fixed, so we have to erase the
2469 // whole group:
2470 for (const VarDecl *Member : Grp)
2471 ToErase.push_back(Member);
2472 }
2473 }
2474 for (auto *VarToErase : ToErase)
2475 FixItsForVariable.erase(VarToErase);
2476 }
2477
2478 // Returns the fix-its that create bounds-safe function overloads for the
2479 // function `D`, if `D`'s parameters will be changed to safe-types through
2480 // fix-its in `FixItsForVariable`.
2481 //
2482 // NOTE: In case `D`'s parameters will be changed but bounds-safe function
2483 // overloads cannot created, the whole group that contains the parameters will
2484 // be erased from `FixItsForVariable`.
createFunctionOverloadsForParms(std::map<const VarDecl *,FixItList> & FixItsForVariable,const VariableGroupsManager & VarGrpMgr,const FunctionDecl * FD,const Strategy & S,ASTContext & Ctx,UnsafeBufferUsageHandler & Handler)2485 static FixItList createFunctionOverloadsForParms(
2486 std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */,
2487 const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD,
2488 const Strategy &S, ASTContext &Ctx, UnsafeBufferUsageHandler &Handler) {
2489 FixItList FixItsSharedByParms{};
2490
2491 std::optional<FixItList> OverloadFixes =
2492 createOverloadsForFixedParams(S, FD, Ctx, Handler);
2493
2494 if (OverloadFixes) {
2495 FixItsSharedByParms.append(*OverloadFixes);
2496 } else {
2497 // Something wrong in generating `OverloadFixes`, need to remove the
2498 // whole group, where parameters are in, from `FixItsForVariable` (Note
2499 // that all parameters should be in the same group):
2500 for (auto *Member : VarGrpMgr.getGroupOfParms())
2501 FixItsForVariable.erase(Member);
2502 }
2503 return FixItsSharedByParms;
2504 }
2505
2506 // Constructs self-contained fix-its for each variable in `FixablesForAllVars`.
2507 static std::map<const VarDecl *, FixItList>
getFixIts(FixableGadgetSets & FixablesForAllVars,const Strategy & S,ASTContext & Ctx,const Decl * D,const DeclUseTracker & Tracker,UnsafeBufferUsageHandler & Handler,const VariableGroupsManager & VarGrpMgr)2508 getFixIts(FixableGadgetSets &FixablesForAllVars, const Strategy &S,
2509 ASTContext &Ctx,
2510 /* The function decl under analysis */ const Decl *D,
2511 const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler,
2512 const VariableGroupsManager &VarGrpMgr) {
2513 // `FixItsForVariable` will map each variable to a set of fix-its directly
2514 // associated to the variable itself. Fix-its of distinct variables in
2515 // `FixItsForVariable` are disjoint.
2516 std::map<const VarDecl *, FixItList> FixItsForVariable;
2517
2518 // Populate `FixItsForVariable` with fix-its directly associated with each
2519 // variable. Fix-its directly associated to a variable 'v' are the ones
2520 // produced by the `FixableGadget`s whose claimed variable is 'v'.
2521 for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) {
2522 FixItsForVariable[VD] =
2523 fixVariable(VD, S.lookup(VD), D, Tracker, Ctx, Handler);
2524 // If we fail to produce Fix-It for the declaration we have to skip the
2525 // variable entirely.
2526 if (FixItsForVariable[VD].empty()) {
2527 FixItsForVariable.erase(VD);
2528 continue;
2529 }
2530 for (const auto &F : Fixables) {
2531 std::optional<FixItList> Fixits = F->getFixits(S);
2532
2533 if (Fixits) {
2534 FixItsForVariable[VD].insert(FixItsForVariable[VD].end(),
2535 Fixits->begin(), Fixits->end());
2536 continue;
2537 }
2538 #ifndef NDEBUG
2539 Handler.addDebugNoteForVar(
2540 VD, F->getBaseStmt()->getBeginLoc(),
2541 ("gadget '" + F->getDebugName() + "' refused to produce a fix")
2542 .str());
2543 #endif
2544 FixItsForVariable.erase(VD);
2545 break;
2546 }
2547 }
2548
2549 // `FixItsForVariable` now contains only variables that can be
2550 // fixed. A variable can be fixed if its' declaration and all Fixables
2551 // associated to it can all be fixed.
2552
2553 // To further remove from `FixItsForVariable` variables whose group mates
2554 // cannot be fixed...
2555 eraseVarsForUnfixableGroupMates(FixItsForVariable, VarGrpMgr);
2556 // Now `FixItsForVariable` gets further reduced: a variable is in
2557 // `FixItsForVariable` iff it can be fixed and all its group mates can be
2558 // fixed.
2559
2560 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`.
2561 // That is, when fixing multiple parameters in one step, these fix-its will
2562 // be applied only once (instead of being applied per parameter).
2563 FixItList FixItsSharedByParms{};
2564
2565 if (auto *FD = dyn_cast<FunctionDecl>(D))
2566 FixItsSharedByParms = createFunctionOverloadsForParms(
2567 FixItsForVariable, VarGrpMgr, FD, S, Ctx, Handler);
2568
2569 // The map that maps each variable `v` to fix-its for the whole group where
2570 // `v` is in:
2571 std::map<const VarDecl *, FixItList> FinalFixItsForVariable{
2572 FixItsForVariable};
2573
2574 for (auto &[Var, Ignore] : FixItsForVariable) {
2575 bool AnyParm = false;
2576 const auto VarGroupForVD = VarGrpMgr.getGroupOfVar(Var, &AnyParm);
2577
2578 for (const VarDecl *GrpMate : VarGroupForVD) {
2579 if (Var == GrpMate)
2580 continue;
2581 if (FixItsForVariable.count(GrpMate))
2582 FinalFixItsForVariable[Var].append(FixItsForVariable[GrpMate]);
2583 }
2584 if (AnyParm) {
2585 // This assertion should never fail. Otherwise we have a bug.
2586 assert(!FixItsSharedByParms.empty() &&
2587 "Should not try to fix a parameter that does not belong to a "
2588 "FunctionDecl");
2589 FinalFixItsForVariable[Var].append(FixItsSharedByParms);
2590 }
2591 }
2592 // Fix-its that will be applied in one step shall NOT:
2593 // 1. overlap with macros or/and templates; or
2594 // 2. conflict with each other.
2595 // Otherwise, the fix-its will be dropped.
2596 for (auto Iter = FinalFixItsForVariable.begin();
2597 Iter != FinalFixItsForVariable.end();)
2598 if (overlapWithMacro(Iter->second) ||
2599 clang::internal::anyConflict(Iter->second, Ctx.getSourceManager())) {
2600 Iter = FinalFixItsForVariable.erase(Iter);
2601 } else
2602 Iter++;
2603 return FinalFixItsForVariable;
2604 }
2605
2606 template <typename VarDeclIterTy>
2607 static Strategy
getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars)2608 getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) {
2609 Strategy S;
2610 for (const VarDecl *VD : UnsafeVars) {
2611 S.set(VD, Strategy::Kind::Span);
2612 }
2613 return S;
2614 }
2615
2616 // Manages variable groups:
2617 class VariableGroupsManagerImpl : public VariableGroupsManager {
2618 const std::vector<VarGrpTy> Groups;
2619 const std::map<const VarDecl *, unsigned> &VarGrpMap;
2620 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms;
2621
2622 public:
VariableGroupsManagerImpl(const std::vector<VarGrpTy> & Groups,const std::map<const VarDecl *,unsigned> & VarGrpMap,const llvm::SetVector<const VarDecl * > & GrpsUnionForParms)2623 VariableGroupsManagerImpl(
2624 const std::vector<VarGrpTy> &Groups,
2625 const std::map<const VarDecl *, unsigned> &VarGrpMap,
2626 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms)
2627 : Groups(Groups), VarGrpMap(VarGrpMap),
2628 GrpsUnionForParms(GrpsUnionForParms) {}
2629
getGroupOfVar(const VarDecl * Var,bool * HasParm) const2630 VarGrpRef getGroupOfVar(const VarDecl *Var, bool *HasParm) const override {
2631 if (GrpsUnionForParms.contains(Var)) {
2632 if (HasParm)
2633 *HasParm = true;
2634 return GrpsUnionForParms.getArrayRef();
2635 }
2636 if (HasParm)
2637 *HasParm = false;
2638
2639 auto It = VarGrpMap.find(Var);
2640
2641 if (It == VarGrpMap.end())
2642 return std::nullopt;
2643 return Groups[It->second];
2644 }
2645
getGroupOfParms() const2646 VarGrpRef getGroupOfParms() const override {
2647 return GrpsUnionForParms.getArrayRef();
2648 }
2649 };
2650
checkUnsafeBufferUsage(const Decl * D,UnsafeBufferUsageHandler & Handler,bool EmitSuggestions)2651 void clang::checkUnsafeBufferUsage(const Decl *D,
2652 UnsafeBufferUsageHandler &Handler,
2653 bool EmitSuggestions) {
2654 #ifndef NDEBUG
2655 Handler.clearDebugNotes();
2656 #endif
2657
2658 assert(D && D->getBody());
2659 // We do not want to visit a Lambda expression defined inside a method independently.
2660 // Instead, it should be visited along with the outer method.
2661 // FIXME: do we want to do the same thing for `BlockDecl`s?
2662 if (const auto *fd = dyn_cast<CXXMethodDecl>(D)) {
2663 if (fd->getParent()->isLambda() && fd->getParent()->isLocalClass())
2664 return;
2665 }
2666
2667 // Do not emit fixit suggestions for functions declared in an
2668 // extern "C" block.
2669 if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
2670 for (FunctionDecl *FReDecl : FD->redecls()) {
2671 if (FReDecl->isExternC()) {
2672 EmitSuggestions = false;
2673 break;
2674 }
2675 }
2676 }
2677
2678 WarningGadgetSets UnsafeOps;
2679 FixableGadgetSets FixablesForAllVars;
2680
2681 auto [FixableGadgets, WarningGadgets, Tracker] =
2682 findGadgets(D, Handler, EmitSuggestions);
2683
2684 if (!EmitSuggestions) {
2685 // Our job is very easy without suggestions. Just warn about
2686 // every problematic operation and consider it done. No need to deal
2687 // with fixable gadgets, no need to group operations by variable.
2688 for (const auto &G : WarningGadgets) {
2689 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false,
2690 D->getASTContext());
2691 }
2692
2693 // This return guarantees that most of the machine doesn't run when
2694 // suggestions aren't requested.
2695 assert(FixableGadgets.size() == 0 &&
2696 "Fixable gadgets found but suggestions not requested!");
2697 return;
2698 }
2699
2700 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
2701 // function under the analysis. No need to fix any Fixables.
2702 if (!WarningGadgets.empty()) {
2703 // Gadgets "claim" variables they're responsible for. Once this loop
2704 // finishes, the tracker will only track DREs that weren't claimed by any
2705 // gadgets, i.e. not understood by the analysis.
2706 for (const auto &G : FixableGadgets) {
2707 for (const auto *DRE : G->getClaimedVarUseSites()) {
2708 Tracker.claimUse(DRE);
2709 }
2710 }
2711 }
2712
2713 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
2714 // function under the analysis. Thus, it early returns here as there is
2715 // nothing needs to be fixed.
2716 //
2717 // Note this claim is based on the assumption that there is no unsafe
2718 // variable whose declaration is invisible from the analyzing function.
2719 // Otherwise, we need to consider if the uses of those unsafe varuables needs
2720 // fix.
2721 // So far, we are not fixing any global variables or class members. And,
2722 // lambdas will be analyzed along with the enclosing function. So this early
2723 // return is correct for now.
2724 if (WarningGadgets.empty())
2725 return;
2726
2727 UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets));
2728 FixablesForAllVars = groupFixablesByVar(std::move(FixableGadgets));
2729
2730 std::map<const VarDecl *, FixItList> FixItsForVariableGroup;
2731
2732 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
2733 for (auto it = FixablesForAllVars.byVar.cbegin();
2734 it != FixablesForAllVars.byVar.cend();) {
2735 // FIXME: need to deal with global variables later
2736 if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(it->first))) {
2737 #ifndef NDEBUG
2738 Handler.addDebugNoteForVar(
2739 it->first, it->first->getBeginLoc(),
2740 ("failed to produce fixit for '" + it->first->getNameAsString() +
2741 "' : neither local nor a parameter"));
2742 #endif
2743 it = FixablesForAllVars.byVar.erase(it);
2744 } else if (it->first->getType().getCanonicalType()->isReferenceType()) {
2745 #ifndef NDEBUG
2746 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
2747 ("failed to produce fixit for '" +
2748 it->first->getNameAsString() +
2749 "' : has a reference type"));
2750 #endif
2751 it = FixablesForAllVars.byVar.erase(it);
2752 } else if (Tracker.hasUnclaimedUses(it->first)) {
2753 #ifndef NDEBUG
2754 auto AllUnclaimed = Tracker.getUnclaimedUses(it->first);
2755 for (auto UnclaimedDRE : AllUnclaimed) {
2756 std::string UnclaimedUseTrace =
2757 getDREAncestorString(UnclaimedDRE, D->getASTContext());
2758
2759 Handler.addDebugNoteForVar(
2760 it->first, UnclaimedDRE->getBeginLoc(),
2761 ("failed to produce fixit for '" + it->first->getNameAsString() +
2762 "' : has an unclaimed use\nThe unclaimed DRE trace: " +
2763 UnclaimedUseTrace));
2764 }
2765 #endif
2766 it = FixablesForAllVars.byVar.erase(it);
2767 } else if (it->first->isInitCapture()) {
2768 #ifndef NDEBUG
2769 Handler.addDebugNoteForVar(
2770 it->first, it->first->getBeginLoc(),
2771 ("failed to produce fixit for '" + it->first->getNameAsString() +
2772 "' : init capture"));
2773 #endif
2774 it = FixablesForAllVars.byVar.erase(it);
2775 }else {
2776 ++it;
2777 }
2778 }
2779
2780 // Fixpoint iteration for pointer assignments
2781 using DepMapTy = DenseMap<const VarDecl *, llvm::SetVector<const VarDecl *>>;
2782 DepMapTy DependenciesMap{};
2783 DepMapTy PtrAssignmentGraph{};
2784
2785 for (auto it : FixablesForAllVars.byVar) {
2786 for (const FixableGadget *fixable : it.second) {
2787 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair =
2788 fixable->getStrategyImplications();
2789 if (ImplPair) {
2790 std::pair<const VarDecl *, const VarDecl *> Impl = std::move(*ImplPair);
2791 PtrAssignmentGraph[Impl.first].insert(Impl.second);
2792 }
2793 }
2794 }
2795
2796 /*
2797 The following code does a BFS traversal of the `PtrAssignmentGraph`
2798 considering all unsafe vars as starting nodes and constructs an undirected
2799 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner
2800 elimiates all variables that are unreachable from any unsafe var. In other
2801 words, this removes all dependencies that don't include any unsafe variable
2802 and consequently don't need any fixit generation.
2803 Note: A careful reader would observe that the code traverses
2804 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and
2805 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would
2806 achieve the same result but the one used here dramatically cuts the
2807 amount of hoops the second part of the algorithm needs to jump, given that
2808 a lot of these connections become "direct". The reader is advised not to
2809 imagine how the graph is transformed because of using `Var` instead of
2810 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used,
2811 and think about why it's equivalent later.
2812 */
2813 std::set<const VarDecl *> VisitedVarsDirected{};
2814 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
2815 if (VisitedVarsDirected.find(Var) == VisitedVarsDirected.end()) {
2816
2817 std::queue<const VarDecl*> QueueDirected{};
2818 QueueDirected.push(Var);
2819 while(!QueueDirected.empty()) {
2820 const VarDecl* CurrentVar = QueueDirected.front();
2821 QueueDirected.pop();
2822 VisitedVarsDirected.insert(CurrentVar);
2823 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar];
2824 for (const VarDecl *Adj : AdjacentNodes) {
2825 if (VisitedVarsDirected.find(Adj) == VisitedVarsDirected.end()) {
2826 QueueDirected.push(Adj);
2827 }
2828 DependenciesMap[Var].insert(Adj);
2829 DependenciesMap[Adj].insert(Var);
2830 }
2831 }
2832 }
2833 }
2834
2835 // `Groups` stores the set of Connected Components in the graph.
2836 std::vector<VarGrpTy> Groups;
2837 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the
2838 // variables belong to. Group indexes refer to the elements in `Groups`.
2839 // `VarGrpMap` is complete in that every variable that needs fix is in it.
2840 std::map<const VarDecl *, unsigned> VarGrpMap;
2841 // The union group over the ones in "Groups" that contain parameters of `D`:
2842 llvm::SetVector<const VarDecl *>
2843 GrpsUnionForParms; // these variables need to be fixed in one step
2844
2845 // Group Connected Components for Unsafe Vars
2846 // (Dependencies based on pointer assignments)
2847 std::set<const VarDecl *> VisitedVars{};
2848 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
2849 if (VisitedVars.find(Var) == VisitedVars.end()) {
2850 VarGrpTy &VarGroup = Groups.emplace_back();
2851 std::queue<const VarDecl*> Queue{};
2852
2853 Queue.push(Var);
2854 while(!Queue.empty()) {
2855 const VarDecl* CurrentVar = Queue.front();
2856 Queue.pop();
2857 VisitedVars.insert(CurrentVar);
2858 VarGroup.push_back(CurrentVar);
2859 auto AdjacentNodes = DependenciesMap[CurrentVar];
2860 for (const VarDecl *Adj : AdjacentNodes) {
2861 if (VisitedVars.find(Adj) == VisitedVars.end()) {
2862 Queue.push(Adj);
2863 }
2864 }
2865 }
2866
2867 bool HasParm = false;
2868 unsigned GrpIdx = Groups.size() - 1;
2869
2870 for (const VarDecl *V : VarGroup) {
2871 VarGrpMap[V] = GrpIdx;
2872 if (!HasParm && isParameterOf(V, D))
2873 HasParm = true;
2874 }
2875 if (HasParm)
2876 GrpsUnionForParms.insert(VarGroup.begin(), VarGroup.end());
2877 }
2878 }
2879
2880 // Remove a `FixableGadget` if the associated variable is not in the graph
2881 // computed above. We do not want to generate fix-its for such variables,
2882 // since they are neither warned nor reachable from a warned one.
2883 //
2884 // Note a variable is not warned if it is not directly used in any unsafe
2885 // operation. A variable `v` is NOT reachable from an unsafe variable, if it
2886 // does not exist another variable `u` such that `u` is warned and fixing `u`
2887 // (transitively) implicates fixing `v`.
2888 //
2889 // For example,
2890 // ```
2891 // void f(int * p) {
2892 // int * a = p; *p = 0;
2893 // }
2894 // ```
2895 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither
2896 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of
2897 // the function above, `p` becomes reachable from a warned variable.
2898 for (auto I = FixablesForAllVars.byVar.begin();
2899 I != FixablesForAllVars.byVar.end();) {
2900 // Note `VisitedVars` contain all the variables in the graph:
2901 if (!VisitedVars.count((*I).first)) {
2902 // no such var in graph:
2903 I = FixablesForAllVars.byVar.erase(I);
2904 } else
2905 ++I;
2906 }
2907
2908 // We assign strategies to variables that are 1) in the graph and 2) can be
2909 // fixed. Other variables have the default "Won't fix" strategy.
2910 Strategy NaiveStrategy = getNaiveStrategy(llvm::make_filter_range(
2911 VisitedVars, [&FixablesForAllVars](const VarDecl *V) {
2912 // If a warned variable has no "Fixable", it is considered unfixable:
2913 return FixablesForAllVars.byVar.count(V);
2914 }));
2915 VariableGroupsManagerImpl VarGrpMgr(Groups, VarGrpMap, GrpsUnionForParms);
2916
2917 if (isa<NamedDecl>(D))
2918 // The only case where `D` is not a `NamedDecl` is when `D` is a
2919 // `BlockDecl`. Let's not fix variables in blocks for now
2920 FixItsForVariableGroup =
2921 getFixIts(FixablesForAllVars, NaiveStrategy, D->getASTContext(), D,
2922 Tracker, Handler, VarGrpMgr);
2923
2924 for (const auto &G : UnsafeOps.noVar) {
2925 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false,
2926 D->getASTContext());
2927 }
2928
2929 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) {
2930 auto FixItsIt = FixItsForVariableGroup.find(VD);
2931 Handler.handleUnsafeVariableGroup(VD, VarGrpMgr,
2932 FixItsIt != FixItsForVariableGroup.end()
2933 ? std::move(FixItsIt->second)
2934 : FixItList{},
2935 D);
2936 for (const auto &G : WarningGadgets) {
2937 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true,
2938 D->getASTContext());
2939 }
2940 }
2941 }
2942