1 //=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a check that marks a raw pointer to a C++ container's
10 // inner buffer released when the object is destroyed. This information can
11 // be used by MallocChecker to detect use-after-free problems.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AllocationState.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19 #include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h"
20 #include "clang/StaticAnalyzer/Core/Checker.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24 
25 using namespace clang;
26 using namespace ento;
27 
28 // Associate container objects with a set of raw pointer symbols.
29 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(PtrSet, SymbolRef)
30 REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet)
31 
32 
33 namespace {
34 
35 class InnerPointerChecker
36     : public Checker<check::DeadSymbols, check::PostCall> {
37 
38   CallDescription AppendFn, AssignFn, AddressofFn, ClearFn, CStrFn, DataFn,
39       DataMemberFn, EraseFn, InsertFn, PopBackFn, PushBackFn, ReplaceFn,
40       ReserveFn, ResizeFn, ShrinkToFitFn, SwapFn;
41 
42 public:
43   class InnerPointerBRVisitor : public BugReporterVisitor {
44     SymbolRef PtrToBuf;
45 
46   public:
47     InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {}
48 
49     static void *getTag() {
50       static int Tag = 0;
51       return &Tag;
52     }
53 
54     void Profile(llvm::FoldingSetNodeID &ID) const override {
55       ID.AddPointer(getTag());
56     }
57 
58     virtual PathDiagnosticPieceRef
59     VisitNode(const ExplodedNode *N, BugReporterContext &BRC,
60               PathSensitiveBugReport &BR) override;
61 
62     // FIXME: Scan the map once in the visitor's constructor and do a direct
63     // lookup by region.
64     bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) {
65       RawPtrMapTy Map = State->get<RawPtrMap>();
66       for (const auto &Entry : Map) {
67         if (Entry.second.contains(Sym))
68           return true;
69       }
70       return false;
71     }
72   };
73 
74   InnerPointerChecker()
75       : AppendFn({"std", "basic_string", "append"}),
76         AssignFn({"std", "basic_string", "assign"}),
77         AddressofFn({"std", "addressof"}),
78         ClearFn({"std", "basic_string", "clear"}),
79         CStrFn({"std", "basic_string", "c_str"}), DataFn({"std", "data"}, 1),
80         DataMemberFn({"std", "basic_string", "data"}),
81         EraseFn({"std", "basic_string", "erase"}),
82         InsertFn({"std", "basic_string", "insert"}),
83         PopBackFn({"std", "basic_string", "pop_back"}),
84         PushBackFn({"std", "basic_string", "push_back"}),
85         ReplaceFn({"std", "basic_string", "replace"}),
86         ReserveFn({"std", "basic_string", "reserve"}),
87         ResizeFn({"std", "basic_string", "resize"}),
88         ShrinkToFitFn({"std", "basic_string", "shrink_to_fit"}),
89         SwapFn({"std", "basic_string", "swap"}) {}
90 
91   /// Check whether the called member function potentially invalidates
92   /// pointers referring to the container object's inner buffer.
93   bool isInvalidatingMemberFunction(const CallEvent &Call) const;
94 
95   /// Check whether the called function returns a raw inner pointer.
96   bool isInnerPointerAccessFunction(const CallEvent &Call) const;
97 
98   /// Mark pointer symbols associated with the given memory region released
99   /// in the program state.
100   void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State,
101                               const MemRegion *ObjRegion,
102                               CheckerContext &C) const;
103 
104   /// Standard library functions that take a non-const `basic_string` argument by
105   /// reference may invalidate its inner pointers. Check for these cases and
106   /// mark the pointers released.
107   void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State,
108                               CheckerContext &C) const;
109 
110   /// Record the connection between raw pointers referring to a container
111   /// object's inner buffer and the object's memory region in the program state.
112   /// Mark potentially invalidated pointers released.
113   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
114 
115   /// Clean up the program state map.
116   void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const;
117 };
118 
119 } // end anonymous namespace
120 
121 bool InnerPointerChecker::isInvalidatingMemberFunction(
122         const CallEvent &Call) const {
123   if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(&Call)) {
124     OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator();
125     if (Opc == OO_Equal || Opc == OO_PlusEqual)
126       return true;
127     return false;
128   }
129   return (isa<CXXDestructorCall>(Call) || Call.isCalled(AppendFn) ||
130           Call.isCalled(AssignFn) || Call.isCalled(ClearFn) ||
131           Call.isCalled(EraseFn) || Call.isCalled(InsertFn) ||
132           Call.isCalled(PopBackFn) || Call.isCalled(PushBackFn) ||
133           Call.isCalled(ReplaceFn) || Call.isCalled(ReserveFn) ||
134           Call.isCalled(ResizeFn) || Call.isCalled(ShrinkToFitFn) ||
135           Call.isCalled(SwapFn));
136 }
137 
138 bool InnerPointerChecker::isInnerPointerAccessFunction(
139     const CallEvent &Call) const {
140   return (Call.isCalled(CStrFn) || Call.isCalled(DataFn) ||
141           Call.isCalled(DataMemberFn));
142 }
143 
144 void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call,
145                                                  ProgramStateRef State,
146                                                  const MemRegion *MR,
147                                                  CheckerContext &C) const {
148   if (const PtrSet *PS = State->get<RawPtrMap>(MR)) {
149     const Expr *Origin = Call.getOriginExpr();
150     for (const auto Symbol : *PS) {
151       // NOTE: `Origin` may be null, and will be stored so in the symbol's
152       // `RefState` in MallocChecker's `RegionState` program state map.
153       State = allocation_state::markReleased(State, Symbol, Origin);
154     }
155     State = State->remove<RawPtrMap>(MR);
156     C.addTransition(State);
157     return;
158   }
159 }
160 
161 void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call,
162                                                  ProgramStateRef State,
163                                                  CheckerContext &C) const {
164   if (const auto *FC = dyn_cast<AnyFunctionCall>(&Call)) {
165     const FunctionDecl *FD = FC->getDecl();
166     if (!FD || !FD->isInStdNamespace())
167       return;
168 
169     for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) {
170       QualType ParamTy = FD->getParamDecl(I)->getType();
171       if (!ParamTy->isReferenceType() ||
172           ParamTy->getPointeeType().isConstQualified())
173         continue;
174 
175       // In case of member operator calls, `this` is counted as an
176       // argument but not as a parameter.
177       bool isaMemberOpCall = isa<CXXMemberOperatorCall>(FC);
178       unsigned ArgI = isaMemberOpCall ? I+1 : I;
179 
180       SVal Arg = FC->getArgSVal(ArgI);
181       const auto *ArgRegion =
182           dyn_cast_or_null<TypedValueRegion>(Arg.getAsRegion());
183       if (!ArgRegion)
184         continue;
185 
186       // std::addressof function accepts a non-const reference as an argument,
187       // but doesn't modify it.
188       if (Call.isCalled(AddressofFn))
189         continue;
190 
191       markPtrSymbolsReleased(Call, State, ArgRegion, C);
192     }
193   }
194 }
195 
196 // [string.require]
197 //
198 // "References, pointers, and iterators referring to the elements of a
199 // basic_string sequence may be invalidated by the following uses of that
200 // basic_string object:
201 //
202 // -- As an argument to any standard library function taking a reference
203 // to non-const basic_string as an argument. For example, as an argument to
204 // non-member functions swap(), operator>>(), and getline(), or as an argument
205 // to basic_string::swap().
206 //
207 // -- Calling non-const member functions, except operator[], at, front, back,
208 // begin, rbegin, end, and rend."
209 
210 void InnerPointerChecker::checkPostCall(const CallEvent &Call,
211                                         CheckerContext &C) const {
212   ProgramStateRef State = C.getState();
213 
214   // TODO: Do we need these to be typed?
215   const TypedValueRegion *ObjRegion = nullptr;
216 
217   if (const auto *ICall = dyn_cast<CXXInstanceCall>(&Call)) {
218     ObjRegion = dyn_cast_or_null<TypedValueRegion>(
219         ICall->getCXXThisVal().getAsRegion());
220 
221     // Check [string.require] / second point.
222     if (isInvalidatingMemberFunction(Call)) {
223       markPtrSymbolsReleased(Call, State, ObjRegion, C);
224       return;
225     }
226   }
227 
228   if (isInnerPointerAccessFunction(Call)) {
229 
230     if (isa<SimpleFunctionCall>(Call)) {
231       // NOTE: As of now, we only have one free access function: std::data.
232       //       If we add more functions like this in the list, hardcoded
233       //       argument index should be changed.
234       ObjRegion =
235           dyn_cast_or_null<TypedValueRegion>(Call.getArgSVal(0).getAsRegion());
236     }
237 
238     if (!ObjRegion)
239       return;
240 
241     SVal RawPtr = Call.getReturnValue();
242     if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) {
243       // Start tracking this raw pointer by adding it to the set of symbols
244       // associated with this container object in the program state map.
245 
246       PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>();
247       const PtrSet *SetPtr = State->get<RawPtrMap>(ObjRegion);
248       PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet();
249       assert(C.wasInlined || !Set.contains(Sym));
250       Set = F.add(Set, Sym);
251 
252       State = State->set<RawPtrMap>(ObjRegion, Set);
253       C.addTransition(State);
254     }
255 
256     return;
257   }
258 
259   // Check [string.require] / first point.
260   checkFunctionArguments(Call, State, C);
261 }
262 
263 void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper,
264                                            CheckerContext &C) const {
265   ProgramStateRef State = C.getState();
266   PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>();
267   RawPtrMapTy RPM = State->get<RawPtrMap>();
268   for (const auto &Entry : RPM) {
269     if (!SymReaper.isLiveRegion(Entry.first)) {
270       // Due to incomplete destructor support, some dead regions might
271       // remain in the program state map. Clean them up.
272       State = State->remove<RawPtrMap>(Entry.first);
273     }
274     if (const PtrSet *OldSet = State->get<RawPtrMap>(Entry.first)) {
275       PtrSet CleanedUpSet = *OldSet;
276       for (const auto Symbol : Entry.second) {
277         if (!SymReaper.isLive(Symbol))
278           CleanedUpSet = F.remove(CleanedUpSet, Symbol);
279       }
280       State = CleanedUpSet.isEmpty()
281                   ? State->remove<RawPtrMap>(Entry.first)
282                   : State->set<RawPtrMap>(Entry.first, CleanedUpSet);
283     }
284   }
285   C.addTransition(State);
286 }
287 
288 namespace clang {
289 namespace ento {
290 namespace allocation_state {
291 
292 std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) {
293   return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(Sym);
294 }
295 
296 const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym) {
297   RawPtrMapTy Map = State->get<RawPtrMap>();
298   for (const auto &Entry : Map) {
299     if (Entry.second.contains(Sym)) {
300       return Entry.first;
301     }
302   }
303   return nullptr;
304 }
305 
306 } // end namespace allocation_state
307 } // end namespace ento
308 } // end namespace clang
309 
310 PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode(
311     const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) {
312   if (!isSymbolTracked(N->getState(), PtrToBuf) ||
313       isSymbolTracked(N->getFirstPred()->getState(), PtrToBuf))
314     return nullptr;
315 
316   const Stmt *S = N->getStmtForDiagnostics();
317   if (!S)
318     return nullptr;
319 
320   const MemRegion *ObjRegion =
321       allocation_state::getContainerObjRegion(N->getState(), PtrToBuf);
322   const auto *TypedRegion = cast<TypedValueRegion>(ObjRegion);
323   QualType ObjTy = TypedRegion->getValueType();
324 
325   SmallString<256> Buf;
326   llvm::raw_svector_ostream OS(Buf);
327   OS << "Pointer to inner buffer of '" << ObjTy.getAsString()
328      << "' obtained here";
329   PathDiagnosticLocation Pos(S, BRC.getSourceManager(),
330                              N->getLocationContext());
331   return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(), true);
332 }
333 
334 void ento::registerInnerPointerChecker(CheckerManager &Mgr) {
335   registerInnerPointerCheckerAux(Mgr);
336   Mgr.registerChecker<InnerPointerChecker>();
337 }
338 
339 bool ento::shouldRegisterInnerPointerChecker(const CheckerManager &mgr) {
340   return true;
341 }
342