1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
17 #include "clang/AST/Attr.h"
18 #include "clang/Basic/Builtins.h"
19 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
20 #include "clang/StaticAnalyzer/Core/Checker.h"
21 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
24 #include <climits>
25 #include <initializer_list>
26 #include <utility>
27 
28 using namespace clang;
29 using namespace ento;
30 
31 namespace {
32 class GenericTaintChecker
33     : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
34 public:
35   static void *getTag() {
36     static int Tag;
37     return &Tag;
38   }
39 
40   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
41 
42   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
43 
44 private:
45   static const unsigned InvalidArgIndex = UINT_MAX;
46   /// Denotes the return vale.
47   static const unsigned ReturnValueIndex = UINT_MAX - 1;
48 
49   mutable std::unique_ptr<BugType> BT;
50   void initBugType() const {
51     if (!BT)
52       BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
53   }
54 
55   /// Catch taint related bugs. Check if tainted data is passed to a
56   /// system call etc.
57   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
58 
59   /// Add taint sources on a pre-visit.
60   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
61 
62   /// Propagate taint generated at pre-visit.
63   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
64 
65   /// Add taint sources on a post visit.
66   void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
67 
68   /// Check if the region the expression evaluates to is the standard input,
69   /// and thus, is tainted.
70   static bool isStdin(const Expr *E, CheckerContext &C);
71 
72   /// Given a pointer argument, return the value it points to.
73   static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
74 
75   /// Functions defining the attack surface.
76   using FnCheck = ProgramStateRef (GenericTaintChecker::*)(
77       const CallExpr *, CheckerContext &C) const;
78   ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
79   ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
80   ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
81 
82   /// Taint the scanned input if the file is tainted.
83   ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
84 
85   /// Check for CWE-134: Uncontrolled Format String.
86   static const char MsgUncontrolledFormatString[];
87   bool checkUncontrolledFormatString(const CallExpr *CE,
88                                      CheckerContext &C) const;
89 
90   /// Check for:
91   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
92   /// CWE-78, "Failure to Sanitize Data into an OS Command"
93   static const char MsgSanitizeSystemArgs[];
94   bool checkSystemCall(const CallExpr *CE, StringRef Name,
95                        CheckerContext &C) const;
96 
97   /// Check if tainted data is used as a buffer size ins strn.. functions,
98   /// and allocators.
99   static const char MsgTaintedBufferSize[];
100   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
101                               CheckerContext &C) const;
102 
103   /// Generate a report if the expression is tainted or points to tainted data.
104   bool generateReportIfTainted(const Expr *E, const char Msg[],
105                                CheckerContext &C) const;
106 
107   using ArgVector = SmallVector<unsigned, 2>;
108 
109   /// A struct used to specify taint propagation rules for a function.
110   ///
111   /// If any of the possible taint source arguments is tainted, all of the
112   /// destination arguments should also be tainted. Use InvalidArgIndex in the
113   /// src list to specify that all of the arguments can introduce taint. Use
114   /// InvalidArgIndex in the dst arguments to signify that all the non-const
115   /// pointer and reference arguments might be tainted on return. If
116   /// ReturnValueIndex is added to the dst list, the return value will be
117   /// tainted.
118   struct TaintPropagationRule {
119     enum class VariadicType { None, Src, Dst };
120 
121     /// List of arguments which can be taint sources and should be checked.
122     ArgVector SrcArgs;
123     /// List of arguments which should be tainted on function return.
124     ArgVector DstArgs;
125     /// Index for the first variadic parameter if exist.
126     unsigned VariadicIndex;
127     /// Show when a function has variadic parameters. If it has, it marks all
128     /// of them as source or destination.
129     VariadicType VarType;
130 
131     TaintPropagationRule()
132         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None) {}
133 
134     TaintPropagationRule(std::initializer_list<unsigned> &&Src,
135                          std::initializer_list<unsigned> &&Dst,
136                          VariadicType Var = VariadicType::None,
137                          unsigned VarIndex = InvalidArgIndex)
138         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
139           VariadicIndex(VarIndex), VarType(Var) {}
140 
141     /// Get the propagation rule for a given function.
142     static TaintPropagationRule
143     getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name,
144                             CheckerContext &C);
145 
146     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
147     void addDstArg(unsigned A) { DstArgs.push_back(A); }
148 
149     bool isNull() const {
150       return SrcArgs.empty() && DstArgs.empty() &&
151              VariadicType::None == VarType;
152     }
153 
154     bool isDestinationArgument(unsigned ArgNum) const {
155       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
156     }
157 
158     static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
159                                            CheckerContext &C) {
160       if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
161         return true;
162 
163       if (!E->getType().getTypePtr()->isPointerType())
164         return false;
165 
166       Optional<SVal> V = getPointedToSVal(C, E);
167       return (V && State->isTainted(*V));
168     }
169 
170     /// Pre-process a function which propagates taint according to the
171     /// taint rule.
172     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
173   };
174 };
175 
176 const unsigned GenericTaintChecker::ReturnValueIndex;
177 const unsigned GenericTaintChecker::InvalidArgIndex;
178 
179 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
180     "Untrusted data is used as a format string "
181     "(CWE-134: Uncontrolled Format String)";
182 
183 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
184     "Untrusted data is passed to a system call "
185     "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
186 
187 const char GenericTaintChecker::MsgTaintedBufferSize[] =
188     "Untrusted data is used to specify the buffer size "
189     "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
190     "for character data and the null terminator)";
191 
192 } // end of anonymous namespace
193 
194 /// A set which is used to pass information from call pre-visit instruction
195 /// to the call post-visit. The values are unsigned integers, which are either
196 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
197 /// points to data, which should be tainted on return.
198 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
199 
200 GenericTaintChecker::TaintPropagationRule
201 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
202     const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
203   // TODO: Currently, we might lose precision here: we always mark a return
204   // value as tainted even if it's just a pointer, pointing to tainted data.
205 
206   // Check for exact name match for functions without builtin substitutes.
207   TaintPropagationRule Rule =
208       llvm::StringSwitch<TaintPropagationRule>(Name)
209           .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
210           .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
211           .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
212           .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
213           .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
214           .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
215           .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
216           .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
217           .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
218           .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
219           .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
220           .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
221           .Case("pread",
222                 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
223           .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
224           .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
225           .Case("getline", TaintPropagationRule({2}, {0}))
226           .Case("getdelim", TaintPropagationRule({3}, {0}))
227           .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
228           .Default(TaintPropagationRule());
229 
230   if (!Rule.isNull())
231     return Rule;
232 
233   // Check if it's one of the memory setting/copying functions.
234   // This check is specialized but faster then calling isCLibraryFunction.
235   unsigned BId = 0;
236   if ((BId = FDecl->getMemoryFunctionKind()))
237     switch (BId) {
238     case Builtin::BImemcpy:
239     case Builtin::BImemmove:
240     case Builtin::BIstrncpy:
241     case Builtin::BIstrncat:
242       return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
243     case Builtin::BIstrlcpy:
244     case Builtin::BIstrlcat:
245       return TaintPropagationRule({1, 2}, {0});
246     case Builtin::BIstrndup:
247       return TaintPropagationRule({0, 1}, {ReturnValueIndex});
248 
249     default:
250       break;
251     };
252 
253   // Process all other functions which could be defined as builtins.
254   if (Rule.isNull()) {
255     if (C.isCLibraryFunction(FDecl, "snprintf"))
256       return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
257                                   3);
258     else if (C.isCLibraryFunction(FDecl, "sprintf"))
259       return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
260                                   2);
261     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
262              C.isCLibraryFunction(FDecl, "stpcpy") ||
263              C.isCLibraryFunction(FDecl, "strcat"))
264       return TaintPropagationRule({1}, {0, ReturnValueIndex});
265     else if (C.isCLibraryFunction(FDecl, "bcopy"))
266       return TaintPropagationRule({0, 2}, {1});
267     else if (C.isCLibraryFunction(FDecl, "strdup") ||
268              C.isCLibraryFunction(FDecl, "strdupa"))
269       return TaintPropagationRule({0}, {ReturnValueIndex});
270     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
271       return TaintPropagationRule({0}, {ReturnValueIndex});
272   }
273 
274   // Skipping the following functions, since they might be used for cleansing
275   // or smart memory copy:
276   // - memccpy - copying until hitting a special character.
277 
278   return TaintPropagationRule();
279 }
280 
281 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
282                                        CheckerContext &C) const {
283   // Check for errors first.
284   if (checkPre(CE, C))
285     return;
286 
287   // Add taint second.
288   addSourcesPre(CE, C);
289 }
290 
291 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
292                                         CheckerContext &C) const {
293   if (propagateFromPre(CE, C))
294     return;
295   addSourcesPost(CE, C);
296 }
297 
298 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
299                                         CheckerContext &C) const {
300   ProgramStateRef State = nullptr;
301   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
302   if (!FDecl || FDecl->getKind() != Decl::Function)
303     return;
304 
305   StringRef Name = C.getCalleeName(FDecl);
306   if (Name.empty())
307     return;
308 
309   // First, try generating a propagation rule for this function.
310   TaintPropagationRule Rule =
311       TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
312   if (!Rule.isNull()) {
313     State = Rule.process(CE, C);
314     if (!State)
315       return;
316     C.addTransition(State);
317     return;
318   }
319 
320   // Otherwise, check if we have custom pre-processing implemented.
321   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
322                              .Case("fscanf", &GenericTaintChecker::preFscanf)
323                              .Default(nullptr);
324   // Check and evaluate the call.
325   if (evalFunction)
326     State = (this->*evalFunction)(CE, C);
327   if (!State)
328     return;
329   C.addTransition(State);
330 }
331 
332 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
333                                            CheckerContext &C) const {
334   ProgramStateRef State = C.getState();
335 
336   // Depending on what was tainted at pre-visit, we determined a set of
337   // arguments which should be tainted after the function returns. These are
338   // stored in the state as TaintArgsOnPostVisit set.
339   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
340   if (TaintArgs.isEmpty())
341     return false;
342 
343   for (unsigned ArgNum : TaintArgs) {
344     // Special handling for the tainted return value.
345     if (ArgNum == ReturnValueIndex) {
346       State = State->addTaint(CE, C.getLocationContext());
347       continue;
348     }
349 
350     // The arguments are pointer arguments. The data they are pointing at is
351     // tainted after the call.
352     if (CE->getNumArgs() < (ArgNum + 1))
353       return false;
354     const Expr *Arg = CE->getArg(ArgNum);
355     Optional<SVal> V = getPointedToSVal(C, Arg);
356     if (V)
357       State = State->addTaint(*V);
358   }
359 
360   // Clear up the taint info from the state.
361   State = State->remove<TaintArgsOnPostVisit>();
362 
363   if (State != C.getState()) {
364     C.addTransition(State);
365     return true;
366   }
367   return false;
368 }
369 
370 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
371                                          CheckerContext &C) const {
372   // Define the attack surface.
373   // Set the evaluation function by switching on the callee name.
374   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
375   if (!FDecl || FDecl->getKind() != Decl::Function)
376     return;
377 
378   StringRef Name = C.getCalleeName(FDecl);
379   if (Name.empty())
380     return;
381   FnCheck evalFunction =
382       llvm::StringSwitch<FnCheck>(Name)
383           .Case("scanf", &GenericTaintChecker::postScanf)
384           // TODO: Add support for vfscanf & family.
385           .Case("getchar", &GenericTaintChecker::postRetTaint)
386           .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
387           .Case("getenv", &GenericTaintChecker::postRetTaint)
388           .Case("fopen", &GenericTaintChecker::postRetTaint)
389           .Case("fdopen", &GenericTaintChecker::postRetTaint)
390           .Case("freopen", &GenericTaintChecker::postRetTaint)
391           .Case("getch", &GenericTaintChecker::postRetTaint)
392           .Case("wgetch", &GenericTaintChecker::postRetTaint)
393           .Case("socket", &GenericTaintChecker::postSocket)
394           .Default(nullptr);
395 
396   // If the callee isn't defined, it is not of security concern.
397   // Check and evaluate the call.
398   ProgramStateRef State = nullptr;
399   if (evalFunction)
400     State = (this->*evalFunction)(CE, C);
401   if (!State)
402     return;
403 
404   C.addTransition(State);
405 }
406 
407 bool GenericTaintChecker::checkPre(const CallExpr *CE,
408                                    CheckerContext &C) const {
409 
410   if (checkUncontrolledFormatString(CE, C))
411     return true;
412 
413   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
414   if (!FDecl || FDecl->getKind() != Decl::Function)
415     return false;
416 
417   StringRef Name = C.getCalleeName(FDecl);
418   if (Name.empty())
419     return false;
420 
421   if (checkSystemCall(CE, Name, C))
422     return true;
423 
424   if (checkTaintedBufferSize(CE, FDecl, C))
425     return true;
426 
427   return false;
428 }
429 
430 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
431                                                      const Expr *Arg) {
432   ProgramStateRef State = C.getState();
433   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
434   if (AddrVal.isUnknownOrUndef())
435     return None;
436 
437   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
438   if (!AddrLoc)
439     return None;
440 
441   QualType ArgTy = Arg->getType().getCanonicalType();
442   if (!ArgTy->isPointerType())
443     return None;
444 
445   QualType ValTy = ArgTy->getPointeeType();
446 
447   // Do not dereference void pointers. Treat them as byte pointers instead.
448   // FIXME: we might want to consider more than just the first byte.
449   if (ValTy->isVoidType())
450     ValTy = C.getASTContext().CharTy;
451 
452   return State->getSVal(*AddrLoc, ValTy);
453 }
454 
455 ProgramStateRef
456 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
457                                                    CheckerContext &C) const {
458   ProgramStateRef State = C.getState();
459 
460   // Check for taint in arguments.
461   bool IsTainted = true;
462   for (unsigned ArgNum : SrcArgs) {
463     if (ArgNum >= CE->getNumArgs())
464       return State;
465     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
466       break;
467   }
468 
469   // Check for taint in variadic arguments.
470   if (!IsTainted && VariadicType::Src == VarType) {
471     // Check if any of the arguments is tainted
472     for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
473       if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
474         break;
475     }
476   }
477 
478   if (!IsTainted)
479     return State;
480 
481   // Mark the arguments which should be tainted after the function returns.
482   for (unsigned ArgNum : DstArgs) {
483     // Should mark the return value?
484     if (ArgNum == ReturnValueIndex) {
485       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
486       continue;
487     }
488 
489     // Mark the given argument.
490     assert(ArgNum < CE->getNumArgs());
491     State = State->add<TaintArgsOnPostVisit>(ArgNum);
492   }
493 
494   // Mark all variadic arguments tainted if present.
495   if (VariadicType::Dst == VarType) {
496     // For all pointer and references that were passed in:
497     //   If they are not pointing to const data, mark data as tainted.
498     //   TODO: So far we are just going one level down; ideally we'd need to
499     //         recurse here.
500     for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
501       const Expr *Arg = CE->getArg(i);
502       // Process pointer argument.
503       const Type *ArgTy = Arg->getType().getTypePtr();
504       QualType PType = ArgTy->getPointeeType();
505       if ((!PType.isNull() && !PType.isConstQualified()) ||
506           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
507         State = State->add<TaintArgsOnPostVisit>(i);
508     }
509   }
510 
511   return State;
512 }
513 
514 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
515 // and arg 1 should get taint.
516 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
517                                                CheckerContext &C) const {
518   assert(CE->getNumArgs() >= 2);
519   ProgramStateRef State = C.getState();
520 
521   // Check is the file descriptor is tainted.
522   if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
523       isStdin(CE->getArg(0), C)) {
524     // All arguments except for the first two should get taint.
525     for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
526       State = State->add<TaintArgsOnPostVisit>(i);
527     return State;
528   }
529 
530   return nullptr;
531 }
532 
533 // If argument 0(protocol domain) is network, the return value should get taint.
534 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
535                                                 CheckerContext &C) const {
536   ProgramStateRef State = C.getState();
537   if (CE->getNumArgs() < 3)
538     return State;
539 
540   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
541   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
542   // White list the internal communication protocols.
543   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
544       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
545     return State;
546   State = State->addTaint(CE, C.getLocationContext());
547   return State;
548 }
549 
550 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
551                                                CheckerContext &C) const {
552   ProgramStateRef State = C.getState();
553   if (CE->getNumArgs() < 2)
554     return State;
555 
556   // All arguments except for the very first one should get taint.
557   for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
558     // The arguments are pointer arguments. The data they are pointing at is
559     // tainted after the call.
560     const Expr *Arg = CE->getArg(i);
561     Optional<SVal> V = getPointedToSVal(C, Arg);
562     if (V)
563       State = State->addTaint(*V);
564   }
565   return State;
566 }
567 
568 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
569                                                   CheckerContext &C) const {
570   return C.getState()->addTaint(CE, C.getLocationContext());
571 }
572 
573 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
574   ProgramStateRef State = C.getState();
575   SVal Val = C.getSVal(E);
576 
577   // stdin is a pointer, so it would be a region.
578   const MemRegion *MemReg = Val.getAsRegion();
579 
580   // The region should be symbolic, we do not know it's value.
581   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
582   if (!SymReg)
583     return false;
584 
585   // Get it's symbol and find the declaration region it's pointing to.
586   const SymbolRegionValue *Sm =
587       dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
588   if (!Sm)
589     return false;
590   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
591   if (!DeclReg)
592     return false;
593 
594   // This region corresponds to a declaration, find out if it's a global/extern
595   // variable named stdin with the proper type.
596   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
597     D = D->getCanonicalDecl();
598     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
599       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
600       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
601                        C.getASTContext().getFILEType().getCanonicalType())
602         return true;
603     }
604   }
605   return false;
606 }
607 
608 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
609                                        const CheckerContext &C,
610                                        unsigned int &ArgNum) {
611   // Find if the function contains a format string argument.
612   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
613   // vsnprintf, syslog, custom annotated functions.
614   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
615   if (!FDecl)
616     return false;
617   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
618     ArgNum = Format->getFormatIdx() - 1;
619     if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
620       return true;
621   }
622 
623   // Or if a function is named setproctitle (this is a heuristic).
624   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
625     ArgNum = 0;
626     return true;
627   }
628 
629   return false;
630 }
631 
632 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
633                                                   const char Msg[],
634                                                   CheckerContext &C) const {
635   assert(E);
636 
637   // Check for taint.
638   ProgramStateRef State = C.getState();
639   Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
640   SVal TaintedSVal;
641   if (PointedToSVal && State->isTainted(*PointedToSVal))
642     TaintedSVal = *PointedToSVal;
643   else if (State->isTainted(E, C.getLocationContext()))
644     TaintedSVal = C.getSVal(E);
645   else
646     return false;
647 
648   // Generate diagnostic.
649   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
650     initBugType();
651     auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
652     report->addRange(E->getSourceRange());
653     report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
654     C.emitReport(std::move(report));
655     return true;
656   }
657   return false;
658 }
659 
660 bool GenericTaintChecker::checkUncontrolledFormatString(
661     const CallExpr *CE, CheckerContext &C) const {
662   // Check if the function contains a format string argument.
663   unsigned int ArgNum = 0;
664   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
665     return false;
666 
667   // If either the format string content or the pointer itself are tainted,
668   // warn.
669   return generateReportIfTainted(CE->getArg(ArgNum),
670                                  MsgUncontrolledFormatString, C);
671 }
672 
673 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
674                                           CheckerContext &C) const {
675   // TODO: It might make sense to run this check on demand. In some cases,
676   // we should check if the environment has been cleansed here. We also might
677   // need to know if the user was reset before these calls(seteuid).
678   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
679                         .Case("system", 0)
680                         .Case("popen", 0)
681                         .Case("execl", 0)
682                         .Case("execle", 0)
683                         .Case("execlp", 0)
684                         .Case("execv", 0)
685                         .Case("execvp", 0)
686                         .Case("execvP", 0)
687                         .Case("execve", 0)
688                         .Case("dlopen", 0)
689                         .Default(UINT_MAX);
690 
691   if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
692     return false;
693 
694   return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
695 }
696 
697 // TODO: Should this check be a part of the CString checker?
698 // If yes, should taint be a global setting?
699 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
700                                                  const FunctionDecl *FDecl,
701                                                  CheckerContext &C) const {
702   // If the function has a buffer size argument, set ArgNum.
703   unsigned ArgNum = InvalidArgIndex;
704   unsigned BId = 0;
705   if ((BId = FDecl->getMemoryFunctionKind()))
706     switch (BId) {
707     case Builtin::BImemcpy:
708     case Builtin::BImemmove:
709     case Builtin::BIstrncpy:
710       ArgNum = 2;
711       break;
712     case Builtin::BIstrndup:
713       ArgNum = 1;
714       break;
715     default:
716       break;
717     };
718 
719   if (ArgNum == InvalidArgIndex) {
720     if (C.isCLibraryFunction(FDecl, "malloc") ||
721         C.isCLibraryFunction(FDecl, "calloc") ||
722         C.isCLibraryFunction(FDecl, "alloca"))
723       ArgNum = 0;
724     else if (C.isCLibraryFunction(FDecl, "memccpy"))
725       ArgNum = 3;
726     else if (C.isCLibraryFunction(FDecl, "realloc"))
727       ArgNum = 1;
728     else if (C.isCLibraryFunction(FDecl, "bcopy"))
729       ArgNum = 2;
730   }
731 
732   return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
733          generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
734 }
735 
736 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
737   mgr.registerChecker<GenericTaintChecker>();
738 }
739 
740 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
741   return true;
742 }
743