1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
17 #include "clang/AST/Attr.h"
18 #include "clang/Basic/Builtins.h"
19 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
20 #include "clang/StaticAnalyzer/Core/Checker.h"
21 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
24 #include <climits>
25 #include <initializer_list>
26 #include <utility>
27 
28 using namespace clang;
29 using namespace ento;
30 
31 namespace {
32 class GenericTaintChecker
33     : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
34 public:
35   static void *getTag() {
36     static int Tag;
37     return &Tag;
38   }
39 
40   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
41 
42   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
43 
44 private:
45   static const unsigned InvalidArgIndex = UINT_MAX;
46   /// Denotes the return vale.
47   static const unsigned ReturnValueIndex = UINT_MAX - 1;
48 
49   mutable std::unique_ptr<BugType> BT;
50   void initBugType() const {
51     if (!BT)
52       BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
53   }
54 
55   /// Catch taint related bugs. Check if tainted data is passed to a
56   /// system call etc.
57   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
58 
59   /// Add taint sources on a pre-visit.
60   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
61 
62   /// Propagate taint generated at pre-visit.
63   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
64 
65   /// Check if the region the expression evaluates to is the standard input,
66   /// and thus, is tainted.
67   static bool isStdin(const Expr *E, CheckerContext &C);
68 
69   /// Given a pointer argument, return the value it points to.
70   static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
71 
72   /// Check for CWE-134: Uncontrolled Format String.
73   static const char MsgUncontrolledFormatString[];
74   bool checkUncontrolledFormatString(const CallExpr *CE,
75                                      CheckerContext &C) const;
76 
77   /// Check for:
78   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
79   /// CWE-78, "Failure to Sanitize Data into an OS Command"
80   static const char MsgSanitizeSystemArgs[];
81   bool checkSystemCall(const CallExpr *CE, StringRef Name,
82                        CheckerContext &C) const;
83 
84   /// Check if tainted data is used as a buffer size ins strn.. functions,
85   /// and allocators.
86   static const char MsgTaintedBufferSize[];
87   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
88                               CheckerContext &C) const;
89 
90   /// Generate a report if the expression is tainted or points to tainted data.
91   bool generateReportIfTainted(const Expr *E, const char Msg[],
92                                CheckerContext &C) const;
93 
94   using ArgVector = SmallVector<unsigned, 2>;
95 
96   /// A struct used to specify taint propagation rules for a function.
97   ///
98   /// If any of the possible taint source arguments is tainted, all of the
99   /// destination arguments should also be tainted. Use InvalidArgIndex in the
100   /// src list to specify that all of the arguments can introduce taint. Use
101   /// InvalidArgIndex in the dst arguments to signify that all the non-const
102   /// pointer and reference arguments might be tainted on return. If
103   /// ReturnValueIndex is added to the dst list, the return value will be
104   /// tainted.
105   struct TaintPropagationRule {
106     enum class VariadicType { None, Src, Dst };
107 
108     using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
109                                          CheckerContext &C);
110 
111     /// List of arguments which can be taint sources and should be checked.
112     ArgVector SrcArgs;
113     /// List of arguments which should be tainted on function return.
114     ArgVector DstArgs;
115     /// Index for the first variadic parameter if exist.
116     unsigned VariadicIndex;
117     /// Show when a function has variadic parameters. If it has, it marks all
118     /// of them as source or destination.
119     VariadicType VarType;
120     /// Special function for tainted source determination. If defined, it can
121     /// override the default behavior.
122     PropagationFuncType PropagationFunc;
123 
124     TaintPropagationRule()
125         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
126           PropagationFunc(nullptr) {}
127 
128     TaintPropagationRule(std::initializer_list<unsigned> &&Src,
129                          std::initializer_list<unsigned> &&Dst,
130                          VariadicType Var = VariadicType::None,
131                          unsigned VarIndex = InvalidArgIndex,
132                          PropagationFuncType Func = nullptr)
133         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
134           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
135 
136     /// Get the propagation rule for a given function.
137     static TaintPropagationRule
138     getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name,
139                             CheckerContext &C);
140 
141     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
142     void addDstArg(unsigned A) { DstArgs.push_back(A); }
143 
144     bool isNull() const {
145       return SrcArgs.empty() && DstArgs.empty() &&
146              VariadicType::None == VarType;
147     }
148 
149     bool isDestinationArgument(unsigned ArgNum) const {
150       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
151     }
152 
153     static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
154                                            CheckerContext &C) {
155       if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
156         return true;
157 
158       if (!E->getType().getTypePtr()->isPointerType())
159         return false;
160 
161       Optional<SVal> V = getPointedToSVal(C, E);
162       return (V && State->isTainted(*V));
163     }
164 
165     /// Pre-process a function which propagates taint according to the
166     /// taint rule.
167     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
168 
169     // Functions for custom taintedness propagation.
170     static bool postSocket(bool IsTainted, const CallExpr *CE,
171                            CheckerContext &C);
172   };
173 };
174 
175 const unsigned GenericTaintChecker::ReturnValueIndex;
176 const unsigned GenericTaintChecker::InvalidArgIndex;
177 
178 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
179     "Untrusted data is used as a format string "
180     "(CWE-134: Uncontrolled Format String)";
181 
182 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
183     "Untrusted data is passed to a system call "
184     "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
185 
186 const char GenericTaintChecker::MsgTaintedBufferSize[] =
187     "Untrusted data is used to specify the buffer size "
188     "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
189     "for character data and the null terminator)";
190 
191 } // end of anonymous namespace
192 
193 /// A set which is used to pass information from call pre-visit instruction
194 /// to the call post-visit. The values are unsigned integers, which are either
195 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
196 /// points to data, which should be tainted on return.
197 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
198 
199 GenericTaintChecker::TaintPropagationRule
200 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
201     const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
202   // TODO: Currently, we might lose precision here: we always mark a return
203   // value as tainted even if it's just a pointer, pointing to tainted data.
204 
205   // Check for exact name match for functions without builtin substitutes.
206   TaintPropagationRule Rule =
207       llvm::StringSwitch<TaintPropagationRule>(Name)
208           // Source functions
209           // TODO: Add support for vfscanf & family.
210           .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
211           .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
212           .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
213           .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
214           .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
215           .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
216           .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
217           .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
218           .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
219           .Case("socket",
220                 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
221                                      InvalidArgIndex,
222                                      &TaintPropagationRule::postSocket))
223           .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
224           // Propagating functions
225           .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
226           .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
227           .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
228           .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
229           .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
230           .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
231           .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
232           .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
233           .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
234           .Case("getdelim", TaintPropagationRule({3}, {0}))
235           .Case("getline", TaintPropagationRule({2}, {0}))
236           .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
237           .Case("pread",
238                 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
239           .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
240           .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
241           .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
242           .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
243           .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
244           .Default(TaintPropagationRule());
245 
246   if (!Rule.isNull())
247     return Rule;
248 
249   // Check if it's one of the memory setting/copying functions.
250   // This check is specialized but faster then calling isCLibraryFunction.
251   unsigned BId = 0;
252   if ((BId = FDecl->getMemoryFunctionKind()))
253     switch (BId) {
254     case Builtin::BImemcpy:
255     case Builtin::BImemmove:
256     case Builtin::BIstrncpy:
257     case Builtin::BIstrncat:
258       return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
259     case Builtin::BIstrlcpy:
260     case Builtin::BIstrlcat:
261       return TaintPropagationRule({1, 2}, {0});
262     case Builtin::BIstrndup:
263       return TaintPropagationRule({0, 1}, {ReturnValueIndex});
264 
265     default:
266       break;
267     };
268 
269   // Process all other functions which could be defined as builtins.
270   if (Rule.isNull()) {
271     if (C.isCLibraryFunction(FDecl, "snprintf"))
272       return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
273                                   3);
274     else if (C.isCLibraryFunction(FDecl, "sprintf"))
275       return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
276                                   2);
277     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
278              C.isCLibraryFunction(FDecl, "stpcpy") ||
279              C.isCLibraryFunction(FDecl, "strcat"))
280       return TaintPropagationRule({1}, {0, ReturnValueIndex});
281     else if (C.isCLibraryFunction(FDecl, "bcopy"))
282       return TaintPropagationRule({0, 2}, {1});
283     else if (C.isCLibraryFunction(FDecl, "strdup") ||
284              C.isCLibraryFunction(FDecl, "strdupa"))
285       return TaintPropagationRule({0}, {ReturnValueIndex});
286     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
287       return TaintPropagationRule({0}, {ReturnValueIndex});
288   }
289 
290   // Skipping the following functions, since they might be used for cleansing
291   // or smart memory copy:
292   // - memccpy - copying until hitting a special character.
293 
294   return TaintPropagationRule();
295 }
296 
297 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
298                                        CheckerContext &C) const {
299   // Check for taintedness related errors first: system call, uncontrolled
300   // format string, tainted buffer size.
301   if (checkPre(CE, C))
302     return;
303 
304   // Marks the function's arguments and/or return value tainted if it present in
305   // the list.
306   addSourcesPre(CE, C);
307 }
308 
309 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
310                                         CheckerContext &C) const {
311   // Set the marked values as tainted. The return value only accessible from
312   // checkPostStmt.
313   propagateFromPre(CE, C);
314 }
315 
316 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
317                                         CheckerContext &C) const {
318   ProgramStateRef State = nullptr;
319   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
320   if (!FDecl || FDecl->getKind() != Decl::Function)
321     return;
322 
323   StringRef Name = C.getCalleeName(FDecl);
324   if (Name.empty())
325     return;
326 
327   // First, try generating a propagation rule for this function.
328   TaintPropagationRule Rule =
329       TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
330   if (!Rule.isNull()) {
331     State = Rule.process(CE, C);
332     if (!State)
333       return;
334     C.addTransition(State);
335     return;
336   }
337 
338   if (!State)
339     return;
340   C.addTransition(State);
341 }
342 
343 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
344                                            CheckerContext &C) const {
345   ProgramStateRef State = C.getState();
346 
347   // Depending on what was tainted at pre-visit, we determined a set of
348   // arguments which should be tainted after the function returns. These are
349   // stored in the state as TaintArgsOnPostVisit set.
350   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
351   if (TaintArgs.isEmpty())
352     return false;
353 
354   for (unsigned ArgNum : TaintArgs) {
355     // Special handling for the tainted return value.
356     if (ArgNum == ReturnValueIndex) {
357       State = State->addTaint(CE, C.getLocationContext());
358       continue;
359     }
360 
361     // The arguments are pointer arguments. The data they are pointing at is
362     // tainted after the call.
363     if (CE->getNumArgs() < (ArgNum + 1))
364       return false;
365     const Expr *Arg = CE->getArg(ArgNum);
366     Optional<SVal> V = getPointedToSVal(C, Arg);
367     if (V)
368       State = State->addTaint(*V);
369   }
370 
371   // Clear up the taint info from the state.
372   State = State->remove<TaintArgsOnPostVisit>();
373 
374   if (State != C.getState()) {
375     C.addTransition(State);
376     return true;
377   }
378   return false;
379 }
380 
381 bool GenericTaintChecker::checkPre(const CallExpr *CE,
382                                    CheckerContext &C) const {
383 
384   if (checkUncontrolledFormatString(CE, C))
385     return true;
386 
387   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
388   if (!FDecl || FDecl->getKind() != Decl::Function)
389     return false;
390 
391   StringRef Name = C.getCalleeName(FDecl);
392   if (Name.empty())
393     return false;
394 
395   if (checkSystemCall(CE, Name, C))
396     return true;
397 
398   if (checkTaintedBufferSize(CE, FDecl, C))
399     return true;
400 
401   return false;
402 }
403 
404 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
405                                                      const Expr *Arg) {
406   ProgramStateRef State = C.getState();
407   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
408   if (AddrVal.isUnknownOrUndef())
409     return None;
410 
411   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
412   if (!AddrLoc)
413     return None;
414 
415   QualType ArgTy = Arg->getType().getCanonicalType();
416   if (!ArgTy->isPointerType())
417     return None;
418 
419   QualType ValTy = ArgTy->getPointeeType();
420 
421   // Do not dereference void pointers. Treat them as byte pointers instead.
422   // FIXME: we might want to consider more than just the first byte.
423   if (ValTy->isVoidType())
424     ValTy = C.getASTContext().CharTy;
425 
426   return State->getSVal(*AddrLoc, ValTy);
427 }
428 
429 ProgramStateRef
430 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
431                                                    CheckerContext &C) const {
432   ProgramStateRef State = C.getState();
433 
434   // Check for taint in arguments.
435   bool IsTainted = true;
436   for (unsigned ArgNum : SrcArgs) {
437     if (ArgNum >= CE->getNumArgs())
438       return State;
439     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
440       break;
441   }
442 
443   // Check for taint in variadic arguments.
444   if (!IsTainted && VariadicType::Src == VarType) {
445     // Check if any of the arguments is tainted
446     for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
447       if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
448         break;
449     }
450   }
451 
452   if (PropagationFunc)
453     IsTainted = PropagationFunc(IsTainted, CE, C);
454 
455   if (!IsTainted)
456     return State;
457 
458   // Mark the arguments which should be tainted after the function returns.
459   for (unsigned ArgNum : DstArgs) {
460     // Should mark the return value?
461     if (ArgNum == ReturnValueIndex) {
462       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
463       continue;
464     }
465 
466     // Mark the given argument.
467     assert(ArgNum < CE->getNumArgs());
468     State = State->add<TaintArgsOnPostVisit>(ArgNum);
469   }
470 
471   // Mark all variadic arguments tainted if present.
472   if (VariadicType::Dst == VarType) {
473     // For all pointer and references that were passed in:
474     //   If they are not pointing to const data, mark data as tainted.
475     //   TODO: So far we are just going one level down; ideally we'd need to
476     //         recurse here.
477     for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
478       const Expr *Arg = CE->getArg(i);
479       // Process pointer argument.
480       const Type *ArgTy = Arg->getType().getTypePtr();
481       QualType PType = ArgTy->getPointeeType();
482       if ((!PType.isNull() && !PType.isConstQualified()) ||
483           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
484         State = State->add<TaintArgsOnPostVisit>(i);
485     }
486   }
487 
488   return State;
489 }
490 
491 // If argument 0(protocol domain) is network, the return value should get taint.
492 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
493                                                            const CallExpr *CE,
494                                                            CheckerContext &C) {
495   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
496   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
497   // White list the internal communication protocols.
498   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
499       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
500     return false;
501 
502   return true;
503 }
504 
505 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
506   ProgramStateRef State = C.getState();
507   SVal Val = C.getSVal(E);
508 
509   // stdin is a pointer, so it would be a region.
510   const MemRegion *MemReg = Val.getAsRegion();
511 
512   // The region should be symbolic, we do not know it's value.
513   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
514   if (!SymReg)
515     return false;
516 
517   // Get it's symbol and find the declaration region it's pointing to.
518   const SymbolRegionValue *Sm =
519       dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
520   if (!Sm)
521     return false;
522   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
523   if (!DeclReg)
524     return false;
525 
526   // This region corresponds to a declaration, find out if it's a global/extern
527   // variable named stdin with the proper type.
528   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
529     D = D->getCanonicalDecl();
530     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
531       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
532       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
533                        C.getASTContext().getFILEType().getCanonicalType())
534         return true;
535     }
536   }
537   return false;
538 }
539 
540 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
541                                        const CheckerContext &C,
542                                        unsigned int &ArgNum) {
543   // Find if the function contains a format string argument.
544   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
545   // vsnprintf, syslog, custom annotated functions.
546   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
547   if (!FDecl)
548     return false;
549   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
550     ArgNum = Format->getFormatIdx() - 1;
551     if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
552       return true;
553   }
554 
555   // Or if a function is named setproctitle (this is a heuristic).
556   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
557     ArgNum = 0;
558     return true;
559   }
560 
561   return false;
562 }
563 
564 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
565                                                   const char Msg[],
566                                                   CheckerContext &C) const {
567   assert(E);
568 
569   // Check for taint.
570   ProgramStateRef State = C.getState();
571   Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
572   SVal TaintedSVal;
573   if (PointedToSVal && State->isTainted(*PointedToSVal))
574     TaintedSVal = *PointedToSVal;
575   else if (State->isTainted(E, C.getLocationContext()))
576     TaintedSVal = C.getSVal(E);
577   else
578     return false;
579 
580   // Generate diagnostic.
581   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
582     initBugType();
583     auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
584     report->addRange(E->getSourceRange());
585     report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
586     C.emitReport(std::move(report));
587     return true;
588   }
589   return false;
590 }
591 
592 bool GenericTaintChecker::checkUncontrolledFormatString(
593     const CallExpr *CE, CheckerContext &C) const {
594   // Check if the function contains a format string argument.
595   unsigned int ArgNum = 0;
596   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
597     return false;
598 
599   // If either the format string content or the pointer itself are tainted,
600   // warn.
601   return generateReportIfTainted(CE->getArg(ArgNum),
602                                  MsgUncontrolledFormatString, C);
603 }
604 
605 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
606                                           CheckerContext &C) const {
607   // TODO: It might make sense to run this check on demand. In some cases,
608   // we should check if the environment has been cleansed here. We also might
609   // need to know if the user was reset before these calls(seteuid).
610   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
611                         .Case("system", 0)
612                         .Case("popen", 0)
613                         .Case("execl", 0)
614                         .Case("execle", 0)
615                         .Case("execlp", 0)
616                         .Case("execv", 0)
617                         .Case("execvp", 0)
618                         .Case("execvP", 0)
619                         .Case("execve", 0)
620                         .Case("dlopen", 0)
621                         .Default(UINT_MAX);
622 
623   if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
624     return false;
625 
626   return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
627 }
628 
629 // TODO: Should this check be a part of the CString checker?
630 // If yes, should taint be a global setting?
631 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
632                                                  const FunctionDecl *FDecl,
633                                                  CheckerContext &C) const {
634   // If the function has a buffer size argument, set ArgNum.
635   unsigned ArgNum = InvalidArgIndex;
636   unsigned BId = 0;
637   if ((BId = FDecl->getMemoryFunctionKind()))
638     switch (BId) {
639     case Builtin::BImemcpy:
640     case Builtin::BImemmove:
641     case Builtin::BIstrncpy:
642       ArgNum = 2;
643       break;
644     case Builtin::BIstrndup:
645       ArgNum = 1;
646       break;
647     default:
648       break;
649     };
650 
651   if (ArgNum == InvalidArgIndex) {
652     if (C.isCLibraryFunction(FDecl, "malloc") ||
653         C.isCLibraryFunction(FDecl, "calloc") ||
654         C.isCLibraryFunction(FDecl, "alloca"))
655       ArgNum = 0;
656     else if (C.isCLibraryFunction(FDecl, "memccpy"))
657       ArgNum = 3;
658     else if (C.isCLibraryFunction(FDecl, "realloc"))
659       ArgNum = 1;
660     else if (C.isCLibraryFunction(FDecl, "bcopy"))
661       ArgNum = 2;
662   }
663 
664   return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
665          generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
666 }
667 
668 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
669   mgr.registerChecker<GenericTaintChecker>();
670 }
671 
672 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
673   return true;
674 }
675