1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This checker defines the attack surface for generic taint propagation.
11 //
12 // The taint information produced by it might be useful to other checkers. For
13 // example, checkers should report errors which involve tainted data more
14 // aggressively, even if the involved symbols are under constrained.
15 //
16 //===----------------------------------------------------------------------===//
17 #include "ClangSACheckers.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/Basic/Builtins.h"
20 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
21 #include "clang/StaticAnalyzer/Core/Checker.h"
22 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
25 #include <climits>
26 
27 using namespace clang;
28 using namespace ento;
29 
30 namespace {
31 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32                                             check::PreStmt<CallExpr> > {
33 public:
34   static void *getTag() { static int Tag; return &Tag; }
35 
36   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37 
38   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39 
40 private:
41   static const unsigned InvalidArgIndex = UINT_MAX;
42   /// Denotes the return vale.
43   static const unsigned ReturnValueIndex = UINT_MAX - 1;
44 
45   mutable std::unique_ptr<BugType> BT;
46   inline void initBugType() const {
47     if (!BT)
48       BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
49   }
50 
51   /// \brief Catch taint related bugs. Check if tainted data is passed to a
52   /// system call etc.
53   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54 
55   /// \brief Add taint sources on a pre-visit.
56   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57 
58   /// \brief Propagate taint generated at pre-visit.
59   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60 
61   /// \brief Add taint sources on a post visit.
62   void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63 
64   /// Check if the region the expression evaluates to is the standard input,
65   /// and thus, is tainted.
66   static bool isStdin(const Expr *E, CheckerContext &C);
67 
68   /// This is called from getPointedToSymbol() to resolve symbol references for
69   /// the region underlying a LazyCompoundVal. This is the default binding
70   /// for the LCV, which could be a conjured symbol from a function call that
71   /// initialized the region. It only returns the conjured symbol if the LCV
72   /// covers the entire region, e.g. we avoid false positives by not returning
73   /// a default bindingc for an entire struct if the symbol for only a single
74   /// field or element within it is requested.
75   // TODO: Return an appropriate symbol for sub-fields/elements of an LCV so
76   // that they are also appropriately tainted.
77   static SymbolRef getLCVSymbol(CheckerContext &C,
78                                 nonloc::LazyCompoundVal &LCV);
79 
80   /// \brief Given a pointer argument, get the symbol of the value it contains
81   /// (points to).
82   static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
83 
84   /// Functions defining the attack surface.
85   typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
86                                                        CheckerContext &C) const;
87   ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
88   ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
89   ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
90 
91   /// Taint the scanned input if the file is tainted.
92   ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
93 
94   /// Check for CWE-134: Uncontrolled Format String.
95   static const char MsgUncontrolledFormatString[];
96   bool checkUncontrolledFormatString(const CallExpr *CE,
97                                      CheckerContext &C) const;
98 
99   /// Check for:
100   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
101   /// CWE-78, "Failure to Sanitize Data into an OS Command"
102   static const char MsgSanitizeSystemArgs[];
103   bool checkSystemCall(const CallExpr *CE, StringRef Name,
104                        CheckerContext &C) const;
105 
106   /// Check if tainted data is used as a buffer size ins strn.. functions,
107   /// and allocators.
108   static const char MsgTaintedBufferSize[];
109   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
110                               CheckerContext &C) const;
111 
112   /// Generate a report if the expression is tainted or points to tainted data.
113   bool generateReportIfTainted(const Expr *E, const char Msg[],
114                                CheckerContext &C) const;
115 
116   /// The bug visitor prints a diagnostic message at the location where a given
117   /// variable was tainted.
118   class TaintBugVisitor
119       : public BugReporterVisitorImpl<TaintBugVisitor> {
120   private:
121     const SVal V;
122 
123   public:
124     TaintBugVisitor(const SVal V) : V(V) {}
125     void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); }
126 
127     std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
128                                                    const ExplodedNode *PrevN,
129                                                    BugReporterContext &BRC,
130                                                    BugReport &BR) override;
131   };
132 
133   typedef SmallVector<unsigned, 2> ArgVector;
134 
135   /// \brief A struct used to specify taint propagation rules for a function.
136   ///
137   /// If any of the possible taint source arguments is tainted, all of the
138   /// destination arguments should also be tainted. Use InvalidArgIndex in the
139   /// src list to specify that all of the arguments can introduce taint. Use
140   /// InvalidArgIndex in the dst arguments to signify that all the non-const
141   /// pointer and reference arguments might be tainted on return. If
142   /// ReturnValueIndex is added to the dst list, the return value will be
143   /// tainted.
144   struct TaintPropagationRule {
145     /// List of arguments which can be taint sources and should be checked.
146     ArgVector SrcArgs;
147     /// List of arguments which should be tainted on function return.
148     ArgVector DstArgs;
149     // TODO: Check if using other data structures would be more optimal.
150 
151     TaintPropagationRule() {}
152 
153     TaintPropagationRule(unsigned SArg,
154                          unsigned DArg, bool TaintRet = false) {
155       SrcArgs.push_back(SArg);
156       DstArgs.push_back(DArg);
157       if (TaintRet)
158         DstArgs.push_back(ReturnValueIndex);
159     }
160 
161     TaintPropagationRule(unsigned SArg1, unsigned SArg2,
162                          unsigned DArg, bool TaintRet = false) {
163       SrcArgs.push_back(SArg1);
164       SrcArgs.push_back(SArg2);
165       DstArgs.push_back(DArg);
166       if (TaintRet)
167         DstArgs.push_back(ReturnValueIndex);
168     }
169 
170     /// Get the propagation rule for a given function.
171     static TaintPropagationRule
172       getTaintPropagationRule(const FunctionDecl *FDecl,
173                               StringRef Name,
174                               CheckerContext &C);
175 
176     inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
177     inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
178 
179     inline bool isNull() const { return SrcArgs.empty(); }
180 
181     inline bool isDestinationArgument(unsigned ArgNum) const {
182       return (std::find(DstArgs.begin(),
183                         DstArgs.end(), ArgNum) != DstArgs.end());
184     }
185 
186     static inline bool isTaintedOrPointsToTainted(const Expr *E,
187                                                   ProgramStateRef State,
188                                                   CheckerContext &C) {
189       return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
190               (E->getType().getTypePtr()->isPointerType() &&
191                State->isTainted(getPointedToSymbol(C, E))));
192     }
193 
194     /// \brief Pre-process a function which propagates taint according to the
195     /// taint rule.
196     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
197 
198   };
199 };
200 
201 const unsigned GenericTaintChecker::ReturnValueIndex;
202 const unsigned GenericTaintChecker::InvalidArgIndex;
203 
204 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
205   "Untrusted data is used as a format string "
206   "(CWE-134: Uncontrolled Format String)";
207 
208 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
209   "Untrusted data is passed to a system call "
210   "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
211 
212 const char GenericTaintChecker::MsgTaintedBufferSize[] =
213   "Untrusted data is used to specify the buffer size "
214   "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
215   "character data and the null terminator)";
216 
217 } // end of anonymous namespace
218 
219 /// A set which is used to pass information from call pre-visit instruction
220 /// to the call post-visit. The values are unsigned integers, which are either
221 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
222 /// points to data, which should be tainted on return.
223 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
224 
225 std::shared_ptr<PathDiagnosticPiece>
226 GenericTaintChecker::TaintBugVisitor::VisitNode(const ExplodedNode *N,
227     const ExplodedNode *PrevN, BugReporterContext &BRC, BugReport &BR) {
228 
229   // Find the ExplodedNode where the taint was first introduced
230   if (!N->getState()->isTainted(V) || PrevN->getState()->isTainted(V))
231     return nullptr;
232 
233   const Stmt *S = PathDiagnosticLocation::getStmt(N);
234   if (!S)
235     return nullptr;
236 
237   const LocationContext *NCtx = N->getLocationContext();
238   PathDiagnosticLocation L =
239       PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
240   if (!L.isValid() || !L.asLocation().isValid())
241     return nullptr;
242 
243   return std::make_shared<PathDiagnosticEventPiece>(
244       L, "Taint originated here");
245 }
246 
247 GenericTaintChecker::TaintPropagationRule
248 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
249                                                      const FunctionDecl *FDecl,
250                                                      StringRef Name,
251                                                      CheckerContext &C) {
252   // TODO: Currently, we might lose precision here: we always mark a return
253   // value as tainted even if it's just a pointer, pointing to tainted data.
254 
255   // Check for exact name match for functions without builtin substitutes.
256   TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
257     .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
258     .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
259     .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
260     .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
261     .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
262     .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
263     .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
264     .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
265     .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
266     .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
267     .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
268     .Case("read", TaintPropagationRule(0, 2, 1, true))
269     .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
270     .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
271     .Case("fgets", TaintPropagationRule(2, 0, true))
272     .Case("getline", TaintPropagationRule(2, 0))
273     .Case("getdelim", TaintPropagationRule(3, 0))
274     .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
275     .Default(TaintPropagationRule());
276 
277   if (!Rule.isNull())
278     return Rule;
279 
280   // Check if it's one of the memory setting/copying functions.
281   // This check is specialized but faster then calling isCLibraryFunction.
282   unsigned BId = 0;
283   if ( (BId = FDecl->getMemoryFunctionKind()) )
284     switch(BId) {
285     case Builtin::BImemcpy:
286     case Builtin::BImemmove:
287     case Builtin::BIstrncpy:
288     case Builtin::BIstrncat:
289       return TaintPropagationRule(1, 2, 0, true);
290     case Builtin::BIstrlcpy:
291     case Builtin::BIstrlcat:
292       return TaintPropagationRule(1, 2, 0, false);
293     case Builtin::BIstrndup:
294       return TaintPropagationRule(0, 1, ReturnValueIndex);
295 
296     default:
297       break;
298     };
299 
300   // Process all other functions which could be defined as builtins.
301   if (Rule.isNull()) {
302     if (C.isCLibraryFunction(FDecl, "snprintf") ||
303         C.isCLibraryFunction(FDecl, "sprintf"))
304       return TaintPropagationRule(InvalidArgIndex, 0, true);
305     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
306              C.isCLibraryFunction(FDecl, "stpcpy") ||
307              C.isCLibraryFunction(FDecl, "strcat"))
308       return TaintPropagationRule(1, 0, true);
309     else if (C.isCLibraryFunction(FDecl, "bcopy"))
310       return TaintPropagationRule(0, 2, 1, false);
311     else if (C.isCLibraryFunction(FDecl, "strdup") ||
312              C.isCLibraryFunction(FDecl, "strdupa"))
313       return TaintPropagationRule(0, ReturnValueIndex);
314     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
315       return TaintPropagationRule(0, ReturnValueIndex);
316   }
317 
318   // Skipping the following functions, since they might be used for cleansing
319   // or smart memory copy:
320   // - memccpy - copying until hitting a special character.
321 
322   return TaintPropagationRule();
323 }
324 
325 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
326                                        CheckerContext &C) const {
327   // Check for errors first.
328   if (checkPre(CE, C))
329     return;
330 
331   // Add taint second.
332   addSourcesPre(CE, C);
333 }
334 
335 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
336                                         CheckerContext &C) const {
337   if (propagateFromPre(CE, C))
338     return;
339   addSourcesPost(CE, C);
340 }
341 
342 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
343                                         CheckerContext &C) const {
344   ProgramStateRef State = nullptr;
345   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
346   if (!FDecl || FDecl->getKind() != Decl::Function)
347     return;
348 
349   StringRef Name = C.getCalleeName(FDecl);
350   if (Name.empty())
351     return;
352 
353   // First, try generating a propagation rule for this function.
354   TaintPropagationRule Rule =
355     TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
356   if (!Rule.isNull()) {
357     State = Rule.process(CE, C);
358     if (!State)
359       return;
360     C.addTransition(State);
361     return;
362   }
363 
364   // Otherwise, check if we have custom pre-processing implemented.
365   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
366     .Case("fscanf", &GenericTaintChecker::preFscanf)
367     .Default(nullptr);
368   // Check and evaluate the call.
369   if (evalFunction)
370     State = (this->*evalFunction)(CE, C);
371   if (!State)
372     return;
373   C.addTransition(State);
374 
375 }
376 
377 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
378                                            CheckerContext &C) const {
379   ProgramStateRef State = C.getState();
380 
381   // Depending on what was tainted at pre-visit, we determined a set of
382   // arguments which should be tainted after the function returns. These are
383   // stored in the state as TaintArgsOnPostVisit set.
384   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
385   if (TaintArgs.isEmpty())
386     return false;
387 
388   for (llvm::ImmutableSet<unsigned>::iterator
389          I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
390     unsigned ArgNum  = *I;
391 
392     // Special handling for the tainted return value.
393     if (ArgNum == ReturnValueIndex) {
394       State = State->addTaint(CE, C.getLocationContext());
395       continue;
396     }
397 
398     // The arguments are pointer arguments. The data they are pointing at is
399     // tainted after the call.
400     if (CE->getNumArgs() < (ArgNum + 1))
401       return false;
402     const Expr* Arg = CE->getArg(ArgNum);
403     SymbolRef Sym = getPointedToSymbol(C, Arg);
404     if (Sym)
405       State = State->addTaint(Sym);
406   }
407 
408   // Clear up the taint info from the state.
409   State = State->remove<TaintArgsOnPostVisit>();
410 
411   if (State != C.getState()) {
412     C.addTransition(State);
413     return true;
414   }
415   return false;
416 }
417 
418 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
419                                          CheckerContext &C) const {
420   // Define the attack surface.
421   // Set the evaluation function by switching on the callee name.
422   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
423   if (!FDecl || FDecl->getKind() != Decl::Function)
424     return;
425 
426   StringRef Name = C.getCalleeName(FDecl);
427   if (Name.empty())
428     return;
429   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
430     .Case("scanf", &GenericTaintChecker::postScanf)
431     // TODO: Add support for vfscanf & family.
432     .Case("getchar", &GenericTaintChecker::postRetTaint)
433     .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
434     .Case("getenv", &GenericTaintChecker::postRetTaint)
435     .Case("fopen", &GenericTaintChecker::postRetTaint)
436     .Case("fdopen", &GenericTaintChecker::postRetTaint)
437     .Case("freopen", &GenericTaintChecker::postRetTaint)
438     .Case("getch", &GenericTaintChecker::postRetTaint)
439     .Case("wgetch", &GenericTaintChecker::postRetTaint)
440     .Case("socket", &GenericTaintChecker::postSocket)
441     .Default(nullptr);
442 
443   // If the callee isn't defined, it is not of security concern.
444   // Check and evaluate the call.
445   ProgramStateRef State = nullptr;
446   if (evalFunction)
447     State = (this->*evalFunction)(CE, C);
448   if (!State)
449     return;
450 
451   C.addTransition(State);
452 }
453 
454 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
455 
456   if (checkUncontrolledFormatString(CE, C))
457     return true;
458 
459   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
460   if (!FDecl || FDecl->getKind() != Decl::Function)
461     return false;
462 
463   StringRef Name = C.getCalleeName(FDecl);
464   if (Name.empty())
465     return false;
466 
467   if (checkSystemCall(CE, Name, C))
468     return true;
469 
470   if (checkTaintedBufferSize(CE, FDecl, C))
471     return true;
472 
473   return false;
474 }
475 
476 SymbolRef GenericTaintChecker::getLCVSymbol(CheckerContext &C,
477                                             nonloc::LazyCompoundVal &LCV) {
478   StoreManager &StoreMgr = C.getStoreManager();
479 
480   // getLCVSymbol() is reached in a PostStmt so we can always expect a default
481   // binding to exist if one is present.
482   if (Optional<SVal> binding = StoreMgr.getDefaultBinding(LCV)) {
483     SymbolRef Sym = binding->getAsSymbol();
484     if (!Sym)
485       return nullptr;
486 
487     // If the LCV covers an entire base region return the default conjured symbol.
488     if (LCV.getRegion() == LCV.getRegion()->getBaseRegion())
489       return Sym;
490   }
491 
492   // Otherwise, return a nullptr as there's not yet a functional way to taint
493   // sub-regions of LCVs.
494   return nullptr;
495 }
496 
497 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
498                                                   const Expr* Arg) {
499   ProgramStateRef State = C.getState();
500   SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
501   if (AddrVal.isUnknownOrUndef())
502     return nullptr;
503 
504   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
505   if (!AddrLoc)
506     return nullptr;
507 
508   const PointerType *ArgTy =
509     dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
510   SVal Val = State->getSVal(*AddrLoc,
511                             ArgTy ? ArgTy->getPointeeType(): QualType());
512 
513   if (auto LCV = Val.getAs<nonloc::LazyCompoundVal>())
514     return getLCVSymbol(C, *LCV);
515 
516   return Val.getAsSymbol();
517 }
518 
519 ProgramStateRef
520 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
521                                                    CheckerContext &C) const {
522   ProgramStateRef State = C.getState();
523 
524   // Check for taint in arguments.
525   bool IsTainted = false;
526   for (ArgVector::const_iterator I = SrcArgs.begin(),
527                                  E = SrcArgs.end(); I != E; ++I) {
528     unsigned ArgNum = *I;
529 
530     if (ArgNum == InvalidArgIndex) {
531       // Check if any of the arguments is tainted, but skip the
532       // destination arguments.
533       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
534         if (isDestinationArgument(i))
535           continue;
536         if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
537           break;
538       }
539       break;
540     }
541 
542     if (CE->getNumArgs() < (ArgNum + 1))
543       return State;
544     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
545       break;
546   }
547   if (!IsTainted)
548     return State;
549 
550   // Mark the arguments which should be tainted after the function returns.
551   for (ArgVector::const_iterator I = DstArgs.begin(),
552                                  E = DstArgs.end(); I != E; ++I) {
553     unsigned ArgNum = *I;
554 
555     // Should we mark all arguments as tainted?
556     if (ArgNum == InvalidArgIndex) {
557       // For all pointer and references that were passed in:
558       //   If they are not pointing to const data, mark data as tainted.
559       //   TODO: So far we are just going one level down; ideally we'd need to
560       //         recurse here.
561       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
562         const Expr *Arg = CE->getArg(i);
563         // Process pointer argument.
564         const Type *ArgTy = Arg->getType().getTypePtr();
565         QualType PType = ArgTy->getPointeeType();
566         if ((!PType.isNull() && !PType.isConstQualified())
567             || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
568           State = State->add<TaintArgsOnPostVisit>(i);
569       }
570       continue;
571     }
572 
573     // Should mark the return value?
574     if (ArgNum == ReturnValueIndex) {
575       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
576       continue;
577     }
578 
579     // Mark the given argument.
580     assert(ArgNum < CE->getNumArgs());
581     State = State->add<TaintArgsOnPostVisit>(ArgNum);
582   }
583 
584   return State;
585 }
586 
587 
588 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
589 // and arg 1 should get taint.
590 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
591                                                    CheckerContext &C) const {
592   assert(CE->getNumArgs() >= 2);
593   ProgramStateRef State = C.getState();
594 
595   // Check is the file descriptor is tainted.
596   if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
597       isStdin(CE->getArg(0), C)) {
598     // All arguments except for the first two should get taint.
599     for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
600         State = State->add<TaintArgsOnPostVisit>(i);
601     return State;
602   }
603 
604   return nullptr;
605 }
606 
607 
608 // If argument 0(protocol domain) is network, the return value should get taint.
609 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
610                                                 CheckerContext &C) const {
611   ProgramStateRef State = C.getState();
612   if (CE->getNumArgs() < 3)
613     return State;
614 
615   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
616   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
617   // White list the internal communication protocols.
618   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
619       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
620     return State;
621   State = State->addTaint(CE, C.getLocationContext());
622   return State;
623 }
624 
625 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
626                                                    CheckerContext &C) const {
627   ProgramStateRef State = C.getState();
628   if (CE->getNumArgs() < 2)
629     return State;
630 
631   // All arguments except for the very first one should get taint.
632   for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
633     // The arguments are pointer arguments. The data they are pointing at is
634     // tainted after the call.
635     const Expr* Arg = CE->getArg(i);
636         SymbolRef Sym = getPointedToSymbol(C, Arg);
637     if (Sym)
638       State = State->addTaint(Sym);
639   }
640   return State;
641 }
642 
643 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
644                                                   CheckerContext &C) const {
645   return C.getState()->addTaint(CE, C.getLocationContext());
646 }
647 
648 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
649   ProgramStateRef State = C.getState();
650   SVal Val = State->getSVal(E, C.getLocationContext());
651 
652   // stdin is a pointer, so it would be a region.
653   const MemRegion *MemReg = Val.getAsRegion();
654 
655   // The region should be symbolic, we do not know it's value.
656   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
657   if (!SymReg)
658     return false;
659 
660   // Get it's symbol and find the declaration region it's pointing to.
661   const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
662   if (!Sm)
663     return false;
664   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
665   if (!DeclReg)
666     return false;
667 
668   // This region corresponds to a declaration, find out if it's a global/extern
669   // variable named stdin with the proper type.
670   if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
671     D = D->getCanonicalDecl();
672     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
673         if (const PointerType * PtrTy =
674               dyn_cast<PointerType>(D->getType().getTypePtr()))
675           if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
676             return true;
677   }
678   return false;
679 }
680 
681 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
682                                        const CheckerContext &C,
683                                        unsigned int &ArgNum) {
684   // Find if the function contains a format string argument.
685   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
686   // vsnprintf, syslog, custom annotated functions.
687   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
688   if (!FDecl)
689     return false;
690   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
691     ArgNum = Format->getFormatIdx() - 1;
692     if ((Format->getType()->getName() == "printf") &&
693          CE->getNumArgs() > ArgNum)
694       return true;
695   }
696 
697   // Or if a function is named setproctitle (this is a heuristic).
698   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
699     ArgNum = 0;
700     return true;
701   }
702 
703   return false;
704 }
705 
706 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
707                                                   const char Msg[],
708                                                   CheckerContext &C) const {
709   assert(E);
710 
711   // Check for taint.
712   ProgramStateRef State = C.getState();
713   const SymbolRef PointedToSym = getPointedToSymbol(C, E);
714   SVal TaintedSVal;
715   if (State->isTainted(PointedToSym))
716     TaintedSVal = nonloc::SymbolVal(PointedToSym);
717   else if (State->isTainted(E, C.getLocationContext()))
718     TaintedSVal = C.getSVal(E);
719   else
720     return false;
721 
722   // Generate diagnostic.
723   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
724     initBugType();
725     auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
726     report->addRange(E->getSourceRange());
727     report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
728     C.emitReport(std::move(report));
729     return true;
730   }
731   return false;
732 }
733 
734 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
735                                                         CheckerContext &C) const{
736   // Check if the function contains a format string argument.
737   unsigned int ArgNum = 0;
738   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
739     return false;
740 
741   // If either the format string content or the pointer itself are tainted, warn.
742   return generateReportIfTainted(CE->getArg(ArgNum),
743                                  MsgUncontrolledFormatString, C);
744 }
745 
746 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
747                                           StringRef Name,
748                                           CheckerContext &C) const {
749   // TODO: It might make sense to run this check on demand. In some cases,
750   // we should check if the environment has been cleansed here. We also might
751   // need to know if the user was reset before these calls(seteuid).
752   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
753     .Case("system", 0)
754     .Case("popen", 0)
755     .Case("execl", 0)
756     .Case("execle", 0)
757     .Case("execlp", 0)
758     .Case("execv", 0)
759     .Case("execvp", 0)
760     .Case("execvP", 0)
761     .Case("execve", 0)
762     .Case("dlopen", 0)
763     .Default(UINT_MAX);
764 
765   if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
766     return false;
767 
768   return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
769 }
770 
771 // TODO: Should this check be a part of the CString checker?
772 // If yes, should taint be a global setting?
773 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
774                                                  const FunctionDecl *FDecl,
775                                                  CheckerContext &C) const {
776   // If the function has a buffer size argument, set ArgNum.
777   unsigned ArgNum = InvalidArgIndex;
778   unsigned BId = 0;
779   if ( (BId = FDecl->getMemoryFunctionKind()) )
780     switch(BId) {
781     case Builtin::BImemcpy:
782     case Builtin::BImemmove:
783     case Builtin::BIstrncpy:
784       ArgNum = 2;
785       break;
786     case Builtin::BIstrndup:
787       ArgNum = 1;
788       break;
789     default:
790       break;
791     };
792 
793   if (ArgNum == InvalidArgIndex) {
794     if (C.isCLibraryFunction(FDecl, "malloc") ||
795         C.isCLibraryFunction(FDecl, "calloc") ||
796         C.isCLibraryFunction(FDecl, "alloca"))
797       ArgNum = 0;
798     else if (C.isCLibraryFunction(FDecl, "memccpy"))
799       ArgNum = 3;
800     else if (C.isCLibraryFunction(FDecl, "realloc"))
801       ArgNum = 1;
802     else if (C.isCLibraryFunction(FDecl, "bcopy"))
803       ArgNum = 2;
804   }
805 
806   return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
807          generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
808 }
809 
810 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
811   mgr.registerChecker<GenericTaintChecker>();
812 }
813