1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This checker defines the attack surface for generic taint propagation.
11 //
12 // The taint information produced by it might be useful to other checkers. For
13 // example, checkers should report errors which involve tainted data more
14 // aggressively, even if the involved symbols are under constrained.
15 //
16 //===----------------------------------------------------------------------===//
17 #include "ClangSACheckers.h"
18 #include "clang/StaticAnalyzer/Core/Checker.h"
19 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/Basic/Builtins.h"
24 #include <climits>
25 
26 using namespace clang;
27 using namespace ento;
28 
29 namespace {
30 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
31                                             check::PreStmt<CallExpr> > {
32 public:
33   static void *getTag() { static int Tag; return &Tag; }
34 
35   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
36   void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
37 
38   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39 
40 private:
41   static const unsigned InvalidArgIndex = UINT_MAX;
42   /// Denotes the return vale.
43   static const unsigned ReturnValueIndex = UINT_MAX - 1;
44 
45   mutable OwningPtr<BugType> BT;
46   inline void initBugType() const {
47     if (!BT)
48       BT.reset(new BugType("Taint Analysis", "General"));
49   }
50 
51   /// \brief Catch taint related bugs. Check if tainted data is passed to a
52   /// system call etc.
53   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54 
55   /// \brief Add taint sources on a pre-visit.
56   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57 
58   /// \brief Propagate taint generated at pre-visit.
59   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60 
61   /// \brief Add taint sources on a post visit.
62   void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63 
64   /// Check if the region the expression evaluates to is the standard input,
65   /// and thus, is tainted.
66   static bool isStdin(const Expr *E, CheckerContext &C);
67 
68   /// \brief Given a pointer argument, get the symbol of the value it contains
69   /// (points to).
70   static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
71 
72   /// Functions defining the attack surface.
73   typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
74                                                        CheckerContext &C) const;
75   ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
76   ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
77   ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
78 
79   /// Taint the scanned input if the file is tainted.
80   ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
81 
82   /// Check for CWE-134: Uncontrolled Format String.
83   static const char MsgUncontrolledFormatString[];
84   bool checkUncontrolledFormatString(const CallExpr *CE,
85                                      CheckerContext &C) const;
86 
87   /// Check for:
88   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
89   /// CWE-78, "Failure to Sanitize Data into an OS Command"
90   static const char MsgSanitizeSystemArgs[];
91   bool checkSystemCall(const CallExpr *CE, StringRef Name,
92                        CheckerContext &C) const;
93 
94   /// Check if tainted data is used as a buffer size ins strn.. functions,
95   /// and allocators.
96   static const char MsgTaintedBufferSize[];
97   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
98                               CheckerContext &C) const;
99 
100   /// Generate a report if the expression is tainted or points to tainted data.
101   bool generateReportIfTainted(const Expr *E, const char Msg[],
102                                CheckerContext &C) const;
103 
104 
105   typedef llvm::SmallVector<unsigned, 2> ArgVector;
106 
107   /// \brief A struct used to specify taint propagation rules for a function.
108   ///
109   /// If any of the possible taint source arguments is tainted, all of the
110   /// destination arguments should also be tainted. Use InvalidArgIndex in the
111   /// src list to specify that all of the arguments can introduce taint. Use
112   /// InvalidArgIndex in the dst arguments to signify that all the non-const
113   /// pointer and reference arguments might be tainted on return. If
114   /// ReturnValueIndex is added to the dst list, the return value will be
115   /// tainted.
116   struct TaintPropagationRule {
117     /// List of arguments which can be taint sources and should be checked.
118     ArgVector SrcArgs;
119     /// List of arguments which should be tainted on function return.
120     ArgVector DstArgs;
121     // TODO: Check if using other data structures would be more optimal.
122 
123     TaintPropagationRule() {}
124 
125     TaintPropagationRule(unsigned SArg,
126                          unsigned DArg, bool TaintRet = false) {
127       SrcArgs.push_back(SArg);
128       DstArgs.push_back(DArg);
129       if (TaintRet)
130         DstArgs.push_back(ReturnValueIndex);
131     }
132 
133     TaintPropagationRule(unsigned SArg1, unsigned SArg2,
134                          unsigned DArg, bool TaintRet = false) {
135       SrcArgs.push_back(SArg1);
136       SrcArgs.push_back(SArg2);
137       DstArgs.push_back(DArg);
138       if (TaintRet)
139         DstArgs.push_back(ReturnValueIndex);
140     }
141 
142     /// Get the propagation rule for a given function.
143     static TaintPropagationRule
144       getTaintPropagationRule(const FunctionDecl *FDecl,
145                               StringRef Name,
146                               CheckerContext &C);
147 
148     inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
149     inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
150 
151     inline bool isNull() const { return SrcArgs.empty(); }
152 
153     inline bool isDestinationArgument(unsigned ArgNum) const {
154       return (std::find(DstArgs.begin(),
155                         DstArgs.end(), ArgNum) != DstArgs.end());
156     }
157 
158     static inline bool isTaintedOrPointsToTainted(const Expr *E,
159                                                   ProgramStateRef State,
160                                                   CheckerContext &C) {
161       return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
162               (E->getType().getTypePtr()->isPointerType() &&
163                State->isTainted(getPointedToSymbol(C, E))));
164     }
165 
166     /// \brief Pre-process a function which propagates taint according to the
167     /// taint rule.
168     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
169 
170   };
171 };
172 
173 const unsigned GenericTaintChecker::ReturnValueIndex;
174 const unsigned GenericTaintChecker::InvalidArgIndex;
175 
176 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
177   "Tainted format string (CWE-134: Uncontrolled Format String)";
178 
179 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
180   "Tainted data passed to a system call "
181   "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
182 
183 const char GenericTaintChecker::MsgTaintedBufferSize[] =
184   "Tainted data is used to specify the buffer size "
185   "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
186   "character data and the null terminator)";
187 
188 } // end of anonymous namespace
189 
190 /// A set which is used to pass information from call pre-visit instruction
191 /// to the call post-visit. The values are unsigned integers, which are either
192 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
193 /// points to data, which should be tainted on return.
194 namespace { struct TaintArgsOnPostVisit{}; }
195 namespace clang { namespace ento {
196 template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
197     :  public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
198   static void *GDMIndex() { return GenericTaintChecker::getTag(); }
199 };
200 }}
201 
202 GenericTaintChecker::TaintPropagationRule
203 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
204                                                      const FunctionDecl *FDecl,
205                                                      StringRef Name,
206                                                      CheckerContext &C) {
207   // TODO: Currently, we might loose precision here: we always mark a return
208   // value as tainted even if it's just a pointer, pointing to tainted data.
209 
210   // Check for exact name match for functions without builtin substitutes.
211   TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
212     .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
213     .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
214     .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
215     .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
216     .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
217     .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
218     .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
219     .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
220     .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
221     .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
222     .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
223     .Case("read", TaintPropagationRule(0, 2, 1, true))
224     .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
225     .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
226     .Case("fgets", TaintPropagationRule(2, 0, true))
227     .Case("getline", TaintPropagationRule(2, 0))
228     .Case("getdelim", TaintPropagationRule(3, 0))
229     .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
230     .Default(TaintPropagationRule());
231 
232   if (!Rule.isNull())
233     return Rule;
234 
235   // Check if it's one of the memory setting/copying functions.
236   // This check is specialized but faster then calling isCLibraryFunction.
237   unsigned BId = 0;
238   if ( (BId = FDecl->getMemoryFunctionKind()) )
239     switch(BId) {
240     case Builtin::BImemcpy:
241     case Builtin::BImemmove:
242     case Builtin::BIstrncpy:
243     case Builtin::BIstrncat:
244       return TaintPropagationRule(1, 2, 0, true);
245     case Builtin::BIstrlcpy:
246     case Builtin::BIstrlcat:
247       return TaintPropagationRule(1, 2, 0, false);
248     case Builtin::BIstrndup:
249       return TaintPropagationRule(0, 1, ReturnValueIndex);
250 
251     default:
252       break;
253     };
254 
255   // Process all other functions which could be defined as builtins.
256   if (Rule.isNull()) {
257     if (C.isCLibraryFunction(FDecl, "snprintf") ||
258         C.isCLibraryFunction(FDecl, "sprintf"))
259       return TaintPropagationRule(InvalidArgIndex, 0, true);
260     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
261              C.isCLibraryFunction(FDecl, "stpcpy") ||
262              C.isCLibraryFunction(FDecl, "strcat"))
263       return TaintPropagationRule(1, 0, true);
264     else if (C.isCLibraryFunction(FDecl, "bcopy"))
265       return TaintPropagationRule(0, 2, 1, false);
266     else if (C.isCLibraryFunction(FDecl, "strdup") ||
267              C.isCLibraryFunction(FDecl, "strdupa"))
268       return TaintPropagationRule(0, ReturnValueIndex);
269     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
270       return TaintPropagationRule(0, ReturnValueIndex);
271   }
272 
273   // Skipping the following functions, since they might be used for cleansing
274   // or smart memory copy:
275   // - memccpy - copying untill hitting a special character.
276 
277   return TaintPropagationRule();
278 }
279 
280 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
281                                        CheckerContext &C) const {
282   // Check for errors first.
283   if (checkPre(CE, C))
284     return;
285 
286   // Add taint second.
287   addSourcesPre(CE, C);
288 }
289 
290 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
291                                         CheckerContext &C) const {
292   if (propagateFromPre(CE, C))
293     return;
294   addSourcesPost(CE, C);
295 }
296 
297 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
298                                         CheckerContext &C) const {
299   ProgramStateRef State = 0;
300   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
301   StringRef Name = C.getCalleeName(FDecl);
302   if (Name.empty())
303     return;
304 
305   // First, try generating a propagation rule for this function.
306   TaintPropagationRule Rule =
307     TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
308   if (!Rule.isNull()) {
309     State = Rule.process(CE, C);
310     if (!State)
311       return;
312     C.addTransition(State);
313     return;
314   }
315 
316   // Otherwise, check if we have custom pre-processing implemented.
317   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
318     .Case("fscanf", &GenericTaintChecker::preFscanf)
319     .Default(0);
320   // Check and evaluate the call.
321   if (evalFunction)
322     State = (this->*evalFunction)(CE, C);
323   if (!State)
324     return;
325   C.addTransition(State);
326 
327 }
328 
329 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
330                                            CheckerContext &C) const {
331   ProgramStateRef State = C.getState();
332 
333   // Depending on what was tainted at pre-visit, we determined a set of
334   // arguments which should be tainted after the function returns. These are
335   // stored in the state as TaintArgsOnPostVisit set.
336   llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
337   if (TaintArgs.isEmpty())
338     return false;
339 
340   for (llvm::ImmutableSet<unsigned>::iterator
341          I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
342     unsigned ArgNum  = *I;
343 
344     // Special handling for the tainted return value.
345     if (ArgNum == ReturnValueIndex) {
346       State = State->addTaint(CE, C.getLocationContext());
347       continue;
348     }
349 
350     // The arguments are pointer arguments. The data they are pointing at is
351     // tainted after the call.
352     const Expr* Arg = CE->getArg(ArgNum);
353     SymbolRef Sym = getPointedToSymbol(C, Arg);
354     if (Sym)
355       State = State->addTaint(Sym);
356   }
357 
358   // Clear up the taint info from the state.
359   State = State->remove<TaintArgsOnPostVisit>();
360 
361   if (State != C.getState()) {
362     C.addTransition(State);
363     return true;
364   }
365   return false;
366 }
367 
368 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
369                                          CheckerContext &C) const {
370   // Define the attack surface.
371   // Set the evaluation function by switching on the callee name.
372   StringRef Name = C.getCalleeName(CE);
373   if (Name.empty())
374     return;
375   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
376     .Case("scanf", &GenericTaintChecker::postScanf)
377     // TODO: Add support for vfscanf & family.
378     .Case("getchar", &GenericTaintChecker::postRetTaint)
379     .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
380     .Case("getenv", &GenericTaintChecker::postRetTaint)
381     .Case("fopen", &GenericTaintChecker::postRetTaint)
382     .Case("fdopen", &GenericTaintChecker::postRetTaint)
383     .Case("freopen", &GenericTaintChecker::postRetTaint)
384     .Case("getch", &GenericTaintChecker::postRetTaint)
385     .Case("wgetch", &GenericTaintChecker::postRetTaint)
386     .Case("socket", &GenericTaintChecker::postSocket)
387     .Default(0);
388 
389   // If the callee isn't defined, it is not of security concern.
390   // Check and evaluate the call.
391   ProgramStateRef State = 0;
392   if (evalFunction)
393     State = (this->*evalFunction)(CE, C);
394   if (!State)
395     return;
396 
397   C.addTransition(State);
398 }
399 
400 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
401 
402   if (checkUncontrolledFormatString(CE, C))
403     return true;
404 
405   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
406   StringRef Name = C.getCalleeName(FDecl);
407   if (Name.empty())
408     return false;
409 
410   if (checkSystemCall(CE, Name, C))
411     return true;
412 
413   if (checkTaintedBufferSize(CE, FDecl, C))
414     return true;
415 
416   return false;
417 }
418 
419 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
420                                                   const Expr* Arg) {
421   ProgramStateRef State = C.getState();
422   SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
423   if (AddrVal.isUnknownOrUndef())
424     return 0;
425 
426   Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
427   if (!AddrLoc)
428     return 0;
429 
430   const PointerType *ArgTy =
431     dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
432   SVal Val = State->getSVal(*AddrLoc,
433                             ArgTy ? ArgTy->getPointeeType(): QualType());
434   return Val.getAsSymbol();
435 }
436 
437 ProgramStateRef
438 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
439                                                    CheckerContext &C) const {
440   ProgramStateRef State = C.getState();
441 
442   // Check for taint in arguments.
443   bool IsTainted = false;
444   for (ArgVector::const_iterator I = SrcArgs.begin(),
445                                  E = SrcArgs.end(); I != E; ++I) {
446     unsigned ArgNum = *I;
447 
448     if (ArgNum == InvalidArgIndex) {
449       // Check if any of the arguments is tainted, but skip the
450       // destination arguments.
451       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
452         if (isDestinationArgument(i))
453           continue;
454         if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
455           break;
456       }
457       break;
458     }
459 
460     assert(ArgNum < CE->getNumArgs());
461     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
462       break;
463   }
464   if (!IsTainted)
465     return State;
466 
467   // Mark the arguments which should be tainted after the function returns.
468   for (ArgVector::const_iterator I = DstArgs.begin(),
469                                  E = DstArgs.end(); I != E; ++I) {
470     unsigned ArgNum = *I;
471 
472     // Should we mark all arguments as tainted?
473     if (ArgNum == InvalidArgIndex) {
474       // For all pointer and references that were passed in:
475       //   If they are not pointing to const data, mark data as tainted.
476       //   TODO: So far we are just going one level down; ideally we'd need to
477       //         recurse here.
478       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
479         const Expr *Arg = CE->getArg(i);
480         // Process pointer argument.
481         const Type *ArgTy = Arg->getType().getTypePtr();
482         QualType PType = ArgTy->getPointeeType();
483         if ((!PType.isNull() && !PType.isConstQualified())
484             || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
485           State = State->add<TaintArgsOnPostVisit>(i);
486       }
487       continue;
488     }
489 
490     // Should mark the return value?
491     if (ArgNum == ReturnValueIndex) {
492       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
493       continue;
494     }
495 
496     // Mark the given argument.
497     assert(ArgNum < CE->getNumArgs());
498     State = State->add<TaintArgsOnPostVisit>(ArgNum);
499   }
500 
501   return State;
502 }
503 
504 
505 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
506 // and arg 1 should get taint.
507 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
508                                                    CheckerContext &C) const {
509   assert(CE->getNumArgs() >= 2);
510   ProgramStateRef State = C.getState();
511 
512   // Check is the file descriptor is tainted.
513   if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
514       isStdin(CE->getArg(0), C)) {
515     // All arguments except for the first two should get taint.
516     for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
517         State = State->add<TaintArgsOnPostVisit>(i);
518     return State;
519   }
520 
521   return 0;
522 }
523 
524 
525 // If argument 0(protocol domain) is network, the return value should get taint.
526 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
527                                                     CheckerContext &C) const {
528   assert(CE->getNumArgs() >= 3);
529   ProgramStateRef State = C.getState();
530 
531   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
532   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
533   // White list the internal communication protocols.
534   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
535       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
536     return State;
537   State = State->addTaint(CE, C.getLocationContext());
538   return State;
539 }
540 
541 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
542                                                    CheckerContext &C) const {
543   ProgramStateRef State = C.getState();
544   assert(CE->getNumArgs() >= 2);
545   SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
546   // All arguments except for the very first one should get taint.
547   for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
548     // The arguments are pointer arguments. The data they are pointing at is
549     // tainted after the call.
550     const Expr* Arg = CE->getArg(i);
551         SymbolRef Sym = getPointedToSymbol(C, Arg);
552     if (Sym)
553       State = State->addTaint(Sym);
554   }
555   return State;
556 }
557 
558 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
559                                                       CheckerContext &C) const {
560   return C.getState()->addTaint(CE, C.getLocationContext());
561 }
562 
563 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
564   ProgramStateRef State = C.getState();
565   SVal Val = State->getSVal(E, C.getLocationContext());
566 
567   // stdin is a pointer, so it would be a region.
568   const MemRegion *MemReg = Val.getAsRegion();
569 
570   // The region should be symbolic, we do not know it's value.
571   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
572   if (!SymReg)
573     return false;
574 
575   // Get it's symbol and find the declaration region it's pointing to.
576   const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
577   if (!Sm)
578     return false;
579   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
580   if (!DeclReg)
581     return false;
582 
583   // This region corresponds to a declaration, find out if it's a global/extern
584   // variable named stdin with the proper type.
585   if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
586     D = D->getCanonicalDecl();
587     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
588         if (const PointerType * PtrTy =
589               dyn_cast<PointerType>(D->getType().getTypePtr()))
590           if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
591             return true;
592   }
593   return false;
594 }
595 
596 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
597                                        const CheckerContext &C,
598                                        unsigned int &ArgNum) {
599   // Find if the function contains a format string argument.
600   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
601   // vsnprintf, syslog, custom annotated functions.
602   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
603   if (!FDecl)
604     return false;
605   for (specific_attr_iterator<FormatAttr>
606          i = FDecl->specific_attr_begin<FormatAttr>(),
607          e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
608 
609     const FormatAttr *Format = *i;
610     ArgNum = Format->getFormatIdx() - 1;
611     if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
612       return true;
613   }
614 
615   // Or if a function is named setproctitle (this is a heuristic).
616   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
617     ArgNum = 0;
618     return true;
619   }
620 
621   return false;
622 }
623 
624 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
625                                                   const char Msg[],
626                                                   CheckerContext &C) const {
627   assert(E);
628 
629   // Check for taint.
630   ProgramStateRef State = C.getState();
631   if (!State->isTainted(getPointedToSymbol(C, E)) &&
632       !State->isTainted(E, C.getLocationContext()))
633     return false;
634 
635   // Generate diagnostic.
636   if (ExplodedNode *N = C.addTransition()) {
637     initBugType();
638     BugReport *report = new BugReport(*BT, Msg, N);
639     report->addRange(E->getSourceRange());
640     C.EmitReport(report);
641     return true;
642   }
643   return false;
644 }
645 
646 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
647                                                         CheckerContext &C) const{
648   // Check if the function contains a format string argument.
649   unsigned int ArgNum = 0;
650   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
651     return false;
652 
653   // If either the format string content or the pointer itself are tainted, warn.
654   if (generateReportIfTainted(CE->getArg(ArgNum),
655                               MsgUncontrolledFormatString, C))
656     return true;
657   return false;
658 }
659 
660 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
661                                           StringRef Name,
662                                           CheckerContext &C) const {
663   // TODO: It might make sense to run this check on demand. In some cases,
664   // we should check if the environment has been cleansed here. We also might
665   // need to know if the user was reset before these calls(seteuid).
666   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
667     .Case("system", 0)
668     .Case("popen", 0)
669     .Case("execl", 0)
670     .Case("execle", 0)
671     .Case("execlp", 0)
672     .Case("execv", 0)
673     .Case("execvp", 0)
674     .Case("execvP", 0)
675     .Case("execve", 0)
676     .Case("dlopen", 0)
677     .Default(UINT_MAX);
678 
679   if (ArgNum == UINT_MAX)
680     return false;
681 
682   if (generateReportIfTainted(CE->getArg(ArgNum),
683                               MsgSanitizeSystemArgs, C))
684     return true;
685 
686   return false;
687 }
688 
689 // TODO: Should this check be a part of the CString checker?
690 // If yes, should taint be a global setting?
691 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
692                                                  const FunctionDecl *FDecl,
693                                                  CheckerContext &C) const {
694   // If the function has a buffer size argument, set ArgNum.
695   unsigned ArgNum = InvalidArgIndex;
696   unsigned BId = 0;
697   if ( (BId = FDecl->getMemoryFunctionKind()) )
698     switch(BId) {
699     case Builtin::BImemcpy:
700     case Builtin::BImemmove:
701     case Builtin::BIstrncpy:
702       ArgNum = 2;
703       break;
704     case Builtin::BIstrndup:
705       ArgNum = 1;
706       break;
707     default:
708       break;
709     };
710 
711   if (ArgNum == InvalidArgIndex) {
712     if (C.isCLibraryFunction(FDecl, "malloc") ||
713         C.isCLibraryFunction(FDecl, "calloc") ||
714         C.isCLibraryFunction(FDecl, "alloca"))
715       ArgNum = 0;
716     else if (C.isCLibraryFunction(FDecl, "memccpy"))
717       ArgNum = 3;
718     else if (C.isCLibraryFunction(FDecl, "realloc"))
719       ArgNum = 1;
720     else if (C.isCLibraryFunction(FDecl, "bcopy"))
721       ArgNum = 2;
722   }
723 
724   if (ArgNum != InvalidArgIndex &&
725       generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
726     return true;
727 
728   return false;
729 }
730 
731 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
732   mgr.registerChecker<GenericTaintChecker>();
733 }
734