1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
27 #include "llvm/ADT/StringMap.h"
28 #include "llvm/Support/YAMLTraits.h"
29 #include <limits>
30 #include <utility>
31 
32 using namespace clang;
33 using namespace ento;
34 using namespace taint;
35 
36 namespace {
37 class GenericTaintChecker
38     : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
39 public:
40   static void *getTag() {
41     static int Tag;
42     return &Tag;
43   }
44 
45   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
46 
47   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
48 
49   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
50                   const char *Sep) const override;
51 
52   using ArgVector = SmallVector<unsigned, 2>;
53   using SignedArgVector = SmallVector<int, 2>;
54 
55   enum class VariadicType { None, Src, Dst };
56 
57   /// Used to parse the configuration file.
58   struct TaintConfiguration {
59     using NameArgsPair = std::pair<std::string, ArgVector>;
60 
61     struct Propagation {
62       std::string Name;
63       ArgVector SrcArgs;
64       SignedArgVector DstArgs;
65       VariadicType VarType;
66       unsigned VarIndex;
67     };
68 
69     std::vector<Propagation> Propagations;
70     std::vector<NameArgsPair> Filters;
71     std::vector<NameArgsPair> Sinks;
72 
73     TaintConfiguration() = default;
74     TaintConfiguration(const TaintConfiguration &) = delete;
75     TaintConfiguration(TaintConfiguration &&) = default;
76     TaintConfiguration &operator=(const TaintConfiguration &) = delete;
77     TaintConfiguration &operator=(TaintConfiguration &&) = default;
78   };
79 
80   /// Convert SignedArgVector to ArgVector.
81   ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
82                                SignedArgVector Args);
83 
84   /// Parse the config.
85   void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
86                           TaintConfiguration &&Config);
87 
88   static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
89   /// Denotes the return vale.
90   static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
91                                          1};
92 
93 private:
94   mutable std::unique_ptr<BugType> BT;
95   void initBugType() const {
96     if (!BT)
97       BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
98   }
99 
100   /// Catch taint related bugs. Check if tainted data is passed to a
101   /// system call etc.
102   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
103 
104   /// Add taint sources on a pre-visit.
105   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
106 
107   /// Propagate taint generated at pre-visit.
108   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
109 
110   /// Check if the region the expression evaluates to is the standard input,
111   /// and thus, is tainted.
112   static bool isStdin(const Expr *E, CheckerContext &C);
113 
114   /// Given a pointer argument, return the value it points to.
115   static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
116 
117   /// Check for CWE-134: Uncontrolled Format String.
118   static const char MsgUncontrolledFormatString[];
119   bool checkUncontrolledFormatString(const CallExpr *CE,
120                                      CheckerContext &C) const;
121 
122   /// Check for:
123   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
124   /// CWE-78, "Failure to Sanitize Data into an OS Command"
125   static const char MsgSanitizeSystemArgs[];
126   bool checkSystemCall(const CallExpr *CE, StringRef Name,
127                        CheckerContext &C) const;
128 
129   /// Check if tainted data is used as a buffer size ins strn.. functions,
130   /// and allocators.
131   static const char MsgTaintedBufferSize[];
132   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
133                               CheckerContext &C) const;
134 
135   /// Generate a report if the expression is tainted or points to tainted data.
136   bool generateReportIfTainted(const Expr *E, const char Msg[],
137                                CheckerContext &C) const;
138 
139   /// A struct used to specify taint propagation rules for a function.
140   ///
141   /// If any of the possible taint source arguments is tainted, all of the
142   /// destination arguments should also be tainted. Use InvalidArgIndex in the
143   /// src list to specify that all of the arguments can introduce taint. Use
144   /// InvalidArgIndex in the dst arguments to signify that all the non-const
145   /// pointer and reference arguments might be tainted on return. If
146   /// ReturnValueIndex is added to the dst list, the return value will be
147   /// tainted.
148   struct TaintPropagationRule {
149     using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
150                                          CheckerContext &C);
151 
152     /// List of arguments which can be taint sources and should be checked.
153     ArgVector SrcArgs;
154     /// List of arguments which should be tainted on function return.
155     ArgVector DstArgs;
156     /// Index for the first variadic parameter if exist.
157     unsigned VariadicIndex;
158     /// Show when a function has variadic parameters. If it has, it marks all
159     /// of them as source or destination.
160     VariadicType VarType;
161     /// Special function for tainted source determination. If defined, it can
162     /// override the default behavior.
163     PropagationFuncType PropagationFunc;
164 
165     TaintPropagationRule()
166         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
167           PropagationFunc(nullptr) {}
168 
169     TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
170                          VariadicType Var = VariadicType::None,
171                          unsigned VarIndex = InvalidArgIndex,
172                          PropagationFuncType Func = nullptr)
173         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
174           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
175 
176     /// Get the propagation rule for a given function.
177     static TaintPropagationRule
178     getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name,
179                             CheckerContext &C);
180 
181     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
182     void addDstArg(unsigned A) { DstArgs.push_back(A); }
183 
184     bool isNull() const {
185       return SrcArgs.empty() && DstArgs.empty() &&
186              VariadicType::None == VarType;
187     }
188 
189     bool isDestinationArgument(unsigned ArgNum) const {
190       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
191     }
192 
193     static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
194                                            CheckerContext &C) {
195       if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
196         return true;
197 
198       if (!E->getType().getTypePtr()->isPointerType())
199         return false;
200 
201       Optional<SVal> V = getPointedToSVal(C, E);
202       return (V && isTainted(State, *V));
203     }
204 
205     /// Pre-process a function which propagates taint according to the
206     /// taint rule.
207     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
208 
209     // Functions for custom taintedness propagation.
210     static bool postSocket(bool IsTainted, const CallExpr *CE,
211                            CheckerContext &C);
212   };
213 
214   using NameRuleMap = llvm::StringMap<TaintPropagationRule>;
215   using NameArgMap = llvm::StringMap<ArgVector>;
216 
217   /// Defines a map between the propagation function's name and
218   /// TaintPropagationRule.
219   NameRuleMap CustomPropagations;
220 
221   /// Defines a map between the filter function's name and filtering args.
222   NameArgMap CustomFilters;
223 
224   /// Defines a map between the sink function's name and sinking args.
225   NameArgMap CustomSinks;
226 };
227 
228 const unsigned GenericTaintChecker::ReturnValueIndex;
229 const unsigned GenericTaintChecker::InvalidArgIndex;
230 
231 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
232     "Untrusted data is used as a format string "
233     "(CWE-134: Uncontrolled Format String)";
234 
235 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
236     "Untrusted data is passed to a system call "
237     "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
238 
239 const char GenericTaintChecker::MsgTaintedBufferSize[] =
240     "Untrusted data is used to specify the buffer size "
241     "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
242     "for character data and the null terminator)";
243 } // end of anonymous namespace
244 
245 using TaintConfig = GenericTaintChecker::TaintConfiguration;
246 
247 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
248 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair)
249 
250 namespace llvm {
251 namespace yaml {
252 template <> struct MappingTraits<TaintConfig> {
253   static void mapping(IO &IO, TaintConfig &Config) {
254     IO.mapOptional("Propagations", Config.Propagations);
255     IO.mapOptional("Filters", Config.Filters);
256     IO.mapOptional("Sinks", Config.Sinks);
257   }
258 };
259 
260 template <> struct MappingTraits<TaintConfig::Propagation> {
261   static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
262     IO.mapRequired("Name", Propagation.Name);
263     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
264     IO.mapOptional("DstArgs", Propagation.DstArgs);
265     IO.mapOptional("VariadicType", Propagation.VarType,
266                    GenericTaintChecker::VariadicType::None);
267     IO.mapOptional("VariadicIndex", Propagation.VarIndex,
268                    GenericTaintChecker::InvalidArgIndex);
269   }
270 };
271 
272 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
273   static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
274     IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
275     IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
276     IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
277   }
278 };
279 
280 template <> struct MappingTraits<TaintConfig::NameArgsPair> {
281   static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) {
282     IO.mapRequired("Name", NameArg.first);
283     IO.mapRequired("Args", NameArg.second);
284   }
285 };
286 } // namespace yaml
287 } // namespace llvm
288 
289 /// A set which is used to pass information from call pre-visit instruction
290 /// to the call post-visit. The values are unsigned integers, which are either
291 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
292 /// points to data, which should be tainted on return.
293 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
294 
295 GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
296     CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
297   ArgVector Result;
298   for (int Arg : Args) {
299     if (Arg == -1)
300       Result.push_back(ReturnValueIndex);
301     else if (Arg < -1) {
302       Result.push_back(InvalidArgIndex);
303       Mgr.reportInvalidCheckerOptionValue(
304           this, Option,
305           "an argument number for propagation rules greater or equal to -1");
306     } else
307       Result.push_back(static_cast<unsigned>(Arg));
308   }
309   return Result;
310 }
311 
312 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
313                                              const std::string &Option,
314                                              TaintConfiguration &&Config) {
315   for (auto &P : Config.Propagations) {
316     GenericTaintChecker::CustomPropagations.try_emplace(
317         P.Name, std::move(P.SrcArgs),
318         convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex);
319   }
320 
321   for (auto &F : Config.Filters) {
322     GenericTaintChecker::CustomFilters.try_emplace(F.first,
323                                                    std::move(F.second));
324   }
325 
326   for (auto &S : Config.Sinks) {
327     GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second));
328   }
329 }
330 
331 GenericTaintChecker::TaintPropagationRule
332 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
333     const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
334   // TODO: Currently, we might lose precision here: we always mark a return
335   // value as tainted even if it's just a pointer, pointing to tainted data.
336 
337   // Check for exact name match for functions without builtin substitutes.
338   TaintPropagationRule Rule =
339       llvm::StringSwitch<TaintPropagationRule>(Name)
340           // Source functions
341           // TODO: Add support for vfscanf & family.
342           .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
343           .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
344           .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
345           .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
346           .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
347           .Case("getchar_unlocked",
348                 TaintPropagationRule({}, {ReturnValueIndex}))
349           .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
350           .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
351           .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
352           .Case("socket",
353                 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
354                                      InvalidArgIndex,
355                                      &TaintPropagationRule::postSocket))
356           .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
357           // Propagating functions
358           .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
359           .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
360           .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
361           .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
362           .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
363           .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
364           .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
365           .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
366           .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
367           .Case("getdelim", TaintPropagationRule({3}, {0}))
368           .Case("getline", TaintPropagationRule({2}, {0}))
369           .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
370           .Case("pread",
371                 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
372           .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
373           .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
374           .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
375           .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
376           .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
377           .Default(TaintPropagationRule());
378 
379   if (!Rule.isNull())
380     return Rule;
381 
382   // Check if it's one of the memory setting/copying functions.
383   // This check is specialized but faster then calling isCLibraryFunction.
384   unsigned BId = 0;
385   if ((BId = FDecl->getMemoryFunctionKind()))
386     switch (BId) {
387     case Builtin::BImemcpy:
388     case Builtin::BImemmove:
389     case Builtin::BIstrncpy:
390     case Builtin::BIstrncat:
391       return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
392     case Builtin::BIstrlcpy:
393     case Builtin::BIstrlcat:
394       return TaintPropagationRule({1, 2}, {0});
395     case Builtin::BIstrndup:
396       return TaintPropagationRule({0, 1}, {ReturnValueIndex});
397 
398     default:
399       break;
400     };
401 
402   // Process all other functions which could be defined as builtins.
403   if (Rule.isNull()) {
404     if (C.isCLibraryFunction(FDecl, "snprintf"))
405       return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
406                                   3);
407     else if (C.isCLibraryFunction(FDecl, "sprintf"))
408       return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
409                                   2);
410     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
411              C.isCLibraryFunction(FDecl, "stpcpy") ||
412              C.isCLibraryFunction(FDecl, "strcat"))
413       return TaintPropagationRule({1}, {0, ReturnValueIndex});
414     else if (C.isCLibraryFunction(FDecl, "bcopy"))
415       return TaintPropagationRule({0, 2}, {1});
416     else if (C.isCLibraryFunction(FDecl, "strdup") ||
417              C.isCLibraryFunction(FDecl, "strdupa"))
418       return TaintPropagationRule({0}, {ReturnValueIndex});
419     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
420       return TaintPropagationRule({0}, {ReturnValueIndex});
421   }
422 
423   // Skipping the following functions, since they might be used for cleansing
424   // or smart memory copy:
425   // - memccpy - copying until hitting a special character.
426 
427   return TaintPropagationRule();
428 }
429 
430 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
431                                        CheckerContext &C) const {
432   // Check for taintedness related errors first: system call, uncontrolled
433   // format string, tainted buffer size.
434   if (checkPre(CE, C))
435     return;
436 
437   // Marks the function's arguments and/or return value tainted if it present in
438   // the list.
439   addSourcesPre(CE, C);
440 }
441 
442 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
443                                         CheckerContext &C) const {
444   // Set the marked values as tainted. The return value only accessible from
445   // checkPostStmt.
446   propagateFromPre(CE, C);
447 }
448 
449 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
450                                      const char *NL, const char *Sep) const {
451   printTaint(State, Out, NL, Sep);
452 }
453 
454 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
455                                         CheckerContext &C) const {
456   ProgramStateRef State = nullptr;
457   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
458   if (!FDecl || FDecl->getKind() != Decl::Function)
459     return;
460 
461   StringRef Name = C.getCalleeName(FDecl);
462   if (Name.empty())
463     return;
464 
465   // First, try generating a propagation rule for this function.
466   TaintPropagationRule Rule =
467       TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
468   if (!Rule.isNull()) {
469     State = Rule.process(CE, C);
470     if (!State)
471       return;
472     C.addTransition(State);
473     return;
474   }
475 
476   if (!State)
477     return;
478   C.addTransition(State);
479 }
480 
481 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
482                                            CheckerContext &C) const {
483   ProgramStateRef State = C.getState();
484 
485   // Depending on what was tainted at pre-visit, we determined a set of
486   // arguments which should be tainted after the function returns. These are
487   // stored in the state as TaintArgsOnPostVisit set.
488   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
489   if (TaintArgs.isEmpty())
490     return false;
491 
492   for (unsigned ArgNum : TaintArgs) {
493     // Special handling for the tainted return value.
494     if (ArgNum == ReturnValueIndex) {
495       State = addTaint(State, CE, C.getLocationContext());
496       continue;
497     }
498 
499     // The arguments are pointer arguments. The data they are pointing at is
500     // tainted after the call.
501     if (CE->getNumArgs() < (ArgNum + 1))
502       return false;
503     const Expr *Arg = CE->getArg(ArgNum);
504     Optional<SVal> V = getPointedToSVal(C, Arg);
505     if (V)
506       State = addTaint(State, *V);
507   }
508 
509   // Clear up the taint info from the state.
510   State = State->remove<TaintArgsOnPostVisit>();
511 
512   if (State != C.getState()) {
513     C.addTransition(State);
514     return true;
515   }
516   return false;
517 }
518 
519 bool GenericTaintChecker::checkPre(const CallExpr *CE,
520                                    CheckerContext &C) const {
521 
522   if (checkUncontrolledFormatString(CE, C))
523     return true;
524 
525   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
526   if (!FDecl || FDecl->getKind() != Decl::Function)
527     return false;
528 
529   StringRef Name = C.getCalleeName(FDecl);
530   if (Name.empty())
531     return false;
532 
533   if (checkSystemCall(CE, Name, C))
534     return true;
535 
536   if (checkTaintedBufferSize(CE, FDecl, C))
537     return true;
538 
539   return false;
540 }
541 
542 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
543                                                      const Expr *Arg) {
544   ProgramStateRef State = C.getState();
545   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
546   if (AddrVal.isUnknownOrUndef())
547     return None;
548 
549   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
550   if (!AddrLoc)
551     return None;
552 
553   QualType ArgTy = Arg->getType().getCanonicalType();
554   if (!ArgTy->isPointerType())
555     return None;
556 
557   QualType ValTy = ArgTy->getPointeeType();
558 
559   // Do not dereference void pointers. Treat them as byte pointers instead.
560   // FIXME: we might want to consider more than just the first byte.
561   if (ValTy->isVoidType())
562     ValTy = C.getASTContext().CharTy;
563 
564   return State->getSVal(*AddrLoc, ValTy);
565 }
566 
567 ProgramStateRef
568 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
569                                                    CheckerContext &C) const {
570   ProgramStateRef State = C.getState();
571 
572   // Check for taint in arguments.
573   bool IsTainted = true;
574   for (unsigned ArgNum : SrcArgs) {
575     if (ArgNum >= CE->getNumArgs())
576       return State;
577     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
578       break;
579   }
580 
581   // Check for taint in variadic arguments.
582   if (!IsTainted && VariadicType::Src == VarType) {
583     // Check if any of the arguments is tainted
584     for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
585       if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
586         break;
587     }
588   }
589 
590   if (PropagationFunc)
591     IsTainted = PropagationFunc(IsTainted, CE, C);
592 
593   if (!IsTainted)
594     return State;
595 
596   // Mark the arguments which should be tainted after the function returns.
597   for (unsigned ArgNum : DstArgs) {
598     // Should mark the return value?
599     if (ArgNum == ReturnValueIndex) {
600       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
601       continue;
602     }
603 
604     // Mark the given argument.
605     assert(ArgNum < CE->getNumArgs());
606     State = State->add<TaintArgsOnPostVisit>(ArgNum);
607   }
608 
609   // Mark all variadic arguments tainted if present.
610   if (VariadicType::Dst == VarType) {
611     // For all pointer and references that were passed in:
612     //   If they are not pointing to const data, mark data as tainted.
613     //   TODO: So far we are just going one level down; ideally we'd need to
614     //         recurse here.
615     for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
616       const Expr *Arg = CE->getArg(i);
617       // Process pointer argument.
618       const Type *ArgTy = Arg->getType().getTypePtr();
619       QualType PType = ArgTy->getPointeeType();
620       if ((!PType.isNull() && !PType.isConstQualified()) ||
621           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
622         State = State->add<TaintArgsOnPostVisit>(i);
623     }
624   }
625 
626   return State;
627 }
628 
629 // If argument 0(protocol domain) is network, the return value should get taint.
630 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
631                                                            const CallExpr *CE,
632                                                            CheckerContext &C) {
633   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
634   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
635   // White list the internal communication protocols.
636   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
637       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
638     return false;
639 
640   return true;
641 }
642 
643 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
644   ProgramStateRef State = C.getState();
645   SVal Val = C.getSVal(E);
646 
647   // stdin is a pointer, so it would be a region.
648   const MemRegion *MemReg = Val.getAsRegion();
649 
650   // The region should be symbolic, we do not know it's value.
651   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
652   if (!SymReg)
653     return false;
654 
655   // Get it's symbol and find the declaration region it's pointing to.
656   const SymbolRegionValue *Sm =
657       dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
658   if (!Sm)
659     return false;
660   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
661   if (!DeclReg)
662     return false;
663 
664   // This region corresponds to a declaration, find out if it's a global/extern
665   // variable named stdin with the proper type.
666   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
667     D = D->getCanonicalDecl();
668     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
669       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
670       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
671                        C.getASTContext().getFILEType().getCanonicalType())
672         return true;
673     }
674   }
675   return false;
676 }
677 
678 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
679                                        const CheckerContext &C,
680                                        unsigned &ArgNum) {
681   // Find if the function contains a format string argument.
682   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
683   // vsnprintf, syslog, custom annotated functions.
684   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
685   if (!FDecl)
686     return false;
687   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
688     ArgNum = Format->getFormatIdx() - 1;
689     if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
690       return true;
691   }
692 
693   // Or if a function is named setproctitle (this is a heuristic).
694   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
695     ArgNum = 0;
696     return true;
697   }
698 
699   return false;
700 }
701 
702 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
703                                                   const char Msg[],
704                                                   CheckerContext &C) const {
705   assert(E);
706 
707   // Check for taint.
708   ProgramStateRef State = C.getState();
709   Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
710   SVal TaintedSVal;
711   if (PointedToSVal && isTainted(State, *PointedToSVal))
712     TaintedSVal = *PointedToSVal;
713   else if (isTainted(State, E, C.getLocationContext()))
714     TaintedSVal = C.getSVal(E);
715   else
716     return false;
717 
718   // Generate diagnostic.
719   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
720     initBugType();
721     auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
722     report->addRange(E->getSourceRange());
723     report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
724     C.emitReport(std::move(report));
725     return true;
726   }
727   return false;
728 }
729 
730 bool GenericTaintChecker::checkUncontrolledFormatString(
731     const CallExpr *CE, CheckerContext &C) const {
732   // Check if the function contains a format string argument.
733   unsigned ArgNum = 0;
734   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
735     return false;
736 
737   // If either the format string content or the pointer itself are tainted,
738   // warn.
739   return generateReportIfTainted(CE->getArg(ArgNum),
740                                  MsgUncontrolledFormatString, C);
741 }
742 
743 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
744                                           CheckerContext &C) const {
745   // TODO: It might make sense to run this check on demand. In some cases,
746   // we should check if the environment has been cleansed here. We also might
747   // need to know if the user was reset before these calls(seteuid).
748   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
749                         .Case("system", 0)
750                         .Case("popen", 0)
751                         .Case("execl", 0)
752                         .Case("execle", 0)
753                         .Case("execlp", 0)
754                         .Case("execv", 0)
755                         .Case("execvp", 0)
756                         .Case("execvP", 0)
757                         .Case("execve", 0)
758                         .Case("dlopen", 0)
759                         .Default(UINT_MAX);
760 
761   if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
762     return false;
763 
764   return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
765 }
766 
767 // TODO: Should this check be a part of the CString checker?
768 // If yes, should taint be a global setting?
769 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
770                                                  const FunctionDecl *FDecl,
771                                                  CheckerContext &C) const {
772   // If the function has a buffer size argument, set ArgNum.
773   unsigned ArgNum = InvalidArgIndex;
774   unsigned BId = 0;
775   if ((BId = FDecl->getMemoryFunctionKind()))
776     switch (BId) {
777     case Builtin::BImemcpy:
778     case Builtin::BImemmove:
779     case Builtin::BIstrncpy:
780       ArgNum = 2;
781       break;
782     case Builtin::BIstrndup:
783       ArgNum = 1;
784       break;
785     default:
786       break;
787     };
788 
789   if (ArgNum == InvalidArgIndex) {
790     if (C.isCLibraryFunction(FDecl, "malloc") ||
791         C.isCLibraryFunction(FDecl, "calloc") ||
792         C.isCLibraryFunction(FDecl, "alloca"))
793       ArgNum = 0;
794     else if (C.isCLibraryFunction(FDecl, "memccpy"))
795       ArgNum = 3;
796     else if (C.isCLibraryFunction(FDecl, "realloc"))
797       ArgNum = 1;
798     else if (C.isCLibraryFunction(FDecl, "bcopy"))
799       ArgNum = 2;
800   }
801 
802   return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
803          generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
804 }
805 
806 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
807   auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
808   std::string Option{"Config"};
809   StringRef ConfigFile =
810       Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
811   llvm::Optional<TaintConfig> Config =
812       getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
813   if (Config)
814     Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
815 }
816 
817 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
818   return true;
819 }
820