1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
27 #include "llvm/ADT/StringMap.h"
28 #include "llvm/Support/YAMLTraits.h"
29 #include <limits>
30 #include <utility>
31 
32 using namespace clang;
33 using namespace ento;
34 using namespace taint;
35 
36 namespace {
37 class GenericTaintChecker
38     : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
39 public:
40   static void *getTag() {
41     static int Tag;
42     return &Tag;
43   }
44 
45   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
46 
47   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
48 
49   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
50                   const char *Sep) const override;
51 
52   using ArgVector = SmallVector<unsigned, 2>;
53   using SignedArgVector = SmallVector<int, 2>;
54 
55   enum class VariadicType { None, Src, Dst };
56 
57   /// Used to parse the configuration file.
58   struct TaintConfiguration {
59     using NameArgsPair = std::pair<std::string, ArgVector>;
60 
61     struct Propagation {
62       std::string Name;
63       ArgVector SrcArgs;
64       SignedArgVector DstArgs;
65       VariadicType VarType;
66       unsigned VarIndex;
67     };
68 
69     std::vector<Propagation> Propagations;
70     std::vector<NameArgsPair> Filters;
71     std::vector<NameArgsPair> Sinks;
72 
73     TaintConfiguration() = default;
74     TaintConfiguration(const TaintConfiguration &) = default;
75     TaintConfiguration(TaintConfiguration &&) = default;
76     TaintConfiguration &operator=(const TaintConfiguration &) = default;
77     TaintConfiguration &operator=(TaintConfiguration &&) = default;
78   };
79 
80   /// Convert SignedArgVector to ArgVector.
81   ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
82                                SignedArgVector Args);
83 
84   /// Parse the config.
85   void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
86                           TaintConfiguration &&Config);
87 
88   static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
89   /// Denotes the return vale.
90   static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
91                                          1};
92 
93 private:
94   mutable std::unique_ptr<BugType> BT;
95   void initBugType() const {
96     if (!BT)
97       BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
98   }
99 
100   /// Catch taint related bugs. Check if tainted data is passed to a
101   /// system call etc. Returns true on matching.
102   bool checkPre(const CallExpr *CE, const FunctionDecl *FDecl, StringRef Name,
103                 CheckerContext &C) const;
104 
105   /// Add taint sources on a pre-visit. Returns true on matching.
106   bool addSourcesPre(const CallExpr *CE, const FunctionDecl *FDecl,
107                      StringRef Name, CheckerContext &C) const;
108 
109   /// Mark filter's arguments not tainted on a pre-visit. Returns true on
110   /// matching.
111   bool addFiltersPre(const CallExpr *CE, StringRef Name,
112                      CheckerContext &C) const;
113 
114   /// Propagate taint generated at pre-visit. Returns true on matching.
115   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
116 
117   /// Check if the region the expression evaluates to is the standard input,
118   /// and thus, is tainted.
119   static bool isStdin(const Expr *E, CheckerContext &C);
120 
121   /// Given a pointer argument, return the value it points to.
122   static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
123 
124   /// Check for CWE-134: Uncontrolled Format String.
125   static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
126       "Untrusted data is used as a format string "
127       "(CWE-134: Uncontrolled Format String)";
128   bool checkUncontrolledFormatString(const CallExpr *CE,
129                                      CheckerContext &C) const;
130 
131   /// Check for:
132   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
133   /// CWE-78, "Failure to Sanitize Data into an OS Command"
134   static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
135       "Untrusted data is passed to a system call "
136       "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
137   bool checkSystemCall(const CallExpr *CE, StringRef Name,
138                        CheckerContext &C) const;
139 
140   /// Check if tainted data is used as a buffer size ins strn.. functions,
141   /// and allocators.
142   static constexpr llvm::StringLiteral MsgTaintedBufferSize =
143       "Untrusted data is used to specify the buffer size "
144       "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
145       "for character data and the null terminator)";
146   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
147                               CheckerContext &C) const;
148 
149   /// Check if tainted data is used as a custom sink's parameter.
150   static constexpr llvm::StringLiteral MsgCustomSink =
151       "Untrusted data is passed to a user-defined sink";
152   bool checkCustomSinks(const CallExpr *CE, StringRef Name,
153                         CheckerContext &C) const;
154 
155   /// Generate a report if the expression is tainted or points to tainted data.
156   bool generateReportIfTainted(const Expr *E, StringRef Msg,
157                                CheckerContext &C) const;
158 
159   struct TaintPropagationRule;
160   using NameRuleMap = llvm::StringMap<TaintPropagationRule>;
161   using NameArgMap = llvm::StringMap<ArgVector>;
162 
163   /// A struct used to specify taint propagation rules for a function.
164   ///
165   /// If any of the possible taint source arguments is tainted, all of the
166   /// destination arguments should also be tainted. Use InvalidArgIndex in the
167   /// src list to specify that all of the arguments can introduce taint. Use
168   /// InvalidArgIndex in the dst arguments to signify that all the non-const
169   /// pointer and reference arguments might be tainted on return. If
170   /// ReturnValueIndex is added to the dst list, the return value will be
171   /// tainted.
172   struct TaintPropagationRule {
173     using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
174                                          CheckerContext &C);
175 
176     /// List of arguments which can be taint sources and should be checked.
177     ArgVector SrcArgs;
178     /// List of arguments which should be tainted on function return.
179     ArgVector DstArgs;
180     /// Index for the first variadic parameter if exist.
181     unsigned VariadicIndex;
182     /// Show when a function has variadic parameters. If it has, it marks all
183     /// of them as source or destination.
184     VariadicType VarType;
185     /// Special function for tainted source determination. If defined, it can
186     /// override the default behavior.
187     PropagationFuncType PropagationFunc;
188 
189     TaintPropagationRule()
190         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
191           PropagationFunc(nullptr) {}
192 
193     TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
194                          VariadicType Var = VariadicType::None,
195                          unsigned VarIndex = InvalidArgIndex,
196                          PropagationFuncType Func = nullptr)
197         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
198           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
199 
200     /// Get the propagation rule for a given function.
201     static TaintPropagationRule
202     getTaintPropagationRule(const NameRuleMap &CustomPropagations,
203                             const FunctionDecl *FDecl, StringRef Name,
204                             CheckerContext &C);
205 
206     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
207     void addDstArg(unsigned A) { DstArgs.push_back(A); }
208 
209     bool isNull() const {
210       return SrcArgs.empty() && DstArgs.empty() &&
211              VariadicType::None == VarType;
212     }
213 
214     bool isDestinationArgument(unsigned ArgNum) const {
215       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
216     }
217 
218     static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
219                                            CheckerContext &C) {
220       if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
221         return true;
222 
223       if (!E->getType().getTypePtr()->isPointerType())
224         return false;
225 
226       Optional<SVal> V = getPointedToSVal(C, E);
227       return (V && isTainted(State, *V));
228     }
229 
230     /// Pre-process a function which propagates taint according to the
231     /// taint rule.
232     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
233 
234     // Functions for custom taintedness propagation.
235     static bool postSocket(bool IsTainted, const CallExpr *CE,
236                            CheckerContext &C);
237   };
238 
239   /// Defines a map between the propagation function's name and
240   /// TaintPropagationRule.
241   NameRuleMap CustomPropagations;
242 
243   /// Defines a map between the filter function's name and filtering args.
244   NameArgMap CustomFilters;
245 
246   /// Defines a map between the sink function's name and sinking args.
247   NameArgMap CustomSinks;
248 };
249 
250 const unsigned GenericTaintChecker::ReturnValueIndex;
251 const unsigned GenericTaintChecker::InvalidArgIndex;
252 
253 // FIXME: these lines can be removed in C++17
254 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
255 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
256 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
257 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
258 } // end of anonymous namespace
259 
260 using TaintConfig = GenericTaintChecker::TaintConfiguration;
261 
262 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
263 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair)
264 
265 namespace llvm {
266 namespace yaml {
267 template <> struct MappingTraits<TaintConfig> {
268   static void mapping(IO &IO, TaintConfig &Config) {
269     IO.mapOptional("Propagations", Config.Propagations);
270     IO.mapOptional("Filters", Config.Filters);
271     IO.mapOptional("Sinks", Config.Sinks);
272   }
273 };
274 
275 template <> struct MappingTraits<TaintConfig::Propagation> {
276   static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
277     IO.mapRequired("Name", Propagation.Name);
278     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
279     IO.mapOptional("DstArgs", Propagation.DstArgs);
280     IO.mapOptional("VariadicType", Propagation.VarType,
281                    GenericTaintChecker::VariadicType::None);
282     IO.mapOptional("VariadicIndex", Propagation.VarIndex,
283                    GenericTaintChecker::InvalidArgIndex);
284   }
285 };
286 
287 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
288   static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
289     IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
290     IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
291     IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
292   }
293 };
294 
295 template <> struct MappingTraits<TaintConfig::NameArgsPair> {
296   static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) {
297     IO.mapRequired("Name", NameArg.first);
298     IO.mapRequired("Args", NameArg.second);
299   }
300 };
301 } // namespace yaml
302 } // namespace llvm
303 
304 /// A set which is used to pass information from call pre-visit instruction
305 /// to the call post-visit. The values are unsigned integers, which are either
306 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
307 /// points to data, which should be tainted on return.
308 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
309 
310 GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
311     CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
312   ArgVector Result;
313   for (int Arg : Args) {
314     if (Arg == -1)
315       Result.push_back(ReturnValueIndex);
316     else if (Arg < -1) {
317       Result.push_back(InvalidArgIndex);
318       Mgr.reportInvalidCheckerOptionValue(
319           this, Option,
320           "an argument number for propagation rules greater or equal to -1");
321     } else
322       Result.push_back(static_cast<unsigned>(Arg));
323   }
324   return Result;
325 }
326 
327 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
328                                              const std::string &Option,
329                                              TaintConfiguration &&Config) {
330   for (auto &P : Config.Propagations) {
331     GenericTaintChecker::CustomPropagations.try_emplace(
332         P.Name, std::move(P.SrcArgs),
333         convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex);
334   }
335 
336   for (auto &F : Config.Filters) {
337     GenericTaintChecker::CustomFilters.try_emplace(F.first,
338                                                    std::move(F.second));
339   }
340 
341   for (auto &S : Config.Sinks) {
342     GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second));
343   }
344 }
345 
346 GenericTaintChecker::TaintPropagationRule
347 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
348     const NameRuleMap &CustomPropagations, const FunctionDecl *FDecl,
349     StringRef Name, CheckerContext &C) {
350   // TODO: Currently, we might lose precision here: we always mark a return
351   // value as tainted even if it's just a pointer, pointing to tainted data.
352 
353   // Check for exact name match for functions without builtin substitutes.
354   TaintPropagationRule Rule =
355       llvm::StringSwitch<TaintPropagationRule>(Name)
356           // Source functions
357           // TODO: Add support for vfscanf & family.
358           .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
359           .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
360           .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
361           .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
362           .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
363           .Case("getchar_unlocked",
364                 TaintPropagationRule({}, {ReturnValueIndex}))
365           .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
366           .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
367           .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
368           .Case("socket",
369                 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
370                                      InvalidArgIndex,
371                                      &TaintPropagationRule::postSocket))
372           .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
373           // Propagating functions
374           .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
375           .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
376           .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
377           .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
378           .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
379           .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
380           .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
381           .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
382           .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
383           .Case("getdelim", TaintPropagationRule({3}, {0}))
384           .Case("getline", TaintPropagationRule({2}, {0}))
385           .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
386           .Case("pread",
387                 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
388           .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
389           .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
390           .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
391           .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
392           .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
393           .Default(TaintPropagationRule());
394 
395   if (!Rule.isNull())
396     return Rule;
397 
398   // Check if it's one of the memory setting/copying functions.
399   // This check is specialized but faster then calling isCLibraryFunction.
400   unsigned BId = 0;
401   if ((BId = FDecl->getMemoryFunctionKind()))
402     switch (BId) {
403     case Builtin::BImemcpy:
404     case Builtin::BImemmove:
405     case Builtin::BIstrncpy:
406     case Builtin::BIstrncat:
407       return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
408     case Builtin::BIstrlcpy:
409     case Builtin::BIstrlcat:
410       return TaintPropagationRule({1, 2}, {0});
411     case Builtin::BIstrndup:
412       return TaintPropagationRule({0, 1}, {ReturnValueIndex});
413 
414     default:
415       break;
416     };
417 
418   // Process all other functions which could be defined as builtins.
419   if (Rule.isNull()) {
420     if (C.isCLibraryFunction(FDecl, "snprintf"))
421       return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
422                                   3);
423     else if (C.isCLibraryFunction(FDecl, "sprintf"))
424       return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
425                                   2);
426     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
427              C.isCLibraryFunction(FDecl, "stpcpy") ||
428              C.isCLibraryFunction(FDecl, "strcat"))
429       return TaintPropagationRule({1}, {0, ReturnValueIndex});
430     else if (C.isCLibraryFunction(FDecl, "bcopy"))
431       return TaintPropagationRule({0, 2}, {1});
432     else if (C.isCLibraryFunction(FDecl, "strdup") ||
433              C.isCLibraryFunction(FDecl, "strdupa"))
434       return TaintPropagationRule({0}, {ReturnValueIndex});
435     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
436       return TaintPropagationRule({0}, {ReturnValueIndex});
437   }
438 
439   // Skipping the following functions, since they might be used for cleansing
440   // or smart memory copy:
441   // - memccpy - copying until hitting a special character.
442 
443   auto It = CustomPropagations.find(Name);
444   if (It != CustomPropagations.end())
445     return It->getValue();
446 
447   return TaintPropagationRule();
448 }
449 
450 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
451                                        CheckerContext &C) const {
452   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
453   // Check for non-global functions.
454   if (!FDecl || FDecl->getKind() != Decl::Function)
455     return;
456 
457   StringRef Name = C.getCalleeName(FDecl);
458   if (Name.empty())
459     return;
460 
461   // Check for taintedness related errors first: system call, uncontrolled
462   // format string, tainted buffer size.
463   if (checkPre(CE, FDecl, Name, C))
464     return;
465 
466   // Marks the function's arguments and/or return value tainted if it present in
467   // the list.
468   if (addSourcesPre(CE, FDecl, Name, C))
469     return;
470 
471   addFiltersPre(CE, Name, C);
472 }
473 
474 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
475                                         CheckerContext &C) const {
476   // Set the marked values as tainted. The return value only accessible from
477   // checkPostStmt.
478   propagateFromPre(CE, C);
479 }
480 
481 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
482                                      const char *NL, const char *Sep) const {
483   printTaint(State, Out, NL, Sep);
484 }
485 
486 bool GenericTaintChecker::addSourcesPre(const CallExpr *CE,
487                                         const FunctionDecl *FDecl,
488                                         StringRef Name,
489                                         CheckerContext &C) const {
490   // First, try generating a propagation rule for this function.
491   TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
492       this->CustomPropagations, FDecl, Name, C);
493   if (!Rule.isNull()) {
494     ProgramStateRef State = Rule.process(CE, C);
495     if (State) {
496       C.addTransition(State);
497       return true;
498     }
499   }
500   return false;
501 }
502 
503 bool GenericTaintChecker::addFiltersPre(const CallExpr *CE, StringRef Name,
504                                         CheckerContext &C) const {
505   auto It = CustomFilters.find(Name);
506   if (It == CustomFilters.end())
507     return false;
508 
509   ProgramStateRef State = C.getState();
510   const ArgVector &Args = It->getValue();
511   for (unsigned ArgNum : Args) {
512     if (ArgNum >= CE->getNumArgs())
513       continue;
514 
515     const Expr *Arg = CE->getArg(ArgNum);
516     Optional<SVal> V = getPointedToSVal(C, Arg);
517     if (V)
518       State = removeTaint(State, *V);
519   }
520 
521   if (State != C.getState()) {
522     C.addTransition(State);
523     return true;
524   }
525   return false;
526 }
527 
528 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
529                                            CheckerContext &C) const {
530   ProgramStateRef State = C.getState();
531 
532   // Depending on what was tainted at pre-visit, we determined a set of
533   // arguments which should be tainted after the function returns. These are
534   // stored in the state as TaintArgsOnPostVisit set.
535   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
536   if (TaintArgs.isEmpty())
537     return false;
538 
539   for (unsigned ArgNum : TaintArgs) {
540     // Special handling for the tainted return value.
541     if (ArgNum == ReturnValueIndex) {
542       State = addTaint(State, CE, C.getLocationContext());
543       continue;
544     }
545 
546     // The arguments are pointer arguments. The data they are pointing at is
547     // tainted after the call.
548     if (CE->getNumArgs() < (ArgNum + 1))
549       return false;
550     const Expr *Arg = CE->getArg(ArgNum);
551     Optional<SVal> V = getPointedToSVal(C, Arg);
552     if (V)
553       State = addTaint(State, *V);
554   }
555 
556   // Clear up the taint info from the state.
557   State = State->remove<TaintArgsOnPostVisit>();
558 
559   if (State != C.getState()) {
560     C.addTransition(State);
561     return true;
562   }
563   return false;
564 }
565 
566 bool GenericTaintChecker::checkPre(const CallExpr *CE,
567                                    const FunctionDecl *FDecl, StringRef Name,
568                                    CheckerContext &C) const {
569 
570   if (checkUncontrolledFormatString(CE, C))
571     return true;
572 
573   if (checkSystemCall(CE, Name, C))
574     return true;
575 
576   if (checkTaintedBufferSize(CE, FDecl, C))
577     return true;
578 
579   if (checkCustomSinks(CE, Name, C))
580     return true;
581 
582   return false;
583 }
584 
585 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
586                                                      const Expr *Arg) {
587   ProgramStateRef State = C.getState();
588   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
589   if (AddrVal.isUnknownOrUndef())
590     return None;
591 
592   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
593   if (!AddrLoc)
594     return None;
595 
596   QualType ArgTy = Arg->getType().getCanonicalType();
597   if (!ArgTy->isPointerType())
598     return None;
599 
600   QualType ValTy = ArgTy->getPointeeType();
601 
602   // Do not dereference void pointers. Treat them as byte pointers instead.
603   // FIXME: we might want to consider more than just the first byte.
604   if (ValTy->isVoidType())
605     ValTy = C.getASTContext().CharTy;
606 
607   return State->getSVal(*AddrLoc, ValTy);
608 }
609 
610 ProgramStateRef
611 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
612                                                    CheckerContext &C) const {
613   ProgramStateRef State = C.getState();
614 
615   // Check for taint in arguments.
616   bool IsTainted = true;
617   for (unsigned ArgNum : SrcArgs) {
618     if (ArgNum >= CE->getNumArgs())
619       continue;
620 
621     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
622       break;
623   }
624 
625   // Check for taint in variadic arguments.
626   if (!IsTainted && VariadicType::Src == VarType) {
627     // Check if any of the arguments is tainted
628     for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
629       if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
630         break;
631     }
632   }
633 
634   if (PropagationFunc)
635     IsTainted = PropagationFunc(IsTainted, CE, C);
636 
637   if (!IsTainted)
638     return State;
639 
640   // Mark the arguments which should be tainted after the function returns.
641   for (unsigned ArgNum : DstArgs) {
642     // Should mark the return value?
643     if (ArgNum == ReturnValueIndex) {
644       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
645       continue;
646     }
647 
648     if (ArgNum >= CE->getNumArgs())
649       continue;
650 
651     // Mark the given argument.
652     State = State->add<TaintArgsOnPostVisit>(ArgNum);
653   }
654 
655   // Mark all variadic arguments tainted if present.
656   if (VariadicType::Dst == VarType) {
657     // For all pointer and references that were passed in:
658     //   If they are not pointing to const data, mark data as tainted.
659     //   TODO: So far we are just going one level down; ideally we'd need to
660     //         recurse here.
661     for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
662       const Expr *Arg = CE->getArg(i);
663       // Process pointer argument.
664       const Type *ArgTy = Arg->getType().getTypePtr();
665       QualType PType = ArgTy->getPointeeType();
666       if ((!PType.isNull() && !PType.isConstQualified()) ||
667           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
668         State = State->add<TaintArgsOnPostVisit>(i);
669     }
670   }
671 
672   return State;
673 }
674 
675 // If argument 0(protocol domain) is network, the return value should get taint.
676 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
677                                                            const CallExpr *CE,
678                                                            CheckerContext &C) {
679   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
680   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
681   // White list the internal communication protocols.
682   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
683       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
684     return false;
685 
686   return true;
687 }
688 
689 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
690   ProgramStateRef State = C.getState();
691   SVal Val = C.getSVal(E);
692 
693   // stdin is a pointer, so it would be a region.
694   const MemRegion *MemReg = Val.getAsRegion();
695 
696   // The region should be symbolic, we do not know it's value.
697   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
698   if (!SymReg)
699     return false;
700 
701   // Get it's symbol and find the declaration region it's pointing to.
702   const SymbolRegionValue *Sm =
703       dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
704   if (!Sm)
705     return false;
706   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
707   if (!DeclReg)
708     return false;
709 
710   // This region corresponds to a declaration, find out if it's a global/extern
711   // variable named stdin with the proper type.
712   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
713     D = D->getCanonicalDecl();
714     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
715       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
716       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
717                        C.getASTContext().getFILEType().getCanonicalType())
718         return true;
719     }
720   }
721   return false;
722 }
723 
724 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
725                                        const CheckerContext &C,
726                                        unsigned &ArgNum) {
727   // Find if the function contains a format string argument.
728   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
729   // vsnprintf, syslog, custom annotated functions.
730   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
731   if (!FDecl)
732     return false;
733   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
734     ArgNum = Format->getFormatIdx() - 1;
735     if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
736       return true;
737   }
738 
739   // Or if a function is named setproctitle (this is a heuristic).
740   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
741     ArgNum = 0;
742     return true;
743   }
744 
745   return false;
746 }
747 
748 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
749                                                   CheckerContext &C) const {
750   assert(E);
751 
752   // Check for taint.
753   ProgramStateRef State = C.getState();
754   Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
755   SVal TaintedSVal;
756   if (PointedToSVal && isTainted(State, *PointedToSVal))
757     TaintedSVal = *PointedToSVal;
758   else if (isTainted(State, E, C.getLocationContext()))
759     TaintedSVal = C.getSVal(E);
760   else
761     return false;
762 
763   // Generate diagnostic.
764   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
765     initBugType();
766     auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
767     report->addRange(E->getSourceRange());
768     report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
769     C.emitReport(std::move(report));
770     return true;
771   }
772   return false;
773 }
774 
775 bool GenericTaintChecker::checkUncontrolledFormatString(
776     const CallExpr *CE, CheckerContext &C) const {
777   // Check if the function contains a format string argument.
778   unsigned ArgNum = 0;
779   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
780     return false;
781 
782   // If either the format string content or the pointer itself are tainted,
783   // warn.
784   return generateReportIfTainted(CE->getArg(ArgNum),
785                                  MsgUncontrolledFormatString, C);
786 }
787 
788 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
789                                           CheckerContext &C) const {
790   // TODO: It might make sense to run this check on demand. In some cases,
791   // we should check if the environment has been cleansed here. We also might
792   // need to know if the user was reset before these calls(seteuid).
793   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
794                         .Case("system", 0)
795                         .Case("popen", 0)
796                         .Case("execl", 0)
797                         .Case("execle", 0)
798                         .Case("execlp", 0)
799                         .Case("execv", 0)
800                         .Case("execvp", 0)
801                         .Case("execvP", 0)
802                         .Case("execve", 0)
803                         .Case("dlopen", 0)
804                         .Default(InvalidArgIndex);
805 
806   if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
807     return false;
808 
809   return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
810 }
811 
812 // TODO: Should this check be a part of the CString checker?
813 // If yes, should taint be a global setting?
814 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
815                                                  const FunctionDecl *FDecl,
816                                                  CheckerContext &C) const {
817   // If the function has a buffer size argument, set ArgNum.
818   unsigned ArgNum = InvalidArgIndex;
819   unsigned BId = 0;
820   if ((BId = FDecl->getMemoryFunctionKind()))
821     switch (BId) {
822     case Builtin::BImemcpy:
823     case Builtin::BImemmove:
824     case Builtin::BIstrncpy:
825       ArgNum = 2;
826       break;
827     case Builtin::BIstrndup:
828       ArgNum = 1;
829       break;
830     default:
831       break;
832     };
833 
834   if (ArgNum == InvalidArgIndex) {
835     if (C.isCLibraryFunction(FDecl, "malloc") ||
836         C.isCLibraryFunction(FDecl, "calloc") ||
837         C.isCLibraryFunction(FDecl, "alloca"))
838       ArgNum = 0;
839     else if (C.isCLibraryFunction(FDecl, "memccpy"))
840       ArgNum = 3;
841     else if (C.isCLibraryFunction(FDecl, "realloc"))
842       ArgNum = 1;
843     else if (C.isCLibraryFunction(FDecl, "bcopy"))
844       ArgNum = 2;
845   }
846 
847   return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
848          generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
849 }
850 
851 bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name,
852                                            CheckerContext &C) const {
853   auto It = CustomSinks.find(Name);
854   if (It == CustomSinks.end())
855     return false;
856 
857   const GenericTaintChecker::ArgVector &Args = It->getValue();
858   for (unsigned ArgNum : Args) {
859     if (ArgNum >= CE->getNumArgs())
860       continue;
861 
862     if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
863       return true;
864   }
865 
866   return false;
867 }
868 
869 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
870   auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
871   std::string Option{"Config"};
872   StringRef ConfigFile =
873       Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
874   llvm::Optional<TaintConfig> Config =
875       getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
876   if (Config)
877     Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
878 }
879 
880 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
881   return true;
882 }
883