1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28 #include "llvm/Support/YAMLTraits.h"
29 
30 #include <algorithm>
31 #include <limits>
32 #include <memory>
33 #include <unordered_map>
34 #include <utility>
35 
36 using namespace clang;
37 using namespace ento;
38 using namespace taint;
39 
40 namespace {
41 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
42 public:
43   static void *getTag() {
44     static int Tag;
45     return &Tag;
46   }
47 
48   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
49   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
50 
51   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
52                   const char *Sep) const override;
53 
54   using ArgVector = SmallVector<unsigned, 2>;
55   using SignedArgVector = SmallVector<int, 2>;
56 
57   enum class VariadicType { None, Src, Dst };
58 
59   /// Used to parse the configuration file.
60   struct TaintConfiguration {
61     using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
62 
63     struct Propagation {
64       std::string Name;
65       std::string Scope;
66       ArgVector SrcArgs;
67       SignedArgVector DstArgs;
68       VariadicType VarType;
69       unsigned VarIndex;
70     };
71 
72     std::vector<Propagation> Propagations;
73     std::vector<NameScopeArgs> Filters;
74     std::vector<NameScopeArgs> Sinks;
75 
76     TaintConfiguration() = default;
77     TaintConfiguration(const TaintConfiguration &) = default;
78     TaintConfiguration(TaintConfiguration &&) = default;
79     TaintConfiguration &operator=(const TaintConfiguration &) = default;
80     TaintConfiguration &operator=(TaintConfiguration &&) = default;
81   };
82 
83   /// Convert SignedArgVector to ArgVector.
84   ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
85                                const SignedArgVector &Args);
86 
87   /// Parse the config.
88   void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
89                           TaintConfiguration &&Config);
90 
91   static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
92   /// Denotes the return vale.
93   static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
94                                          1};
95 
96 private:
97   mutable std::unique_ptr<BugType> BT;
98   void initBugType() const {
99     if (!BT)
100       BT = std::make_unique<BugType>(this, "Use of Untrusted Data",
101                                      "Untrusted Data");
102   }
103 
104   struct FunctionData {
105     FunctionData() = delete;
106     FunctionData(const FunctionData &) = default;
107     FunctionData(FunctionData &&) = default;
108     FunctionData &operator=(const FunctionData &) = delete;
109     FunctionData &operator=(FunctionData &&) = delete;
110 
111     static Optional<FunctionData> create(const CallEvent &Call,
112                                          const CheckerContext &C) {
113       assert(Call.getDecl());
114       const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
115       if (!FDecl || (FDecl->getKind() != Decl::Function &&
116                      FDecl->getKind() != Decl::CXXMethod))
117         return None;
118 
119       StringRef Name = C.getCalleeName(FDecl);
120       std::string FullName = FDecl->getQualifiedNameAsString();
121       if (Name.empty() || FullName.empty())
122         return None;
123 
124       return FunctionData{FDecl, Name, FullName};
125     }
126 
127     bool isInScope(StringRef Scope) const {
128       return StringRef(FullName).startswith(Scope);
129     }
130 
131     const FunctionDecl *const FDecl;
132     const StringRef Name;
133     const std::string FullName;
134   };
135 
136   /// Catch taint related bugs. Check if tainted data is passed to a
137   /// system call etc. Returns true on matching.
138   bool checkPre(const CallEvent &Call, const FunctionData &FData,
139                 CheckerContext &C) const;
140 
141   /// Add taint sources on a pre-visit. Returns true on matching.
142   bool addSourcesPre(const CallEvent &Call, const FunctionData &FData,
143                      CheckerContext &C) const;
144 
145   /// Mark filter's arguments not tainted on a pre-visit. Returns true on
146   /// matching.
147   bool addFiltersPre(const CallEvent &Call, const FunctionData &FData,
148                      CheckerContext &C) const;
149 
150   /// Propagate taint generated at pre-visit. Returns true on matching.
151   static bool propagateFromPre(const CallEvent &Call, CheckerContext &C);
152 
153   /// Check if the region the expression evaluates to is the standard input,
154   /// and thus, is tainted.
155   static bool isStdin(const Expr *E, CheckerContext &C);
156 
157   /// Given a pointer argument, return the value it points to.
158   static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg);
159 
160   /// Check for CWE-134: Uncontrolled Format String.
161   static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
162       "Untrusted data is used as a format string "
163       "(CWE-134: Uncontrolled Format String)";
164   bool checkUncontrolledFormatString(const CallEvent &Call,
165                                      CheckerContext &C) const;
166 
167   /// Check for:
168   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
169   /// CWE-78, "Failure to Sanitize Data into an OS Command"
170   static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
171       "Untrusted data is passed to a system call "
172       "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
173   bool checkSystemCall(const CallEvent &Call, StringRef Name,
174                        CheckerContext &C) const;
175 
176   /// Check if tainted data is used as a buffer size ins strn.. functions,
177   /// and allocators.
178   static constexpr llvm::StringLiteral MsgTaintedBufferSize =
179       "Untrusted data is used to specify the buffer size "
180       "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
181       "for character data and the null terminator)";
182   bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const;
183 
184   /// Check if tainted data is used as a custom sink's parameter.
185   static constexpr llvm::StringLiteral MsgCustomSink =
186       "Untrusted data is passed to a user-defined sink";
187   bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData,
188                         CheckerContext &C) const;
189 
190   /// Generate a report if the expression is tainted or points to tainted data.
191   bool generateReportIfTainted(const Expr *E, StringRef Msg,
192                                CheckerContext &C) const;
193 
194   struct TaintPropagationRule;
195   template <typename T>
196   using ConfigDataMap =
197       std::unordered_multimap<std::string, std::pair<std::string, T>>;
198   using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
199   using NameArgMap = ConfigDataMap<ArgVector>;
200 
201   /// Find a function with the given name and scope. Returns the first match
202   /// or the end of the map.
203   template <typename T>
204   static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
205                                    const FunctionData &FData);
206 
207   /// A struct used to specify taint propagation rules for a function.
208   ///
209   /// If any of the possible taint source arguments is tainted, all of the
210   /// destination arguments should also be tainted. Use InvalidArgIndex in the
211   /// src list to specify that all of the arguments can introduce taint. Use
212   /// InvalidArgIndex in the dst arguments to signify that all the non-const
213   /// pointer and reference arguments might be tainted on return. If
214   /// ReturnValueIndex is added to the dst list, the return value will be
215   /// tainted.
216   struct TaintPropagationRule {
217     using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call,
218                                          CheckerContext &C);
219 
220     /// List of arguments which can be taint sources and should be checked.
221     ArgVector SrcArgs;
222     /// List of arguments which should be tainted on function return.
223     ArgVector DstArgs;
224     /// Index for the first variadic parameter if exist.
225     unsigned VariadicIndex;
226     /// Show when a function has variadic parameters. If it has, it marks all
227     /// of them as source or destination.
228     VariadicType VarType;
229     /// Special function for tainted source determination. If defined, it can
230     /// override the default behavior.
231     PropagationFuncType PropagationFunc;
232 
233     TaintPropagationRule()
234         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
235           PropagationFunc(nullptr) {}
236 
237     TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
238                          VariadicType Var = VariadicType::None,
239                          unsigned VarIndex = InvalidArgIndex,
240                          PropagationFuncType Func = nullptr)
241         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
242           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
243 
244     /// Get the propagation rule for a given function.
245     static TaintPropagationRule
246     getTaintPropagationRule(const NameRuleMap &CustomPropagations,
247                             const FunctionData &FData, CheckerContext &C);
248 
249     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
250     void addDstArg(unsigned A) { DstArgs.push_back(A); }
251 
252     bool isNull() const {
253       return SrcArgs.empty() && DstArgs.empty() &&
254              VariadicType::None == VarType;
255     }
256 
257     bool isDestinationArgument(unsigned ArgNum) const {
258       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
259     }
260 
261     static bool isTaintedOrPointsToTainted(const Expr *E,
262                                            const ProgramStateRef &State,
263                                            CheckerContext &C) {
264       if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
265         return true;
266 
267       if (!E->getType().getTypePtr()->isPointerType())
268         return false;
269 
270       Optional<SVal> V = getPointeeOf(C, E);
271       return (V && isTainted(State, *V));
272     }
273 
274     /// Pre-process a function which propagates taint according to the
275     /// taint rule.
276     ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const;
277 
278     // Functions for custom taintedness propagation.
279     static bool postSocket(bool IsTainted, const CallEvent &Call,
280                            CheckerContext &C);
281   };
282 
283   /// Defines a map between the propagation function's name, scope
284   /// and TaintPropagationRule.
285   NameRuleMap CustomPropagations;
286 
287   /// Defines a map between the filter function's name, scope and filtering
288   /// args.
289   NameArgMap CustomFilters;
290 
291   /// Defines a map between the sink function's name, scope and sinking args.
292   NameArgMap CustomSinks;
293 };
294 
295 const unsigned GenericTaintChecker::ReturnValueIndex;
296 const unsigned GenericTaintChecker::InvalidArgIndex;
297 
298 // FIXME: these lines can be removed in C++17
299 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
300 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
301 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
302 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
303 } // end of anonymous namespace
304 
305 using TaintConfig = GenericTaintChecker::TaintConfiguration;
306 
307 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
308 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
309 
310 namespace llvm {
311 namespace yaml {
312 template <> struct MappingTraits<TaintConfig> {
313   static void mapping(IO &IO, TaintConfig &Config) {
314     IO.mapOptional("Propagations", Config.Propagations);
315     IO.mapOptional("Filters", Config.Filters);
316     IO.mapOptional("Sinks", Config.Sinks);
317   }
318 };
319 
320 template <> struct MappingTraits<TaintConfig::Propagation> {
321   static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
322     IO.mapRequired("Name", Propagation.Name);
323     IO.mapOptional("Scope", Propagation.Scope);
324     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
325     IO.mapOptional("DstArgs", Propagation.DstArgs);
326     IO.mapOptional("VariadicType", Propagation.VarType,
327                    GenericTaintChecker::VariadicType::None);
328     IO.mapOptional("VariadicIndex", Propagation.VarIndex,
329                    GenericTaintChecker::InvalidArgIndex);
330   }
331 };
332 
333 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
334   static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
335     IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
336     IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
337     IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
338   }
339 };
340 
341 template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
342   static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
343     IO.mapRequired("Name", std::get<0>(NSA));
344     IO.mapOptional("Scope", std::get<1>(NSA));
345     IO.mapRequired("Args", std::get<2>(NSA));
346   }
347 };
348 } // namespace yaml
349 } // namespace llvm
350 
351 /// A set which is used to pass information from call pre-visit instruction
352 /// to the call post-visit. The values are unsigned integers, which are either
353 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
354 /// points to data, which should be tainted on return.
355 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
356 
357 GenericTaintChecker::ArgVector
358 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr,
359                                         const std::string &Option,
360                                         const SignedArgVector &Args) {
361   ArgVector Result;
362   for (int Arg : Args) {
363     if (Arg == -1)
364       Result.push_back(ReturnValueIndex);
365     else if (Arg < -1) {
366       Result.push_back(InvalidArgIndex);
367       Mgr.reportInvalidCheckerOptionValue(
368           this, Option,
369           "an argument number for propagation rules greater or equal to -1");
370     } else
371       Result.push_back(static_cast<unsigned>(Arg));
372   }
373   return Result;
374 }
375 
376 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
377                                              const std::string &Option,
378                                              TaintConfiguration &&Config) {
379   for (auto &P : Config.Propagations) {
380     GenericTaintChecker::CustomPropagations.emplace(
381         P.Name,
382         std::make_pair(P.Scope, TaintPropagationRule{
383                                     std::move(P.SrcArgs),
384                                     convertToArgVector(Mgr, Option, P.DstArgs),
385                                     P.VarType, P.VarIndex}));
386   }
387 
388   for (auto &F : Config.Filters) {
389     GenericTaintChecker::CustomFilters.emplace(
390         std::get<0>(F),
391         std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
392   }
393 
394   for (auto &S : Config.Sinks) {
395     GenericTaintChecker::CustomSinks.emplace(
396         std::get<0>(S),
397         std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
398   }
399 }
400 
401 template <typename T>
402 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
403                                                const FunctionData &FData) {
404   auto Range = Map.equal_range(std::string(FData.Name));
405   auto It =
406       std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
407         const auto &Value = Entry.second;
408         StringRef Scope = Value.first;
409         return Scope.empty() || FData.isInScope(Scope);
410       });
411   return It != Range.second ? It : Map.end();
412 }
413 
414 GenericTaintChecker::TaintPropagationRule
415 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
416     const NameRuleMap &CustomPropagations, const FunctionData &FData,
417     CheckerContext &C) {
418   // TODO: Currently, we might lose precision here: we always mark a return
419   // value as tainted even if it's just a pointer, pointing to tainted data.
420 
421   // Check for exact name match for functions without builtin substitutes.
422   // Use qualified name, because these are C functions without namespace.
423   TaintPropagationRule Rule =
424       llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
425           // Source functions
426           // TODO: Add support for vfscanf & family.
427           .Case("fdopen", {{}, {ReturnValueIndex}})
428           .Case("fopen", {{}, {ReturnValueIndex}})
429           .Case("freopen", {{}, {ReturnValueIndex}})
430           .Case("getch", {{}, {ReturnValueIndex}})
431           .Case("getchar", {{}, {ReturnValueIndex}})
432           .Case("getchar_unlocked", {{}, {ReturnValueIndex}})
433           .Case("getenv", {{}, {ReturnValueIndex}})
434           .Case("gets", {{}, {0, ReturnValueIndex}})
435           .Case("scanf", {{}, {}, VariadicType::Dst, 1})
436           .Case("socket", {{},
437                            {ReturnValueIndex},
438                            VariadicType::None,
439                            InvalidArgIndex,
440                            &TaintPropagationRule::postSocket})
441           .Case("wgetch", {{}, {ReturnValueIndex}})
442           // Propagating functions
443           .Case("atoi", {{0}, {ReturnValueIndex}})
444           .Case("atol", {{0}, {ReturnValueIndex}})
445           .Case("atoll", {{0}, {ReturnValueIndex}})
446           .Case("fgetc", {{0}, {ReturnValueIndex}})
447           .Case("fgetln", {{0}, {ReturnValueIndex}})
448           .Case("fgets", {{2}, {0, ReturnValueIndex}})
449           .Case("fscanf", {{0}, {}, VariadicType::Dst, 2})
450           .Case("sscanf", {{0}, {}, VariadicType::Dst, 2})
451           .Case("getc", {{0}, {ReturnValueIndex}})
452           .Case("getc_unlocked", {{0}, {ReturnValueIndex}})
453           .Case("getdelim", {{3}, {0}})
454           .Case("getline", {{2}, {0}})
455           .Case("getw", {{0}, {ReturnValueIndex}})
456           .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}})
457           .Case("read", {{0, 2}, {1, ReturnValueIndex}})
458           .Case("strchr", {{0}, {ReturnValueIndex}})
459           .Case("strrchr", {{0}, {ReturnValueIndex}})
460           .Case("tolower", {{0}, {ReturnValueIndex}})
461           .Case("toupper", {{0}, {ReturnValueIndex}})
462           .Default({});
463 
464   if (!Rule.isNull())
465     return Rule;
466   assert(FData.FDecl);
467 
468   // Check if it's one of the memory setting/copying functions.
469   // This check is specialized but faster then calling isCLibraryFunction.
470   const FunctionDecl *FDecl = FData.FDecl;
471   unsigned BId = 0;
472   if ((BId = FDecl->getMemoryFunctionKind())) {
473     switch (BId) {
474     case Builtin::BImemcpy:
475     case Builtin::BImemmove:
476     case Builtin::BIstrncpy:
477     case Builtin::BIstrncat:
478       return {{1, 2}, {0, ReturnValueIndex}};
479     case Builtin::BIstrlcpy:
480     case Builtin::BIstrlcat:
481       return {{1, 2}, {0}};
482     case Builtin::BIstrndup:
483       return {{0, 1}, {ReturnValueIndex}};
484 
485     default:
486       break;
487     }
488   }
489 
490   // Process all other functions which could be defined as builtins.
491   if (Rule.isNull()) {
492     const auto OneOf = [FDecl](const auto &... Name) {
493       // FIXME: use fold expression in C++17
494       using unused = int[];
495       bool ret = false;
496       static_cast<void>(unused{
497           0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...});
498       return ret;
499     };
500     if (OneOf("snprintf"))
501       return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3};
502     if (OneOf("sprintf"))
503       return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2};
504     if (OneOf("strcpy", "stpcpy", "strcat"))
505       return {{1}, {0, ReturnValueIndex}};
506     if (OneOf("bcopy"))
507       return {{0, 2}, {1}};
508     if (OneOf("strdup", "strdupa", "wcsdup"))
509       return {{0}, {ReturnValueIndex}};
510   }
511 
512   // Skipping the following functions, since they might be used for cleansing or
513   // smart memory copy:
514   // - memccpy - copying until hitting a special character.
515 
516   auto It = findFunctionInConfig(CustomPropagations, FData);
517   if (It != CustomPropagations.end())
518     return It->second.second;
519   return {};
520 }
521 
522 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
523                                        CheckerContext &C) const {
524   Optional<FunctionData> FData = FunctionData::create(Call, C);
525   if (!FData)
526     return;
527 
528   // Check for taintedness related errors first: system call, uncontrolled
529   // format string, tainted buffer size.
530   if (checkPre(Call, *FData, C))
531     return;
532 
533   // Marks the function's arguments and/or return value tainted if it present in
534   // the list.
535   if (addSourcesPre(Call, *FData, C))
536     return;
537 
538   addFiltersPre(Call, *FData, C);
539 }
540 
541 void GenericTaintChecker::checkPostCall(const CallEvent &Call,
542                                         CheckerContext &C) const {
543   // Set the marked values as tainted. The return value only accessible from
544   // checkPostStmt.
545   propagateFromPre(Call, C);
546 }
547 
548 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
549                                      const char *NL, const char *Sep) const {
550   printTaint(State, Out, NL, Sep);
551 }
552 
553 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call,
554                                         const FunctionData &FData,
555                                         CheckerContext &C) const {
556   // First, try generating a propagation rule for this function.
557   TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
558       this->CustomPropagations, FData, C);
559   if (!Rule.isNull()) {
560     ProgramStateRef State = Rule.process(Call, C);
561     if (State) {
562       C.addTransition(State);
563       return true;
564     }
565   }
566   return false;
567 }
568 
569 bool GenericTaintChecker::addFiltersPre(const CallEvent &Call,
570                                         const FunctionData &FData,
571                                         CheckerContext &C) const {
572   auto It = findFunctionInConfig(CustomFilters, FData);
573   if (It == CustomFilters.end())
574     return false;
575 
576   ProgramStateRef State = C.getState();
577   const auto &Value = It->second;
578   const ArgVector &Args = Value.second;
579   for (unsigned ArgNum : Args) {
580     if (ArgNum >= Call.getNumArgs())
581       continue;
582 
583     const Expr *Arg = Call.getArgExpr(ArgNum);
584     Optional<SVal> V = getPointeeOf(C, Arg);
585     if (V)
586       State = removeTaint(State, *V);
587   }
588 
589   if (State != C.getState()) {
590     C.addTransition(State);
591     return true;
592   }
593   return false;
594 }
595 
596 bool GenericTaintChecker::propagateFromPre(const CallEvent &Call,
597                                            CheckerContext &C) {
598   ProgramStateRef State = C.getState();
599 
600   // Depending on what was tainted at pre-visit, we determined a set of
601   // arguments which should be tainted after the function returns. These are
602   // stored in the state as TaintArgsOnPostVisit set.
603   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
604   if (TaintArgs.isEmpty())
605     return false;
606 
607   for (unsigned ArgNum : TaintArgs) {
608     // Special handling for the tainted return value.
609     if (ArgNum == ReturnValueIndex) {
610       State = addTaint(State, Call.getReturnValue());
611       continue;
612     }
613 
614     // The arguments are pointer arguments. The data they are pointing at is
615     // tainted after the call.
616     if (Call.getNumArgs() < (ArgNum + 1))
617       return false;
618     const Expr *Arg = Call.getArgExpr(ArgNum);
619     Optional<SVal> V = getPointeeOf(C, Arg);
620     if (V)
621       State = addTaint(State, *V);
622   }
623 
624   // Clear up the taint info from the state.
625   State = State->remove<TaintArgsOnPostVisit>();
626 
627   if (State != C.getState()) {
628     C.addTransition(State);
629     return true;
630   }
631   return false;
632 }
633 
634 bool GenericTaintChecker::checkPre(const CallEvent &Call,
635                                    const FunctionData &FData,
636                                    CheckerContext &C) const {
637   if (checkUncontrolledFormatString(Call, C))
638     return true;
639 
640   if (checkSystemCall(Call, FData.Name, C))
641     return true;
642 
643   if (checkTaintedBufferSize(Call, C))
644     return true;
645 
646   return checkCustomSinks(Call, FData, C);
647 }
648 
649 Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C,
650                                                  const Expr *Arg) {
651   ProgramStateRef State = C.getState();
652   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
653   if (AddrVal.isUnknownOrUndef())
654     return None;
655 
656   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
657   if (!AddrLoc)
658     return None;
659 
660   QualType ArgTy = Arg->getType().getCanonicalType();
661   if (!ArgTy->isPointerType())
662     return State->getSVal(*AddrLoc);
663 
664   QualType ValTy = ArgTy->getPointeeType();
665 
666   // Do not dereference void pointers. Treat them as byte pointers instead.
667   // FIXME: we might want to consider more than just the first byte.
668   if (ValTy->isVoidType())
669     ValTy = C.getASTContext().CharTy;
670 
671   return State->getSVal(*AddrLoc, ValTy);
672 }
673 
674 ProgramStateRef
675 GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call,
676                                                    CheckerContext &C) const {
677   ProgramStateRef State = C.getState();
678 
679   // Check for taint in arguments.
680   bool IsTainted = true;
681   for (unsigned ArgNum : SrcArgs) {
682     if (ArgNum >= Call.getNumArgs())
683       continue;
684 
685     if ((IsTainted =
686              isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C)))
687       break;
688   }
689 
690   // Check for taint in variadic arguments.
691   if (!IsTainted && VariadicType::Src == VarType) {
692     // Check if any of the arguments is tainted
693     for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
694       if ((IsTainted =
695                isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C)))
696         break;
697     }
698   }
699 
700   if (PropagationFunc)
701     IsTainted = PropagationFunc(IsTainted, Call, C);
702 
703   if (!IsTainted)
704     return State;
705 
706   // Mark the arguments which should be tainted after the function returns.
707   for (unsigned ArgNum : DstArgs) {
708     // Should mark the return value?
709     if (ArgNum == ReturnValueIndex) {
710       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
711       continue;
712     }
713 
714     if (ArgNum >= Call.getNumArgs())
715       continue;
716 
717     // Mark the given argument.
718     State = State->add<TaintArgsOnPostVisit>(ArgNum);
719   }
720 
721   // Mark all variadic arguments tainted if present.
722   if (VariadicType::Dst == VarType) {
723     // For all pointer and references that were passed in:
724     //   If they are not pointing to const data, mark data as tainted.
725     //   TODO: So far we are just going one level down; ideally we'd need to
726     //         recurse here.
727     for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
728       const Expr *Arg = Call.getArgExpr(i);
729       // Process pointer argument.
730       const Type *ArgTy = Arg->getType().getTypePtr();
731       QualType PType = ArgTy->getPointeeType();
732       if ((!PType.isNull() && !PType.isConstQualified()) ||
733           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) {
734         State = State->add<TaintArgsOnPostVisit>(i);
735       }
736     }
737   }
738 
739   return State;
740 }
741 
742 // If argument 0(protocol domain) is network, the return value should get taint.
743 bool GenericTaintChecker::TaintPropagationRule::postSocket(
744     bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) {
745   SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
746   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
747   // White list the internal communication protocols.
748   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
749       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
750     return false;
751   return true;
752 }
753 
754 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
755   ProgramStateRef State = C.getState();
756   SVal Val = C.getSVal(E);
757 
758   // stdin is a pointer, so it would be a region.
759   const MemRegion *MemReg = Val.getAsRegion();
760 
761   // The region should be symbolic, we do not know it's value.
762   const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
763   if (!SymReg)
764     return false;
765 
766   // Get it's symbol and find the declaration region it's pointing to.
767   const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
768   if (!Sm)
769     return false;
770   const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
771   if (!DeclReg)
772     return false;
773 
774   // This region corresponds to a declaration, find out if it's a global/extern
775   // variable named stdin with the proper type.
776   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
777     D = D->getCanonicalDecl();
778     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
779       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
780       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
781                        C.getASTContext().getFILEType().getCanonicalType())
782         return true;
783     }
784   }
785   return false;
786 }
787 
788 static bool getPrintfFormatArgumentNum(const CallEvent &Call,
789                                        const CheckerContext &C,
790                                        unsigned &ArgNum) {
791   // Find if the function contains a format string argument.
792   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
793   // vsnprintf, syslog, custom annotated functions.
794   const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
795   if (!FDecl)
796     return false;
797   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
798     ArgNum = Format->getFormatIdx() - 1;
799     if ((Format->getType()->getName() == "printf") &&
800         Call.getNumArgs() > ArgNum)
801       return true;
802   }
803 
804   // Or if a function is named setproctitle (this is a heuristic).
805   if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) {
806     ArgNum = 0;
807     return true;
808   }
809 
810   return false;
811 }
812 
813 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
814                                                   CheckerContext &C) const {
815   assert(E);
816 
817   // Check for taint.
818   ProgramStateRef State = C.getState();
819   Optional<SVal> PointedToSVal = getPointeeOf(C, E);
820   SVal TaintedSVal;
821   if (PointedToSVal && isTainted(State, *PointedToSVal))
822     TaintedSVal = *PointedToSVal;
823   else if (isTainted(State, E, C.getLocationContext()))
824     TaintedSVal = C.getSVal(E);
825   else
826     return false;
827 
828   // Generate diagnostic.
829   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
830     initBugType();
831     auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
832     report->addRange(E->getSourceRange());
833     report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
834     C.emitReport(std::move(report));
835     return true;
836   }
837   return false;
838 }
839 
840 bool GenericTaintChecker::checkUncontrolledFormatString(
841     const CallEvent &Call, CheckerContext &C) const {
842   // Check if the function contains a format string argument.
843   unsigned ArgNum = 0;
844   if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
845     return false;
846 
847   // If either the format string content or the pointer itself are tainted,
848   // warn.
849   return generateReportIfTainted(Call.getArgExpr(ArgNum),
850                                  MsgUncontrolledFormatString, C);
851 }
852 
853 bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name,
854                                           CheckerContext &C) const {
855   // TODO: It might make sense to run this check on demand. In some cases,
856   // we should check if the environment has been cleansed here. We also might
857   // need to know if the user was reset before these calls(seteuid).
858   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
859                         .Case("system", 0)
860                         .Case("popen", 0)
861                         .Case("execl", 0)
862                         .Case("execle", 0)
863                         .Case("execlp", 0)
864                         .Case("execv", 0)
865                         .Case("execvp", 0)
866                         .Case("execvP", 0)
867                         .Case("execve", 0)
868                         .Case("dlopen", 0)
869                         .Default(InvalidArgIndex);
870 
871   if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1))
872     return false;
873 
874   return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs,
875                                  C);
876 }
877 
878 // TODO: Should this check be a part of the CString checker?
879 // If yes, should taint be a global setting?
880 bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call,
881                                                  CheckerContext &C) const {
882   const auto *FDecl = Call.getDecl()->getAsFunction();
883   // If the function has a buffer size argument, set ArgNum.
884   unsigned ArgNum = InvalidArgIndex;
885   unsigned BId = 0;
886   if ((BId = FDecl->getMemoryFunctionKind())) {
887     switch (BId) {
888     case Builtin::BImemcpy:
889     case Builtin::BImemmove:
890     case Builtin::BIstrncpy:
891       ArgNum = 2;
892       break;
893     case Builtin::BIstrndup:
894       ArgNum = 1;
895       break;
896     default:
897       break;
898     }
899   }
900 
901   if (ArgNum == InvalidArgIndex) {
902     using CCtx = CheckerContext;
903     if (CCtx::isCLibraryFunction(FDecl, "malloc") ||
904         CCtx::isCLibraryFunction(FDecl, "calloc") ||
905         CCtx::isCLibraryFunction(FDecl, "alloca"))
906       ArgNum = 0;
907     else if (CCtx::isCLibraryFunction(FDecl, "memccpy"))
908       ArgNum = 3;
909     else if (CCtx::isCLibraryFunction(FDecl, "realloc"))
910       ArgNum = 1;
911     else if (CCtx::isCLibraryFunction(FDecl, "bcopy"))
912       ArgNum = 2;
913   }
914 
915   return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum &&
916          generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize,
917                                  C);
918 }
919 
920 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call,
921                                            const FunctionData &FData,
922                                            CheckerContext &C) const {
923   auto It = findFunctionInConfig(CustomSinks, FData);
924   if (It == CustomSinks.end())
925     return false;
926 
927   const auto &Value = It->second;
928   const GenericTaintChecker::ArgVector &Args = Value.second;
929   for (unsigned ArgNum : Args) {
930     if (ArgNum >= Call.getNumArgs())
931       continue;
932 
933     if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C))
934       return true;
935   }
936 
937   return false;
938 }
939 
940 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
941   auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
942   std::string Option{"Config"};
943   StringRef ConfigFile =
944       Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
945   llvm::Optional<TaintConfig> Config =
946       getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
947   if (Config)
948     Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
949 }
950 
951 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
952   return true;
953 }
954