15c5bf9b6SAnna Zaks //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
25c5bf9b6SAnna Zaks //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65c5bf9b6SAnna Zaks //
75c5bf9b6SAnna Zaks //===----------------------------------------------------------------------===//
85c5bf9b6SAnna Zaks //
95c5bf9b6SAnna Zaks // This checker defines the attack surface for generic taint propagation.
105c5bf9b6SAnna Zaks //
115c5bf9b6SAnna Zaks // The taint information produced by it might be useful to other checkers. For
125c5bf9b6SAnna Zaks // example, checkers should report errors which involve tainted data more
135c5bf9b6SAnna Zaks // aggressively, even if the involved symbols are under constrained.
145c5bf9b6SAnna Zaks //
155c5bf9b6SAnna Zaks //===----------------------------------------------------------------------===//
1644551cf6SArtem Dergachev 
174bde15feSGabor Borsik #include "Yaml.h"
183a02247dSChandler Carruth #include "clang/AST/Attr.h"
193a02247dSChandler Carruth #include "clang/Basic/Builtins.h"
204bde15feSGabor Borsik #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
2182f3ed99STom Ritter #include "clang/StaticAnalyzer/Checkers/Taint.h"
223a02247dSChandler Carruth #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
235c5bf9b6SAnna Zaks #include "clang/StaticAnalyzer/Core/Checker.h"
245c5bf9b6SAnna Zaks #include "clang/StaticAnalyzer/Core/CheckerManager.h"
2517f74240SEndre Fülöp #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
2695a94df5SBalazs Benics #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
275c5bf9b6SAnna Zaks #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
283b0ab206SAnna Zaks #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
294bde15feSGabor Borsik #include "llvm/Support/YAMLTraits.h"
3095a94df5SBalazs Benics 
314bde15feSGabor Borsik #include <limits>
3295a94df5SBalazs Benics #include <memory>
332a5fb125SArtem Dergachev #include <utility>
345c5bf9b6SAnna Zaks 
35fa0a80e0SBalazs Benics #define DEBUG_TYPE "taint-checker"
36fa0a80e0SBalazs Benics 
375c5bf9b6SAnna Zaks using namespace clang;
385c5bf9b6SAnna Zaks using namespace ento;
3944551cf6SArtem Dergachev using namespace taint;
405c5bf9b6SAnna Zaks 
41a848a5cfSBalazs Benics using llvm::ImmutableSet;
42a848a5cfSBalazs Benics 
435c5bf9b6SAnna Zaks namespace {
4417f74240SEndre Fülöp 
4517f74240SEndre Fülöp class GenericTaintChecker;
4617f74240SEndre Fülöp 
4717f74240SEndre Fülöp /// Check for CWE-134: Uncontrolled Format String.
4817f74240SEndre Fülöp constexpr llvm::StringLiteral MsgUncontrolledFormatString =
4917f74240SEndre Fülöp     "Untrusted data is used as a format string "
5017f74240SEndre Fülöp     "(CWE-134: Uncontrolled Format String)";
5117f74240SEndre Fülöp 
5217f74240SEndre Fülöp /// Check for:
5317f74240SEndre Fülöp /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
5417f74240SEndre Fülöp /// CWE-78, "Failure to Sanitize Data into an OS Command"
5517f74240SEndre Fülöp constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
5617f74240SEndre Fülöp     "Untrusted data is passed to a system call "
5717f74240SEndre Fülöp     "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
5817f74240SEndre Fülöp 
5917f74240SEndre Fülöp /// Check if tainted data is used as a buffer size in strn.. functions,
6017f74240SEndre Fülöp /// and allocators.
6117f74240SEndre Fülöp constexpr llvm::StringLiteral MsgTaintedBufferSize =
6217f74240SEndre Fülöp     "Untrusted data is used to specify the buffer size "
6317f74240SEndre Fülöp     "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
6417f74240SEndre Fülöp     "for character data and the null terminator)";
6517f74240SEndre Fülöp 
6617f74240SEndre Fülöp /// Check if tainted data is used as a custom sink's parameter.
6717f74240SEndre Fülöp constexpr llvm::StringLiteral MsgCustomSink =
6817f74240SEndre Fülöp     "Untrusted data is passed to a user-defined sink";
6917f74240SEndre Fülöp 
7017f74240SEndre Fülöp using ArgIdxTy = int;
7117f74240SEndre Fülöp using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>;
7217f74240SEndre Fülöp 
7317f74240SEndre Fülöp /// Denotes the return value.
7417f74240SEndre Fülöp constexpr ArgIdxTy ReturnValueIndex{-1};
7517f74240SEndre Fülöp 
fromArgumentCount(unsigned Count)7617f74240SEndre Fülöp static ArgIdxTy fromArgumentCount(unsigned Count) {
7717f74240SEndre Fülöp   assert(Count <=
7817f74240SEndre Fülöp              static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) &&
7917f74240SEndre Fülöp          "ArgIdxTy is not large enough to represent the number of arguments.");
8017f74240SEndre Fülöp   return Count;
8117f74240SEndre Fülöp }
8217f74240SEndre Fülöp 
8317f74240SEndre Fülöp /// Check if the region the expression evaluates to is the standard input,
8417f74240SEndre Fülöp /// and thus, is tainted.
8517f74240SEndre Fülöp /// FIXME: Move this to Taint.cpp.
isStdin(SVal Val,const ASTContext & ACtx)8617f74240SEndre Fülöp bool isStdin(SVal Val, const ASTContext &ACtx) {
8717f74240SEndre Fülöp   // FIXME: What if Val is NonParamVarRegion?
8817f74240SEndre Fülöp 
8917f74240SEndre Fülöp   // The region should be symbolic, we do not know it's value.
9017f74240SEndre Fülöp   const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion());
9117f74240SEndre Fülöp   if (!SymReg)
9217f74240SEndre Fülöp     return false;
9317f74240SEndre Fülöp 
9417f74240SEndre Fülöp   // Get it's symbol and find the declaration region it's pointing to.
95f4fc3f6bSBalazs Benics   const auto *DeclReg =
96f4fc3f6bSBalazs Benics       dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
9717f74240SEndre Fülöp   if (!DeclReg)
9817f74240SEndre Fülöp     return false;
9917f74240SEndre Fülöp 
10017f74240SEndre Fülöp   // This region corresponds to a declaration, find out if it's a global/extern
10117f74240SEndre Fülöp   // variable named stdin with the proper type.
10217f74240SEndre Fülöp   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
10317f74240SEndre Fülöp     D = D->getCanonicalDecl();
10417f74240SEndre Fülöp     // FIXME: This should look for an exact match.
10517f74240SEndre Fülöp     if (D->getName().contains("stdin") && D->isExternC()) {
10617f74240SEndre Fülöp       const QualType FILETy = ACtx.getFILEType().getCanonicalType();
10717f74240SEndre Fülöp       const QualType Ty = D->getType().getCanonicalType();
10817f74240SEndre Fülöp 
10917f74240SEndre Fülöp       if (Ty->isPointerType())
11017f74240SEndre Fülöp         return Ty->getPointeeType() == FILETy;
11117f74240SEndre Fülöp     }
11217f74240SEndre Fülöp   }
11317f74240SEndre Fülöp   return false;
11417f74240SEndre Fülöp }
11517f74240SEndre Fülöp 
getPointeeOf(const CheckerContext & C,Loc LValue)11617f74240SEndre Fülöp SVal getPointeeOf(const CheckerContext &C, Loc LValue) {
11717f74240SEndre Fülöp   const QualType ArgTy = LValue.getType(C.getASTContext());
11817f74240SEndre Fülöp   if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())
11917f74240SEndre Fülöp     return C.getState()->getSVal(LValue);
12017f74240SEndre Fülöp 
12117f74240SEndre Fülöp   // Do not dereference void pointers. Treat them as byte pointers instead.
12217f74240SEndre Fülöp   // FIXME: we might want to consider more than just the first byte.
12317f74240SEndre Fülöp   return C.getState()->getSVal(LValue, C.getASTContext().CharTy);
12417f74240SEndre Fülöp }
12517f74240SEndre Fülöp 
12617f74240SEndre Fülöp /// Given a pointer/reference argument, return the value it refers to.
getPointeeOf(const CheckerContext & C,SVal Arg)12717f74240SEndre Fülöp Optional<SVal> getPointeeOf(const CheckerContext &C, SVal Arg) {
12817f74240SEndre Fülöp   if (auto LValue = Arg.getAs<Loc>())
12917f74240SEndre Fülöp     return getPointeeOf(C, *LValue);
13017f74240SEndre Fülöp   return None;
13117f74240SEndre Fülöp }
13217f74240SEndre Fülöp 
13317f74240SEndre Fülöp /// Given a pointer, return the SVal of its pointee or if it is tainted,
13417f74240SEndre Fülöp /// otherwise return the pointer's SVal if tainted.
13517f74240SEndre Fülöp /// Also considers stdin as a taint source.
getTaintedPointeeOrPointer(const CheckerContext & C,SVal Arg)13617f74240SEndre Fülöp Optional<SVal> getTaintedPointeeOrPointer(const CheckerContext &C, SVal Arg) {
13717f74240SEndre Fülöp   const ProgramStateRef State = C.getState();
13817f74240SEndre Fülöp 
13917f74240SEndre Fülöp   if (auto Pointee = getPointeeOf(C, Arg))
14017f74240SEndre Fülöp     if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None;
14117f74240SEndre Fülöp       return Pointee;
14217f74240SEndre Fülöp 
14317f74240SEndre Fülöp   if (isTainted(State, Arg))
14417f74240SEndre Fülöp     return Arg;
14517f74240SEndre Fülöp 
14617f74240SEndre Fülöp   // FIXME: This should be done by the isTainted() API.
14717f74240SEndre Fülöp   if (isStdin(Arg, C.getASTContext()))
14817f74240SEndre Fülöp     return Arg;
14917f74240SEndre Fülöp 
15017f74240SEndre Fülöp   return None;
15117f74240SEndre Fülöp }
15217f74240SEndre Fülöp 
isTaintedOrPointsToTainted(const Expr * E,const ProgramStateRef & State,CheckerContext & C)15317f74240SEndre Fülöp bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State,
15417f74240SEndre Fülöp                                 CheckerContext &C) {
155064a08cdSKazu Hirata   return getTaintedPointeeOrPointer(C, C.getSVal(E)).has_value();
15617f74240SEndre Fülöp }
15717f74240SEndre Fülöp 
15817f74240SEndre Fülöp /// ArgSet is used to describe arguments relevant for taint detection or
15917f74240SEndre Fülöp /// taint application. A discrete set of argument indexes and a variadic
16017f74240SEndre Fülöp /// argument list signified by a starting index are supported.
16117f74240SEndre Fülöp class ArgSet {
16217f74240SEndre Fülöp public:
16317f74240SEndre Fülöp   ArgSet() = default;
ArgSet(ArgVecTy && DiscreteArgs,Optional<ArgIdxTy> VariadicIndex=None)16417f74240SEndre Fülöp   ArgSet(ArgVecTy &&DiscreteArgs, Optional<ArgIdxTy> VariadicIndex = None)
16517f74240SEndre Fülöp       : DiscreteArgs(std::move(DiscreteArgs)),
16617f74240SEndre Fülöp         VariadicIndex(std::move(VariadicIndex)) {}
16717f74240SEndre Fülöp 
contains(ArgIdxTy ArgIdx) const16817f74240SEndre Fülöp   bool contains(ArgIdxTy ArgIdx) const {
16917f74240SEndre Fülöp     if (llvm::is_contained(DiscreteArgs, ArgIdx))
17017f74240SEndre Fülöp       return true;
17117f74240SEndre Fülöp 
17217f74240SEndre Fülöp     return VariadicIndex && ArgIdx >= *VariadicIndex;
17317f74240SEndre Fülöp   }
17417f74240SEndre Fülöp 
isEmpty() const17517f74240SEndre Fülöp   bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; }
17617f74240SEndre Fülöp 
17717f74240SEndre Fülöp private:
17817f74240SEndre Fülöp   ArgVecTy DiscreteArgs;
17917f74240SEndre Fülöp   Optional<ArgIdxTy> VariadicIndex;
18017f74240SEndre Fülöp };
18117f74240SEndre Fülöp 
18217f74240SEndre Fülöp /// A struct used to specify taint propagation rules for a function.
18317f74240SEndre Fülöp ///
18417f74240SEndre Fülöp /// If any of the possible taint source arguments is tainted, all of the
18517f74240SEndre Fülöp /// destination arguments should also be tainted. If ReturnValueIndex is added
18617f74240SEndre Fülöp /// to the dst list, the return value will be tainted.
18717f74240SEndre Fülöp class GenericTaintRule {
18817f74240SEndre Fülöp   /// Arguments which are taints sinks and should be checked, and a report
18917f74240SEndre Fülöp   /// should be emitted if taint reaches these.
19017f74240SEndre Fülöp   ArgSet SinkArgs;
19117f74240SEndre Fülöp   /// Arguments which should be sanitized on function return.
19217f74240SEndre Fülöp   ArgSet FilterArgs;
19317f74240SEndre Fülöp   /// Arguments which can participate in taint propagationa. If any of the
19417f74240SEndre Fülöp   /// arguments in PropSrcArgs is tainted, all arguments in  PropDstArgs should
19517f74240SEndre Fülöp   /// be tainted.
19617f74240SEndre Fülöp   ArgSet PropSrcArgs;
19717f74240SEndre Fülöp   ArgSet PropDstArgs;
19817f74240SEndre Fülöp 
19917f74240SEndre Fülöp   /// A message that explains why the call is sensitive to taint.
20017f74240SEndre Fülöp   Optional<StringRef> SinkMsg;
20117f74240SEndre Fülöp 
20217f74240SEndre Fülöp   GenericTaintRule() = default;
20317f74240SEndre Fülöp 
GenericTaintRule(ArgSet && Sink,ArgSet && Filter,ArgSet && Src,ArgSet && Dst,Optional<StringRef> SinkMsg=None)20417f74240SEndre Fülöp   GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst,
20517f74240SEndre Fülöp                    Optional<StringRef> SinkMsg = None)
20617f74240SEndre Fülöp       : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)),
20717f74240SEndre Fülöp         PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),
20817f74240SEndre Fülöp         SinkMsg(SinkMsg) {}
20917f74240SEndre Fülöp 
21017f74240SEndre Fülöp public:
21117f74240SEndre Fülöp   /// Make a rule that reports a warning if taint reaches any of \p FilterArgs
21217f74240SEndre Fülöp   /// arguments.
Sink(ArgSet && SinkArgs,Optional<StringRef> Msg=None)21317f74240SEndre Fülöp   static GenericTaintRule Sink(ArgSet &&SinkArgs,
21417f74240SEndre Fülöp                                Optional<StringRef> Msg = None) {
21517f74240SEndre Fülöp     return {std::move(SinkArgs), {}, {}, {}, Msg};
21617f74240SEndre Fülöp   }
21717f74240SEndre Fülöp 
21817f74240SEndre Fülöp   /// Make a rule that sanitizes all FilterArgs arguments.
Filter(ArgSet && FilterArgs)21917f74240SEndre Fülöp   static GenericTaintRule Filter(ArgSet &&FilterArgs) {
22017f74240SEndre Fülöp     return {{}, std::move(FilterArgs), {}, {}};
22117f74240SEndre Fülöp   }
22217f74240SEndre Fülöp 
22317f74240SEndre Fülöp   /// Make a rule that unconditionally taints all Args.
22417f74240SEndre Fülöp   /// If Func is provided, it must also return true for taint to propagate.
Source(ArgSet && SourceArgs)22517f74240SEndre Fülöp   static GenericTaintRule Source(ArgSet &&SourceArgs) {
22617f74240SEndre Fülöp     return {{}, {}, {}, std::move(SourceArgs)};
22717f74240SEndre Fülöp   }
22817f74240SEndre Fülöp 
22917f74240SEndre Fülöp   /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
Prop(ArgSet && SrcArgs,ArgSet && DstArgs)23017f74240SEndre Fülöp   static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {
23117f74240SEndre Fülöp     return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
23217f74240SEndre Fülöp   }
23317f74240SEndre Fülöp 
23417f74240SEndre Fülöp   /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
SinkProp(ArgSet && SinkArgs,ArgSet && SrcArgs,ArgSet && DstArgs,Optional<StringRef> Msg=None)23517f74240SEndre Fülöp   static GenericTaintRule SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs,
23617f74240SEndre Fülöp                                    ArgSet &&DstArgs,
23717f74240SEndre Fülöp                                    Optional<StringRef> Msg = None) {
23817f74240SEndre Fülöp     return {
23917f74240SEndre Fülöp         std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg};
24017f74240SEndre Fülöp   }
24117f74240SEndre Fülöp 
24217f74240SEndre Fülöp   /// Process a function which could either be a taint source, a taint sink, a
24317f74240SEndre Fülöp   /// taint filter or a taint propagator.
24417f74240SEndre Fülöp   void process(const GenericTaintChecker &Checker, const CallEvent &Call,
24517f74240SEndre Fülöp                CheckerContext &C) const;
24617f74240SEndre Fülöp 
24717f74240SEndre Fülöp   /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s.
GetArgExpr(ArgIdxTy ArgIdx,const CallEvent & Call)24817f74240SEndre Fülöp   static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) {
24917f74240SEndre Fülöp     return ArgIdx == ReturnValueIndex ? Call.getOriginExpr()
25017f74240SEndre Fülöp                                       : Call.getArgExpr(ArgIdx);
25117f74240SEndre Fülöp   };
25217f74240SEndre Fülöp 
25317f74240SEndre Fülöp   /// Functions for custom taintedness propagation.
25417f74240SEndre Fülöp   static bool UntrustedEnv(CheckerContext &C);
25517f74240SEndre Fülöp };
25617f74240SEndre Fülöp 
25717f74240SEndre Fülöp using RuleLookupTy = CallDescriptionMap<GenericTaintRule>;
25817f74240SEndre Fülöp 
25917f74240SEndre Fülöp /// Used to parse the configuration file.
26017f74240SEndre Fülöp struct TaintConfiguration {
26117f74240SEndre Fülöp   using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;
26217f74240SEndre Fülöp   enum class VariadicType { None, Src, Dst };
26317f74240SEndre Fülöp 
26417f74240SEndre Fülöp   struct Common {
26517f74240SEndre Fülöp     std::string Name;
26617f74240SEndre Fülöp     std::string Scope;
26717f74240SEndre Fülöp   };
26817f74240SEndre Fülöp 
26917f74240SEndre Fülöp   struct Sink : Common {
27017f74240SEndre Fülöp     ArgVecTy SinkArgs;
27117f74240SEndre Fülöp   };
27217f74240SEndre Fülöp 
27317f74240SEndre Fülöp   struct Filter : Common {
27417f74240SEndre Fülöp     ArgVecTy FilterArgs;
27517f74240SEndre Fülöp   };
27617f74240SEndre Fülöp 
27717f74240SEndre Fülöp   struct Propagation : Common {
27817f74240SEndre Fülöp     ArgVecTy SrcArgs;
27917f74240SEndre Fülöp     ArgVecTy DstArgs;
28017f74240SEndre Fülöp     VariadicType VarType;
28117f74240SEndre Fülöp     ArgIdxTy VarIndex;
28217f74240SEndre Fülöp   };
28317f74240SEndre Fülöp 
28417f74240SEndre Fülöp   std::vector<Propagation> Propagations;
28517f74240SEndre Fülöp   std::vector<Filter> Filters;
28617f74240SEndre Fülöp   std::vector<Sink> Sinks;
28717f74240SEndre Fülöp 
28817f74240SEndre Fülöp   TaintConfiguration() = default;
28917f74240SEndre Fülöp   TaintConfiguration(const TaintConfiguration &) = default;
29017f74240SEndre Fülöp   TaintConfiguration(TaintConfiguration &&) = default;
29117f74240SEndre Fülöp   TaintConfiguration &operator=(const TaintConfiguration &) = default;
29217f74240SEndre Fülöp   TaintConfiguration &operator=(TaintConfiguration &&) = default;
29317f74240SEndre Fülöp };
29417f74240SEndre Fülöp 
29517f74240SEndre Fülöp struct GenericTaintRuleParser {
GenericTaintRuleParser__anon39b2fff00111::GenericTaintRuleParser29617f74240SEndre Fülöp   GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {}
29717f74240SEndre Fülöp   /// Container type used to gather call identification objects grouped into
29817f74240SEndre Fülöp   /// pairs with their corresponding taint rules. It is temporary as it is used
29917f74240SEndre Fülöp   /// to finally initialize RuleLookupTy, which is considered to be immutable.
30017f74240SEndre Fülöp   using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;
30117f74240SEndre Fülöp   RulesContTy parseConfiguration(const std::string &Option,
30217f74240SEndre Fülöp                                  TaintConfiguration &&Config) const;
30317f74240SEndre Fülöp 
30417f74240SEndre Fülöp private:
30517f74240SEndre Fülöp   using NamePartsTy = llvm::SmallVector<SmallString<32>, 2>;
30617f74240SEndre Fülöp 
30717f74240SEndre Fülöp   /// Validate part of the configuration, which contains a list of argument
30817f74240SEndre Fülöp   /// indexes.
30917f74240SEndre Fülöp   void validateArgVector(const std::string &Option, const ArgVecTy &Args) const;
31017f74240SEndre Fülöp 
31117f74240SEndre Fülöp   template <typename Config> static NamePartsTy parseNameParts(const Config &C);
31217f74240SEndre Fülöp 
31317f74240SEndre Fülöp   // Takes the config and creates a CallDescription for it and associates a Rule
31417f74240SEndre Fülöp   // with that.
31517f74240SEndre Fülöp   template <typename Config>
31617f74240SEndre Fülöp   static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule,
31717f74240SEndre Fülöp                                      RulesContTy &Rules);
31817f74240SEndre Fülöp 
31917f74240SEndre Fülöp   void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P,
32017f74240SEndre Fülöp                    RulesContTy &Rules) const;
32117f74240SEndre Fülöp   void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P,
32217f74240SEndre Fülöp                    RulesContTy &Rules) const;
32317f74240SEndre Fülöp   void parseConfig(const std::string &Option,
32417f74240SEndre Fülöp                    TaintConfiguration::Propagation &&P,
32517f74240SEndre Fülöp                    RulesContTy &Rules) const;
32617f74240SEndre Fülöp 
32717f74240SEndre Fülöp   CheckerManager &Mgr;
32817f74240SEndre Fülöp };
32917f74240SEndre Fülöp 
33095a94df5SBalazs Benics class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
3313b0ab206SAnna Zaks public:
33295a94df5SBalazs Benics   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
33395a94df5SBalazs Benics   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
3345c5bf9b6SAnna Zaks 
3354bde15feSGabor Borsik   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
3364bde15feSGabor Borsik                   const char *Sep) const override;
3374bde15feSGabor Borsik 
338080ecafdSGabor Borsik   /// Generate a report if the expression is tainted or points to tainted data.
339080ecafdSGabor Borsik   bool generateReportIfTainted(const Expr *E, StringRef Msg,
340080ecafdSGabor Borsik                                CheckerContext &C) const;
341080ecafdSGabor Borsik 
34217f74240SEndre Fülöp private:
34317f74240SEndre Fülöp   const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"};
344273e6742SBorsik Gabor 
34517f74240SEndre Fülöp   bool checkUncontrolledFormatString(const CallEvent &Call,
34617f74240SEndre Fülöp                                      CheckerContext &C) const;
347080ecafdSGabor Borsik 
34817f74240SEndre Fülöp   void taintUnsafeSocketProtocol(const CallEvent &Call,
34917f74240SEndre Fülöp                                  CheckerContext &C) const;
3502827349cSKristof Umann 
35117f74240SEndre Fülöp   /// Default taint rules are initilized with the help of a CheckerContext to
35217f74240SEndre Fülöp   /// access the names of built-in functions like memcpy.
35317f74240SEndre Fülöp   void initTaintRules(CheckerContext &C) const;
3543666d2c1SAnna Zaks 
35517f74240SEndre Fülöp   /// CallDescription currently cannot restrict matches to the global namespace
35617f74240SEndre Fülöp   /// only, which is why multiple CallDescriptionMaps are used, as we want to
35717f74240SEndre Fülöp   /// disambiguate global C functions from functions inside user-defined
35817f74240SEndre Fülöp   /// namespaces.
35917f74240SEndre Fülöp   // TODO: Remove separation to simplify matching logic once CallDescriptions
36017f74240SEndre Fülöp   // are more expressive.
3613666d2c1SAnna Zaks 
36217f74240SEndre Fülöp   mutable Optional<RuleLookupTy> StaticTaintRules;
36317f74240SEndre Fülöp   mutable Optional<RuleLookupTy> DynamicTaintRules;
3647f6a6b75SAnna Zaks };
365560dbe9aSAnna Zaks } // end of anonymous namespace
3665c5bf9b6SAnna Zaks 
36717f74240SEndre Fülöp /// YAML serialization mapping.
36817f74240SEndre Fülöp LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)
36917f74240SEndre Fülöp LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter)
37017f74240SEndre Fülöp LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)
3714bde15feSGabor Borsik 
3724bde15feSGabor Borsik namespace llvm {
3734bde15feSGabor Borsik namespace yaml {
37417f74240SEndre Fülöp template <> struct MappingTraits<TaintConfiguration> {
mappingllvm::yaml::MappingTraits37517f74240SEndre Fülöp   static void mapping(IO &IO, TaintConfiguration &Config) {
3764bde15feSGabor Borsik     IO.mapOptional("Propagations", Config.Propagations);
3774bde15feSGabor Borsik     IO.mapOptional("Filters", Config.Filters);
3784bde15feSGabor Borsik     IO.mapOptional("Sinks", Config.Sinks);
3794bde15feSGabor Borsik   }
3804bde15feSGabor Borsik };
3814bde15feSGabor Borsik 
38217f74240SEndre Fülöp template <> struct MappingTraits<TaintConfiguration::Sink> {
mappingllvm::yaml::MappingTraits38317f74240SEndre Fülöp   static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {
38417f74240SEndre Fülöp     IO.mapRequired("Name", Sink.Name);
38517f74240SEndre Fülöp     IO.mapOptional("Scope", Sink.Scope);
38617f74240SEndre Fülöp     IO.mapRequired("Args", Sink.SinkArgs);
38717f74240SEndre Fülöp   }
38817f74240SEndre Fülöp };
38917f74240SEndre Fülöp 
39017f74240SEndre Fülöp template <> struct MappingTraits<TaintConfiguration::Filter> {
mappingllvm::yaml::MappingTraits39117f74240SEndre Fülöp   static void mapping(IO &IO, TaintConfiguration::Filter &Filter) {
39217f74240SEndre Fülöp     IO.mapRequired("Name", Filter.Name);
39317f74240SEndre Fülöp     IO.mapOptional("Scope", Filter.Scope);
39417f74240SEndre Fülöp     IO.mapRequired("Args", Filter.FilterArgs);
39517f74240SEndre Fülöp   }
39617f74240SEndre Fülöp };
39717f74240SEndre Fülöp 
39817f74240SEndre Fülöp template <> struct MappingTraits<TaintConfiguration::Propagation> {
mappingllvm::yaml::MappingTraits39917f74240SEndre Fülöp   static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {
4004bde15feSGabor Borsik     IO.mapRequired("Name", Propagation.Name);
401273e6742SBorsik Gabor     IO.mapOptional("Scope", Propagation.Scope);
4024bde15feSGabor Borsik     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
4034bde15feSGabor Borsik     IO.mapOptional("DstArgs", Propagation.DstArgs);
40417f74240SEndre Fülöp     IO.mapOptional("VariadicType", Propagation.VarType);
40517f74240SEndre Fülöp     IO.mapOptional("VariadicIndex", Propagation.VarIndex);
4064bde15feSGabor Borsik   }
4074bde15feSGabor Borsik };
4084bde15feSGabor Borsik 
40917f74240SEndre Fülöp template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
enumerationllvm::yaml::ScalarEnumerationTraits41017f74240SEndre Fülöp   static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) {
41117f74240SEndre Fülöp     IO.enumCase(Value, "None", TaintConfiguration::VariadicType::None);
41217f74240SEndre Fülöp     IO.enumCase(Value, "Src", TaintConfiguration::VariadicType::Src);
41317f74240SEndre Fülöp     IO.enumCase(Value, "Dst", TaintConfiguration::VariadicType::Dst);
4144bde15feSGabor Borsik   }
4154bde15feSGabor Borsik };
4164bde15feSGabor Borsik } // namespace yaml
4174bde15feSGabor Borsik } // namespace llvm
4184bde15feSGabor Borsik 
419b3fa8d7dSAnna Zaks /// A set which is used to pass information from call pre-visit instruction
42017f74240SEndre Fülöp /// to the call post-visit. The values are signed integers, which are either
421b3fa8d7dSAnna Zaks /// ReturnValueIndex, or indexes of the pointer/reference argument, which
422b3fa8d7dSAnna Zaks /// points to data, which should be tainted on return.
REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit,const LocationContext *,ImmutableSet<ArgIdxTy>)423a848a5cfSBalazs Benics REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *,
424a848a5cfSBalazs Benics                                ImmutableSet<ArgIdxTy>)
425a848a5cfSBalazs Benics REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy)
4263b0ab206SAnna Zaks 
42717f74240SEndre Fülöp void GenericTaintRuleParser::validateArgVector(const std::string &Option,
42817f74240SEndre Fülöp                                                const ArgVecTy &Args) const {
42917f74240SEndre Fülöp   for (ArgIdxTy Arg : Args) {
43017f74240SEndre Fülöp     if (Arg < ReturnValueIndex) {
4314bde15feSGabor Borsik       Mgr.reportInvalidCheckerOptionValue(
43217f74240SEndre Fülöp           Mgr.getChecker<GenericTaintChecker>(), Option,
4334bde15feSGabor Borsik           "an argument number for propagation rules greater or equal to -1");
4344bde15feSGabor Borsik     }
4354bde15feSGabor Borsik   }
4364bde15feSGabor Borsik }
4374bde15feSGabor Borsik 
43817f74240SEndre Fülöp template <typename Config>
43917f74240SEndre Fülöp GenericTaintRuleParser::NamePartsTy
parseNameParts(const Config & C)44017f74240SEndre Fülöp GenericTaintRuleParser::parseNameParts(const Config &C) {
44117f74240SEndre Fülöp   NamePartsTy NameParts;
44217f74240SEndre Fülöp   if (!C.Scope.empty()) {
44317f74240SEndre Fülöp     // If the Scope argument contains multiple "::" parts, those are considered
44417f74240SEndre Fülöp     // namespace identifiers.
44517f74240SEndre Fülöp     llvm::SmallVector<StringRef, 2> NSParts;
44617f74240SEndre Fülöp     StringRef{C.Scope}.split(NSParts, "::", /*MaxSplit*/ -1,
44717f74240SEndre Fülöp                              /*KeepEmpty*/ false);
44817f74240SEndre Fülöp     NameParts.append(NSParts.begin(), NSParts.end());
44917f74240SEndre Fülöp   }
45017f74240SEndre Fülöp   NameParts.emplace_back(C.Name);
45117f74240SEndre Fülöp   return NameParts;
452273e6742SBorsik Gabor }
453273e6742SBorsik Gabor 
45417f74240SEndre Fülöp template <typename Config>
consumeRulesFromConfig(const Config & C,GenericTaintRule && Rule,RulesContTy & Rules)45517f74240SEndre Fülöp void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C,
45617f74240SEndre Fülöp                                                     GenericTaintRule &&Rule,
45717f74240SEndre Fülöp                                                     RulesContTy &Rules) {
45817f74240SEndre Fülöp   NamePartsTy NameParts = parseNameParts(C);
45917f74240SEndre Fülöp   llvm::SmallVector<const char *, 2> CallDescParts{NameParts.size()};
46017f74240SEndre Fülöp   llvm::transform(NameParts, CallDescParts.begin(),
46117f74240SEndre Fülöp                   [](SmallString<32> &S) { return S.c_str(); });
462262cc74eSTres Popp   Rules.emplace_back(CallDescription(CallDescParts), std::move(Rule));
46317f74240SEndre Fülöp }
464bf740512SAnna Zaks 
parseConfig(const std::string & Option,TaintConfiguration::Sink && S,RulesContTy & Rules) const46517f74240SEndre Fülöp void GenericTaintRuleParser::parseConfig(const std::string &Option,
46617f74240SEndre Fülöp                                          TaintConfiguration::Sink &&S,
46717f74240SEndre Fülöp                                          RulesContTy &Rules) const {
46817f74240SEndre Fülöp   validateArgVector(Option, S.SinkArgs);
46917f74240SEndre Fülöp   consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),
47017f74240SEndre Fülöp                          Rules);
47117f74240SEndre Fülöp }
47217f74240SEndre Fülöp 
parseConfig(const std::string & Option,TaintConfiguration::Filter && S,RulesContTy & Rules) const47317f74240SEndre Fülöp void GenericTaintRuleParser::parseConfig(const std::string &Option,
47417f74240SEndre Fülöp                                          TaintConfiguration::Filter &&S,
47517f74240SEndre Fülöp                                          RulesContTy &Rules) const {
47617f74240SEndre Fülöp   validateArgVector(Option, S.FilterArgs);
47717f74240SEndre Fülöp   consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)),
47817f74240SEndre Fülöp                          Rules);
47917f74240SEndre Fülöp }
48017f74240SEndre Fülöp 
parseConfig(const std::string & Option,TaintConfiguration::Propagation && P,RulesContTy & Rules) const48117f74240SEndre Fülöp void GenericTaintRuleParser::parseConfig(const std::string &Option,
48217f74240SEndre Fülöp                                          TaintConfiguration::Propagation &&P,
48317f74240SEndre Fülöp                                          RulesContTy &Rules) const {
48417f74240SEndre Fülöp   validateArgVector(Option, P.SrcArgs);
48517f74240SEndre Fülöp   validateArgVector(Option, P.DstArgs);
48617f74240SEndre Fülöp   bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src;
48717f74240SEndre Fülöp   bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst;
48817f74240SEndre Fülöp   Optional<ArgIdxTy> JustVarIndex = P.VarIndex;
48917f74240SEndre Fülöp 
49017f74240SEndre Fülöp   ArgSet SrcDesc(std::move(P.SrcArgs), IsSrcVariadic ? JustVarIndex : None);
49117f74240SEndre Fülöp   ArgSet DstDesc(std::move(P.DstArgs), IsDstVariadic ? JustVarIndex : None);
49217f74240SEndre Fülöp 
49317f74240SEndre Fülöp   consumeRulesFromConfig(
49417f74240SEndre Fülöp       P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);
49517f74240SEndre Fülöp }
49617f74240SEndre Fülöp 
49717f74240SEndre Fülöp GenericTaintRuleParser::RulesContTy
parseConfiguration(const std::string & Option,TaintConfiguration && Config) const49817f74240SEndre Fülöp GenericTaintRuleParser::parseConfiguration(const std::string &Option,
49917f74240SEndre Fülöp                                            TaintConfiguration &&Config) const {
50017f74240SEndre Fülöp 
50117f74240SEndre Fülöp   RulesContTy Rules;
50217f74240SEndre Fülöp 
50317f74240SEndre Fülöp   for (auto &F : Config.Filters)
50417f74240SEndre Fülöp     parseConfig(Option, std::move(F), Rules);
50517f74240SEndre Fülöp 
50617f74240SEndre Fülöp   for (auto &S : Config.Sinks)
50717f74240SEndre Fülöp     parseConfig(Option, std::move(S), Rules);
50817f74240SEndre Fülöp 
50917f74240SEndre Fülöp   for (auto &P : Config.Propagations)
51017f74240SEndre Fülöp     parseConfig(Option, std::move(P), Rules);
51117f74240SEndre Fülöp 
51217f74240SEndre Fülöp   return Rules;
51317f74240SEndre Fülöp }
51417f74240SEndre Fülöp 
initTaintRules(CheckerContext & C) const51517f74240SEndre Fülöp void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
5165d324e50SAnna Zaks   // Check for exact name match for functions without builtin substitutes.
517273e6742SBorsik Gabor   // Use qualified name, because these are C functions without namespace.
5185d324e50SAnna Zaks 
51917f74240SEndre Fülöp   if (StaticTaintRules || DynamicTaintRules)
52017f74240SEndre Fülöp     return;
52117f74240SEndre Fülöp 
52217f74240SEndre Fülöp   using RulesConstructionTy =
52317f74240SEndre Fülöp       std::vector<std::pair<CallDescription, GenericTaintRule>>;
52417f74240SEndre Fülöp   using TR = GenericTaintRule;
52517f74240SEndre Fülöp 
52617f74240SEndre Fülöp   const Builtin::Context &BI = C.getASTContext().BuiltinInfo;
52717f74240SEndre Fülöp 
52817f74240SEndre Fülöp   RulesConstructionTy GlobalCRules{
52917f74240SEndre Fülöp       // Sources
53017f74240SEndre Fülöp       {{"fdopen"}, TR::Source({{ReturnValueIndex}})},
53117f74240SEndre Fülöp       {{"fopen"}, TR::Source({{ReturnValueIndex}})},
53217f74240SEndre Fülöp       {{"freopen"}, TR::Source({{ReturnValueIndex}})},
53317f74240SEndre Fülöp       {{"getch"}, TR::Source({{ReturnValueIndex}})},
53417f74240SEndre Fülöp       {{"getchar"}, TR::Source({{ReturnValueIndex}})},
53517f74240SEndre Fülöp       {{"getchar_unlocked"}, TR::Source({{ReturnValueIndex}})},
53617f74240SEndre Fülöp       {{"gets"}, TR::Source({{0}, ReturnValueIndex})},
53734a73879SEndre Fülöp       {{"gets_s"}, TR::Source({{0}, ReturnValueIndex})},
53817f74240SEndre Fülöp       {{"scanf"}, TR::Source({{}, 1})},
53934a73879SEndre Fülöp       {{"scanf_s"}, TR::Source({{}, {1}})},
54017f74240SEndre Fülöp       {{"wgetch"}, TR::Source({{}, ReturnValueIndex})},
54134a73879SEndre Fülöp       // Sometimes the line between taint sources and propagators is blurry.
54234a73879SEndre Fülöp       // _IO_getc is choosen to be a source, but could also be a propagator.
54334a73879SEndre Fülöp       // This way it is simpler, as modeling it as a propagator would require
54434a73879SEndre Fülöp       // to model the possible sources of _IO_FILE * values, which the _IO_getc
54534a73879SEndre Fülöp       // function takes as parameters.
54634a73879SEndre Fülöp       {{"_IO_getc"}, TR::Source({{ReturnValueIndex}})},
54734a73879SEndre Fülöp       {{"getcwd"}, TR::Source({{0, ReturnValueIndex}})},
54834a73879SEndre Fülöp       {{"getwd"}, TR::Source({{0, ReturnValueIndex}})},
54934a73879SEndre Fülöp       {{"readlink"}, TR::Source({{1, ReturnValueIndex}})},
55034a73879SEndre Fülöp       {{"readlinkat"}, TR::Source({{2, ReturnValueIndex}})},
55134a73879SEndre Fülöp       {{"get_current_dir_name"}, TR::Source({{ReturnValueIndex}})},
55234a73879SEndre Fülöp       {{"gethostname"}, TR::Source({{0}})},
55334a73879SEndre Fülöp       {{"getnameinfo"}, TR::Source({{2, 4}})},
55434a73879SEndre Fülöp       {{"getseuserbyname"}, TR::Source({{1, 2}})},
55534a73879SEndre Fülöp       {{"getgroups"}, TR::Source({{1, ReturnValueIndex}})},
55634a73879SEndre Fülöp       {{"getlogin"}, TR::Source({{ReturnValueIndex}})},
55734a73879SEndre Fülöp       {{"getlogin_r"}, TR::Source({{0}})},
55817f74240SEndre Fülöp 
55917f74240SEndre Fülöp       // Props
56017f74240SEndre Fülöp       {{"atoi"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
56117f74240SEndre Fülöp       {{"atol"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
56217f74240SEndre Fülöp       {{"atoll"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
56317f74240SEndre Fülöp       {{"fgetc"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
56417f74240SEndre Fülöp       {{"fgetln"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5657036413dSBalazs Benics       {{"fgets"}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
56617f74240SEndre Fülöp       {{"fscanf"}, TR::Prop({{0}}, {{}, 2})},
5674fd6c6e6SEndre Fülöp       {{"fscanf_s"}, TR::Prop({{0}}, {{}, {2}})},
56817f74240SEndre Fülöp       {{"sscanf"}, TR::Prop({{0}}, {{}, 2})},
5694fd6c6e6SEndre Fülöp 
57017f74240SEndre Fülöp       {{"getc"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
57117f74240SEndre Fülöp       {{"getc_unlocked"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
57217f74240SEndre Fülöp       {{"getdelim"}, TR::Prop({{3}}, {{0}})},
57317f74240SEndre Fülöp       {{"getline"}, TR::Prop({{2}}, {{0}})},
57417f74240SEndre Fülöp       {{"getw"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
57517f74240SEndre Fülöp       {{"pread"}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
57617f74240SEndre Fülöp       {{"read"}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
57717f74240SEndre Fülöp       {{"strchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
57817f74240SEndre Fülöp       {{"strrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
57917f74240SEndre Fülöp       {{"tolower"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
58017f74240SEndre Fülöp       {{"toupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5814fd6c6e6SEndre Fülöp       {{"fread"}, TR::Prop({{3}}, {{0, ReturnValueIndex}})},
5824fd6c6e6SEndre Fülöp       {{"recv"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
5834fd6c6e6SEndre Fülöp       {{"recvfrom"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
5844fd6c6e6SEndre Fülöp 
5854fd6c6e6SEndre Fülöp       {{"ttyname"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5864fd6c6e6SEndre Fülöp       {{"ttyname_r"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
5874fd6c6e6SEndre Fülöp 
5884fd6c6e6SEndre Fülöp       {{"basename"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5894fd6c6e6SEndre Fülöp       {{"dirname"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5904fd6c6e6SEndre Fülöp       {{"fnmatch"}, TR::Prop({{1}}, {{ReturnValueIndex}})},
5914fd6c6e6SEndre Fülöp       {{"memchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5924fd6c6e6SEndre Fülöp       {{"memrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5934fd6c6e6SEndre Fülöp       {{"rawmemchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5944fd6c6e6SEndre Fülöp 
5954fd6c6e6SEndre Fülöp       {{"mbtowc"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
5964fd6c6e6SEndre Fülöp       {{"wctomb"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
5974fd6c6e6SEndre Fülöp       {{"wcwidth"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5984fd6c6e6SEndre Fülöp 
5994fd6c6e6SEndre Fülöp       {{"memcmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
6004fd6c6e6SEndre Fülöp       {{"memcpy"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
6014fd6c6e6SEndre Fülöp       {{"memmove"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
6024fd6c6e6SEndre Fülöp       // If memmem was called with a tainted needle and the search was
6034fd6c6e6SEndre Fülöp       // successful, that would mean that the value pointed by the return value
6044fd6c6e6SEndre Fülöp       // has the same content as the needle. If we choose to go by the policy of
6054fd6c6e6SEndre Fülöp       // content equivalence implies taintedness equivalence, that would mean
6064fd6c6e6SEndre Fülöp       // haystack should be considered a propagation source argument.
6074fd6c6e6SEndre Fülöp       {{"memmem"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6084fd6c6e6SEndre Fülöp 
6094fd6c6e6SEndre Fülöp       // The comment for memmem above also applies to strstr.
6104fd6c6e6SEndre Fülöp       {{"strstr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6114fd6c6e6SEndre Fülöp       {{"strcasestr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6124fd6c6e6SEndre Fülöp 
6134fd6c6e6SEndre Fülöp       {{"strchrnul"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6144fd6c6e6SEndre Fülöp 
6154fd6c6e6SEndre Fülöp       {{"index"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6164fd6c6e6SEndre Fülöp       {{"rindex"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6174fd6c6e6SEndre Fülöp 
6184fd6c6e6SEndre Fülöp       // FIXME: In case of arrays, only the first element of the array gets
6194fd6c6e6SEndre Fülöp       // tainted.
6204fd6c6e6SEndre Fülöp       {{"qsort"}, TR::Prop({{0}}, {{0}})},
6214fd6c6e6SEndre Fülöp       {{"qsort_r"}, TR::Prop({{0}}, {{0}})},
6224fd6c6e6SEndre Fülöp 
6234fd6c6e6SEndre Fülöp       {{"strcmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
6244fd6c6e6SEndre Fülöp       {{"strcasecmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
6254fd6c6e6SEndre Fülöp       {{"strncmp"}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
6264fd6c6e6SEndre Fülöp       {{"strncasecmp"}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
6274fd6c6e6SEndre Fülöp       {{"strspn"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
6284fd6c6e6SEndre Fülöp       {{"strcspn"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
6294fd6c6e6SEndre Fülöp       {{"strpbrk"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6304fd6c6e6SEndre Fülöp       {{"strndup"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6314fd6c6e6SEndre Fülöp       {{"strndupa"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6324fd6c6e6SEndre Fülöp       {{"strlen"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6334fd6c6e6SEndre Fülöp       {{"strnlen"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6344fd6c6e6SEndre Fülöp       {{"strtol"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
6354fd6c6e6SEndre Fülöp       {{"strtoll"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
6364fd6c6e6SEndre Fülöp       {{"strtoul"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
6374fd6c6e6SEndre Fülöp       {{"strtoull"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
6384fd6c6e6SEndre Fülöp 
6394fd6c6e6SEndre Fülöp       {{"isalnum"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6404fd6c6e6SEndre Fülöp       {{"isalpha"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6414fd6c6e6SEndre Fülöp       {{"isascii"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6424fd6c6e6SEndre Fülöp       {{"isblank"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6434fd6c6e6SEndre Fülöp       {{"iscntrl"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6444fd6c6e6SEndre Fülöp       {{"isdigit"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6454fd6c6e6SEndre Fülöp       {{"isgraph"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6464fd6c6e6SEndre Fülöp       {{"islower"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6474fd6c6e6SEndre Fülöp       {{"isprint"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6484fd6c6e6SEndre Fülöp       {{"ispunct"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6494fd6c6e6SEndre Fülöp       {{"isspace"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6504fd6c6e6SEndre Fülöp       {{"isupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6514fd6c6e6SEndre Fülöp       {{"isxdigit"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6524fd6c6e6SEndre Fülöp 
65317f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncat)}},
65417f74240SEndre Fülöp        TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
65517f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcpy)}},
65617f74240SEndre Fülöp        TR::Prop({{1, 2}}, {{0}})},
65717f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcat)}},
65817f74240SEndre Fülöp        TR::Prop({{1, 2}}, {{0}})},
65917f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"snprintf"}},
66017f74240SEndre Fülöp        TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})},
66117f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"sprintf"}},
66217f74240SEndre Fülöp        TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},
66317f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"strcpy"}},
66417f74240SEndre Fülöp        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
66517f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"stpcpy"}},
66617f74240SEndre Fülöp        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
66717f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"strcat"}},
66817f74240SEndre Fülöp        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
66917f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
67017f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
67117f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
67217f74240SEndre Fülöp 
67317f74240SEndre Fülöp       // Sinks
67417f74240SEndre Fülöp       {{"system"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
67517f74240SEndre Fülöp       {{"popen"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
67617f74240SEndre Fülöp       {{"execl"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
67717f74240SEndre Fülöp       {{"execle"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
67817f74240SEndre Fülöp       {{"execlp"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
67917f74240SEndre Fülöp       {{"execvp"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
68017f74240SEndre Fülöp       {{"execvP"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
68117f74240SEndre Fülöp       {{"execve"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
68217f74240SEndre Fülöp       {{"dlopen"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
68317f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"malloc"}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
68417f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"calloc"}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
68517f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"alloca"}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
68617f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"memccpy"}}, TR::Sink({{3}}, MsgTaintedBufferSize)},
68717f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"realloc"}}, TR::Sink({{1}}, MsgTaintedBufferSize)},
68817f74240SEndre Fülöp       {{{"setproctitle"}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
68917f74240SEndre Fülöp       {{{"setproctitle_fast"}},
69017f74240SEndre Fülöp        TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
69117f74240SEndre Fülöp 
69217f74240SEndre Fülöp       // SinkProps
69317f74240SEndre Fülöp       {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
69417f74240SEndre Fülöp        TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
69517f74240SEndre Fülöp                     MsgTaintedBufferSize)},
69617f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
69717f74240SEndre Fülöp        TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
69817f74240SEndre Fülöp                     MsgTaintedBufferSize)},
69917f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
70017f74240SEndre Fülöp        TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
70117f74240SEndre Fülöp                     MsgTaintedBufferSize)},
70217f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
70317f74240SEndre Fülöp        TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}},
70417f74240SEndre Fülöp                     MsgTaintedBufferSize)},
70517f74240SEndre Fülöp       {{CDF_MaybeBuiltin, {"bcopy"}},
70617f74240SEndre Fülöp        TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}};
707edde4efcSBalazs Benics 
708edde4efcSBalazs Benics   // `getenv` returns taint only in untrusted environments.
70917f74240SEndre Fülöp   if (TR::UntrustedEnv(C)) {
71017f74240SEndre Fülöp     // void setproctitle_init(int argc, char *argv[], char *envp[])
71117f74240SEndre Fülöp     GlobalCRules.push_back(
7127036413dSBalazs Benics         {{{"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)});
71317f74240SEndre Fülöp     GlobalCRules.push_back({{"getenv"}, TR::Source({{ReturnValueIndex}})});
714edde4efcSBalazs Benics   }
715edde4efcSBalazs Benics 
71617f74240SEndre Fülöp   StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),
71717f74240SEndre Fülöp                            std::make_move_iterator(GlobalCRules.end()));
7185d324e50SAnna Zaks 
71917f74240SEndre Fülöp   // User-provided taint configuration.
72017f74240SEndre Fülöp   CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager();
72117f74240SEndre Fülöp   assert(Mgr);
72217f74240SEndre Fülöp   GenericTaintRuleParser ConfigParser{*Mgr};
72317f74240SEndre Fülöp   std::string Option{"Config"};
72417f74240SEndre Fülöp   StringRef ConfigFile =
72517f74240SEndre Fülöp       Mgr->getAnalyzerOptions().getCheckerStringOption(this, Option);
72617f74240SEndre Fülöp   llvm::Optional<TaintConfiguration> Config =
72717f74240SEndre Fülöp       getConfiguration<TaintConfiguration>(*Mgr, this, Option, ConfigFile);
72817f74240SEndre Fülöp   if (!Config) {
72917f74240SEndre Fülöp     // We don't have external taint config, no parsing required.
73017f74240SEndre Fülöp     DynamicTaintRules = RuleLookupTy{};
73117f74240SEndre Fülöp     return;
73295a94df5SBalazs Benics   }
7335d324e50SAnna Zaks 
73417f74240SEndre Fülöp   GenericTaintRuleParser::RulesContTy Rules{
735*ca4af13eSKazu Hirata       ConfigParser.parseConfiguration(Option, std::move(*Config))};
7365d324e50SAnna Zaks 
73717f74240SEndre Fülöp   DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
73817f74240SEndre Fülöp                             std::make_move_iterator(Rules.end()));
739273e6742SBorsik Gabor }
740080ecafdSGabor Borsik 
checkPreCall(const CallEvent & Call,CheckerContext & C) const74195a94df5SBalazs Benics void GenericTaintChecker::checkPreCall(const CallEvent &Call,
7425c5bf9b6SAnna Zaks                                        CheckerContext &C) const {
74317f74240SEndre Fülöp   initTaintRules(C);
74489bc4c66SBorsik Gabor 
74517f74240SEndre Fülöp   // FIXME: this should be much simpler.
74617f74240SEndre Fülöp   if (const auto *Rule =
74717f74240SEndre Fülöp           Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr)
74817f74240SEndre Fülöp     Rule->process(*this, Call, C);
74917f74240SEndre Fülöp   else if (const auto *Rule = DynamicTaintRules->lookup(Call))
75017f74240SEndre Fülöp     Rule->process(*this, Call, C);
7513b0ab206SAnna Zaks 
75217f74240SEndre Fülöp   // FIXME: These edge cases are to be eliminated from here eventually.
75317f74240SEndre Fülöp   //
75417f74240SEndre Fülöp   // Additional check that is not supported by CallDescription.
75517f74240SEndre Fülöp   // TODO: Make CallDescription be able to match attributes such as printf-like
75617f74240SEndre Fülöp   // arguments.
75717f74240SEndre Fülöp   checkUncontrolledFormatString(Call, C);
75889bc4c66SBorsik Gabor 
75917f74240SEndre Fülöp   // TODO: Modeling sockets should be done in a specific checker.
76017f74240SEndre Fülöp   // Socket is a source, which taints the return value.
76117f74240SEndre Fülöp   taintUnsafeSocketProtocol(Call, C);
762126a2ef9SAnna Zaks }
763126a2ef9SAnna Zaks 
checkPostCall(const CallEvent & Call,CheckerContext & C) const76495a94df5SBalazs Benics void GenericTaintChecker::checkPostCall(const CallEvent &Call,
765126a2ef9SAnna Zaks                                         CheckerContext &C) const {
7662827349cSKristof Umann   // Set the marked values as tainted. The return value only accessible from
7672827349cSKristof Umann   // checkPostStmt.
76849b1e38eSTed Kremenek   ProgramStateRef State = C.getState();
769a848a5cfSBalazs Benics   const StackFrameContext *CurrentFrame = C.getStackFrame();
770b3fa8d7dSAnna Zaks 
771b3fa8d7dSAnna Zaks   // Depending on what was tainted at pre-visit, we determined a set of
772b3fa8d7dSAnna Zaks   // arguments which should be tainted after the function returns. These are
773b3fa8d7dSAnna Zaks   // stored in the state as TaintArgsOnPostVisit set.
774a848a5cfSBalazs Benics   TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();
775a848a5cfSBalazs Benics 
776a848a5cfSBalazs Benics   const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
777a848a5cfSBalazs Benics   if (!TaintArgs)
77817f74240SEndre Fülöp     return;
779a848a5cfSBalazs Benics   assert(!TaintArgs->isEmpty());
780bf740512SAnna Zaks 
781fa0a80e0SBalazs Benics   LLVM_DEBUG(for (ArgIdxTy I
782a848a5cfSBalazs Benics                   : *TaintArgs) {
783fa0a80e0SBalazs Benics     llvm::dbgs() << "PostCall<";
784fa0a80e0SBalazs Benics     Call.dump(llvm::dbgs());
785fa0a80e0SBalazs Benics     llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n';
786fa0a80e0SBalazs Benics   });
787fa0a80e0SBalazs Benics 
788a848a5cfSBalazs Benics   for (ArgIdxTy ArgNum : *TaintArgs) {
789b3fa8d7dSAnna Zaks     // Special handling for the tainted return value.
790b3fa8d7dSAnna Zaks     if (ArgNum == ReturnValueIndex) {
79195a94df5SBalazs Benics       State = addTaint(State, Call.getReturnValue());
792b3fa8d7dSAnna Zaks       continue;
793b3fa8d7dSAnna Zaks     }
794b3fa8d7dSAnna Zaks 
795b3fa8d7dSAnna Zaks     // The arguments are pointer arguments. The data they are pointing at is
796b3fa8d7dSAnna Zaks     // tainted after the call.
79717f74240SEndre Fülöp     if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum)))
79844551cf6SArtem Dergachev       State = addTaint(State, *V);
799b3fa8d7dSAnna Zaks   }
800b3fa8d7dSAnna Zaks 
801b3fa8d7dSAnna Zaks   // Clear up the taint info from the state.
802a848a5cfSBalazs Benics   State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
803b3fa8d7dSAnna Zaks   C.addTransition(State);
804b3fa8d7dSAnna Zaks }
805b3fa8d7dSAnna Zaks 
printState(raw_ostream & Out,ProgramStateRef State,const char * NL,const char * Sep) const80617f74240SEndre Fülöp void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
80717f74240SEndre Fülöp                                      const char *NL, const char *Sep) const {
80817f74240SEndre Fülöp   printTaint(State, Out, NL, Sep);
809126a2ef9SAnna Zaks }
810126a2ef9SAnna Zaks 
process(const GenericTaintChecker & Checker,const CallEvent & Call,CheckerContext & C) const81117f74240SEndre Fülöp void GenericTaintRule::process(const GenericTaintChecker &Checker,
81217f74240SEndre Fülöp                                const CallEvent &Call, CheckerContext &C) const {
81349b1e38eSTed Kremenek   ProgramStateRef State = C.getState();
81417f74240SEndre Fülöp   const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
8157c96b7dbSAnna Zaks 
81617f74240SEndre Fülöp   /// Iterate every call argument, and get their corresponding Expr and SVal.
81717f74240SEndre Fülöp   const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) {
81817f74240SEndre Fülöp     for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) {
81917f74240SEndre Fülöp       const Expr *E = GetArgExpr(I, Call);
82017f74240SEndre Fülöp       Fun(I, E, C.getSVal(E));
8215c5bf9b6SAnna Zaks     }
82217f74240SEndre Fülöp   };
8235c5bf9b6SAnna Zaks 
82417f74240SEndre Fülöp   /// Check for taint sinks.
82517f74240SEndre Fülöp   ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) {
82617f74240SEndre Fülöp     if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C))
82706decd0bSKazu Hirata       Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C);
82817f74240SEndre Fülöp   });
8293666d2c1SAnna Zaks 
83017f74240SEndre Fülöp   /// Check for taint filters.
83117f74240SEndre Fülöp   ForEachCallArg([this, &C, &State](ArgIdxTy I, const Expr *E, SVal S) {
83217f74240SEndre Fülöp     if (FilterArgs.contains(I)) {
83317f74240SEndre Fülöp       State = removeTaint(State, S);
83417f74240SEndre Fülöp       if (auto P = getPointeeOf(C, S))
83517f74240SEndre Fülöp         State = removeTaint(State, *P);
8363666d2c1SAnna Zaks     }
83717f74240SEndre Fülöp   });
8382a5fb125SArtem Dergachev 
83917f74240SEndre Fülöp   /// Check for taint propagation sources.
84017f74240SEndre Fülöp   /// A rule is relevant if PropSrcArgs is empty, or if any of its signified
84117f74240SEndre Fülöp   /// args are tainted in context of the current CallEvent.
84217f74240SEndre Fülöp   bool IsMatching = PropSrcArgs.isEmpty();
84317f74240SEndre Fülöp   ForEachCallArg(
84417f74240SEndre Fülöp       [this, &C, &IsMatching, &State](ArgIdxTy I, const Expr *E, SVal) {
84517f74240SEndre Fülöp         IsMatching = IsMatching || (PropSrcArgs.contains(I) &&
84617f74240SEndre Fülöp                                     isTaintedOrPointsToTainted(E, State, C));
84717f74240SEndre Fülöp       });
8482a5fb125SArtem Dergachev 
84917f74240SEndre Fülöp   if (!IsMatching)
85017f74240SEndre Fülöp     return;
8512827349cSKristof Umann 
85217f74240SEndre Fülöp   const auto WouldEscape = [](SVal V, QualType Ty) -> bool {
85396ccb690SBalazs Benics     if (!isa<Loc>(V))
854099fe3fbSAnna Zaks       return false;
855099fe3fbSAnna Zaks 
85617f74240SEndre Fülöp     const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();
85717f74240SEndre Fülöp     const bool IsNonConstPtr =
85817f74240SEndre Fülöp         Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();
859e48ee503SAnna Zaks 
86017f74240SEndre Fülöp     return IsNonConstRef || IsNonConstPtr;
86117f74240SEndre Fülöp   };
86217f74240SEndre Fülöp 
86317f74240SEndre Fülöp   /// Propagate taint where it is necessary.
864a848a5cfSBalazs Benics   auto &F = State->getStateManager().get_context<ArgIdxFactory>();
865a848a5cfSBalazs Benics   ImmutableSet<ArgIdxTy> Result = F.getEmptySet();
86617f74240SEndre Fülöp   ForEachCallArg(
867ecff9b65SFangrui Song       [&](ArgIdxTy I, const Expr *E, SVal V) {
868fa0a80e0SBalazs Benics         if (PropDstArgs.contains(I)) {
869fa0a80e0SBalazs Benics           LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs());
870fa0a80e0SBalazs Benics                      llvm::dbgs()
871fa0a80e0SBalazs Benics                      << "> prepares tainting arg index: " << I << '\n';);
872a848a5cfSBalazs Benics           Result = F.add(Result, I);
873fa0a80e0SBalazs Benics         }
87417f74240SEndre Fülöp 
87517f74240SEndre Fülöp         // TODO: We should traverse all reachable memory regions via the
87617f74240SEndre Fülöp         // escaping parameter. Instead of doing that we simply mark only the
87717f74240SEndre Fülöp         // referred memory region as tainted.
878fa0a80e0SBalazs Benics         if (WouldEscape(V, E->getType())) {
879a848a5cfSBalazs Benics           LLVM_DEBUG(if (!Result.contains(I)) {
880fa0a80e0SBalazs Benics             llvm::dbgs() << "PreCall<";
881fa0a80e0SBalazs Benics             Call.dump(llvm::dbgs());
882fa0a80e0SBalazs Benics             llvm::dbgs() << "> prepares tainting arg index: " << I << '\n';
883fa0a80e0SBalazs Benics           });
884a848a5cfSBalazs Benics           Result = F.add(Result, I);
885fa0a80e0SBalazs Benics         }
88617f74240SEndre Fülöp       });
88717f74240SEndre Fülöp 
888a848a5cfSBalazs Benics   if (!Result.isEmpty())
889a848a5cfSBalazs Benics     State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
89017f74240SEndre Fülöp   C.addTransition(State);
891099fe3fbSAnna Zaks }
892099fe3fbSAnna Zaks 
UntrustedEnv(CheckerContext & C)89317f74240SEndre Fülöp bool GenericTaintRule::UntrustedEnv(CheckerContext &C) {
89417f74240SEndre Fülöp   return !C.getAnalysisManager()
89517f74240SEndre Fülöp               .getAnalyzerOptions()
89617f74240SEndre Fülöp               .ShouldAssumeControlledEnvironment;
897126a2ef9SAnna Zaks }
898126a2ef9SAnna Zaks 
generateReportIfTainted(const Expr * E,StringRef Msg,CheckerContext & C) const899080ecafdSGabor Borsik bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
9000244cd74SAnna Zaks                                                   CheckerContext &C) const {
9010244cd74SAnna Zaks   assert(E);
90217f74240SEndre Fülöp   Optional<SVal> TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))};
9030244cd74SAnna Zaks 
90417f74240SEndre Fülöp   if (!TaintedSVal)
9050244cd74SAnna Zaks     return false;
9060244cd74SAnna Zaks 
9070244cd74SAnna Zaks   // Generate diagnostic.
908e39bd407SDevin Coughlin   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
90917f74240SEndre Fülöp     auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
9100244cd74SAnna Zaks     report->addRange(E->getSourceRange());
91117f74240SEndre Fülöp     report->addVisitor(std::make_unique<TaintBugVisitor>(*TaintedSVal));
9128d3a7a56SAaron Ballman     C.emitReport(std::move(report));
9130244cd74SAnna Zaks     return true;
9140244cd74SAnna Zaks   }
9150244cd74SAnna Zaks   return false;
9160244cd74SAnna Zaks }
9170244cd74SAnna Zaks 
91817f74240SEndre Fülöp /// TODO: remove checking for printf format attributes and socket whitelisting
91917f74240SEndre Fülöp /// from GenericTaintChecker, and that means the following functions:
92017f74240SEndre Fülöp /// getPrintfFormatArgumentNum,
92117f74240SEndre Fülöp /// GenericTaintChecker::checkUncontrolledFormatString,
92217f74240SEndre Fülöp /// GenericTaintChecker::taintUnsafeSocketProtocol
92317f74240SEndre Fülöp 
getPrintfFormatArgumentNum(const CallEvent & Call,const CheckerContext & C,ArgIdxTy & ArgNum)92417f74240SEndre Fülöp static bool getPrintfFormatArgumentNum(const CallEvent &Call,
92517f74240SEndre Fülöp                                        const CheckerContext &C,
92617f74240SEndre Fülöp                                        ArgIdxTy &ArgNum) {
92717f74240SEndre Fülöp   // Find if the function contains a format string argument.
92817f74240SEndre Fülöp   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
92917f74240SEndre Fülöp   // vsnprintf, syslog, custom annotated functions.
93017f74240SEndre Fülöp   const Decl *CallDecl = Call.getDecl();
93117f74240SEndre Fülöp   if (!CallDecl)
93217f74240SEndre Fülöp     return false;
93317f74240SEndre Fülöp   const FunctionDecl *FDecl = CallDecl->getAsFunction();
93417f74240SEndre Fülöp   if (!FDecl)
93517f74240SEndre Fülöp     return false;
93617f74240SEndre Fülöp 
93717f74240SEndre Fülöp   const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
93817f74240SEndre Fülöp 
93917f74240SEndre Fülöp   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
94017f74240SEndre Fülöp     ArgNum = Format->getFormatIdx() - 1;
94117f74240SEndre Fülöp     if ((Format->getType()->getName() == "printf") && CallNumArgs > ArgNum)
94217f74240SEndre Fülöp       return true;
94317f74240SEndre Fülöp   }
94417f74240SEndre Fülöp 
94517f74240SEndre Fülöp   return false;
94617f74240SEndre Fülöp }
94717f74240SEndre Fülöp 
checkUncontrolledFormatString(const CallEvent & Call,CheckerContext & C) const948b68cb549SArtem Dergachev bool GenericTaintChecker::checkUncontrolledFormatString(
94995a94df5SBalazs Benics     const CallEvent &Call, CheckerContext &C) const {
950126a2ef9SAnna Zaks   // Check if the function contains a format string argument.
95117f74240SEndre Fülöp   ArgIdxTy ArgNum = 0;
95295a94df5SBalazs Benics   if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
953126a2ef9SAnna Zaks     return false;
954126a2ef9SAnna Zaks 
955b68cb549SArtem Dergachev   // If either the format string content or the pointer itself are tainted,
956b68cb549SArtem Dergachev   // warn.
95795a94df5SBalazs Benics   return generateReportIfTainted(Call.getArgExpr(ArgNum),
9589c10490eSAlexander Kornienko                                  MsgUncontrolledFormatString, C);
959126a2ef9SAnna Zaks }
9600244cd74SAnna Zaks 
taintUnsafeSocketProtocol(const CallEvent & Call,CheckerContext & C) const96117f74240SEndre Fülöp void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call,
9620244cd74SAnna Zaks                                                     CheckerContext &C) const {
96317f74240SEndre Fülöp   if (Call.getNumArgs() < 1)
96417f74240SEndre Fülöp     return;
96517f74240SEndre Fülöp   const IdentifierInfo *ID = Call.getCalleeIdentifier();
96617f74240SEndre Fülöp   if (!ID)
96717f74240SEndre Fülöp     return;
96817f74240SEndre Fülöp   if (!ID->getName().equals("socket"))
96917f74240SEndre Fülöp     return;
9700244cd74SAnna Zaks 
97117f74240SEndre Fülöp   SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
97217f74240SEndre Fülöp   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
97317f74240SEndre Fülöp   // Allow internal communication protocols.
97417f74240SEndre Fülöp   bool SafeProtocol = DomName.equals("AF_SYSTEM") ||
97517f74240SEndre Fülöp                       DomName.equals("AF_LOCAL") || DomName.equals("AF_UNIX") ||
97617f74240SEndre Fülöp                       DomName.equals("AF_RESERVED_36");
97717f74240SEndre Fülöp   if (SafeProtocol)
97817f74240SEndre Fülöp     return;
9790244cd74SAnna Zaks 
980a848a5cfSBalazs Benics   ProgramStateRef State = C.getState();
981a848a5cfSBalazs Benics   auto &F = State->getStateManager().get_context<ArgIdxFactory>();
982a848a5cfSBalazs Benics   ImmutableSet<ArgIdxTy> Result = F.add(F.getEmptySet(), ReturnValueIndex);
983a848a5cfSBalazs Benics   State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
984a848a5cfSBalazs Benics   C.addTransition(State);
985126a2ef9SAnna Zaks }
986126a2ef9SAnna Zaks 
98717f74240SEndre Fülöp /// Checker registration
registerGenericTaintChecker(CheckerManager & Mgr)9884bde15feSGabor Borsik void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
98917f74240SEndre Fülöp   Mgr.registerChecker<GenericTaintChecker>();
9905c5bf9b6SAnna Zaks }
991058a7a45SKristof Umann 
shouldRegisterGenericTaintChecker(const CheckerManager & mgr)992bda3dd0dSKirstóf Umann bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
993058a7a45SKristof Umann   return true;
994058a7a45SKristof Umann }
995