15c5bf9b6SAnna Zaks //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
25c5bf9b6SAnna Zaks //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65c5bf9b6SAnna Zaks //
75c5bf9b6SAnna Zaks //===----------------------------------------------------------------------===//
85c5bf9b6SAnna Zaks //
95c5bf9b6SAnna Zaks // This checker defines the attack surface for generic taint propagation.
105c5bf9b6SAnna Zaks //
115c5bf9b6SAnna Zaks // The taint information produced by it might be useful to other checkers. For
125c5bf9b6SAnna Zaks // example, checkers should report errors which involve tainted data more
135c5bf9b6SAnna Zaks // aggressively, even if the involved symbols are under constrained.
145c5bf9b6SAnna Zaks //
155c5bf9b6SAnna Zaks //===----------------------------------------------------------------------===//
1644551cf6SArtem Dergachev
174bde15feSGabor Borsik #include "Yaml.h"
183a02247dSChandler Carruth #include "clang/AST/Attr.h"
193a02247dSChandler Carruth #include "clang/Basic/Builtins.h"
204bde15feSGabor Borsik #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
2182f3ed99STom Ritter #include "clang/StaticAnalyzer/Checkers/Taint.h"
223a02247dSChandler Carruth #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
235c5bf9b6SAnna Zaks #include "clang/StaticAnalyzer/Core/Checker.h"
245c5bf9b6SAnna Zaks #include "clang/StaticAnalyzer/Core/CheckerManager.h"
2517f74240SEndre Fülöp #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
2695a94df5SBalazs Benics #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
275c5bf9b6SAnna Zaks #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
283b0ab206SAnna Zaks #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
294bde15feSGabor Borsik #include "llvm/Support/YAMLTraits.h"
3095a94df5SBalazs Benics
314bde15feSGabor Borsik #include <limits>
3295a94df5SBalazs Benics #include <memory>
332a5fb125SArtem Dergachev #include <utility>
345c5bf9b6SAnna Zaks
35fa0a80e0SBalazs Benics #define DEBUG_TYPE "taint-checker"
36fa0a80e0SBalazs Benics
375c5bf9b6SAnna Zaks using namespace clang;
385c5bf9b6SAnna Zaks using namespace ento;
3944551cf6SArtem Dergachev using namespace taint;
405c5bf9b6SAnna Zaks
41a848a5cfSBalazs Benics using llvm::ImmutableSet;
42a848a5cfSBalazs Benics
435c5bf9b6SAnna Zaks namespace {
4417f74240SEndre Fülöp
4517f74240SEndre Fülöp class GenericTaintChecker;
4617f74240SEndre Fülöp
4717f74240SEndre Fülöp /// Check for CWE-134: Uncontrolled Format String.
4817f74240SEndre Fülöp constexpr llvm::StringLiteral MsgUncontrolledFormatString =
4917f74240SEndre Fülöp "Untrusted data is used as a format string "
5017f74240SEndre Fülöp "(CWE-134: Uncontrolled Format String)";
5117f74240SEndre Fülöp
5217f74240SEndre Fülöp /// Check for:
5317f74240SEndre Fülöp /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
5417f74240SEndre Fülöp /// CWE-78, "Failure to Sanitize Data into an OS Command"
5517f74240SEndre Fülöp constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
5617f74240SEndre Fülöp "Untrusted data is passed to a system call "
5717f74240SEndre Fülöp "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
5817f74240SEndre Fülöp
5917f74240SEndre Fülöp /// Check if tainted data is used as a buffer size in strn.. functions,
6017f74240SEndre Fülöp /// and allocators.
6117f74240SEndre Fülöp constexpr llvm::StringLiteral MsgTaintedBufferSize =
6217f74240SEndre Fülöp "Untrusted data is used to specify the buffer size "
6317f74240SEndre Fülöp "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
6417f74240SEndre Fülöp "for character data and the null terminator)";
6517f74240SEndre Fülöp
6617f74240SEndre Fülöp /// Check if tainted data is used as a custom sink's parameter.
6717f74240SEndre Fülöp constexpr llvm::StringLiteral MsgCustomSink =
6817f74240SEndre Fülöp "Untrusted data is passed to a user-defined sink";
6917f74240SEndre Fülöp
7017f74240SEndre Fülöp using ArgIdxTy = int;
7117f74240SEndre Fülöp using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>;
7217f74240SEndre Fülöp
7317f74240SEndre Fülöp /// Denotes the return value.
7417f74240SEndre Fülöp constexpr ArgIdxTy ReturnValueIndex{-1};
7517f74240SEndre Fülöp
fromArgumentCount(unsigned Count)7617f74240SEndre Fülöp static ArgIdxTy fromArgumentCount(unsigned Count) {
7717f74240SEndre Fülöp assert(Count <=
7817f74240SEndre Fülöp static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) &&
7917f74240SEndre Fülöp "ArgIdxTy is not large enough to represent the number of arguments.");
8017f74240SEndre Fülöp return Count;
8117f74240SEndre Fülöp }
8217f74240SEndre Fülöp
8317f74240SEndre Fülöp /// Check if the region the expression evaluates to is the standard input,
8417f74240SEndre Fülöp /// and thus, is tainted.
8517f74240SEndre Fülöp /// FIXME: Move this to Taint.cpp.
isStdin(SVal Val,const ASTContext & ACtx)8617f74240SEndre Fülöp bool isStdin(SVal Val, const ASTContext &ACtx) {
8717f74240SEndre Fülöp // FIXME: What if Val is NonParamVarRegion?
8817f74240SEndre Fülöp
8917f74240SEndre Fülöp // The region should be symbolic, we do not know it's value.
9017f74240SEndre Fülöp const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion());
9117f74240SEndre Fülöp if (!SymReg)
9217f74240SEndre Fülöp return false;
9317f74240SEndre Fülöp
9417f74240SEndre Fülöp // Get it's symbol and find the declaration region it's pointing to.
95f4fc3f6bSBalazs Benics const auto *DeclReg =
96f4fc3f6bSBalazs Benics dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
9717f74240SEndre Fülöp if (!DeclReg)
9817f74240SEndre Fülöp return false;
9917f74240SEndre Fülöp
10017f74240SEndre Fülöp // This region corresponds to a declaration, find out if it's a global/extern
10117f74240SEndre Fülöp // variable named stdin with the proper type.
10217f74240SEndre Fülöp if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
10317f74240SEndre Fülöp D = D->getCanonicalDecl();
10417f74240SEndre Fülöp // FIXME: This should look for an exact match.
10517f74240SEndre Fülöp if (D->getName().contains("stdin") && D->isExternC()) {
10617f74240SEndre Fülöp const QualType FILETy = ACtx.getFILEType().getCanonicalType();
10717f74240SEndre Fülöp const QualType Ty = D->getType().getCanonicalType();
10817f74240SEndre Fülöp
10917f74240SEndre Fülöp if (Ty->isPointerType())
11017f74240SEndre Fülöp return Ty->getPointeeType() == FILETy;
11117f74240SEndre Fülöp }
11217f74240SEndre Fülöp }
11317f74240SEndre Fülöp return false;
11417f74240SEndre Fülöp }
11517f74240SEndre Fülöp
getPointeeOf(const CheckerContext & C,Loc LValue)11617f74240SEndre Fülöp SVal getPointeeOf(const CheckerContext &C, Loc LValue) {
11717f74240SEndre Fülöp const QualType ArgTy = LValue.getType(C.getASTContext());
11817f74240SEndre Fülöp if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())
11917f74240SEndre Fülöp return C.getState()->getSVal(LValue);
12017f74240SEndre Fülöp
12117f74240SEndre Fülöp // Do not dereference void pointers. Treat them as byte pointers instead.
12217f74240SEndre Fülöp // FIXME: we might want to consider more than just the first byte.
12317f74240SEndre Fülöp return C.getState()->getSVal(LValue, C.getASTContext().CharTy);
12417f74240SEndre Fülöp }
12517f74240SEndre Fülöp
12617f74240SEndre Fülöp /// Given a pointer/reference argument, return the value it refers to.
getPointeeOf(const CheckerContext & C,SVal Arg)12717f74240SEndre Fülöp Optional<SVal> getPointeeOf(const CheckerContext &C, SVal Arg) {
12817f74240SEndre Fülöp if (auto LValue = Arg.getAs<Loc>())
12917f74240SEndre Fülöp return getPointeeOf(C, *LValue);
13017f74240SEndre Fülöp return None;
13117f74240SEndre Fülöp }
13217f74240SEndre Fülöp
13317f74240SEndre Fülöp /// Given a pointer, return the SVal of its pointee or if it is tainted,
13417f74240SEndre Fülöp /// otherwise return the pointer's SVal if tainted.
13517f74240SEndre Fülöp /// Also considers stdin as a taint source.
getTaintedPointeeOrPointer(const CheckerContext & C,SVal Arg)13617f74240SEndre Fülöp Optional<SVal> getTaintedPointeeOrPointer(const CheckerContext &C, SVal Arg) {
13717f74240SEndre Fülöp const ProgramStateRef State = C.getState();
13817f74240SEndre Fülöp
13917f74240SEndre Fülöp if (auto Pointee = getPointeeOf(C, Arg))
14017f74240SEndre Fülöp if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None;
14117f74240SEndre Fülöp return Pointee;
14217f74240SEndre Fülöp
14317f74240SEndre Fülöp if (isTainted(State, Arg))
14417f74240SEndre Fülöp return Arg;
14517f74240SEndre Fülöp
14617f74240SEndre Fülöp // FIXME: This should be done by the isTainted() API.
14717f74240SEndre Fülöp if (isStdin(Arg, C.getASTContext()))
14817f74240SEndre Fülöp return Arg;
14917f74240SEndre Fülöp
15017f74240SEndre Fülöp return None;
15117f74240SEndre Fülöp }
15217f74240SEndre Fülöp
isTaintedOrPointsToTainted(const Expr * E,const ProgramStateRef & State,CheckerContext & C)15317f74240SEndre Fülöp bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State,
15417f74240SEndre Fülöp CheckerContext &C) {
155064a08cdSKazu Hirata return getTaintedPointeeOrPointer(C, C.getSVal(E)).has_value();
15617f74240SEndre Fülöp }
15717f74240SEndre Fülöp
15817f74240SEndre Fülöp /// ArgSet is used to describe arguments relevant for taint detection or
15917f74240SEndre Fülöp /// taint application. A discrete set of argument indexes and a variadic
16017f74240SEndre Fülöp /// argument list signified by a starting index are supported.
16117f74240SEndre Fülöp class ArgSet {
16217f74240SEndre Fülöp public:
16317f74240SEndre Fülöp ArgSet() = default;
ArgSet(ArgVecTy && DiscreteArgs,Optional<ArgIdxTy> VariadicIndex=None)16417f74240SEndre Fülöp ArgSet(ArgVecTy &&DiscreteArgs, Optional<ArgIdxTy> VariadicIndex = None)
16517f74240SEndre Fülöp : DiscreteArgs(std::move(DiscreteArgs)),
16617f74240SEndre Fülöp VariadicIndex(std::move(VariadicIndex)) {}
16717f74240SEndre Fülöp
contains(ArgIdxTy ArgIdx) const16817f74240SEndre Fülöp bool contains(ArgIdxTy ArgIdx) const {
16917f74240SEndre Fülöp if (llvm::is_contained(DiscreteArgs, ArgIdx))
17017f74240SEndre Fülöp return true;
17117f74240SEndre Fülöp
17217f74240SEndre Fülöp return VariadicIndex && ArgIdx >= *VariadicIndex;
17317f74240SEndre Fülöp }
17417f74240SEndre Fülöp
isEmpty() const17517f74240SEndre Fülöp bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; }
17617f74240SEndre Fülöp
17717f74240SEndre Fülöp private:
17817f74240SEndre Fülöp ArgVecTy DiscreteArgs;
17917f74240SEndre Fülöp Optional<ArgIdxTy> VariadicIndex;
18017f74240SEndre Fülöp };
18117f74240SEndre Fülöp
18217f74240SEndre Fülöp /// A struct used to specify taint propagation rules for a function.
18317f74240SEndre Fülöp ///
18417f74240SEndre Fülöp /// If any of the possible taint source arguments is tainted, all of the
18517f74240SEndre Fülöp /// destination arguments should also be tainted. If ReturnValueIndex is added
18617f74240SEndre Fülöp /// to the dst list, the return value will be tainted.
18717f74240SEndre Fülöp class GenericTaintRule {
18817f74240SEndre Fülöp /// Arguments which are taints sinks and should be checked, and a report
18917f74240SEndre Fülöp /// should be emitted if taint reaches these.
19017f74240SEndre Fülöp ArgSet SinkArgs;
19117f74240SEndre Fülöp /// Arguments which should be sanitized on function return.
19217f74240SEndre Fülöp ArgSet FilterArgs;
19317f74240SEndre Fülöp /// Arguments which can participate in taint propagationa. If any of the
19417f74240SEndre Fülöp /// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should
19517f74240SEndre Fülöp /// be tainted.
19617f74240SEndre Fülöp ArgSet PropSrcArgs;
19717f74240SEndre Fülöp ArgSet PropDstArgs;
19817f74240SEndre Fülöp
19917f74240SEndre Fülöp /// A message that explains why the call is sensitive to taint.
20017f74240SEndre Fülöp Optional<StringRef> SinkMsg;
20117f74240SEndre Fülöp
20217f74240SEndre Fülöp GenericTaintRule() = default;
20317f74240SEndre Fülöp
GenericTaintRule(ArgSet && Sink,ArgSet && Filter,ArgSet && Src,ArgSet && Dst,Optional<StringRef> SinkMsg=None)20417f74240SEndre Fülöp GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst,
20517f74240SEndre Fülöp Optional<StringRef> SinkMsg = None)
20617f74240SEndre Fülöp : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)),
20717f74240SEndre Fülöp PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),
20817f74240SEndre Fülöp SinkMsg(SinkMsg) {}
20917f74240SEndre Fülöp
21017f74240SEndre Fülöp public:
21117f74240SEndre Fülöp /// Make a rule that reports a warning if taint reaches any of \p FilterArgs
21217f74240SEndre Fülöp /// arguments.
Sink(ArgSet && SinkArgs,Optional<StringRef> Msg=None)21317f74240SEndre Fülöp static GenericTaintRule Sink(ArgSet &&SinkArgs,
21417f74240SEndre Fülöp Optional<StringRef> Msg = None) {
21517f74240SEndre Fülöp return {std::move(SinkArgs), {}, {}, {}, Msg};
21617f74240SEndre Fülöp }
21717f74240SEndre Fülöp
21817f74240SEndre Fülöp /// Make a rule that sanitizes all FilterArgs arguments.
Filter(ArgSet && FilterArgs)21917f74240SEndre Fülöp static GenericTaintRule Filter(ArgSet &&FilterArgs) {
22017f74240SEndre Fülöp return {{}, std::move(FilterArgs), {}, {}};
22117f74240SEndre Fülöp }
22217f74240SEndre Fülöp
22317f74240SEndre Fülöp /// Make a rule that unconditionally taints all Args.
22417f74240SEndre Fülöp /// If Func is provided, it must also return true for taint to propagate.
Source(ArgSet && SourceArgs)22517f74240SEndre Fülöp static GenericTaintRule Source(ArgSet &&SourceArgs) {
22617f74240SEndre Fülöp return {{}, {}, {}, std::move(SourceArgs)};
22717f74240SEndre Fülöp }
22817f74240SEndre Fülöp
22917f74240SEndre Fülöp /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
Prop(ArgSet && SrcArgs,ArgSet && DstArgs)23017f74240SEndre Fülöp static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {
23117f74240SEndre Fülöp return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
23217f74240SEndre Fülöp }
23317f74240SEndre Fülöp
23417f74240SEndre Fülöp /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
SinkProp(ArgSet && SinkArgs,ArgSet && SrcArgs,ArgSet && DstArgs,Optional<StringRef> Msg=None)23517f74240SEndre Fülöp static GenericTaintRule SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs,
23617f74240SEndre Fülöp ArgSet &&DstArgs,
23717f74240SEndre Fülöp Optional<StringRef> Msg = None) {
23817f74240SEndre Fülöp return {
23917f74240SEndre Fülöp std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg};
24017f74240SEndre Fülöp }
24117f74240SEndre Fülöp
24217f74240SEndre Fülöp /// Process a function which could either be a taint source, a taint sink, a
24317f74240SEndre Fülöp /// taint filter or a taint propagator.
24417f74240SEndre Fülöp void process(const GenericTaintChecker &Checker, const CallEvent &Call,
24517f74240SEndre Fülöp CheckerContext &C) const;
24617f74240SEndre Fülöp
24717f74240SEndre Fülöp /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s.
GetArgExpr(ArgIdxTy ArgIdx,const CallEvent & Call)24817f74240SEndre Fülöp static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) {
24917f74240SEndre Fülöp return ArgIdx == ReturnValueIndex ? Call.getOriginExpr()
25017f74240SEndre Fülöp : Call.getArgExpr(ArgIdx);
25117f74240SEndre Fülöp };
25217f74240SEndre Fülöp
25317f74240SEndre Fülöp /// Functions for custom taintedness propagation.
25417f74240SEndre Fülöp static bool UntrustedEnv(CheckerContext &C);
25517f74240SEndre Fülöp };
25617f74240SEndre Fülöp
25717f74240SEndre Fülöp using RuleLookupTy = CallDescriptionMap<GenericTaintRule>;
25817f74240SEndre Fülöp
25917f74240SEndre Fülöp /// Used to parse the configuration file.
26017f74240SEndre Fülöp struct TaintConfiguration {
26117f74240SEndre Fülöp using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;
26217f74240SEndre Fülöp enum class VariadicType { None, Src, Dst };
26317f74240SEndre Fülöp
26417f74240SEndre Fülöp struct Common {
26517f74240SEndre Fülöp std::string Name;
26617f74240SEndre Fülöp std::string Scope;
26717f74240SEndre Fülöp };
26817f74240SEndre Fülöp
26917f74240SEndre Fülöp struct Sink : Common {
27017f74240SEndre Fülöp ArgVecTy SinkArgs;
27117f74240SEndre Fülöp };
27217f74240SEndre Fülöp
27317f74240SEndre Fülöp struct Filter : Common {
27417f74240SEndre Fülöp ArgVecTy FilterArgs;
27517f74240SEndre Fülöp };
27617f74240SEndre Fülöp
27717f74240SEndre Fülöp struct Propagation : Common {
27817f74240SEndre Fülöp ArgVecTy SrcArgs;
27917f74240SEndre Fülöp ArgVecTy DstArgs;
28017f74240SEndre Fülöp VariadicType VarType;
28117f74240SEndre Fülöp ArgIdxTy VarIndex;
28217f74240SEndre Fülöp };
28317f74240SEndre Fülöp
28417f74240SEndre Fülöp std::vector<Propagation> Propagations;
28517f74240SEndre Fülöp std::vector<Filter> Filters;
28617f74240SEndre Fülöp std::vector<Sink> Sinks;
28717f74240SEndre Fülöp
28817f74240SEndre Fülöp TaintConfiguration() = default;
28917f74240SEndre Fülöp TaintConfiguration(const TaintConfiguration &) = default;
29017f74240SEndre Fülöp TaintConfiguration(TaintConfiguration &&) = default;
29117f74240SEndre Fülöp TaintConfiguration &operator=(const TaintConfiguration &) = default;
29217f74240SEndre Fülöp TaintConfiguration &operator=(TaintConfiguration &&) = default;
29317f74240SEndre Fülöp };
29417f74240SEndre Fülöp
29517f74240SEndre Fülöp struct GenericTaintRuleParser {
GenericTaintRuleParser__anon39b2fff00111::GenericTaintRuleParser29617f74240SEndre Fülöp GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {}
29717f74240SEndre Fülöp /// Container type used to gather call identification objects grouped into
29817f74240SEndre Fülöp /// pairs with their corresponding taint rules. It is temporary as it is used
29917f74240SEndre Fülöp /// to finally initialize RuleLookupTy, which is considered to be immutable.
30017f74240SEndre Fülöp using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;
30117f74240SEndre Fülöp RulesContTy parseConfiguration(const std::string &Option,
30217f74240SEndre Fülöp TaintConfiguration &&Config) const;
30317f74240SEndre Fülöp
30417f74240SEndre Fülöp private:
30517f74240SEndre Fülöp using NamePartsTy = llvm::SmallVector<SmallString<32>, 2>;
30617f74240SEndre Fülöp
30717f74240SEndre Fülöp /// Validate part of the configuration, which contains a list of argument
30817f74240SEndre Fülöp /// indexes.
30917f74240SEndre Fülöp void validateArgVector(const std::string &Option, const ArgVecTy &Args) const;
31017f74240SEndre Fülöp
31117f74240SEndre Fülöp template <typename Config> static NamePartsTy parseNameParts(const Config &C);
31217f74240SEndre Fülöp
31317f74240SEndre Fülöp // Takes the config and creates a CallDescription for it and associates a Rule
31417f74240SEndre Fülöp // with that.
31517f74240SEndre Fülöp template <typename Config>
31617f74240SEndre Fülöp static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule,
31717f74240SEndre Fülöp RulesContTy &Rules);
31817f74240SEndre Fülöp
31917f74240SEndre Fülöp void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P,
32017f74240SEndre Fülöp RulesContTy &Rules) const;
32117f74240SEndre Fülöp void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P,
32217f74240SEndre Fülöp RulesContTy &Rules) const;
32317f74240SEndre Fülöp void parseConfig(const std::string &Option,
32417f74240SEndre Fülöp TaintConfiguration::Propagation &&P,
32517f74240SEndre Fülöp RulesContTy &Rules) const;
32617f74240SEndre Fülöp
32717f74240SEndre Fülöp CheckerManager &Mgr;
32817f74240SEndre Fülöp };
32917f74240SEndre Fülöp
33095a94df5SBalazs Benics class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
3313b0ab206SAnna Zaks public:
33295a94df5SBalazs Benics void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
33395a94df5SBalazs Benics void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
3345c5bf9b6SAnna Zaks
3354bde15feSGabor Borsik void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
3364bde15feSGabor Borsik const char *Sep) const override;
3374bde15feSGabor Borsik
338080ecafdSGabor Borsik /// Generate a report if the expression is tainted or points to tainted data.
339080ecafdSGabor Borsik bool generateReportIfTainted(const Expr *E, StringRef Msg,
340080ecafdSGabor Borsik CheckerContext &C) const;
341080ecafdSGabor Borsik
34217f74240SEndre Fülöp private:
34317f74240SEndre Fülöp const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"};
344273e6742SBorsik Gabor
34517f74240SEndre Fülöp bool checkUncontrolledFormatString(const CallEvent &Call,
34617f74240SEndre Fülöp CheckerContext &C) const;
347080ecafdSGabor Borsik
34817f74240SEndre Fülöp void taintUnsafeSocketProtocol(const CallEvent &Call,
34917f74240SEndre Fülöp CheckerContext &C) const;
3502827349cSKristof Umann
35117f74240SEndre Fülöp /// Default taint rules are initilized with the help of a CheckerContext to
35217f74240SEndre Fülöp /// access the names of built-in functions like memcpy.
35317f74240SEndre Fülöp void initTaintRules(CheckerContext &C) const;
3543666d2c1SAnna Zaks
35517f74240SEndre Fülöp /// CallDescription currently cannot restrict matches to the global namespace
35617f74240SEndre Fülöp /// only, which is why multiple CallDescriptionMaps are used, as we want to
35717f74240SEndre Fülöp /// disambiguate global C functions from functions inside user-defined
35817f74240SEndre Fülöp /// namespaces.
35917f74240SEndre Fülöp // TODO: Remove separation to simplify matching logic once CallDescriptions
36017f74240SEndre Fülöp // are more expressive.
3613666d2c1SAnna Zaks
36217f74240SEndre Fülöp mutable Optional<RuleLookupTy> StaticTaintRules;
36317f74240SEndre Fülöp mutable Optional<RuleLookupTy> DynamicTaintRules;
3647f6a6b75SAnna Zaks };
365560dbe9aSAnna Zaks } // end of anonymous namespace
3665c5bf9b6SAnna Zaks
36717f74240SEndre Fülöp /// YAML serialization mapping.
36817f74240SEndre Fülöp LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)
36917f74240SEndre Fülöp LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter)
37017f74240SEndre Fülöp LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)
3714bde15feSGabor Borsik
3724bde15feSGabor Borsik namespace llvm {
3734bde15feSGabor Borsik namespace yaml {
37417f74240SEndre Fülöp template <> struct MappingTraits<TaintConfiguration> {
mappingllvm::yaml::MappingTraits37517f74240SEndre Fülöp static void mapping(IO &IO, TaintConfiguration &Config) {
3764bde15feSGabor Borsik IO.mapOptional("Propagations", Config.Propagations);
3774bde15feSGabor Borsik IO.mapOptional("Filters", Config.Filters);
3784bde15feSGabor Borsik IO.mapOptional("Sinks", Config.Sinks);
3794bde15feSGabor Borsik }
3804bde15feSGabor Borsik };
3814bde15feSGabor Borsik
38217f74240SEndre Fülöp template <> struct MappingTraits<TaintConfiguration::Sink> {
mappingllvm::yaml::MappingTraits38317f74240SEndre Fülöp static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {
38417f74240SEndre Fülöp IO.mapRequired("Name", Sink.Name);
38517f74240SEndre Fülöp IO.mapOptional("Scope", Sink.Scope);
38617f74240SEndre Fülöp IO.mapRequired("Args", Sink.SinkArgs);
38717f74240SEndre Fülöp }
38817f74240SEndre Fülöp };
38917f74240SEndre Fülöp
39017f74240SEndre Fülöp template <> struct MappingTraits<TaintConfiguration::Filter> {
mappingllvm::yaml::MappingTraits39117f74240SEndre Fülöp static void mapping(IO &IO, TaintConfiguration::Filter &Filter) {
39217f74240SEndre Fülöp IO.mapRequired("Name", Filter.Name);
39317f74240SEndre Fülöp IO.mapOptional("Scope", Filter.Scope);
39417f74240SEndre Fülöp IO.mapRequired("Args", Filter.FilterArgs);
39517f74240SEndre Fülöp }
39617f74240SEndre Fülöp };
39717f74240SEndre Fülöp
39817f74240SEndre Fülöp template <> struct MappingTraits<TaintConfiguration::Propagation> {
mappingllvm::yaml::MappingTraits39917f74240SEndre Fülöp static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {
4004bde15feSGabor Borsik IO.mapRequired("Name", Propagation.Name);
401273e6742SBorsik Gabor IO.mapOptional("Scope", Propagation.Scope);
4024bde15feSGabor Borsik IO.mapOptional("SrcArgs", Propagation.SrcArgs);
4034bde15feSGabor Borsik IO.mapOptional("DstArgs", Propagation.DstArgs);
40417f74240SEndre Fülöp IO.mapOptional("VariadicType", Propagation.VarType);
40517f74240SEndre Fülöp IO.mapOptional("VariadicIndex", Propagation.VarIndex);
4064bde15feSGabor Borsik }
4074bde15feSGabor Borsik };
4084bde15feSGabor Borsik
40917f74240SEndre Fülöp template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
enumerationllvm::yaml::ScalarEnumerationTraits41017f74240SEndre Fülöp static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) {
41117f74240SEndre Fülöp IO.enumCase(Value, "None", TaintConfiguration::VariadicType::None);
41217f74240SEndre Fülöp IO.enumCase(Value, "Src", TaintConfiguration::VariadicType::Src);
41317f74240SEndre Fülöp IO.enumCase(Value, "Dst", TaintConfiguration::VariadicType::Dst);
4144bde15feSGabor Borsik }
4154bde15feSGabor Borsik };
4164bde15feSGabor Borsik } // namespace yaml
4174bde15feSGabor Borsik } // namespace llvm
4184bde15feSGabor Borsik
419b3fa8d7dSAnna Zaks /// A set which is used to pass information from call pre-visit instruction
42017f74240SEndre Fülöp /// to the call post-visit. The values are signed integers, which are either
421b3fa8d7dSAnna Zaks /// ReturnValueIndex, or indexes of the pointer/reference argument, which
422b3fa8d7dSAnna Zaks /// points to data, which should be tainted on return.
REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit,const LocationContext *,ImmutableSet<ArgIdxTy>)423a848a5cfSBalazs Benics REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *,
424a848a5cfSBalazs Benics ImmutableSet<ArgIdxTy>)
425a848a5cfSBalazs Benics REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy)
4263b0ab206SAnna Zaks
42717f74240SEndre Fülöp void GenericTaintRuleParser::validateArgVector(const std::string &Option,
42817f74240SEndre Fülöp const ArgVecTy &Args) const {
42917f74240SEndre Fülöp for (ArgIdxTy Arg : Args) {
43017f74240SEndre Fülöp if (Arg < ReturnValueIndex) {
4314bde15feSGabor Borsik Mgr.reportInvalidCheckerOptionValue(
43217f74240SEndre Fülöp Mgr.getChecker<GenericTaintChecker>(), Option,
4334bde15feSGabor Borsik "an argument number for propagation rules greater or equal to -1");
4344bde15feSGabor Borsik }
4354bde15feSGabor Borsik }
4364bde15feSGabor Borsik }
4374bde15feSGabor Borsik
43817f74240SEndre Fülöp template <typename Config>
43917f74240SEndre Fülöp GenericTaintRuleParser::NamePartsTy
parseNameParts(const Config & C)44017f74240SEndre Fülöp GenericTaintRuleParser::parseNameParts(const Config &C) {
44117f74240SEndre Fülöp NamePartsTy NameParts;
44217f74240SEndre Fülöp if (!C.Scope.empty()) {
44317f74240SEndre Fülöp // If the Scope argument contains multiple "::" parts, those are considered
44417f74240SEndre Fülöp // namespace identifiers.
44517f74240SEndre Fülöp llvm::SmallVector<StringRef, 2> NSParts;
44617f74240SEndre Fülöp StringRef{C.Scope}.split(NSParts, "::", /*MaxSplit*/ -1,
44717f74240SEndre Fülöp /*KeepEmpty*/ false);
44817f74240SEndre Fülöp NameParts.append(NSParts.begin(), NSParts.end());
44917f74240SEndre Fülöp }
45017f74240SEndre Fülöp NameParts.emplace_back(C.Name);
45117f74240SEndre Fülöp return NameParts;
452273e6742SBorsik Gabor }
453273e6742SBorsik Gabor
45417f74240SEndre Fülöp template <typename Config>
consumeRulesFromConfig(const Config & C,GenericTaintRule && Rule,RulesContTy & Rules)45517f74240SEndre Fülöp void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C,
45617f74240SEndre Fülöp GenericTaintRule &&Rule,
45717f74240SEndre Fülöp RulesContTy &Rules) {
45817f74240SEndre Fülöp NamePartsTy NameParts = parseNameParts(C);
45917f74240SEndre Fülöp llvm::SmallVector<const char *, 2> CallDescParts{NameParts.size()};
46017f74240SEndre Fülöp llvm::transform(NameParts, CallDescParts.begin(),
46117f74240SEndre Fülöp [](SmallString<32> &S) { return S.c_str(); });
462262cc74eSTres Popp Rules.emplace_back(CallDescription(CallDescParts), std::move(Rule));
46317f74240SEndre Fülöp }
464bf740512SAnna Zaks
parseConfig(const std::string & Option,TaintConfiguration::Sink && S,RulesContTy & Rules) const46517f74240SEndre Fülöp void GenericTaintRuleParser::parseConfig(const std::string &Option,
46617f74240SEndre Fülöp TaintConfiguration::Sink &&S,
46717f74240SEndre Fülöp RulesContTy &Rules) const {
46817f74240SEndre Fülöp validateArgVector(Option, S.SinkArgs);
46917f74240SEndre Fülöp consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),
47017f74240SEndre Fülöp Rules);
47117f74240SEndre Fülöp }
47217f74240SEndre Fülöp
parseConfig(const std::string & Option,TaintConfiguration::Filter && S,RulesContTy & Rules) const47317f74240SEndre Fülöp void GenericTaintRuleParser::parseConfig(const std::string &Option,
47417f74240SEndre Fülöp TaintConfiguration::Filter &&S,
47517f74240SEndre Fülöp RulesContTy &Rules) const {
47617f74240SEndre Fülöp validateArgVector(Option, S.FilterArgs);
47717f74240SEndre Fülöp consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)),
47817f74240SEndre Fülöp Rules);
47917f74240SEndre Fülöp }
48017f74240SEndre Fülöp
parseConfig(const std::string & Option,TaintConfiguration::Propagation && P,RulesContTy & Rules) const48117f74240SEndre Fülöp void GenericTaintRuleParser::parseConfig(const std::string &Option,
48217f74240SEndre Fülöp TaintConfiguration::Propagation &&P,
48317f74240SEndre Fülöp RulesContTy &Rules) const {
48417f74240SEndre Fülöp validateArgVector(Option, P.SrcArgs);
48517f74240SEndre Fülöp validateArgVector(Option, P.DstArgs);
48617f74240SEndre Fülöp bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src;
48717f74240SEndre Fülöp bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst;
48817f74240SEndre Fülöp Optional<ArgIdxTy> JustVarIndex = P.VarIndex;
48917f74240SEndre Fülöp
49017f74240SEndre Fülöp ArgSet SrcDesc(std::move(P.SrcArgs), IsSrcVariadic ? JustVarIndex : None);
49117f74240SEndre Fülöp ArgSet DstDesc(std::move(P.DstArgs), IsDstVariadic ? JustVarIndex : None);
49217f74240SEndre Fülöp
49317f74240SEndre Fülöp consumeRulesFromConfig(
49417f74240SEndre Fülöp P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);
49517f74240SEndre Fülöp }
49617f74240SEndre Fülöp
49717f74240SEndre Fülöp GenericTaintRuleParser::RulesContTy
parseConfiguration(const std::string & Option,TaintConfiguration && Config) const49817f74240SEndre Fülöp GenericTaintRuleParser::parseConfiguration(const std::string &Option,
49917f74240SEndre Fülöp TaintConfiguration &&Config) const {
50017f74240SEndre Fülöp
50117f74240SEndre Fülöp RulesContTy Rules;
50217f74240SEndre Fülöp
50317f74240SEndre Fülöp for (auto &F : Config.Filters)
50417f74240SEndre Fülöp parseConfig(Option, std::move(F), Rules);
50517f74240SEndre Fülöp
50617f74240SEndre Fülöp for (auto &S : Config.Sinks)
50717f74240SEndre Fülöp parseConfig(Option, std::move(S), Rules);
50817f74240SEndre Fülöp
50917f74240SEndre Fülöp for (auto &P : Config.Propagations)
51017f74240SEndre Fülöp parseConfig(Option, std::move(P), Rules);
51117f74240SEndre Fülöp
51217f74240SEndre Fülöp return Rules;
51317f74240SEndre Fülöp }
51417f74240SEndre Fülöp
initTaintRules(CheckerContext & C) const51517f74240SEndre Fülöp void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
5165d324e50SAnna Zaks // Check for exact name match for functions without builtin substitutes.
517273e6742SBorsik Gabor // Use qualified name, because these are C functions without namespace.
5185d324e50SAnna Zaks
51917f74240SEndre Fülöp if (StaticTaintRules || DynamicTaintRules)
52017f74240SEndre Fülöp return;
52117f74240SEndre Fülöp
52217f74240SEndre Fülöp using RulesConstructionTy =
52317f74240SEndre Fülöp std::vector<std::pair<CallDescription, GenericTaintRule>>;
52417f74240SEndre Fülöp using TR = GenericTaintRule;
52517f74240SEndre Fülöp
52617f74240SEndre Fülöp const Builtin::Context &BI = C.getASTContext().BuiltinInfo;
52717f74240SEndre Fülöp
52817f74240SEndre Fülöp RulesConstructionTy GlobalCRules{
52917f74240SEndre Fülöp // Sources
53017f74240SEndre Fülöp {{"fdopen"}, TR::Source({{ReturnValueIndex}})},
53117f74240SEndre Fülöp {{"fopen"}, TR::Source({{ReturnValueIndex}})},
53217f74240SEndre Fülöp {{"freopen"}, TR::Source({{ReturnValueIndex}})},
53317f74240SEndre Fülöp {{"getch"}, TR::Source({{ReturnValueIndex}})},
53417f74240SEndre Fülöp {{"getchar"}, TR::Source({{ReturnValueIndex}})},
53517f74240SEndre Fülöp {{"getchar_unlocked"}, TR::Source({{ReturnValueIndex}})},
53617f74240SEndre Fülöp {{"gets"}, TR::Source({{0}, ReturnValueIndex})},
53734a73879SEndre Fülöp {{"gets_s"}, TR::Source({{0}, ReturnValueIndex})},
53817f74240SEndre Fülöp {{"scanf"}, TR::Source({{}, 1})},
53934a73879SEndre Fülöp {{"scanf_s"}, TR::Source({{}, {1}})},
54017f74240SEndre Fülöp {{"wgetch"}, TR::Source({{}, ReturnValueIndex})},
54134a73879SEndre Fülöp // Sometimes the line between taint sources and propagators is blurry.
54234a73879SEndre Fülöp // _IO_getc is choosen to be a source, but could also be a propagator.
54334a73879SEndre Fülöp // This way it is simpler, as modeling it as a propagator would require
54434a73879SEndre Fülöp // to model the possible sources of _IO_FILE * values, which the _IO_getc
54534a73879SEndre Fülöp // function takes as parameters.
54634a73879SEndre Fülöp {{"_IO_getc"}, TR::Source({{ReturnValueIndex}})},
54734a73879SEndre Fülöp {{"getcwd"}, TR::Source({{0, ReturnValueIndex}})},
54834a73879SEndre Fülöp {{"getwd"}, TR::Source({{0, ReturnValueIndex}})},
54934a73879SEndre Fülöp {{"readlink"}, TR::Source({{1, ReturnValueIndex}})},
55034a73879SEndre Fülöp {{"readlinkat"}, TR::Source({{2, ReturnValueIndex}})},
55134a73879SEndre Fülöp {{"get_current_dir_name"}, TR::Source({{ReturnValueIndex}})},
55234a73879SEndre Fülöp {{"gethostname"}, TR::Source({{0}})},
55334a73879SEndre Fülöp {{"getnameinfo"}, TR::Source({{2, 4}})},
55434a73879SEndre Fülöp {{"getseuserbyname"}, TR::Source({{1, 2}})},
55534a73879SEndre Fülöp {{"getgroups"}, TR::Source({{1, ReturnValueIndex}})},
55634a73879SEndre Fülöp {{"getlogin"}, TR::Source({{ReturnValueIndex}})},
55734a73879SEndre Fülöp {{"getlogin_r"}, TR::Source({{0}})},
55817f74240SEndre Fülöp
55917f74240SEndre Fülöp // Props
56017f74240SEndre Fülöp {{"atoi"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
56117f74240SEndre Fülöp {{"atol"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
56217f74240SEndre Fülöp {{"atoll"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
56317f74240SEndre Fülöp {{"fgetc"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
56417f74240SEndre Fülöp {{"fgetln"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5657036413dSBalazs Benics {{"fgets"}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
56617f74240SEndre Fülöp {{"fscanf"}, TR::Prop({{0}}, {{}, 2})},
5674fd6c6e6SEndre Fülöp {{"fscanf_s"}, TR::Prop({{0}}, {{}, {2}})},
56817f74240SEndre Fülöp {{"sscanf"}, TR::Prop({{0}}, {{}, 2})},
5694fd6c6e6SEndre Fülöp
57017f74240SEndre Fülöp {{"getc"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
57117f74240SEndre Fülöp {{"getc_unlocked"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
57217f74240SEndre Fülöp {{"getdelim"}, TR::Prop({{3}}, {{0}})},
57317f74240SEndre Fülöp {{"getline"}, TR::Prop({{2}}, {{0}})},
57417f74240SEndre Fülöp {{"getw"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
57517f74240SEndre Fülöp {{"pread"}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
57617f74240SEndre Fülöp {{"read"}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
57717f74240SEndre Fülöp {{"strchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
57817f74240SEndre Fülöp {{"strrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
57917f74240SEndre Fülöp {{"tolower"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
58017f74240SEndre Fülöp {{"toupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5814fd6c6e6SEndre Fülöp {{"fread"}, TR::Prop({{3}}, {{0, ReturnValueIndex}})},
5824fd6c6e6SEndre Fülöp {{"recv"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
5834fd6c6e6SEndre Fülöp {{"recvfrom"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
5844fd6c6e6SEndre Fülöp
5854fd6c6e6SEndre Fülöp {{"ttyname"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5864fd6c6e6SEndre Fülöp {{"ttyname_r"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
5874fd6c6e6SEndre Fülöp
5884fd6c6e6SEndre Fülöp {{"basename"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5894fd6c6e6SEndre Fülöp {{"dirname"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5904fd6c6e6SEndre Fülöp {{"fnmatch"}, TR::Prop({{1}}, {{ReturnValueIndex}})},
5914fd6c6e6SEndre Fülöp {{"memchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5924fd6c6e6SEndre Fülöp {{"memrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5934fd6c6e6SEndre Fülöp {{"rawmemchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5944fd6c6e6SEndre Fülöp
5954fd6c6e6SEndre Fülöp {{"mbtowc"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
5964fd6c6e6SEndre Fülöp {{"wctomb"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
5974fd6c6e6SEndre Fülöp {{"wcwidth"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
5984fd6c6e6SEndre Fülöp
5994fd6c6e6SEndre Fülöp {{"memcmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
6004fd6c6e6SEndre Fülöp {{"memcpy"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
6014fd6c6e6SEndre Fülöp {{"memmove"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
6024fd6c6e6SEndre Fülöp // If memmem was called with a tainted needle and the search was
6034fd6c6e6SEndre Fülöp // successful, that would mean that the value pointed by the return value
6044fd6c6e6SEndre Fülöp // has the same content as the needle. If we choose to go by the policy of
6054fd6c6e6SEndre Fülöp // content equivalence implies taintedness equivalence, that would mean
6064fd6c6e6SEndre Fülöp // haystack should be considered a propagation source argument.
6074fd6c6e6SEndre Fülöp {{"memmem"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6084fd6c6e6SEndre Fülöp
6094fd6c6e6SEndre Fülöp // The comment for memmem above also applies to strstr.
6104fd6c6e6SEndre Fülöp {{"strstr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6114fd6c6e6SEndre Fülöp {{"strcasestr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6124fd6c6e6SEndre Fülöp
6134fd6c6e6SEndre Fülöp {{"strchrnul"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6144fd6c6e6SEndre Fülöp
6154fd6c6e6SEndre Fülöp {{"index"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6164fd6c6e6SEndre Fülöp {{"rindex"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6174fd6c6e6SEndre Fülöp
6184fd6c6e6SEndre Fülöp // FIXME: In case of arrays, only the first element of the array gets
6194fd6c6e6SEndre Fülöp // tainted.
6204fd6c6e6SEndre Fülöp {{"qsort"}, TR::Prop({{0}}, {{0}})},
6214fd6c6e6SEndre Fülöp {{"qsort_r"}, TR::Prop({{0}}, {{0}})},
6224fd6c6e6SEndre Fülöp
6234fd6c6e6SEndre Fülöp {{"strcmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
6244fd6c6e6SEndre Fülöp {{"strcasecmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
6254fd6c6e6SEndre Fülöp {{"strncmp"}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
6264fd6c6e6SEndre Fülöp {{"strncasecmp"}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
6274fd6c6e6SEndre Fülöp {{"strspn"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
6284fd6c6e6SEndre Fülöp {{"strcspn"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
6294fd6c6e6SEndre Fülöp {{"strpbrk"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6304fd6c6e6SEndre Fülöp {{"strndup"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6314fd6c6e6SEndre Fülöp {{"strndupa"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6324fd6c6e6SEndre Fülöp {{"strlen"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6334fd6c6e6SEndre Fülöp {{"strnlen"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6344fd6c6e6SEndre Fülöp {{"strtol"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
6354fd6c6e6SEndre Fülöp {{"strtoll"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
6364fd6c6e6SEndre Fülöp {{"strtoul"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
6374fd6c6e6SEndre Fülöp {{"strtoull"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
6384fd6c6e6SEndre Fülöp
6394fd6c6e6SEndre Fülöp {{"isalnum"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6404fd6c6e6SEndre Fülöp {{"isalpha"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6414fd6c6e6SEndre Fülöp {{"isascii"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6424fd6c6e6SEndre Fülöp {{"isblank"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6434fd6c6e6SEndre Fülöp {{"iscntrl"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6444fd6c6e6SEndre Fülöp {{"isdigit"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6454fd6c6e6SEndre Fülöp {{"isgraph"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6464fd6c6e6SEndre Fülöp {{"islower"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6474fd6c6e6SEndre Fülöp {{"isprint"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6484fd6c6e6SEndre Fülöp {{"ispunct"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6494fd6c6e6SEndre Fülöp {{"isspace"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6504fd6c6e6SEndre Fülöp {{"isupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6514fd6c6e6SEndre Fülöp {{"isxdigit"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
6524fd6c6e6SEndre Fülöp
65317f74240SEndre Fülöp {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncat)}},
65417f74240SEndre Fülöp TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
65517f74240SEndre Fülöp {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcpy)}},
65617f74240SEndre Fülöp TR::Prop({{1, 2}}, {{0}})},
65717f74240SEndre Fülöp {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcat)}},
65817f74240SEndre Fülöp TR::Prop({{1, 2}}, {{0}})},
65917f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"snprintf"}},
66017f74240SEndre Fülöp TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})},
66117f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"sprintf"}},
66217f74240SEndre Fülöp TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},
66317f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"strcpy"}},
66417f74240SEndre Fülöp TR::Prop({{1}}, {{0, ReturnValueIndex}})},
66517f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"stpcpy"}},
66617f74240SEndre Fülöp TR::Prop({{1}}, {{0, ReturnValueIndex}})},
66717f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"strcat"}},
66817f74240SEndre Fülöp TR::Prop({{1}}, {{0, ReturnValueIndex}})},
66917f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
67017f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
67117f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
67217f74240SEndre Fülöp
67317f74240SEndre Fülöp // Sinks
67417f74240SEndre Fülöp {{"system"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
67517f74240SEndre Fülöp {{"popen"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
67617f74240SEndre Fülöp {{"execl"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
67717f74240SEndre Fülöp {{"execle"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
67817f74240SEndre Fülöp {{"execlp"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
67917f74240SEndre Fülöp {{"execvp"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
68017f74240SEndre Fülöp {{"execvP"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
68117f74240SEndre Fülöp {{"execve"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
68217f74240SEndre Fülöp {{"dlopen"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
68317f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"malloc"}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
68417f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"calloc"}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
68517f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"alloca"}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
68617f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"memccpy"}}, TR::Sink({{3}}, MsgTaintedBufferSize)},
68717f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"realloc"}}, TR::Sink({{1}}, MsgTaintedBufferSize)},
68817f74240SEndre Fülöp {{{"setproctitle"}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
68917f74240SEndre Fülöp {{{"setproctitle_fast"}},
69017f74240SEndre Fülöp TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
69117f74240SEndre Fülöp
69217f74240SEndre Fülöp // SinkProps
69317f74240SEndre Fülöp {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
69417f74240SEndre Fülöp TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
69517f74240SEndre Fülöp MsgTaintedBufferSize)},
69617f74240SEndre Fülöp {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
69717f74240SEndre Fülöp TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
69817f74240SEndre Fülöp MsgTaintedBufferSize)},
69917f74240SEndre Fülöp {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
70017f74240SEndre Fülöp TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
70117f74240SEndre Fülöp MsgTaintedBufferSize)},
70217f74240SEndre Fülöp {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
70317f74240SEndre Fülöp TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}},
70417f74240SEndre Fülöp MsgTaintedBufferSize)},
70517f74240SEndre Fülöp {{CDF_MaybeBuiltin, {"bcopy"}},
70617f74240SEndre Fülöp TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}};
707edde4efcSBalazs Benics
708edde4efcSBalazs Benics // `getenv` returns taint only in untrusted environments.
70917f74240SEndre Fülöp if (TR::UntrustedEnv(C)) {
71017f74240SEndre Fülöp // void setproctitle_init(int argc, char *argv[], char *envp[])
71117f74240SEndre Fülöp GlobalCRules.push_back(
7127036413dSBalazs Benics {{{"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)});
71317f74240SEndre Fülöp GlobalCRules.push_back({{"getenv"}, TR::Source({{ReturnValueIndex}})});
714edde4efcSBalazs Benics }
715edde4efcSBalazs Benics
71617f74240SEndre Fülöp StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),
71717f74240SEndre Fülöp std::make_move_iterator(GlobalCRules.end()));
7185d324e50SAnna Zaks
71917f74240SEndre Fülöp // User-provided taint configuration.
72017f74240SEndre Fülöp CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager();
72117f74240SEndre Fülöp assert(Mgr);
72217f74240SEndre Fülöp GenericTaintRuleParser ConfigParser{*Mgr};
72317f74240SEndre Fülöp std::string Option{"Config"};
72417f74240SEndre Fülöp StringRef ConfigFile =
72517f74240SEndre Fülöp Mgr->getAnalyzerOptions().getCheckerStringOption(this, Option);
72617f74240SEndre Fülöp llvm::Optional<TaintConfiguration> Config =
72717f74240SEndre Fülöp getConfiguration<TaintConfiguration>(*Mgr, this, Option, ConfigFile);
72817f74240SEndre Fülöp if (!Config) {
72917f74240SEndre Fülöp // We don't have external taint config, no parsing required.
73017f74240SEndre Fülöp DynamicTaintRules = RuleLookupTy{};
73117f74240SEndre Fülöp return;
73295a94df5SBalazs Benics }
7335d324e50SAnna Zaks
73417f74240SEndre Fülöp GenericTaintRuleParser::RulesContTy Rules{
735*ca4af13eSKazu Hirata ConfigParser.parseConfiguration(Option, std::move(*Config))};
7365d324e50SAnna Zaks
73717f74240SEndre Fülöp DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
73817f74240SEndre Fülöp std::make_move_iterator(Rules.end()));
739273e6742SBorsik Gabor }
740080ecafdSGabor Borsik
checkPreCall(const CallEvent & Call,CheckerContext & C) const74195a94df5SBalazs Benics void GenericTaintChecker::checkPreCall(const CallEvent &Call,
7425c5bf9b6SAnna Zaks CheckerContext &C) const {
74317f74240SEndre Fülöp initTaintRules(C);
74489bc4c66SBorsik Gabor
74517f74240SEndre Fülöp // FIXME: this should be much simpler.
74617f74240SEndre Fülöp if (const auto *Rule =
74717f74240SEndre Fülöp Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr)
74817f74240SEndre Fülöp Rule->process(*this, Call, C);
74917f74240SEndre Fülöp else if (const auto *Rule = DynamicTaintRules->lookup(Call))
75017f74240SEndre Fülöp Rule->process(*this, Call, C);
7513b0ab206SAnna Zaks
75217f74240SEndre Fülöp // FIXME: These edge cases are to be eliminated from here eventually.
75317f74240SEndre Fülöp //
75417f74240SEndre Fülöp // Additional check that is not supported by CallDescription.
75517f74240SEndre Fülöp // TODO: Make CallDescription be able to match attributes such as printf-like
75617f74240SEndre Fülöp // arguments.
75717f74240SEndre Fülöp checkUncontrolledFormatString(Call, C);
75889bc4c66SBorsik Gabor
75917f74240SEndre Fülöp // TODO: Modeling sockets should be done in a specific checker.
76017f74240SEndre Fülöp // Socket is a source, which taints the return value.
76117f74240SEndre Fülöp taintUnsafeSocketProtocol(Call, C);
762126a2ef9SAnna Zaks }
763126a2ef9SAnna Zaks
checkPostCall(const CallEvent & Call,CheckerContext & C) const76495a94df5SBalazs Benics void GenericTaintChecker::checkPostCall(const CallEvent &Call,
765126a2ef9SAnna Zaks CheckerContext &C) const {
7662827349cSKristof Umann // Set the marked values as tainted. The return value only accessible from
7672827349cSKristof Umann // checkPostStmt.
76849b1e38eSTed Kremenek ProgramStateRef State = C.getState();
769a848a5cfSBalazs Benics const StackFrameContext *CurrentFrame = C.getStackFrame();
770b3fa8d7dSAnna Zaks
771b3fa8d7dSAnna Zaks // Depending on what was tainted at pre-visit, we determined a set of
772b3fa8d7dSAnna Zaks // arguments which should be tainted after the function returns. These are
773b3fa8d7dSAnna Zaks // stored in the state as TaintArgsOnPostVisit set.
774a848a5cfSBalazs Benics TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();
775a848a5cfSBalazs Benics
776a848a5cfSBalazs Benics const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
777a848a5cfSBalazs Benics if (!TaintArgs)
77817f74240SEndre Fülöp return;
779a848a5cfSBalazs Benics assert(!TaintArgs->isEmpty());
780bf740512SAnna Zaks
781fa0a80e0SBalazs Benics LLVM_DEBUG(for (ArgIdxTy I
782a848a5cfSBalazs Benics : *TaintArgs) {
783fa0a80e0SBalazs Benics llvm::dbgs() << "PostCall<";
784fa0a80e0SBalazs Benics Call.dump(llvm::dbgs());
785fa0a80e0SBalazs Benics llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n';
786fa0a80e0SBalazs Benics });
787fa0a80e0SBalazs Benics
788a848a5cfSBalazs Benics for (ArgIdxTy ArgNum : *TaintArgs) {
789b3fa8d7dSAnna Zaks // Special handling for the tainted return value.
790b3fa8d7dSAnna Zaks if (ArgNum == ReturnValueIndex) {
79195a94df5SBalazs Benics State = addTaint(State, Call.getReturnValue());
792b3fa8d7dSAnna Zaks continue;
793b3fa8d7dSAnna Zaks }
794b3fa8d7dSAnna Zaks
795b3fa8d7dSAnna Zaks // The arguments are pointer arguments. The data they are pointing at is
796b3fa8d7dSAnna Zaks // tainted after the call.
79717f74240SEndre Fülöp if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum)))
79844551cf6SArtem Dergachev State = addTaint(State, *V);
799b3fa8d7dSAnna Zaks }
800b3fa8d7dSAnna Zaks
801b3fa8d7dSAnna Zaks // Clear up the taint info from the state.
802a848a5cfSBalazs Benics State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
803b3fa8d7dSAnna Zaks C.addTransition(State);
804b3fa8d7dSAnna Zaks }
805b3fa8d7dSAnna Zaks
printState(raw_ostream & Out,ProgramStateRef State,const char * NL,const char * Sep) const80617f74240SEndre Fülöp void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
80717f74240SEndre Fülöp const char *NL, const char *Sep) const {
80817f74240SEndre Fülöp printTaint(State, Out, NL, Sep);
809126a2ef9SAnna Zaks }
810126a2ef9SAnna Zaks
process(const GenericTaintChecker & Checker,const CallEvent & Call,CheckerContext & C) const81117f74240SEndre Fülöp void GenericTaintRule::process(const GenericTaintChecker &Checker,
81217f74240SEndre Fülöp const CallEvent &Call, CheckerContext &C) const {
81349b1e38eSTed Kremenek ProgramStateRef State = C.getState();
81417f74240SEndre Fülöp const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
8157c96b7dbSAnna Zaks
81617f74240SEndre Fülöp /// Iterate every call argument, and get their corresponding Expr and SVal.
81717f74240SEndre Fülöp const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) {
81817f74240SEndre Fülöp for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) {
81917f74240SEndre Fülöp const Expr *E = GetArgExpr(I, Call);
82017f74240SEndre Fülöp Fun(I, E, C.getSVal(E));
8215c5bf9b6SAnna Zaks }
82217f74240SEndre Fülöp };
8235c5bf9b6SAnna Zaks
82417f74240SEndre Fülöp /// Check for taint sinks.
82517f74240SEndre Fülöp ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) {
82617f74240SEndre Fülöp if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C))
82706decd0bSKazu Hirata Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C);
82817f74240SEndre Fülöp });
8293666d2c1SAnna Zaks
83017f74240SEndre Fülöp /// Check for taint filters.
83117f74240SEndre Fülöp ForEachCallArg([this, &C, &State](ArgIdxTy I, const Expr *E, SVal S) {
83217f74240SEndre Fülöp if (FilterArgs.contains(I)) {
83317f74240SEndre Fülöp State = removeTaint(State, S);
83417f74240SEndre Fülöp if (auto P = getPointeeOf(C, S))
83517f74240SEndre Fülöp State = removeTaint(State, *P);
8363666d2c1SAnna Zaks }
83717f74240SEndre Fülöp });
8382a5fb125SArtem Dergachev
83917f74240SEndre Fülöp /// Check for taint propagation sources.
84017f74240SEndre Fülöp /// A rule is relevant if PropSrcArgs is empty, or if any of its signified
84117f74240SEndre Fülöp /// args are tainted in context of the current CallEvent.
84217f74240SEndre Fülöp bool IsMatching = PropSrcArgs.isEmpty();
84317f74240SEndre Fülöp ForEachCallArg(
84417f74240SEndre Fülöp [this, &C, &IsMatching, &State](ArgIdxTy I, const Expr *E, SVal) {
84517f74240SEndre Fülöp IsMatching = IsMatching || (PropSrcArgs.contains(I) &&
84617f74240SEndre Fülöp isTaintedOrPointsToTainted(E, State, C));
84717f74240SEndre Fülöp });
8482a5fb125SArtem Dergachev
84917f74240SEndre Fülöp if (!IsMatching)
85017f74240SEndre Fülöp return;
8512827349cSKristof Umann
85217f74240SEndre Fülöp const auto WouldEscape = [](SVal V, QualType Ty) -> bool {
85396ccb690SBalazs Benics if (!isa<Loc>(V))
854099fe3fbSAnna Zaks return false;
855099fe3fbSAnna Zaks
85617f74240SEndre Fülöp const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();
85717f74240SEndre Fülöp const bool IsNonConstPtr =
85817f74240SEndre Fülöp Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();
859e48ee503SAnna Zaks
86017f74240SEndre Fülöp return IsNonConstRef || IsNonConstPtr;
86117f74240SEndre Fülöp };
86217f74240SEndre Fülöp
86317f74240SEndre Fülöp /// Propagate taint where it is necessary.
864a848a5cfSBalazs Benics auto &F = State->getStateManager().get_context<ArgIdxFactory>();
865a848a5cfSBalazs Benics ImmutableSet<ArgIdxTy> Result = F.getEmptySet();
86617f74240SEndre Fülöp ForEachCallArg(
867ecff9b65SFangrui Song [&](ArgIdxTy I, const Expr *E, SVal V) {
868fa0a80e0SBalazs Benics if (PropDstArgs.contains(I)) {
869fa0a80e0SBalazs Benics LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs());
870fa0a80e0SBalazs Benics llvm::dbgs()
871fa0a80e0SBalazs Benics << "> prepares tainting arg index: " << I << '\n';);
872a848a5cfSBalazs Benics Result = F.add(Result, I);
873fa0a80e0SBalazs Benics }
87417f74240SEndre Fülöp
87517f74240SEndre Fülöp // TODO: We should traverse all reachable memory regions via the
87617f74240SEndre Fülöp // escaping parameter. Instead of doing that we simply mark only the
87717f74240SEndre Fülöp // referred memory region as tainted.
878fa0a80e0SBalazs Benics if (WouldEscape(V, E->getType())) {
879a848a5cfSBalazs Benics LLVM_DEBUG(if (!Result.contains(I)) {
880fa0a80e0SBalazs Benics llvm::dbgs() << "PreCall<";
881fa0a80e0SBalazs Benics Call.dump(llvm::dbgs());
882fa0a80e0SBalazs Benics llvm::dbgs() << "> prepares tainting arg index: " << I << '\n';
883fa0a80e0SBalazs Benics });
884a848a5cfSBalazs Benics Result = F.add(Result, I);
885fa0a80e0SBalazs Benics }
88617f74240SEndre Fülöp });
88717f74240SEndre Fülöp
888a848a5cfSBalazs Benics if (!Result.isEmpty())
889a848a5cfSBalazs Benics State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
89017f74240SEndre Fülöp C.addTransition(State);
891099fe3fbSAnna Zaks }
892099fe3fbSAnna Zaks
UntrustedEnv(CheckerContext & C)89317f74240SEndre Fülöp bool GenericTaintRule::UntrustedEnv(CheckerContext &C) {
89417f74240SEndre Fülöp return !C.getAnalysisManager()
89517f74240SEndre Fülöp .getAnalyzerOptions()
89617f74240SEndre Fülöp .ShouldAssumeControlledEnvironment;
897126a2ef9SAnna Zaks }
898126a2ef9SAnna Zaks
generateReportIfTainted(const Expr * E,StringRef Msg,CheckerContext & C) const899080ecafdSGabor Borsik bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
9000244cd74SAnna Zaks CheckerContext &C) const {
9010244cd74SAnna Zaks assert(E);
90217f74240SEndre Fülöp Optional<SVal> TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))};
9030244cd74SAnna Zaks
90417f74240SEndre Fülöp if (!TaintedSVal)
9050244cd74SAnna Zaks return false;
9060244cd74SAnna Zaks
9070244cd74SAnna Zaks // Generate diagnostic.
908e39bd407SDevin Coughlin if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
90917f74240SEndre Fülöp auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
9100244cd74SAnna Zaks report->addRange(E->getSourceRange());
91117f74240SEndre Fülöp report->addVisitor(std::make_unique<TaintBugVisitor>(*TaintedSVal));
9128d3a7a56SAaron Ballman C.emitReport(std::move(report));
9130244cd74SAnna Zaks return true;
9140244cd74SAnna Zaks }
9150244cd74SAnna Zaks return false;
9160244cd74SAnna Zaks }
9170244cd74SAnna Zaks
91817f74240SEndre Fülöp /// TODO: remove checking for printf format attributes and socket whitelisting
91917f74240SEndre Fülöp /// from GenericTaintChecker, and that means the following functions:
92017f74240SEndre Fülöp /// getPrintfFormatArgumentNum,
92117f74240SEndre Fülöp /// GenericTaintChecker::checkUncontrolledFormatString,
92217f74240SEndre Fülöp /// GenericTaintChecker::taintUnsafeSocketProtocol
92317f74240SEndre Fülöp
getPrintfFormatArgumentNum(const CallEvent & Call,const CheckerContext & C,ArgIdxTy & ArgNum)92417f74240SEndre Fülöp static bool getPrintfFormatArgumentNum(const CallEvent &Call,
92517f74240SEndre Fülöp const CheckerContext &C,
92617f74240SEndre Fülöp ArgIdxTy &ArgNum) {
92717f74240SEndre Fülöp // Find if the function contains a format string argument.
92817f74240SEndre Fülöp // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
92917f74240SEndre Fülöp // vsnprintf, syslog, custom annotated functions.
93017f74240SEndre Fülöp const Decl *CallDecl = Call.getDecl();
93117f74240SEndre Fülöp if (!CallDecl)
93217f74240SEndre Fülöp return false;
93317f74240SEndre Fülöp const FunctionDecl *FDecl = CallDecl->getAsFunction();
93417f74240SEndre Fülöp if (!FDecl)
93517f74240SEndre Fülöp return false;
93617f74240SEndre Fülöp
93717f74240SEndre Fülöp const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
93817f74240SEndre Fülöp
93917f74240SEndre Fülöp for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
94017f74240SEndre Fülöp ArgNum = Format->getFormatIdx() - 1;
94117f74240SEndre Fülöp if ((Format->getType()->getName() == "printf") && CallNumArgs > ArgNum)
94217f74240SEndre Fülöp return true;
94317f74240SEndre Fülöp }
94417f74240SEndre Fülöp
94517f74240SEndre Fülöp return false;
94617f74240SEndre Fülöp }
94717f74240SEndre Fülöp
checkUncontrolledFormatString(const CallEvent & Call,CheckerContext & C) const948b68cb549SArtem Dergachev bool GenericTaintChecker::checkUncontrolledFormatString(
94995a94df5SBalazs Benics const CallEvent &Call, CheckerContext &C) const {
950126a2ef9SAnna Zaks // Check if the function contains a format string argument.
95117f74240SEndre Fülöp ArgIdxTy ArgNum = 0;
95295a94df5SBalazs Benics if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
953126a2ef9SAnna Zaks return false;
954126a2ef9SAnna Zaks
955b68cb549SArtem Dergachev // If either the format string content or the pointer itself are tainted,
956b68cb549SArtem Dergachev // warn.
95795a94df5SBalazs Benics return generateReportIfTainted(Call.getArgExpr(ArgNum),
9589c10490eSAlexander Kornienko MsgUncontrolledFormatString, C);
959126a2ef9SAnna Zaks }
9600244cd74SAnna Zaks
taintUnsafeSocketProtocol(const CallEvent & Call,CheckerContext & C) const96117f74240SEndre Fülöp void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call,
9620244cd74SAnna Zaks CheckerContext &C) const {
96317f74240SEndre Fülöp if (Call.getNumArgs() < 1)
96417f74240SEndre Fülöp return;
96517f74240SEndre Fülöp const IdentifierInfo *ID = Call.getCalleeIdentifier();
96617f74240SEndre Fülöp if (!ID)
96717f74240SEndre Fülöp return;
96817f74240SEndre Fülöp if (!ID->getName().equals("socket"))
96917f74240SEndre Fülöp return;
9700244cd74SAnna Zaks
97117f74240SEndre Fülöp SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
97217f74240SEndre Fülöp StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
97317f74240SEndre Fülöp // Allow internal communication protocols.
97417f74240SEndre Fülöp bool SafeProtocol = DomName.equals("AF_SYSTEM") ||
97517f74240SEndre Fülöp DomName.equals("AF_LOCAL") || DomName.equals("AF_UNIX") ||
97617f74240SEndre Fülöp DomName.equals("AF_RESERVED_36");
97717f74240SEndre Fülöp if (SafeProtocol)
97817f74240SEndre Fülöp return;
9790244cd74SAnna Zaks
980a848a5cfSBalazs Benics ProgramStateRef State = C.getState();
981a848a5cfSBalazs Benics auto &F = State->getStateManager().get_context<ArgIdxFactory>();
982a848a5cfSBalazs Benics ImmutableSet<ArgIdxTy> Result = F.add(F.getEmptySet(), ReturnValueIndex);
983a848a5cfSBalazs Benics State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
984a848a5cfSBalazs Benics C.addTransition(State);
985126a2ef9SAnna Zaks }
986126a2ef9SAnna Zaks
98717f74240SEndre Fülöp /// Checker registration
registerGenericTaintChecker(CheckerManager & Mgr)9884bde15feSGabor Borsik void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
98917f74240SEndre Fülöp Mgr.registerChecker<GenericTaintChecker>();
9905c5bf9b6SAnna Zaks }
991058a7a45SKristof Umann
shouldRegisterGenericTaintChecker(const CheckerManager & mgr)992bda3dd0dSKirstóf Umann bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
993058a7a45SKristof Umann return true;
994058a7a45SKristof Umann }
995