1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker defines the attack surface for generic taint propagation. 10 // 11 // The taint information produced by it might be useful to other checkers. For 12 // example, checkers should report errors which involve tainted data more 13 // aggressively, even if the involved symbols are under constrained. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Taint.h" 18 #include "Yaml.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/Basic/Builtins.h" 21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/Checker.h" 24 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 27 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 28 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 29 #include "llvm/Support/YAMLTraits.h" 30 31 #include <limits> 32 #include <memory> 33 #include <utility> 34 35 #define DEBUG_TYPE "taint-checker" 36 37 using namespace clang; 38 using namespace ento; 39 using namespace taint; 40 41 using llvm::ImmutableSet; 42 43 namespace { 44 45 class GenericTaintChecker; 46 47 /// Check for CWE-134: Uncontrolled Format String. 48 constexpr llvm::StringLiteral MsgUncontrolledFormatString = 49 "Untrusted data is used as a format string " 50 "(CWE-134: Uncontrolled Format String)"; 51 52 /// Check for: 53 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 54 /// CWE-78, "Failure to Sanitize Data into an OS Command" 55 constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 56 "Untrusted data is passed to a system call " 57 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 58 59 /// Check if tainted data is used as a buffer size in strn.. functions, 60 /// and allocators. 61 constexpr llvm::StringLiteral MsgTaintedBufferSize = 62 "Untrusted data is used to specify the buffer size " 63 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 64 "for character data and the null terminator)"; 65 66 /// Check if tainted data is used as a custom sink's parameter. 67 constexpr llvm::StringLiteral MsgCustomSink = 68 "Untrusted data is passed to a user-defined sink"; 69 70 using ArgIdxTy = int; 71 using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>; 72 73 /// Denotes the return value. 74 constexpr ArgIdxTy ReturnValueIndex{-1}; 75 76 static ArgIdxTy fromArgumentCount(unsigned Count) { 77 assert(Count <= 78 static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) && 79 "ArgIdxTy is not large enough to represent the number of arguments."); 80 return Count; 81 } 82 83 /// Check if the region the expression evaluates to is the standard input, 84 /// and thus, is tainted. 85 /// FIXME: Move this to Taint.cpp. 86 bool isStdin(SVal Val, const ASTContext &ACtx) { 87 // FIXME: What if Val is NonParamVarRegion? 88 89 // The region should be symbolic, we do not know it's value. 90 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion()); 91 if (!SymReg) 92 return false; 93 94 // Get it's symbol and find the declaration region it's pointing to. 95 const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 96 if (!Sm) 97 return false; 98 const auto *DeclReg = dyn_cast<DeclRegion>(Sm->getRegion()); 99 if (!DeclReg) 100 return false; 101 102 // This region corresponds to a declaration, find out if it's a global/extern 103 // variable named stdin with the proper type. 104 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 105 D = D->getCanonicalDecl(); 106 // FIXME: This should look for an exact match. 107 if (D->getName().contains("stdin") && D->isExternC()) { 108 const QualType FILETy = ACtx.getFILEType().getCanonicalType(); 109 const QualType Ty = D->getType().getCanonicalType(); 110 111 if (Ty->isPointerType()) 112 return Ty->getPointeeType() == FILETy; 113 } 114 } 115 return false; 116 } 117 118 SVal getPointeeOf(const CheckerContext &C, Loc LValue) { 119 const QualType ArgTy = LValue.getType(C.getASTContext()); 120 if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) 121 return C.getState()->getSVal(LValue); 122 123 // Do not dereference void pointers. Treat them as byte pointers instead. 124 // FIXME: we might want to consider more than just the first byte. 125 return C.getState()->getSVal(LValue, C.getASTContext().CharTy); 126 } 127 128 /// Given a pointer/reference argument, return the value it refers to. 129 Optional<SVal> getPointeeOf(const CheckerContext &C, SVal Arg) { 130 if (auto LValue = Arg.getAs<Loc>()) 131 return getPointeeOf(C, *LValue); 132 return None; 133 } 134 135 /// Given a pointer, return the SVal of its pointee or if it is tainted, 136 /// otherwise return the pointer's SVal if tainted. 137 /// Also considers stdin as a taint source. 138 Optional<SVal> getTaintedPointeeOrPointer(const CheckerContext &C, SVal Arg) { 139 const ProgramStateRef State = C.getState(); 140 141 if (auto Pointee = getPointeeOf(C, Arg)) 142 if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None; 143 return Pointee; 144 145 if (isTainted(State, Arg)) 146 return Arg; 147 148 // FIXME: This should be done by the isTainted() API. 149 if (isStdin(Arg, C.getASTContext())) 150 return Arg; 151 152 return None; 153 } 154 155 bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State, 156 CheckerContext &C) { 157 return getTaintedPointeeOrPointer(C, C.getSVal(E)).hasValue(); 158 } 159 160 /// ArgSet is used to describe arguments relevant for taint detection or 161 /// taint application. A discrete set of argument indexes and a variadic 162 /// argument list signified by a starting index are supported. 163 class ArgSet { 164 public: 165 ArgSet() = default; 166 ArgSet(ArgVecTy &&DiscreteArgs, Optional<ArgIdxTy> VariadicIndex = None) 167 : DiscreteArgs(std::move(DiscreteArgs)), 168 VariadicIndex(std::move(VariadicIndex)) {} 169 170 bool contains(ArgIdxTy ArgIdx) const { 171 if (llvm::is_contained(DiscreteArgs, ArgIdx)) 172 return true; 173 174 return VariadicIndex && ArgIdx >= *VariadicIndex; 175 } 176 177 bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; } 178 179 ArgVecTy ArgsUpTo(ArgIdxTy LastArgIdx) const { 180 ArgVecTy Args; 181 for (ArgIdxTy I = ReturnValueIndex; I <= LastArgIdx; ++I) { 182 if (contains(I)) 183 Args.push_back(I); 184 } 185 return Args; 186 } 187 188 private: 189 ArgVecTy DiscreteArgs; 190 Optional<ArgIdxTy> VariadicIndex; 191 }; 192 193 /// A struct used to specify taint propagation rules for a function. 194 /// 195 /// If any of the possible taint source arguments is tainted, all of the 196 /// destination arguments should also be tainted. If ReturnValueIndex is added 197 /// to the dst list, the return value will be tainted. 198 class GenericTaintRule { 199 /// Arguments which are taints sinks and should be checked, and a report 200 /// should be emitted if taint reaches these. 201 ArgSet SinkArgs; 202 /// Arguments which should be sanitized on function return. 203 ArgSet FilterArgs; 204 /// Arguments which can participate in taint propagationa. If any of the 205 /// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should 206 /// be tainted. 207 ArgSet PropSrcArgs; 208 ArgSet PropDstArgs; 209 210 /// A message that explains why the call is sensitive to taint. 211 Optional<StringRef> SinkMsg; 212 213 GenericTaintRule() = default; 214 215 GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst, 216 Optional<StringRef> SinkMsg = None) 217 : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)), 218 PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)), 219 SinkMsg(SinkMsg) {} 220 221 public: 222 /// Make a rule that reports a warning if taint reaches any of \p FilterArgs 223 /// arguments. 224 static GenericTaintRule Sink(ArgSet &&SinkArgs, 225 Optional<StringRef> Msg = None) { 226 return {std::move(SinkArgs), {}, {}, {}, Msg}; 227 } 228 229 /// Make a rule that sanitizes all FilterArgs arguments. 230 static GenericTaintRule Filter(ArgSet &&FilterArgs) { 231 return {{}, std::move(FilterArgs), {}, {}}; 232 } 233 234 /// Make a rule that unconditionally taints all Args. 235 /// If Func is provided, it must also return true for taint to propagate. 236 static GenericTaintRule Source(ArgSet &&SourceArgs) { 237 return {{}, {}, {}, std::move(SourceArgs)}; 238 } 239 240 /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. 241 static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) { 242 return {{}, {}, std::move(SrcArgs), std::move(DstArgs)}; 243 } 244 245 /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. 246 static GenericTaintRule SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, 247 ArgSet &&DstArgs, 248 Optional<StringRef> Msg = None) { 249 return { 250 std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg}; 251 } 252 253 /// Process a function which could either be a taint source, a taint sink, a 254 /// taint filter or a taint propagator. 255 void process(const GenericTaintChecker &Checker, const CallEvent &Call, 256 CheckerContext &C) const; 257 258 /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s. 259 static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) { 260 return ArgIdx == ReturnValueIndex ? Call.getOriginExpr() 261 : Call.getArgExpr(ArgIdx); 262 }; 263 264 /// Functions for custom taintedness propagation. 265 static bool UntrustedEnv(CheckerContext &C); 266 }; 267 268 using RuleLookupTy = CallDescriptionMap<GenericTaintRule>; 269 270 /// Used to parse the configuration file. 271 struct TaintConfiguration { 272 using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>; 273 enum class VariadicType { None, Src, Dst }; 274 275 struct Common { 276 std::string Name; 277 std::string Scope; 278 }; 279 280 struct Sink : Common { 281 ArgVecTy SinkArgs; 282 }; 283 284 struct Filter : Common { 285 ArgVecTy FilterArgs; 286 }; 287 288 struct Propagation : Common { 289 ArgVecTy SrcArgs; 290 ArgVecTy DstArgs; 291 VariadicType VarType; 292 ArgIdxTy VarIndex; 293 }; 294 295 std::vector<Propagation> Propagations; 296 std::vector<Filter> Filters; 297 std::vector<Sink> Sinks; 298 299 TaintConfiguration() = default; 300 TaintConfiguration(const TaintConfiguration &) = default; 301 TaintConfiguration(TaintConfiguration &&) = default; 302 TaintConfiguration &operator=(const TaintConfiguration &) = default; 303 TaintConfiguration &operator=(TaintConfiguration &&) = default; 304 }; 305 306 struct GenericTaintRuleParser { 307 GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {} 308 /// Container type used to gather call identification objects grouped into 309 /// pairs with their corresponding taint rules. It is temporary as it is used 310 /// to finally initialize RuleLookupTy, which is considered to be immutable. 311 using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>; 312 RulesContTy parseConfiguration(const std::string &Option, 313 TaintConfiguration &&Config) const; 314 315 private: 316 using NamePartsTy = llvm::SmallVector<SmallString<32>, 2>; 317 318 /// Validate part of the configuration, which contains a list of argument 319 /// indexes. 320 void validateArgVector(const std::string &Option, const ArgVecTy &Args) const; 321 322 template <typename Config> static NamePartsTy parseNameParts(const Config &C); 323 324 // Takes the config and creates a CallDescription for it and associates a Rule 325 // with that. 326 template <typename Config> 327 static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule, 328 RulesContTy &Rules); 329 330 void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P, 331 RulesContTy &Rules) const; 332 void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P, 333 RulesContTy &Rules) const; 334 void parseConfig(const std::string &Option, 335 TaintConfiguration::Propagation &&P, 336 RulesContTy &Rules) const; 337 338 CheckerManager &Mgr; 339 }; 340 341 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { 342 public: 343 static void *getTag() { 344 static int Tag; 345 return &Tag; 346 } 347 348 void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 349 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 350 351 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 352 const char *Sep) const override; 353 354 /// Generate a report if the expression is tainted or points to tainted data. 355 bool generateReportIfTainted(const Expr *E, StringRef Msg, 356 CheckerContext &C) const; 357 358 private: 359 const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"}; 360 361 bool checkUncontrolledFormatString(const CallEvent &Call, 362 CheckerContext &C) const; 363 364 void taintUnsafeSocketProtocol(const CallEvent &Call, 365 CheckerContext &C) const; 366 367 /// Default taint rules are initilized with the help of a CheckerContext to 368 /// access the names of built-in functions like memcpy. 369 void initTaintRules(CheckerContext &C) const; 370 371 /// CallDescription currently cannot restrict matches to the global namespace 372 /// only, which is why multiple CallDescriptionMaps are used, as we want to 373 /// disambiguate global C functions from functions inside user-defined 374 /// namespaces. 375 // TODO: Remove separation to simplify matching logic once CallDescriptions 376 // are more expressive. 377 378 mutable Optional<RuleLookupTy> StaticTaintRules; 379 mutable Optional<RuleLookupTy> DynamicTaintRules; 380 }; 381 } // end of anonymous namespace 382 383 /// YAML serialization mapping. 384 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink) 385 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter) 386 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation) 387 388 namespace llvm { 389 namespace yaml { 390 template <> struct MappingTraits<TaintConfiguration> { 391 static void mapping(IO &IO, TaintConfiguration &Config) { 392 IO.mapOptional("Propagations", Config.Propagations); 393 IO.mapOptional("Filters", Config.Filters); 394 IO.mapOptional("Sinks", Config.Sinks); 395 } 396 }; 397 398 template <> struct MappingTraits<TaintConfiguration::Sink> { 399 static void mapping(IO &IO, TaintConfiguration::Sink &Sink) { 400 IO.mapRequired("Name", Sink.Name); 401 IO.mapOptional("Scope", Sink.Scope); 402 IO.mapRequired("Args", Sink.SinkArgs); 403 } 404 }; 405 406 template <> struct MappingTraits<TaintConfiguration::Filter> { 407 static void mapping(IO &IO, TaintConfiguration::Filter &Filter) { 408 IO.mapRequired("Name", Filter.Name); 409 IO.mapOptional("Scope", Filter.Scope); 410 IO.mapRequired("Args", Filter.FilterArgs); 411 } 412 }; 413 414 template <> struct MappingTraits<TaintConfiguration::Propagation> { 415 static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) { 416 IO.mapRequired("Name", Propagation.Name); 417 IO.mapOptional("Scope", Propagation.Scope); 418 IO.mapOptional("SrcArgs", Propagation.SrcArgs); 419 IO.mapOptional("DstArgs", Propagation.DstArgs); 420 IO.mapOptional("VariadicType", Propagation.VarType); 421 IO.mapOptional("VariadicIndex", Propagation.VarIndex); 422 } 423 }; 424 425 template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> { 426 static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) { 427 IO.enumCase(Value, "None", TaintConfiguration::VariadicType::None); 428 IO.enumCase(Value, "Src", TaintConfiguration::VariadicType::Src); 429 IO.enumCase(Value, "Dst", TaintConfiguration::VariadicType::Dst); 430 } 431 }; 432 } // namespace yaml 433 } // namespace llvm 434 435 /// A set which is used to pass information from call pre-visit instruction 436 /// to the call post-visit. The values are signed integers, which are either 437 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 438 /// points to data, which should be tainted on return. 439 REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *, 440 ImmutableSet<ArgIdxTy>) 441 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy) 442 443 void GenericTaintRuleParser::validateArgVector(const std::string &Option, 444 const ArgVecTy &Args) const { 445 for (ArgIdxTy Arg : Args) { 446 if (Arg < ReturnValueIndex) { 447 Mgr.reportInvalidCheckerOptionValue( 448 Mgr.getChecker<GenericTaintChecker>(), Option, 449 "an argument number for propagation rules greater or equal to -1"); 450 } 451 } 452 } 453 454 template <typename Config> 455 GenericTaintRuleParser::NamePartsTy 456 GenericTaintRuleParser::parseNameParts(const Config &C) { 457 NamePartsTy NameParts; 458 if (!C.Scope.empty()) { 459 // If the Scope argument contains multiple "::" parts, those are considered 460 // namespace identifiers. 461 llvm::SmallVector<StringRef, 2> NSParts; 462 StringRef{C.Scope}.split(NSParts, "::", /*MaxSplit*/ -1, 463 /*KeepEmpty*/ false); 464 NameParts.append(NSParts.begin(), NSParts.end()); 465 } 466 NameParts.emplace_back(C.Name); 467 return NameParts; 468 } 469 470 template <typename Config> 471 void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C, 472 GenericTaintRule &&Rule, 473 RulesContTy &Rules) { 474 NamePartsTy NameParts = parseNameParts(C); 475 llvm::SmallVector<const char *, 2> CallDescParts{NameParts.size()}; 476 llvm::transform(NameParts, CallDescParts.begin(), 477 [](SmallString<32> &S) { return S.c_str(); }); 478 Rules.emplace_back(CallDescription(CallDescParts), std::move(Rule)); 479 } 480 481 void GenericTaintRuleParser::parseConfig(const std::string &Option, 482 TaintConfiguration::Sink &&S, 483 RulesContTy &Rules) const { 484 validateArgVector(Option, S.SinkArgs); 485 consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)), 486 Rules); 487 } 488 489 void GenericTaintRuleParser::parseConfig(const std::string &Option, 490 TaintConfiguration::Filter &&S, 491 RulesContTy &Rules) const { 492 validateArgVector(Option, S.FilterArgs); 493 consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)), 494 Rules); 495 } 496 497 void GenericTaintRuleParser::parseConfig(const std::string &Option, 498 TaintConfiguration::Propagation &&P, 499 RulesContTy &Rules) const { 500 validateArgVector(Option, P.SrcArgs); 501 validateArgVector(Option, P.DstArgs); 502 bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src; 503 bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst; 504 Optional<ArgIdxTy> JustVarIndex = P.VarIndex; 505 506 ArgSet SrcDesc(std::move(P.SrcArgs), IsSrcVariadic ? JustVarIndex : None); 507 ArgSet DstDesc(std::move(P.DstArgs), IsDstVariadic ? JustVarIndex : None); 508 509 consumeRulesFromConfig( 510 P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules); 511 } 512 513 GenericTaintRuleParser::RulesContTy 514 GenericTaintRuleParser::parseConfiguration(const std::string &Option, 515 TaintConfiguration &&Config) const { 516 517 RulesContTy Rules; 518 519 for (auto &F : Config.Filters) 520 parseConfig(Option, std::move(F), Rules); 521 522 for (auto &S : Config.Sinks) 523 parseConfig(Option, std::move(S), Rules); 524 525 for (auto &P : Config.Propagations) 526 parseConfig(Option, std::move(P), Rules); 527 528 return Rules; 529 } 530 531 void GenericTaintChecker::initTaintRules(CheckerContext &C) const { 532 // Check for exact name match for functions without builtin substitutes. 533 // Use qualified name, because these are C functions without namespace. 534 535 if (StaticTaintRules || DynamicTaintRules) 536 return; 537 538 using RulesConstructionTy = 539 std::vector<std::pair<CallDescription, GenericTaintRule>>; 540 using TR = GenericTaintRule; 541 542 const Builtin::Context &BI = C.getASTContext().BuiltinInfo; 543 544 RulesConstructionTy GlobalCRules{ 545 // Sources 546 {{"fdopen"}, TR::Source({{ReturnValueIndex}})}, 547 {{"fopen"}, TR::Source({{ReturnValueIndex}})}, 548 {{"freopen"}, TR::Source({{ReturnValueIndex}})}, 549 {{"getch"}, TR::Source({{ReturnValueIndex}})}, 550 {{"getchar"}, TR::Source({{ReturnValueIndex}})}, 551 {{"getchar_unlocked"}, TR::Source({{ReturnValueIndex}})}, 552 {{"gets"}, TR::Source({{0}, ReturnValueIndex})}, 553 {{"gets_s"}, TR::Source({{0}, ReturnValueIndex})}, 554 {{"scanf"}, TR::Source({{}, 1})}, 555 {{"scanf_s"}, TR::Source({{}, {1}})}, 556 {{"wgetch"}, TR::Source({{}, ReturnValueIndex})}, 557 // Sometimes the line between taint sources and propagators is blurry. 558 // _IO_getc is choosen to be a source, but could also be a propagator. 559 // This way it is simpler, as modeling it as a propagator would require 560 // to model the possible sources of _IO_FILE * values, which the _IO_getc 561 // function takes as parameters. 562 {{"_IO_getc"}, TR::Source({{ReturnValueIndex}})}, 563 {{"getcwd"}, TR::Source({{0, ReturnValueIndex}})}, 564 {{"getwd"}, TR::Source({{0, ReturnValueIndex}})}, 565 {{"readlink"}, TR::Source({{1, ReturnValueIndex}})}, 566 {{"readlinkat"}, TR::Source({{2, ReturnValueIndex}})}, 567 {{"get_current_dir_name"}, TR::Source({{ReturnValueIndex}})}, 568 {{"gethostname"}, TR::Source({{0}})}, 569 {{"getnameinfo"}, TR::Source({{2, 4}})}, 570 {{"getseuserbyname"}, TR::Source({{1, 2}})}, 571 {{"getgroups"}, TR::Source({{1, ReturnValueIndex}})}, 572 {{"getlogin"}, TR::Source({{ReturnValueIndex}})}, 573 {{"getlogin_r"}, TR::Source({{0}})}, 574 575 // Props 576 {{"atoi"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 577 {{"atol"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 578 {{"atoll"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 579 {{"fgetc"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 580 {{"fgetln"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 581 {{"fgets"}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, 582 {{"fscanf"}, TR::Prop({{0}}, {{}, 2})}, 583 {{"sscanf"}, TR::Prop({{0}}, {{}, 2})}, 584 {{"getc"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 585 {{"getc_unlocked"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 586 {{"getdelim"}, TR::Prop({{3}}, {{0}})}, 587 {{"getline"}, TR::Prop({{2}}, {{0}})}, 588 {{"getw"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 589 {{"pread"}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})}, 590 {{"read"}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})}, 591 {{"strchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 592 {{"strrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 593 {{"tolower"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 594 {{"toupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 595 {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncat)}}, 596 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, 597 {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcpy)}}, 598 TR::Prop({{1, 2}}, {{0}})}, 599 {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcat)}}, 600 TR::Prop({{1, 2}}, {{0}})}, 601 {{CDF_MaybeBuiltin, {"snprintf"}}, 602 TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})}, 603 {{CDF_MaybeBuiltin, {"sprintf"}}, 604 TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})}, 605 {{CDF_MaybeBuiltin, {"strcpy"}}, 606 TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 607 {{CDF_MaybeBuiltin, {"stpcpy"}}, 608 TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 609 {{CDF_MaybeBuiltin, {"strcat"}}, 610 TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 611 {{CDF_MaybeBuiltin, {"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 612 {{CDF_MaybeBuiltin, {"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 613 {{CDF_MaybeBuiltin, {"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 614 615 // Sinks 616 {{"system"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 617 {{"popen"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 618 {{"execl"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 619 {{"execle"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 620 {{"execlp"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 621 {{"execvp"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 622 {{"execvP"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 623 {{"execve"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 624 {{"dlopen"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 625 {{CDF_MaybeBuiltin, {"malloc"}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, 626 {{CDF_MaybeBuiltin, {"calloc"}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, 627 {{CDF_MaybeBuiltin, {"alloca"}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, 628 {{CDF_MaybeBuiltin, {"memccpy"}}, TR::Sink({{3}}, MsgTaintedBufferSize)}, 629 {{CDF_MaybeBuiltin, {"realloc"}}, TR::Sink({{1}}, MsgTaintedBufferSize)}, 630 {{{"setproctitle"}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, 631 {{{"setproctitle_fast"}}, 632 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, 633 634 // SinkProps 635 {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)}, 636 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, 637 MsgTaintedBufferSize)}, 638 {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}}, 639 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, 640 MsgTaintedBufferSize)}, 641 {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}}, 642 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, 643 MsgTaintedBufferSize)}, 644 {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}}, 645 TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}}, 646 MsgTaintedBufferSize)}, 647 {{CDF_MaybeBuiltin, {"bcopy"}}, 648 TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}}; 649 650 // `getenv` returns taint only in untrusted environments. 651 if (TR::UntrustedEnv(C)) { 652 // void setproctitle_init(int argc, char *argv[], char *envp[]) 653 GlobalCRules.push_back( 654 {{{"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)}); 655 GlobalCRules.push_back({{"getenv"}, TR::Source({{ReturnValueIndex}})}); 656 } 657 658 StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()), 659 std::make_move_iterator(GlobalCRules.end())); 660 661 // User-provided taint configuration. 662 CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager(); 663 assert(Mgr); 664 GenericTaintRuleParser ConfigParser{*Mgr}; 665 std::string Option{"Config"}; 666 StringRef ConfigFile = 667 Mgr->getAnalyzerOptions().getCheckerStringOption(this, Option); 668 llvm::Optional<TaintConfiguration> Config = 669 getConfiguration<TaintConfiguration>(*Mgr, this, Option, ConfigFile); 670 if (!Config) { 671 // We don't have external taint config, no parsing required. 672 DynamicTaintRules = RuleLookupTy{}; 673 return; 674 } 675 676 GenericTaintRuleParser::RulesContTy Rules{ 677 ConfigParser.parseConfiguration(Option, std::move(Config.getValue()))}; 678 679 DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()), 680 std::make_move_iterator(Rules.end())); 681 } 682 683 void GenericTaintChecker::checkPreCall(const CallEvent &Call, 684 CheckerContext &C) const { 685 initTaintRules(C); 686 687 // FIXME: this should be much simpler. 688 if (const auto *Rule = 689 Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr) 690 Rule->process(*this, Call, C); 691 else if (const auto *Rule = DynamicTaintRules->lookup(Call)) 692 Rule->process(*this, Call, C); 693 694 // FIXME: These edge cases are to be eliminated from here eventually. 695 // 696 // Additional check that is not supported by CallDescription. 697 // TODO: Make CallDescription be able to match attributes such as printf-like 698 // arguments. 699 checkUncontrolledFormatString(Call, C); 700 701 // TODO: Modeling sockets should be done in a specific checker. 702 // Socket is a source, which taints the return value. 703 taintUnsafeSocketProtocol(Call, C); 704 } 705 706 void GenericTaintChecker::checkPostCall(const CallEvent &Call, 707 CheckerContext &C) const { 708 // Set the marked values as tainted. The return value only accessible from 709 // checkPostStmt. 710 ProgramStateRef State = C.getState(); 711 const StackFrameContext *CurrentFrame = C.getStackFrame(); 712 713 // Depending on what was tainted at pre-visit, we determined a set of 714 // arguments which should be tainted after the function returns. These are 715 // stored in the state as TaintArgsOnPostVisit set. 716 TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>(); 717 718 const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame); 719 if (!TaintArgs) 720 return; 721 assert(!TaintArgs->isEmpty()); 722 723 LLVM_DEBUG(for (ArgIdxTy I 724 : *TaintArgs) { 725 llvm::dbgs() << "PostCall<"; 726 Call.dump(llvm::dbgs()); 727 llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n'; 728 }); 729 730 for (ArgIdxTy ArgNum : *TaintArgs) { 731 // Special handling for the tainted return value. 732 if (ArgNum == ReturnValueIndex) { 733 State = addTaint(State, Call.getReturnValue()); 734 continue; 735 } 736 737 // The arguments are pointer arguments. The data they are pointing at is 738 // tainted after the call. 739 if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum))) 740 State = addTaint(State, *V); 741 } 742 743 // Clear up the taint info from the state. 744 State = State->remove<TaintArgsOnPostVisit>(CurrentFrame); 745 C.addTransition(State); 746 } 747 748 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 749 const char *NL, const char *Sep) const { 750 printTaint(State, Out, NL, Sep); 751 } 752 753 void GenericTaintRule::process(const GenericTaintChecker &Checker, 754 const CallEvent &Call, CheckerContext &C) const { 755 ProgramStateRef State = C.getState(); 756 const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); 757 758 /// Iterate every call argument, and get their corresponding Expr and SVal. 759 const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { 760 for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) { 761 const Expr *E = GetArgExpr(I, Call); 762 Fun(I, E, C.getSVal(E)); 763 } 764 }; 765 766 /// Check for taint sinks. 767 ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { 768 if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C)) 769 Checker.generateReportIfTainted(E, SinkMsg.getValueOr(MsgCustomSink), C); 770 }); 771 772 /// Check for taint filters. 773 ForEachCallArg([this, &C, &State](ArgIdxTy I, const Expr *E, SVal S) { 774 if (FilterArgs.contains(I)) { 775 State = removeTaint(State, S); 776 if (auto P = getPointeeOf(C, S)) 777 State = removeTaint(State, *P); 778 } 779 }); 780 781 /// Check for taint propagation sources. 782 /// A rule is relevant if PropSrcArgs is empty, or if any of its signified 783 /// args are tainted in context of the current CallEvent. 784 bool IsMatching = PropSrcArgs.isEmpty(); 785 ForEachCallArg( 786 [this, &C, &IsMatching, &State](ArgIdxTy I, const Expr *E, SVal) { 787 IsMatching = IsMatching || (PropSrcArgs.contains(I) && 788 isTaintedOrPointsToTainted(E, State, C)); 789 }); 790 791 if (!IsMatching) 792 return; 793 794 const auto WouldEscape = [](SVal V, QualType Ty) -> bool { 795 if (!V.getAs<Loc>()) 796 return false; 797 798 const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified(); 799 const bool IsNonConstPtr = 800 Ty->isPointerType() && !Ty->getPointeeType().isConstQualified(); 801 802 return IsNonConstRef || IsNonConstPtr; 803 }; 804 805 /// Propagate taint where it is necessary. 806 auto &F = State->getStateManager().get_context<ArgIdxFactory>(); 807 ImmutableSet<ArgIdxTy> Result = F.getEmptySet(); 808 ForEachCallArg( 809 [&](ArgIdxTy I, const Expr *E, SVal V) { 810 if (PropDstArgs.contains(I)) { 811 LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs()); 812 llvm::dbgs() 813 << "> prepares tainting arg index: " << I << '\n';); 814 Result = F.add(Result, I); 815 } 816 817 // TODO: We should traverse all reachable memory regions via the 818 // escaping parameter. Instead of doing that we simply mark only the 819 // referred memory region as tainted. 820 if (WouldEscape(V, E->getType())) { 821 LLVM_DEBUG(if (!Result.contains(I)) { 822 llvm::dbgs() << "PreCall<"; 823 Call.dump(llvm::dbgs()); 824 llvm::dbgs() << "> prepares tainting arg index: " << I << '\n'; 825 }); 826 Result = F.add(Result, I); 827 } 828 }); 829 830 if (!Result.isEmpty()) 831 State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result); 832 C.addTransition(State); 833 } 834 835 bool GenericTaintRule::UntrustedEnv(CheckerContext &C) { 836 return !C.getAnalysisManager() 837 .getAnalyzerOptions() 838 .ShouldAssumeControlledEnvironment; 839 } 840 841 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 842 CheckerContext &C) const { 843 assert(E); 844 Optional<SVal> TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))}; 845 846 if (!TaintedSVal) 847 return false; 848 849 // Generate diagnostic. 850 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 851 auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N); 852 report->addRange(E->getSourceRange()); 853 report->addVisitor(std::make_unique<TaintBugVisitor>(*TaintedSVal)); 854 C.emitReport(std::move(report)); 855 return true; 856 } 857 return false; 858 } 859 860 /// TODO: remove checking for printf format attributes and socket whitelisting 861 /// from GenericTaintChecker, and that means the following functions: 862 /// getPrintfFormatArgumentNum, 863 /// GenericTaintChecker::checkUncontrolledFormatString, 864 /// GenericTaintChecker::taintUnsafeSocketProtocol 865 866 static bool getPrintfFormatArgumentNum(const CallEvent &Call, 867 const CheckerContext &C, 868 ArgIdxTy &ArgNum) { 869 // Find if the function contains a format string argument. 870 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 871 // vsnprintf, syslog, custom annotated functions. 872 const Decl *CallDecl = Call.getDecl(); 873 if (!CallDecl) 874 return false; 875 const FunctionDecl *FDecl = CallDecl->getAsFunction(); 876 if (!FDecl) 877 return false; 878 879 const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); 880 881 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 882 ArgNum = Format->getFormatIdx() - 1; 883 if ((Format->getType()->getName() == "printf") && CallNumArgs > ArgNum) 884 return true; 885 } 886 887 return false; 888 } 889 890 bool GenericTaintChecker::checkUncontrolledFormatString( 891 const CallEvent &Call, CheckerContext &C) const { 892 // Check if the function contains a format string argument. 893 ArgIdxTy ArgNum = 0; 894 if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) 895 return false; 896 897 // If either the format string content or the pointer itself are tainted, 898 // warn. 899 return generateReportIfTainted(Call.getArgExpr(ArgNum), 900 MsgUncontrolledFormatString, C); 901 } 902 903 void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call, 904 CheckerContext &C) const { 905 if (Call.getNumArgs() < 1) 906 return; 907 const IdentifierInfo *ID = Call.getCalleeIdentifier(); 908 if (!ID) 909 return; 910 if (!ID->getName().equals("socket")) 911 return; 912 913 SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); 914 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 915 // Allow internal communication protocols. 916 bool SafeProtocol = DomName.equals("AF_SYSTEM") || 917 DomName.equals("AF_LOCAL") || DomName.equals("AF_UNIX") || 918 DomName.equals("AF_RESERVED_36"); 919 if (SafeProtocol) 920 return; 921 922 ProgramStateRef State = C.getState(); 923 auto &F = State->getStateManager().get_context<ArgIdxFactory>(); 924 ImmutableSet<ArgIdxTy> Result = F.add(F.getEmptySet(), ReturnValueIndex); 925 State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result); 926 C.addTransition(State); 927 } 928 929 /// Checker registration 930 931 void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 932 Mgr.registerChecker<GenericTaintChecker>(); 933 } 934 935 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { 936 return true; 937 } 938