1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker defines the attack surface for generic taint propagation. 10 // 11 // The taint information produced by it might be useful to other checkers. For 12 // example, checkers should report errors which involve tainted data more 13 // aggressively, even if the involved symbols are under constrained. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Taint.h" 18 #include "Yaml.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/Basic/Builtins.h" 21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/Checker.h" 24 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 27 #include "llvm/ADT/StringMap.h" 28 #include "llvm/Support/YAMLTraits.h" 29 #include <limits> 30 #include <utility> 31 32 using namespace clang; 33 using namespace ento; 34 using namespace taint; 35 36 namespace { 37 class GenericTaintChecker 38 : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> { 39 public: 40 static void *getTag() { 41 static int Tag; 42 return &Tag; 43 } 44 45 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 46 47 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 48 49 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 50 const char *Sep) const override; 51 52 using ArgVector = SmallVector<unsigned, 2>; 53 using SignedArgVector = SmallVector<int, 2>; 54 55 enum class VariadicType { None, Src, Dst }; 56 57 /// Used to parse the configuration file. 58 struct TaintConfiguration { 59 using NameArgsPair = std::pair<std::string, ArgVector>; 60 61 struct Propagation { 62 std::string Name; 63 ArgVector SrcArgs; 64 SignedArgVector DstArgs; 65 VariadicType VarType; 66 unsigned VarIndex; 67 }; 68 69 std::vector<Propagation> Propagations; 70 std::vector<NameArgsPair> Filters; 71 std::vector<NameArgsPair> Sinks; 72 73 TaintConfiguration() = default; 74 TaintConfiguration(const TaintConfiguration &) = delete; 75 TaintConfiguration(TaintConfiguration &&) = default; 76 TaintConfiguration &operator=(const TaintConfiguration &) = delete; 77 TaintConfiguration &operator=(TaintConfiguration &&) = default; 78 }; 79 80 /// Convert SignedArgVector to ArgVector. 81 ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, 82 SignedArgVector Args); 83 84 /// Parse the config. 85 void parseConfiguration(CheckerManager &Mgr, const std::string &Option, 86 TaintConfiguration &&Config); 87 88 static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; 89 /// Denotes the return vale. 90 static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - 91 1}; 92 93 private: 94 mutable std::unique_ptr<BugType> BT; 95 void initBugType() const { 96 if (!BT) 97 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); 98 } 99 100 /// Catch taint related bugs. Check if tainted data is passed to a 101 /// system call etc. 102 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 103 104 /// Add taint sources on a pre-visit. 105 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 106 107 /// Propagate taint generated at pre-visit. 108 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 109 110 /// Check if the region the expression evaluates to is the standard input, 111 /// and thus, is tainted. 112 static bool isStdin(const Expr *E, CheckerContext &C); 113 114 /// Given a pointer argument, return the value it points to. 115 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); 116 117 /// Check for CWE-134: Uncontrolled Format String. 118 static const char MsgUncontrolledFormatString[]; 119 bool checkUncontrolledFormatString(const CallExpr *CE, 120 CheckerContext &C) const; 121 122 /// Check for: 123 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 124 /// CWE-78, "Failure to Sanitize Data into an OS Command" 125 static const char MsgSanitizeSystemArgs[]; 126 bool checkSystemCall(const CallExpr *CE, StringRef Name, 127 CheckerContext &C) const; 128 129 /// Check if tainted data is used as a buffer size ins strn.. functions, 130 /// and allocators. 131 static const char MsgTaintedBufferSize[]; 132 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 133 CheckerContext &C) const; 134 135 /// Generate a report if the expression is tainted or points to tainted data. 136 bool generateReportIfTainted(const Expr *E, const char Msg[], 137 CheckerContext &C) const; 138 139 /// A struct used to specify taint propagation rules for a function. 140 /// 141 /// If any of the possible taint source arguments is tainted, all of the 142 /// destination arguments should also be tainted. Use InvalidArgIndex in the 143 /// src list to specify that all of the arguments can introduce taint. Use 144 /// InvalidArgIndex in the dst arguments to signify that all the non-const 145 /// pointer and reference arguments might be tainted on return. If 146 /// ReturnValueIndex is added to the dst list, the return value will be 147 /// tainted. 148 struct TaintPropagationRule { 149 using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *, 150 CheckerContext &C); 151 152 /// List of arguments which can be taint sources and should be checked. 153 ArgVector SrcArgs; 154 /// List of arguments which should be tainted on function return. 155 ArgVector DstArgs; 156 /// Index for the first variadic parameter if exist. 157 unsigned VariadicIndex; 158 /// Show when a function has variadic parameters. If it has, it marks all 159 /// of them as source or destination. 160 VariadicType VarType; 161 /// Special function for tainted source determination. If defined, it can 162 /// override the default behavior. 163 PropagationFuncType PropagationFunc; 164 165 TaintPropagationRule() 166 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), 167 PropagationFunc(nullptr) {} 168 169 TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, 170 VariadicType Var = VariadicType::None, 171 unsigned VarIndex = InvalidArgIndex, 172 PropagationFuncType Func = nullptr) 173 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), 174 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} 175 176 /// Get the propagation rule for a given function. 177 static TaintPropagationRule 178 getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name, 179 CheckerContext &C); 180 181 void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 182 void addDstArg(unsigned A) { DstArgs.push_back(A); } 183 184 bool isNull() const { 185 return SrcArgs.empty() && DstArgs.empty() && 186 VariadicType::None == VarType; 187 } 188 189 bool isDestinationArgument(unsigned ArgNum) const { 190 return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); 191 } 192 193 static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, 194 CheckerContext &C) { 195 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) 196 return true; 197 198 if (!E->getType().getTypePtr()->isPointerType()) 199 return false; 200 201 Optional<SVal> V = getPointedToSVal(C, E); 202 return (V && isTainted(State, *V)); 203 } 204 205 /// Pre-process a function which propagates taint according to the 206 /// taint rule. 207 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 208 209 // Functions for custom taintedness propagation. 210 static bool postSocket(bool IsTainted, const CallExpr *CE, 211 CheckerContext &C); 212 }; 213 214 using NameRuleMap = llvm::StringMap<TaintPropagationRule>; 215 using NameArgMap = llvm::StringMap<ArgVector>; 216 217 /// Defines a map between the propagation function's name and 218 /// TaintPropagationRule. 219 NameRuleMap CustomPropagations; 220 221 /// Defines a map between the filter function's name and filtering args. 222 NameArgMap CustomFilters; 223 224 /// Defines a map between the sink function's name and sinking args. 225 NameArgMap CustomSinks; 226 }; 227 228 const unsigned GenericTaintChecker::ReturnValueIndex; 229 const unsigned GenericTaintChecker::InvalidArgIndex; 230 231 const char GenericTaintChecker::MsgUncontrolledFormatString[] = 232 "Untrusted data is used as a format string " 233 "(CWE-134: Uncontrolled Format String)"; 234 235 const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 236 "Untrusted data is passed to a system call " 237 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 238 239 const char GenericTaintChecker::MsgTaintedBufferSize[] = 240 "Untrusted data is used to specify the buffer size " 241 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 242 "for character data and the null terminator)"; 243 } // end of anonymous namespace 244 245 using TaintConfig = GenericTaintChecker::TaintConfiguration; 246 247 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) 248 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair) 249 250 namespace llvm { 251 namespace yaml { 252 template <> struct MappingTraits<TaintConfig> { 253 static void mapping(IO &IO, TaintConfig &Config) { 254 IO.mapOptional("Propagations", Config.Propagations); 255 IO.mapOptional("Filters", Config.Filters); 256 IO.mapOptional("Sinks", Config.Sinks); 257 } 258 }; 259 260 template <> struct MappingTraits<TaintConfig::Propagation> { 261 static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { 262 IO.mapRequired("Name", Propagation.Name); 263 IO.mapOptional("SrcArgs", Propagation.SrcArgs); 264 IO.mapOptional("DstArgs", Propagation.DstArgs); 265 IO.mapOptional("VariadicType", Propagation.VarType, 266 GenericTaintChecker::VariadicType::None); 267 IO.mapOptional("VariadicIndex", Propagation.VarIndex, 268 GenericTaintChecker::InvalidArgIndex); 269 } 270 }; 271 272 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { 273 static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { 274 IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); 275 IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); 276 IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); 277 } 278 }; 279 280 template <> struct MappingTraits<TaintConfig::NameArgsPair> { 281 static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) { 282 IO.mapRequired("Name", NameArg.first); 283 IO.mapRequired("Args", NameArg.second); 284 } 285 }; 286 } // namespace yaml 287 } // namespace llvm 288 289 /// A set which is used to pass information from call pre-visit instruction 290 /// to the call post-visit. The values are unsigned integers, which are either 291 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 292 /// points to data, which should be tainted on return. 293 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 294 295 GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector( 296 CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) { 297 ArgVector Result; 298 for (int Arg : Args) { 299 if (Arg == -1) 300 Result.push_back(ReturnValueIndex); 301 else if (Arg < -1) { 302 Result.push_back(InvalidArgIndex); 303 Mgr.reportInvalidCheckerOptionValue( 304 this, Option, 305 "an argument number for propagation rules greater or equal to -1"); 306 } else 307 Result.push_back(static_cast<unsigned>(Arg)); 308 } 309 return Result; 310 } 311 312 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, 313 const std::string &Option, 314 TaintConfiguration &&Config) { 315 for (auto &P : Config.Propagations) { 316 GenericTaintChecker::CustomPropagations.try_emplace( 317 P.Name, std::move(P.SrcArgs), 318 convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex); 319 } 320 321 for (auto &F : Config.Filters) { 322 GenericTaintChecker::CustomFilters.try_emplace(F.first, 323 std::move(F.second)); 324 } 325 326 for (auto &S : Config.Sinks) { 327 GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second)); 328 } 329 } 330 331 GenericTaintChecker::TaintPropagationRule 332 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 333 const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) { 334 // TODO: Currently, we might lose precision here: we always mark a return 335 // value as tainted even if it's just a pointer, pointing to tainted data. 336 337 // Check for exact name match for functions without builtin substitutes. 338 TaintPropagationRule Rule = 339 llvm::StringSwitch<TaintPropagationRule>(Name) 340 // Source functions 341 // TODO: Add support for vfscanf & family. 342 .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex})) 343 .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex})) 344 .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex})) 345 .Case("getch", TaintPropagationRule({}, {ReturnValueIndex})) 346 .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex})) 347 .Case("getchar_unlocked", 348 TaintPropagationRule({}, {ReturnValueIndex})) 349 .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex})) 350 .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) 351 .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1)) 352 .Case("socket", 353 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None, 354 InvalidArgIndex, 355 &TaintPropagationRule::postSocket)) 356 .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex})) 357 // Propagating functions 358 .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex})) 359 .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex})) 360 .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex})) 361 .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex})) 362 .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex})) 363 .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex})) 364 .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2)) 365 .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex})) 366 .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex})) 367 .Case("getdelim", TaintPropagationRule({3}, {0})) 368 .Case("getline", TaintPropagationRule({2}, {0})) 369 .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex})) 370 .Case("pread", 371 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex})) 372 .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex})) 373 .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex})) 374 .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex})) 375 .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex})) 376 .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex})) 377 .Default(TaintPropagationRule()); 378 379 if (!Rule.isNull()) 380 return Rule; 381 382 // Check if it's one of the memory setting/copying functions. 383 // This check is specialized but faster then calling isCLibraryFunction. 384 unsigned BId = 0; 385 if ((BId = FDecl->getMemoryFunctionKind())) 386 switch (BId) { 387 case Builtin::BImemcpy: 388 case Builtin::BImemmove: 389 case Builtin::BIstrncpy: 390 case Builtin::BIstrncat: 391 return TaintPropagationRule({1, 2}, {0, ReturnValueIndex}); 392 case Builtin::BIstrlcpy: 393 case Builtin::BIstrlcat: 394 return TaintPropagationRule({1, 2}, {0}); 395 case Builtin::BIstrndup: 396 return TaintPropagationRule({0, 1}, {ReturnValueIndex}); 397 398 default: 399 break; 400 }; 401 402 // Process all other functions which could be defined as builtins. 403 if (Rule.isNull()) { 404 if (C.isCLibraryFunction(FDecl, "snprintf")) 405 return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src, 406 3); 407 else if (C.isCLibraryFunction(FDecl, "sprintf")) 408 return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src, 409 2); 410 else if (C.isCLibraryFunction(FDecl, "strcpy") || 411 C.isCLibraryFunction(FDecl, "stpcpy") || 412 C.isCLibraryFunction(FDecl, "strcat")) 413 return TaintPropagationRule({1}, {0, ReturnValueIndex}); 414 else if (C.isCLibraryFunction(FDecl, "bcopy")) 415 return TaintPropagationRule({0, 2}, {1}); 416 else if (C.isCLibraryFunction(FDecl, "strdup") || 417 C.isCLibraryFunction(FDecl, "strdupa")) 418 return TaintPropagationRule({0}, {ReturnValueIndex}); 419 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 420 return TaintPropagationRule({0}, {ReturnValueIndex}); 421 } 422 423 // Skipping the following functions, since they might be used for cleansing 424 // or smart memory copy: 425 // - memccpy - copying until hitting a special character. 426 427 return TaintPropagationRule(); 428 } 429 430 void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 431 CheckerContext &C) const { 432 // Check for taintedness related errors first: system call, uncontrolled 433 // format string, tainted buffer size. 434 if (checkPre(CE, C)) 435 return; 436 437 // Marks the function's arguments and/or return value tainted if it present in 438 // the list. 439 addSourcesPre(CE, C); 440 } 441 442 void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 443 CheckerContext &C) const { 444 // Set the marked values as tainted. The return value only accessible from 445 // checkPostStmt. 446 propagateFromPre(CE, C); 447 } 448 449 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 450 const char *NL, const char *Sep) const { 451 printTaint(State, Out, NL, Sep); 452 } 453 454 void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 455 CheckerContext &C) const { 456 ProgramStateRef State = nullptr; 457 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 458 if (!FDecl || FDecl->getKind() != Decl::Function) 459 return; 460 461 StringRef Name = C.getCalleeName(FDecl); 462 if (Name.empty()) 463 return; 464 465 // First, try generating a propagation rule for this function. 466 TaintPropagationRule Rule = 467 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 468 if (!Rule.isNull()) { 469 State = Rule.process(CE, C); 470 if (!State) 471 return; 472 C.addTransition(State); 473 return; 474 } 475 476 if (!State) 477 return; 478 C.addTransition(State); 479 } 480 481 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 482 CheckerContext &C) const { 483 ProgramStateRef State = C.getState(); 484 485 // Depending on what was tainted at pre-visit, we determined a set of 486 // arguments which should be tainted after the function returns. These are 487 // stored in the state as TaintArgsOnPostVisit set. 488 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 489 if (TaintArgs.isEmpty()) 490 return false; 491 492 for (unsigned ArgNum : TaintArgs) { 493 // Special handling for the tainted return value. 494 if (ArgNum == ReturnValueIndex) { 495 State = addTaint(State, CE, C.getLocationContext()); 496 continue; 497 } 498 499 // The arguments are pointer arguments. The data they are pointing at is 500 // tainted after the call. 501 if (CE->getNumArgs() < (ArgNum + 1)) 502 return false; 503 const Expr *Arg = CE->getArg(ArgNum); 504 Optional<SVal> V = getPointedToSVal(C, Arg); 505 if (V) 506 State = addTaint(State, *V); 507 } 508 509 // Clear up the taint info from the state. 510 State = State->remove<TaintArgsOnPostVisit>(); 511 512 if (State != C.getState()) { 513 C.addTransition(State); 514 return true; 515 } 516 return false; 517 } 518 519 bool GenericTaintChecker::checkPre(const CallExpr *CE, 520 CheckerContext &C) const { 521 522 if (checkUncontrolledFormatString(CE, C)) 523 return true; 524 525 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 526 if (!FDecl || FDecl->getKind() != Decl::Function) 527 return false; 528 529 StringRef Name = C.getCalleeName(FDecl); 530 if (Name.empty()) 531 return false; 532 533 if (checkSystemCall(CE, Name, C)) 534 return true; 535 536 if (checkTaintedBufferSize(CE, FDecl, C)) 537 return true; 538 539 return false; 540 } 541 542 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C, 543 const Expr *Arg) { 544 ProgramStateRef State = C.getState(); 545 SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 546 if (AddrVal.isUnknownOrUndef()) 547 return None; 548 549 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 550 if (!AddrLoc) 551 return None; 552 553 QualType ArgTy = Arg->getType().getCanonicalType(); 554 if (!ArgTy->isPointerType()) 555 return None; 556 557 QualType ValTy = ArgTy->getPointeeType(); 558 559 // Do not dereference void pointers. Treat them as byte pointers instead. 560 // FIXME: we might want to consider more than just the first byte. 561 if (ValTy->isVoidType()) 562 ValTy = C.getASTContext().CharTy; 563 564 return State->getSVal(*AddrLoc, ValTy); 565 } 566 567 ProgramStateRef 568 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 569 CheckerContext &C) const { 570 ProgramStateRef State = C.getState(); 571 572 // Check for taint in arguments. 573 bool IsTainted = true; 574 for (unsigned ArgNum : SrcArgs) { 575 if (ArgNum >= CE->getNumArgs()) 576 return State; 577 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 578 break; 579 } 580 581 // Check for taint in variadic arguments. 582 if (!IsTainted && VariadicType::Src == VarType) { 583 // Check if any of the arguments is tainted 584 for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { 585 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 586 break; 587 } 588 } 589 590 if (PropagationFunc) 591 IsTainted = PropagationFunc(IsTainted, CE, C); 592 593 if (!IsTainted) 594 return State; 595 596 // Mark the arguments which should be tainted after the function returns. 597 for (unsigned ArgNum : DstArgs) { 598 // Should mark the return value? 599 if (ArgNum == ReturnValueIndex) { 600 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 601 continue; 602 } 603 604 // Mark the given argument. 605 assert(ArgNum < CE->getNumArgs()); 606 State = State->add<TaintArgsOnPostVisit>(ArgNum); 607 } 608 609 // Mark all variadic arguments tainted if present. 610 if (VariadicType::Dst == VarType) { 611 // For all pointer and references that were passed in: 612 // If they are not pointing to const data, mark data as tainted. 613 // TODO: So far we are just going one level down; ideally we'd need to 614 // recurse here. 615 for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { 616 const Expr *Arg = CE->getArg(i); 617 // Process pointer argument. 618 const Type *ArgTy = Arg->getType().getTypePtr(); 619 QualType PType = ArgTy->getPointeeType(); 620 if ((!PType.isNull() && !PType.isConstQualified()) || 621 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 622 State = State->add<TaintArgsOnPostVisit>(i); 623 } 624 } 625 626 return State; 627 } 628 629 // If argument 0(protocol domain) is network, the return value should get taint. 630 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/, 631 const CallExpr *CE, 632 CheckerContext &C) { 633 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 634 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 635 // White list the internal communication protocols. 636 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 637 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 638 return false; 639 640 return true; 641 } 642 643 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 644 ProgramStateRef State = C.getState(); 645 SVal Val = C.getSVal(E); 646 647 // stdin is a pointer, so it would be a region. 648 const MemRegion *MemReg = Val.getAsRegion(); 649 650 // The region should be symbolic, we do not know it's value. 651 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 652 if (!SymReg) 653 return false; 654 655 // Get it's symbol and find the declaration region it's pointing to. 656 const SymbolRegionValue *Sm = 657 dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 658 if (!Sm) 659 return false; 660 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 661 if (!DeclReg) 662 return false; 663 664 // This region corresponds to a declaration, find out if it's a global/extern 665 // variable named stdin with the proper type. 666 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 667 D = D->getCanonicalDecl(); 668 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { 669 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); 670 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == 671 C.getASTContext().getFILEType().getCanonicalType()) 672 return true; 673 } 674 } 675 return false; 676 } 677 678 static bool getPrintfFormatArgumentNum(const CallExpr *CE, 679 const CheckerContext &C, 680 unsigned &ArgNum) { 681 // Find if the function contains a format string argument. 682 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 683 // vsnprintf, syslog, custom annotated functions. 684 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 685 if (!FDecl) 686 return false; 687 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 688 ArgNum = Format->getFormatIdx() - 1; 689 if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum) 690 return true; 691 } 692 693 // Or if a function is named setproctitle (this is a heuristic). 694 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 695 ArgNum = 0; 696 return true; 697 } 698 699 return false; 700 } 701 702 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 703 const char Msg[], 704 CheckerContext &C) const { 705 assert(E); 706 707 // Check for taint. 708 ProgramStateRef State = C.getState(); 709 Optional<SVal> PointedToSVal = getPointedToSVal(C, E); 710 SVal TaintedSVal; 711 if (PointedToSVal && isTainted(State, *PointedToSVal)) 712 TaintedSVal = *PointedToSVal; 713 else if (isTainted(State, E, C.getLocationContext())) 714 TaintedSVal = C.getSVal(E); 715 else 716 return false; 717 718 // Generate diagnostic. 719 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 720 initBugType(); 721 auto report = llvm::make_unique<BugReport>(*BT, Msg, N); 722 report->addRange(E->getSourceRange()); 723 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal)); 724 C.emitReport(std::move(report)); 725 return true; 726 } 727 return false; 728 } 729 730 bool GenericTaintChecker::checkUncontrolledFormatString( 731 const CallExpr *CE, CheckerContext &C) const { 732 // Check if the function contains a format string argument. 733 unsigned ArgNum = 0; 734 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 735 return false; 736 737 // If either the format string content or the pointer itself are tainted, 738 // warn. 739 return generateReportIfTainted(CE->getArg(ArgNum), 740 MsgUncontrolledFormatString, C); 741 } 742 743 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, 744 CheckerContext &C) const { 745 // TODO: It might make sense to run this check on demand. In some cases, 746 // we should check if the environment has been cleansed here. We also might 747 // need to know if the user was reset before these calls(seteuid). 748 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 749 .Case("system", 0) 750 .Case("popen", 0) 751 .Case("execl", 0) 752 .Case("execle", 0) 753 .Case("execlp", 0) 754 .Case("execv", 0) 755 .Case("execvp", 0) 756 .Case("execvP", 0) 757 .Case("execve", 0) 758 .Case("dlopen", 0) 759 .Default(UINT_MAX); 760 761 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 762 return false; 763 764 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); 765 } 766 767 // TODO: Should this check be a part of the CString checker? 768 // If yes, should taint be a global setting? 769 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 770 const FunctionDecl *FDecl, 771 CheckerContext &C) const { 772 // If the function has a buffer size argument, set ArgNum. 773 unsigned ArgNum = InvalidArgIndex; 774 unsigned BId = 0; 775 if ((BId = FDecl->getMemoryFunctionKind())) 776 switch (BId) { 777 case Builtin::BImemcpy: 778 case Builtin::BImemmove: 779 case Builtin::BIstrncpy: 780 ArgNum = 2; 781 break; 782 case Builtin::BIstrndup: 783 ArgNum = 1; 784 break; 785 default: 786 break; 787 }; 788 789 if (ArgNum == InvalidArgIndex) { 790 if (C.isCLibraryFunction(FDecl, "malloc") || 791 C.isCLibraryFunction(FDecl, "calloc") || 792 C.isCLibraryFunction(FDecl, "alloca")) 793 ArgNum = 0; 794 else if (C.isCLibraryFunction(FDecl, "memccpy")) 795 ArgNum = 3; 796 else if (C.isCLibraryFunction(FDecl, "realloc")) 797 ArgNum = 1; 798 else if (C.isCLibraryFunction(FDecl, "bcopy")) 799 ArgNum = 2; 800 } 801 802 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 803 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); 804 } 805 806 void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 807 auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); 808 std::string Option{"Config"}; 809 StringRef ConfigFile = 810 Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); 811 llvm::Optional<TaintConfig> Config = 812 getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); 813 if (Config) 814 Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); 815 } 816 817 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) { 818 return true; 819 } 820