1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker defines the attack surface for generic taint propagation. 10 // 11 // The taint information produced by it might be useful to other checkers. For 12 // example, checkers should report errors which involve tainted data more 13 // aggressively, even if the involved symbols are under constrained. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Taint.h" 18 #include "Yaml.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/Basic/Builtins.h" 21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/Checker.h" 24 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 27 #include "llvm/ADT/StringMap.h" 28 #include "llvm/Support/YAMLTraits.h" 29 #include <limits> 30 #include <utility> 31 32 using namespace clang; 33 using namespace ento; 34 using namespace taint; 35 36 namespace { 37 class GenericTaintChecker 38 : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> { 39 public: 40 static void *getTag() { 41 static int Tag; 42 return &Tag; 43 } 44 45 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 46 47 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 48 49 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 50 const char *Sep) const override; 51 52 using ArgVector = SmallVector<unsigned, 2>; 53 using SignedArgVector = SmallVector<int, 2>; 54 55 enum class VariadicType { None, Src, Dst }; 56 57 /// Used to parse the configuration file. 58 struct TaintConfiguration { 59 using NameArgsPair = std::pair<std::string, ArgVector>; 60 61 struct Propagation { 62 std::string Name; 63 ArgVector SrcArgs; 64 SignedArgVector DstArgs; 65 VariadicType VarType; 66 unsigned VarIndex; 67 }; 68 69 std::vector<Propagation> Propagations; 70 std::vector<NameArgsPair> Filters; 71 std::vector<NameArgsPair> Sinks; 72 73 TaintConfiguration() = default; 74 TaintConfiguration(const TaintConfiguration &) = default; 75 TaintConfiguration(TaintConfiguration &&) = default; 76 TaintConfiguration &operator=(const TaintConfiguration &) = default; 77 TaintConfiguration &operator=(TaintConfiguration &&) = default; 78 }; 79 80 /// Convert SignedArgVector to ArgVector. 81 ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, 82 SignedArgVector Args); 83 84 /// Parse the config. 85 void parseConfiguration(CheckerManager &Mgr, const std::string &Option, 86 TaintConfiguration &&Config); 87 88 static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; 89 /// Denotes the return vale. 90 static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - 91 1}; 92 93 private: 94 mutable std::unique_ptr<BugType> BT; 95 void initBugType() const { 96 if (!BT) 97 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); 98 } 99 100 /// Catch taint related bugs. Check if tainted data is passed to a 101 /// system call etc. Returns true on matching. 102 bool checkPre(const CallExpr *CE, const FunctionDecl *FDecl, StringRef Name, 103 CheckerContext &C) const; 104 105 /// Add taint sources on a pre-visit. Returns true on matching. 106 bool addSourcesPre(const CallExpr *CE, const FunctionDecl *FDecl, 107 StringRef Name, CheckerContext &C) const; 108 109 /// Mark filter's arguments not tainted on a pre-visit. Returns true on 110 /// matching. 111 bool addFiltersPre(const CallExpr *CE, StringRef Name, 112 CheckerContext &C) const; 113 114 /// Propagate taint generated at pre-visit. Returns true on matching. 115 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 116 117 /// Check if the region the expression evaluates to is the standard input, 118 /// and thus, is tainted. 119 static bool isStdin(const Expr *E, CheckerContext &C); 120 121 /// Given a pointer argument, return the value it points to. 122 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); 123 124 /// Check for CWE-134: Uncontrolled Format String. 125 static constexpr llvm::StringLiteral MsgUncontrolledFormatString = 126 "Untrusted data is used as a format string " 127 "(CWE-134: Uncontrolled Format String)"; 128 bool checkUncontrolledFormatString(const CallExpr *CE, 129 CheckerContext &C) const; 130 131 /// Check for: 132 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 133 /// CWE-78, "Failure to Sanitize Data into an OS Command" 134 static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 135 "Untrusted data is passed to a system call " 136 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 137 bool checkSystemCall(const CallExpr *CE, StringRef Name, 138 CheckerContext &C) const; 139 140 /// Check if tainted data is used as a buffer size ins strn.. functions, 141 /// and allocators. 142 static constexpr llvm::StringLiteral MsgTaintedBufferSize = 143 "Untrusted data is used to specify the buffer size " 144 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 145 "for character data and the null terminator)"; 146 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 147 CheckerContext &C) const; 148 149 /// Check if tainted data is used as a custom sink's parameter. 150 static constexpr llvm::StringLiteral MsgCustomSink = 151 "Untrusted data is passed to a user-defined sink"; 152 bool checkCustomSinks(const CallExpr *CE, StringRef Name, 153 CheckerContext &C) const; 154 155 /// Generate a report if the expression is tainted or points to tainted data. 156 bool generateReportIfTainted(const Expr *E, StringRef Msg, 157 CheckerContext &C) const; 158 159 struct TaintPropagationRule; 160 using NameRuleMap = llvm::StringMap<TaintPropagationRule>; 161 using NameArgMap = llvm::StringMap<ArgVector>; 162 163 /// A struct used to specify taint propagation rules for a function. 164 /// 165 /// If any of the possible taint source arguments is tainted, all of the 166 /// destination arguments should also be tainted. Use InvalidArgIndex in the 167 /// src list to specify that all of the arguments can introduce taint. Use 168 /// InvalidArgIndex in the dst arguments to signify that all the non-const 169 /// pointer and reference arguments might be tainted on return. If 170 /// ReturnValueIndex is added to the dst list, the return value will be 171 /// tainted. 172 struct TaintPropagationRule { 173 using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *, 174 CheckerContext &C); 175 176 /// List of arguments which can be taint sources and should be checked. 177 ArgVector SrcArgs; 178 /// List of arguments which should be tainted on function return. 179 ArgVector DstArgs; 180 /// Index for the first variadic parameter if exist. 181 unsigned VariadicIndex; 182 /// Show when a function has variadic parameters. If it has, it marks all 183 /// of them as source or destination. 184 VariadicType VarType; 185 /// Special function for tainted source determination. If defined, it can 186 /// override the default behavior. 187 PropagationFuncType PropagationFunc; 188 189 TaintPropagationRule() 190 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), 191 PropagationFunc(nullptr) {} 192 193 TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, 194 VariadicType Var = VariadicType::None, 195 unsigned VarIndex = InvalidArgIndex, 196 PropagationFuncType Func = nullptr) 197 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), 198 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} 199 200 /// Get the propagation rule for a given function. 201 static TaintPropagationRule 202 getTaintPropagationRule(const NameRuleMap &CustomPropagations, 203 const FunctionDecl *FDecl, StringRef Name, 204 CheckerContext &C); 205 206 void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 207 void addDstArg(unsigned A) { DstArgs.push_back(A); } 208 209 bool isNull() const { 210 return SrcArgs.empty() && DstArgs.empty() && 211 VariadicType::None == VarType; 212 } 213 214 bool isDestinationArgument(unsigned ArgNum) const { 215 return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); 216 } 217 218 static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, 219 CheckerContext &C) { 220 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) 221 return true; 222 223 if (!E->getType().getTypePtr()->isPointerType()) 224 return false; 225 226 Optional<SVal> V = getPointedToSVal(C, E); 227 return (V && isTainted(State, *V)); 228 } 229 230 /// Pre-process a function which propagates taint according to the 231 /// taint rule. 232 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 233 234 // Functions for custom taintedness propagation. 235 static bool postSocket(bool IsTainted, const CallExpr *CE, 236 CheckerContext &C); 237 }; 238 239 /// Defines a map between the propagation function's name and 240 /// TaintPropagationRule. 241 NameRuleMap CustomPropagations; 242 243 /// Defines a map between the filter function's name and filtering args. 244 NameArgMap CustomFilters; 245 246 /// Defines a map between the sink function's name and sinking args. 247 NameArgMap CustomSinks; 248 }; 249 250 const unsigned GenericTaintChecker::ReturnValueIndex; 251 const unsigned GenericTaintChecker::InvalidArgIndex; 252 253 // FIXME: these lines can be removed in C++17 254 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; 255 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; 256 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; 257 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; 258 } // end of anonymous namespace 259 260 using TaintConfig = GenericTaintChecker::TaintConfiguration; 261 262 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) 263 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair) 264 265 namespace llvm { 266 namespace yaml { 267 template <> struct MappingTraits<TaintConfig> { 268 static void mapping(IO &IO, TaintConfig &Config) { 269 IO.mapOptional("Propagations", Config.Propagations); 270 IO.mapOptional("Filters", Config.Filters); 271 IO.mapOptional("Sinks", Config.Sinks); 272 } 273 }; 274 275 template <> struct MappingTraits<TaintConfig::Propagation> { 276 static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { 277 IO.mapRequired("Name", Propagation.Name); 278 IO.mapOptional("SrcArgs", Propagation.SrcArgs); 279 IO.mapOptional("DstArgs", Propagation.DstArgs); 280 IO.mapOptional("VariadicType", Propagation.VarType, 281 GenericTaintChecker::VariadicType::None); 282 IO.mapOptional("VariadicIndex", Propagation.VarIndex, 283 GenericTaintChecker::InvalidArgIndex); 284 } 285 }; 286 287 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { 288 static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { 289 IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); 290 IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); 291 IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); 292 } 293 }; 294 295 template <> struct MappingTraits<TaintConfig::NameArgsPair> { 296 static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) { 297 IO.mapRequired("Name", NameArg.first); 298 IO.mapRequired("Args", NameArg.second); 299 } 300 }; 301 } // namespace yaml 302 } // namespace llvm 303 304 /// A set which is used to pass information from call pre-visit instruction 305 /// to the call post-visit. The values are unsigned integers, which are either 306 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 307 /// points to data, which should be tainted on return. 308 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 309 310 GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector( 311 CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) { 312 ArgVector Result; 313 for (int Arg : Args) { 314 if (Arg == -1) 315 Result.push_back(ReturnValueIndex); 316 else if (Arg < -1) { 317 Result.push_back(InvalidArgIndex); 318 Mgr.reportInvalidCheckerOptionValue( 319 this, Option, 320 "an argument number for propagation rules greater or equal to -1"); 321 } else 322 Result.push_back(static_cast<unsigned>(Arg)); 323 } 324 return Result; 325 } 326 327 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, 328 const std::string &Option, 329 TaintConfiguration &&Config) { 330 for (auto &P : Config.Propagations) { 331 GenericTaintChecker::CustomPropagations.try_emplace( 332 P.Name, std::move(P.SrcArgs), 333 convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex); 334 } 335 336 for (auto &F : Config.Filters) { 337 GenericTaintChecker::CustomFilters.try_emplace(F.first, 338 std::move(F.second)); 339 } 340 341 for (auto &S : Config.Sinks) { 342 GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second)); 343 } 344 } 345 346 GenericTaintChecker::TaintPropagationRule 347 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 348 const NameRuleMap &CustomPropagations, const FunctionDecl *FDecl, 349 StringRef Name, CheckerContext &C) { 350 // TODO: Currently, we might lose precision here: we always mark a return 351 // value as tainted even if it's just a pointer, pointing to tainted data. 352 353 // Check for exact name match for functions without builtin substitutes. 354 TaintPropagationRule Rule = 355 llvm::StringSwitch<TaintPropagationRule>(Name) 356 // Source functions 357 // TODO: Add support for vfscanf & family. 358 .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex})) 359 .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex})) 360 .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex})) 361 .Case("getch", TaintPropagationRule({}, {ReturnValueIndex})) 362 .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex})) 363 .Case("getchar_unlocked", 364 TaintPropagationRule({}, {ReturnValueIndex})) 365 .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex})) 366 .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) 367 .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1)) 368 .Case("socket", 369 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None, 370 InvalidArgIndex, 371 &TaintPropagationRule::postSocket)) 372 .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex})) 373 // Propagating functions 374 .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex})) 375 .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex})) 376 .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex})) 377 .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex})) 378 .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex})) 379 .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex})) 380 .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2)) 381 .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex})) 382 .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex})) 383 .Case("getdelim", TaintPropagationRule({3}, {0})) 384 .Case("getline", TaintPropagationRule({2}, {0})) 385 .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex})) 386 .Case("pread", 387 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex})) 388 .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex})) 389 .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex})) 390 .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex})) 391 .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex})) 392 .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex})) 393 .Default(TaintPropagationRule()); 394 395 if (!Rule.isNull()) 396 return Rule; 397 398 // Check if it's one of the memory setting/copying functions. 399 // This check is specialized but faster then calling isCLibraryFunction. 400 unsigned BId = 0; 401 if ((BId = FDecl->getMemoryFunctionKind())) 402 switch (BId) { 403 case Builtin::BImemcpy: 404 case Builtin::BImemmove: 405 case Builtin::BIstrncpy: 406 case Builtin::BIstrncat: 407 return TaintPropagationRule({1, 2}, {0, ReturnValueIndex}); 408 case Builtin::BIstrlcpy: 409 case Builtin::BIstrlcat: 410 return TaintPropagationRule({1, 2}, {0}); 411 case Builtin::BIstrndup: 412 return TaintPropagationRule({0, 1}, {ReturnValueIndex}); 413 414 default: 415 break; 416 }; 417 418 // Process all other functions which could be defined as builtins. 419 if (Rule.isNull()) { 420 if (C.isCLibraryFunction(FDecl, "snprintf")) 421 return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src, 422 3); 423 else if (C.isCLibraryFunction(FDecl, "sprintf")) 424 return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src, 425 2); 426 else if (C.isCLibraryFunction(FDecl, "strcpy") || 427 C.isCLibraryFunction(FDecl, "stpcpy") || 428 C.isCLibraryFunction(FDecl, "strcat")) 429 return TaintPropagationRule({1}, {0, ReturnValueIndex}); 430 else if (C.isCLibraryFunction(FDecl, "bcopy")) 431 return TaintPropagationRule({0, 2}, {1}); 432 else if (C.isCLibraryFunction(FDecl, "strdup") || 433 C.isCLibraryFunction(FDecl, "strdupa")) 434 return TaintPropagationRule({0}, {ReturnValueIndex}); 435 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 436 return TaintPropagationRule({0}, {ReturnValueIndex}); 437 } 438 439 // Skipping the following functions, since they might be used for cleansing 440 // or smart memory copy: 441 // - memccpy - copying until hitting a special character. 442 443 auto It = CustomPropagations.find(Name); 444 if (It != CustomPropagations.end()) 445 return It->getValue(); 446 447 return TaintPropagationRule(); 448 } 449 450 void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 451 CheckerContext &C) const { 452 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 453 // Check for non-global functions. 454 if (!FDecl || FDecl->getKind() != Decl::Function) 455 return; 456 457 StringRef Name = C.getCalleeName(FDecl); 458 if (Name.empty()) 459 return; 460 461 // Check for taintedness related errors first: system call, uncontrolled 462 // format string, tainted buffer size. 463 if (checkPre(CE, FDecl, Name, C)) 464 return; 465 466 // Marks the function's arguments and/or return value tainted if it present in 467 // the list. 468 if (addSourcesPre(CE, FDecl, Name, C)) 469 return; 470 471 addFiltersPre(CE, Name, C); 472 } 473 474 void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 475 CheckerContext &C) const { 476 // Set the marked values as tainted. The return value only accessible from 477 // checkPostStmt. 478 propagateFromPre(CE, C); 479 } 480 481 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 482 const char *NL, const char *Sep) const { 483 printTaint(State, Out, NL, Sep); 484 } 485 486 bool GenericTaintChecker::addSourcesPre(const CallExpr *CE, 487 const FunctionDecl *FDecl, 488 StringRef Name, 489 CheckerContext &C) const { 490 // First, try generating a propagation rule for this function. 491 TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( 492 this->CustomPropagations, FDecl, Name, C); 493 if (!Rule.isNull()) { 494 ProgramStateRef State = Rule.process(CE, C); 495 if (State) { 496 C.addTransition(State); 497 return true; 498 } 499 } 500 return false; 501 } 502 503 bool GenericTaintChecker::addFiltersPre(const CallExpr *CE, StringRef Name, 504 CheckerContext &C) const { 505 auto It = CustomFilters.find(Name); 506 if (It == CustomFilters.end()) 507 return false; 508 509 ProgramStateRef State = C.getState(); 510 const ArgVector &Args = It->getValue(); 511 for (unsigned ArgNum : Args) { 512 if (ArgNum >= CE->getNumArgs()) 513 continue; 514 515 const Expr *Arg = CE->getArg(ArgNum); 516 Optional<SVal> V = getPointedToSVal(C, Arg); 517 if (V) 518 State = removeTaint(State, *V); 519 } 520 521 if (State != C.getState()) { 522 C.addTransition(State); 523 return true; 524 } 525 return false; 526 } 527 528 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 529 CheckerContext &C) const { 530 ProgramStateRef State = C.getState(); 531 532 // Depending on what was tainted at pre-visit, we determined a set of 533 // arguments which should be tainted after the function returns. These are 534 // stored in the state as TaintArgsOnPostVisit set. 535 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 536 if (TaintArgs.isEmpty()) 537 return false; 538 539 for (unsigned ArgNum : TaintArgs) { 540 // Special handling for the tainted return value. 541 if (ArgNum == ReturnValueIndex) { 542 State = addTaint(State, CE, C.getLocationContext()); 543 continue; 544 } 545 546 // The arguments are pointer arguments. The data they are pointing at is 547 // tainted after the call. 548 if (CE->getNumArgs() < (ArgNum + 1)) 549 return false; 550 const Expr *Arg = CE->getArg(ArgNum); 551 Optional<SVal> V = getPointedToSVal(C, Arg); 552 if (V) 553 State = addTaint(State, *V); 554 } 555 556 // Clear up the taint info from the state. 557 State = State->remove<TaintArgsOnPostVisit>(); 558 559 if (State != C.getState()) { 560 C.addTransition(State); 561 return true; 562 } 563 return false; 564 } 565 566 bool GenericTaintChecker::checkPre(const CallExpr *CE, 567 const FunctionDecl *FDecl, StringRef Name, 568 CheckerContext &C) const { 569 570 if (checkUncontrolledFormatString(CE, C)) 571 return true; 572 573 if (checkSystemCall(CE, Name, C)) 574 return true; 575 576 if (checkTaintedBufferSize(CE, FDecl, C)) 577 return true; 578 579 if (checkCustomSinks(CE, Name, C)) 580 return true; 581 582 return false; 583 } 584 585 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C, 586 const Expr *Arg) { 587 ProgramStateRef State = C.getState(); 588 SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 589 if (AddrVal.isUnknownOrUndef()) 590 return None; 591 592 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 593 if (!AddrLoc) 594 return None; 595 596 QualType ArgTy = Arg->getType().getCanonicalType(); 597 if (!ArgTy->isPointerType()) 598 return None; 599 600 QualType ValTy = ArgTy->getPointeeType(); 601 602 // Do not dereference void pointers. Treat them as byte pointers instead. 603 // FIXME: we might want to consider more than just the first byte. 604 if (ValTy->isVoidType()) 605 ValTy = C.getASTContext().CharTy; 606 607 return State->getSVal(*AddrLoc, ValTy); 608 } 609 610 ProgramStateRef 611 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 612 CheckerContext &C) const { 613 ProgramStateRef State = C.getState(); 614 615 // Check for taint in arguments. 616 bool IsTainted = true; 617 for (unsigned ArgNum : SrcArgs) { 618 if (ArgNum >= CE->getNumArgs()) 619 continue; 620 621 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 622 break; 623 } 624 625 // Check for taint in variadic arguments. 626 if (!IsTainted && VariadicType::Src == VarType) { 627 // Check if any of the arguments is tainted 628 for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { 629 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 630 break; 631 } 632 } 633 634 if (PropagationFunc) 635 IsTainted = PropagationFunc(IsTainted, CE, C); 636 637 if (!IsTainted) 638 return State; 639 640 // Mark the arguments which should be tainted after the function returns. 641 for (unsigned ArgNum : DstArgs) { 642 // Should mark the return value? 643 if (ArgNum == ReturnValueIndex) { 644 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 645 continue; 646 } 647 648 if (ArgNum >= CE->getNumArgs()) 649 continue; 650 651 // Mark the given argument. 652 State = State->add<TaintArgsOnPostVisit>(ArgNum); 653 } 654 655 // Mark all variadic arguments tainted if present. 656 if (VariadicType::Dst == VarType) { 657 // For all pointer and references that were passed in: 658 // If they are not pointing to const data, mark data as tainted. 659 // TODO: So far we are just going one level down; ideally we'd need to 660 // recurse here. 661 for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { 662 const Expr *Arg = CE->getArg(i); 663 // Process pointer argument. 664 const Type *ArgTy = Arg->getType().getTypePtr(); 665 QualType PType = ArgTy->getPointeeType(); 666 if ((!PType.isNull() && !PType.isConstQualified()) || 667 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 668 State = State->add<TaintArgsOnPostVisit>(i); 669 } 670 } 671 672 return State; 673 } 674 675 // If argument 0(protocol domain) is network, the return value should get taint. 676 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/, 677 const CallExpr *CE, 678 CheckerContext &C) { 679 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 680 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 681 // White list the internal communication protocols. 682 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 683 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 684 return false; 685 686 return true; 687 } 688 689 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 690 ProgramStateRef State = C.getState(); 691 SVal Val = C.getSVal(E); 692 693 // stdin is a pointer, so it would be a region. 694 const MemRegion *MemReg = Val.getAsRegion(); 695 696 // The region should be symbolic, we do not know it's value. 697 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 698 if (!SymReg) 699 return false; 700 701 // Get it's symbol and find the declaration region it's pointing to. 702 const SymbolRegionValue *Sm = 703 dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 704 if (!Sm) 705 return false; 706 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 707 if (!DeclReg) 708 return false; 709 710 // This region corresponds to a declaration, find out if it's a global/extern 711 // variable named stdin with the proper type. 712 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 713 D = D->getCanonicalDecl(); 714 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { 715 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); 716 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == 717 C.getASTContext().getFILEType().getCanonicalType()) 718 return true; 719 } 720 } 721 return false; 722 } 723 724 static bool getPrintfFormatArgumentNum(const CallExpr *CE, 725 const CheckerContext &C, 726 unsigned &ArgNum) { 727 // Find if the function contains a format string argument. 728 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 729 // vsnprintf, syslog, custom annotated functions. 730 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 731 if (!FDecl) 732 return false; 733 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 734 ArgNum = Format->getFormatIdx() - 1; 735 if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum) 736 return true; 737 } 738 739 // Or if a function is named setproctitle (this is a heuristic). 740 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 741 ArgNum = 0; 742 return true; 743 } 744 745 return false; 746 } 747 748 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 749 CheckerContext &C) const { 750 assert(E); 751 752 // Check for taint. 753 ProgramStateRef State = C.getState(); 754 Optional<SVal> PointedToSVal = getPointedToSVal(C, E); 755 SVal TaintedSVal; 756 if (PointedToSVal && isTainted(State, *PointedToSVal)) 757 TaintedSVal = *PointedToSVal; 758 else if (isTainted(State, E, C.getLocationContext())) 759 TaintedSVal = C.getSVal(E); 760 else 761 return false; 762 763 // Generate diagnostic. 764 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 765 initBugType(); 766 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 767 report->addRange(E->getSourceRange()); 768 report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal)); 769 C.emitReport(std::move(report)); 770 return true; 771 } 772 return false; 773 } 774 775 bool GenericTaintChecker::checkUncontrolledFormatString( 776 const CallExpr *CE, CheckerContext &C) const { 777 // Check if the function contains a format string argument. 778 unsigned ArgNum = 0; 779 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 780 return false; 781 782 // If either the format string content or the pointer itself are tainted, 783 // warn. 784 return generateReportIfTainted(CE->getArg(ArgNum), 785 MsgUncontrolledFormatString, C); 786 } 787 788 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, 789 CheckerContext &C) const { 790 // TODO: It might make sense to run this check on demand. In some cases, 791 // we should check if the environment has been cleansed here. We also might 792 // need to know if the user was reset before these calls(seteuid). 793 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 794 .Case("system", 0) 795 .Case("popen", 0) 796 .Case("execl", 0) 797 .Case("execle", 0) 798 .Case("execlp", 0) 799 .Case("execv", 0) 800 .Case("execvp", 0) 801 .Case("execvP", 0) 802 .Case("execve", 0) 803 .Case("dlopen", 0) 804 .Default(InvalidArgIndex); 805 806 if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1)) 807 return false; 808 809 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); 810 } 811 812 // TODO: Should this check be a part of the CString checker? 813 // If yes, should taint be a global setting? 814 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 815 const FunctionDecl *FDecl, 816 CheckerContext &C) const { 817 // If the function has a buffer size argument, set ArgNum. 818 unsigned ArgNum = InvalidArgIndex; 819 unsigned BId = 0; 820 if ((BId = FDecl->getMemoryFunctionKind())) 821 switch (BId) { 822 case Builtin::BImemcpy: 823 case Builtin::BImemmove: 824 case Builtin::BIstrncpy: 825 ArgNum = 2; 826 break; 827 case Builtin::BIstrndup: 828 ArgNum = 1; 829 break; 830 default: 831 break; 832 }; 833 834 if (ArgNum == InvalidArgIndex) { 835 if (C.isCLibraryFunction(FDecl, "malloc") || 836 C.isCLibraryFunction(FDecl, "calloc") || 837 C.isCLibraryFunction(FDecl, "alloca")) 838 ArgNum = 0; 839 else if (C.isCLibraryFunction(FDecl, "memccpy")) 840 ArgNum = 3; 841 else if (C.isCLibraryFunction(FDecl, "realloc")) 842 ArgNum = 1; 843 else if (C.isCLibraryFunction(FDecl, "bcopy")) 844 ArgNum = 2; 845 } 846 847 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 848 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); 849 } 850 851 bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name, 852 CheckerContext &C) const { 853 auto It = CustomSinks.find(Name); 854 if (It == CustomSinks.end()) 855 return false; 856 857 const GenericTaintChecker::ArgVector &Args = It->getValue(); 858 for (unsigned ArgNum : Args) { 859 if (ArgNum >= CE->getNumArgs()) 860 continue; 861 862 if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C)) 863 return true; 864 } 865 866 return false; 867 } 868 869 void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 870 auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); 871 std::string Option{"Config"}; 872 StringRef ConfigFile = 873 Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); 874 llvm::Optional<TaintConfig> Config = 875 getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); 876 if (Config) 877 Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); 878 } 879 880 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) { 881 return true; 882 } 883