1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker defines the attack surface for generic taint propagation. 10 // 11 // The taint information produced by it might be useful to other checkers. For 12 // example, checkers should report errors which involve tainted data more 13 // aggressively, even if the involved symbols are under constrained. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Taint.h" 18 #include "Yaml.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/Basic/Builtins.h" 21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/Checker.h" 24 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 28 #include "llvm/Support/YAMLTraits.h" 29 30 #include <algorithm> 31 #include <limits> 32 #include <memory> 33 #include <unordered_map> 34 #include <utility> 35 36 using namespace clang; 37 using namespace ento; 38 using namespace taint; 39 40 namespace { 41 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { 42 public: 43 static void *getTag() { 44 static int Tag; 45 return &Tag; 46 } 47 48 void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 49 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 50 51 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 52 const char *Sep) const override; 53 54 using ArgVector = SmallVector<unsigned, 2>; 55 using SignedArgVector = SmallVector<int, 2>; 56 57 enum class VariadicType { None, Src, Dst }; 58 59 /// Used to parse the configuration file. 60 struct TaintConfiguration { 61 using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>; 62 63 struct Propagation { 64 std::string Name; 65 std::string Scope; 66 ArgVector SrcArgs; 67 SignedArgVector DstArgs; 68 VariadicType VarType; 69 unsigned VarIndex; 70 }; 71 72 std::vector<Propagation> Propagations; 73 std::vector<NameScopeArgs> Filters; 74 std::vector<NameScopeArgs> Sinks; 75 76 TaintConfiguration() = default; 77 TaintConfiguration(const TaintConfiguration &) = default; 78 TaintConfiguration(TaintConfiguration &&) = default; 79 TaintConfiguration &operator=(const TaintConfiguration &) = default; 80 TaintConfiguration &operator=(TaintConfiguration &&) = default; 81 }; 82 83 /// Convert SignedArgVector to ArgVector. 84 ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, 85 const SignedArgVector &Args); 86 87 /// Parse the config. 88 void parseConfiguration(CheckerManager &Mgr, const std::string &Option, 89 TaintConfiguration &&Config); 90 91 static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; 92 /// Denotes the return vale. 93 static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - 94 1}; 95 96 private: 97 mutable std::unique_ptr<BugType> BT; 98 void initBugType() const { 99 if (!BT) 100 BT = std::make_unique<BugType>(this, "Use of Untrusted Data", 101 "Untrusted Data"); 102 } 103 104 struct FunctionData { 105 FunctionData() = delete; 106 FunctionData(const FunctionData &) = default; 107 FunctionData(FunctionData &&) = default; 108 FunctionData &operator=(const FunctionData &) = delete; 109 FunctionData &operator=(FunctionData &&) = delete; 110 111 static Optional<FunctionData> create(const CallEvent &Call, 112 const CheckerContext &C) { 113 assert(Call.getDecl()); 114 const FunctionDecl *FDecl = Call.getDecl()->getAsFunction(); 115 if (!FDecl || (FDecl->getKind() != Decl::Function && 116 FDecl->getKind() != Decl::CXXMethod)) 117 return None; 118 119 StringRef Name = C.getCalleeName(FDecl); 120 std::string FullName = FDecl->getQualifiedNameAsString(); 121 if (Name.empty() || FullName.empty()) 122 return None; 123 124 return FunctionData{FDecl, Name, FullName}; 125 } 126 127 bool isInScope(StringRef Scope) const { 128 return StringRef(FullName).startswith(Scope); 129 } 130 131 const FunctionDecl *const FDecl; 132 const StringRef Name; 133 const std::string FullName; 134 }; 135 136 /// Catch taint related bugs. Check if tainted data is passed to a 137 /// system call etc. Returns true on matching. 138 bool checkPre(const CallEvent &Call, const FunctionData &FData, 139 CheckerContext &C) const; 140 141 /// Add taint sources on a pre-visit. Returns true on matching. 142 bool addSourcesPre(const CallEvent &Call, const FunctionData &FData, 143 CheckerContext &C) const; 144 145 /// Mark filter's arguments not tainted on a pre-visit. Returns true on 146 /// matching. 147 bool addFiltersPre(const CallEvent &Call, const FunctionData &FData, 148 CheckerContext &C) const; 149 150 /// Propagate taint generated at pre-visit. Returns true on matching. 151 static bool propagateFromPre(const CallEvent &Call, CheckerContext &C); 152 153 /// Check if the region the expression evaluates to is the standard input, 154 /// and thus, is tainted. 155 static bool isStdin(const Expr *E, CheckerContext &C); 156 157 /// Given a pointer argument, return the value it points to. 158 static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg); 159 160 /// Check for CWE-134: Uncontrolled Format String. 161 static constexpr llvm::StringLiteral MsgUncontrolledFormatString = 162 "Untrusted data is used as a format string " 163 "(CWE-134: Uncontrolled Format String)"; 164 bool checkUncontrolledFormatString(const CallEvent &Call, 165 CheckerContext &C) const; 166 167 /// Check for: 168 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 169 /// CWE-78, "Failure to Sanitize Data into an OS Command" 170 static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 171 "Untrusted data is passed to a system call " 172 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 173 bool checkSystemCall(const CallEvent &Call, StringRef Name, 174 CheckerContext &C) const; 175 176 /// Check if tainted data is used as a buffer size ins strn.. functions, 177 /// and allocators. 178 static constexpr llvm::StringLiteral MsgTaintedBufferSize = 179 "Untrusted data is used to specify the buffer size " 180 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 181 "for character data and the null terminator)"; 182 bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const; 183 184 /// Check if tainted data is used as a custom sink's parameter. 185 static constexpr llvm::StringLiteral MsgCustomSink = 186 "Untrusted data is passed to a user-defined sink"; 187 bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData, 188 CheckerContext &C) const; 189 190 /// Generate a report if the expression is tainted or points to tainted data. 191 bool generateReportIfTainted(const Expr *E, StringRef Msg, 192 CheckerContext &C) const; 193 194 struct TaintPropagationRule; 195 template <typename T> 196 using ConfigDataMap = 197 std::unordered_multimap<std::string, std::pair<std::string, T>>; 198 using NameRuleMap = ConfigDataMap<TaintPropagationRule>; 199 using NameArgMap = ConfigDataMap<ArgVector>; 200 201 /// Find a function with the given name and scope. Returns the first match 202 /// or the end of the map. 203 template <typename T> 204 static auto findFunctionInConfig(const ConfigDataMap<T> &Map, 205 const FunctionData &FData); 206 207 /// A struct used to specify taint propagation rules for a function. 208 /// 209 /// If any of the possible taint source arguments is tainted, all of the 210 /// destination arguments should also be tainted. Use InvalidArgIndex in the 211 /// src list to specify that all of the arguments can introduce taint. Use 212 /// InvalidArgIndex in the dst arguments to signify that all the non-const 213 /// pointer and reference arguments might be tainted on return. If 214 /// ReturnValueIndex is added to the dst list, the return value will be 215 /// tainted. 216 struct TaintPropagationRule { 217 using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call, 218 CheckerContext &C); 219 220 /// List of arguments which can be taint sources and should be checked. 221 ArgVector SrcArgs; 222 /// List of arguments which should be tainted on function return. 223 ArgVector DstArgs; 224 /// Index for the first variadic parameter if exist. 225 unsigned VariadicIndex; 226 /// Show when a function has variadic parameters. If it has, it marks all 227 /// of them as source or destination. 228 VariadicType VarType; 229 /// Special function for tainted source determination. If defined, it can 230 /// override the default behavior. 231 PropagationFuncType PropagationFunc; 232 233 TaintPropagationRule() 234 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), 235 PropagationFunc(nullptr) {} 236 237 TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, 238 VariadicType Var = VariadicType::None, 239 unsigned VarIndex = InvalidArgIndex, 240 PropagationFuncType Func = nullptr) 241 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), 242 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} 243 244 /// Get the propagation rule for a given function. 245 static TaintPropagationRule 246 getTaintPropagationRule(const NameRuleMap &CustomPropagations, 247 const FunctionData &FData, CheckerContext &C); 248 249 void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 250 void addDstArg(unsigned A) { DstArgs.push_back(A); } 251 252 bool isNull() const { 253 return SrcArgs.empty() && DstArgs.empty() && 254 VariadicType::None == VarType; 255 } 256 257 bool isDestinationArgument(unsigned ArgNum) const { 258 return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); 259 } 260 261 static bool isTaintedOrPointsToTainted(const Expr *E, 262 const ProgramStateRef &State, 263 CheckerContext &C) { 264 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) 265 return true; 266 267 if (!E->getType().getTypePtr()->isPointerType()) 268 return false; 269 270 Optional<SVal> V = getPointeeOf(C, E); 271 return (V && isTainted(State, *V)); 272 } 273 274 /// Pre-process a function which propagates taint according to the 275 /// taint rule. 276 ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const; 277 278 // Functions for custom taintedness propagation. 279 static bool postSocket(bool IsTainted, const CallEvent &Call, 280 CheckerContext &C); 281 }; 282 283 /// Defines a map between the propagation function's name, scope 284 /// and TaintPropagationRule. 285 NameRuleMap CustomPropagations; 286 287 /// Defines a map between the filter function's name, scope and filtering 288 /// args. 289 NameArgMap CustomFilters; 290 291 /// Defines a map between the sink function's name, scope and sinking args. 292 NameArgMap CustomSinks; 293 }; 294 295 const unsigned GenericTaintChecker::ReturnValueIndex; 296 const unsigned GenericTaintChecker::InvalidArgIndex; 297 298 // FIXME: these lines can be removed in C++17 299 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; 300 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; 301 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; 302 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; 303 } // end of anonymous namespace 304 305 using TaintConfig = GenericTaintChecker::TaintConfiguration; 306 307 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) 308 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs) 309 310 namespace llvm { 311 namespace yaml { 312 template <> struct MappingTraits<TaintConfig> { 313 static void mapping(IO &IO, TaintConfig &Config) { 314 IO.mapOptional("Propagations", Config.Propagations); 315 IO.mapOptional("Filters", Config.Filters); 316 IO.mapOptional("Sinks", Config.Sinks); 317 } 318 }; 319 320 template <> struct MappingTraits<TaintConfig::Propagation> { 321 static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { 322 IO.mapRequired("Name", Propagation.Name); 323 IO.mapOptional("Scope", Propagation.Scope); 324 IO.mapOptional("SrcArgs", Propagation.SrcArgs); 325 IO.mapOptional("DstArgs", Propagation.DstArgs); 326 IO.mapOptional("VariadicType", Propagation.VarType, 327 GenericTaintChecker::VariadicType::None); 328 IO.mapOptional("VariadicIndex", Propagation.VarIndex, 329 GenericTaintChecker::InvalidArgIndex); 330 } 331 }; 332 333 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { 334 static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { 335 IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); 336 IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); 337 IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); 338 } 339 }; 340 341 template <> struct MappingTraits<TaintConfig::NameScopeArgs> { 342 static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) { 343 IO.mapRequired("Name", std::get<0>(NSA)); 344 IO.mapOptional("Scope", std::get<1>(NSA)); 345 IO.mapRequired("Args", std::get<2>(NSA)); 346 } 347 }; 348 } // namespace yaml 349 } // namespace llvm 350 351 /// A set which is used to pass information from call pre-visit instruction 352 /// to the call post-visit. The values are unsigned integers, which are either 353 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 354 /// points to data, which should be tainted on return. 355 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 356 357 GenericTaintChecker::ArgVector 358 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr, 359 const std::string &Option, 360 const SignedArgVector &Args) { 361 ArgVector Result; 362 for (int Arg : Args) { 363 if (Arg == -1) 364 Result.push_back(ReturnValueIndex); 365 else if (Arg < -1) { 366 Result.push_back(InvalidArgIndex); 367 Mgr.reportInvalidCheckerOptionValue( 368 this, Option, 369 "an argument number for propagation rules greater or equal to -1"); 370 } else 371 Result.push_back(static_cast<unsigned>(Arg)); 372 } 373 return Result; 374 } 375 376 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, 377 const std::string &Option, 378 TaintConfiguration &&Config) { 379 for (auto &P : Config.Propagations) { 380 GenericTaintChecker::CustomPropagations.emplace( 381 P.Name, 382 std::make_pair(P.Scope, TaintPropagationRule{ 383 std::move(P.SrcArgs), 384 convertToArgVector(Mgr, Option, P.DstArgs), 385 P.VarType, P.VarIndex})); 386 } 387 388 for (auto &F : Config.Filters) { 389 GenericTaintChecker::CustomFilters.emplace( 390 std::get<0>(F), 391 std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F)))); 392 } 393 394 for (auto &S : Config.Sinks) { 395 GenericTaintChecker::CustomSinks.emplace( 396 std::get<0>(S), 397 std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S)))); 398 } 399 } 400 401 template <typename T> 402 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map, 403 const FunctionData &FData) { 404 auto Range = Map.equal_range(std::string(FData.Name)); 405 auto It = 406 std::find_if(Range.first, Range.second, [&FData](const auto &Entry) { 407 const auto &Value = Entry.second; 408 StringRef Scope = Value.first; 409 return Scope.empty() || FData.isInScope(Scope); 410 }); 411 return It != Range.second ? It : Map.end(); 412 } 413 414 GenericTaintChecker::TaintPropagationRule 415 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 416 const NameRuleMap &CustomPropagations, const FunctionData &FData, 417 CheckerContext &C) { 418 // TODO: Currently, we might lose precision here: we always mark a return 419 // value as tainted even if it's just a pointer, pointing to tainted data. 420 421 // Check for exact name match for functions without builtin substitutes. 422 // Use qualified name, because these are C functions without namespace. 423 TaintPropagationRule Rule = 424 llvm::StringSwitch<TaintPropagationRule>(FData.FullName) 425 // Source functions 426 // TODO: Add support for vfscanf & family. 427 .Case("fdopen", {{}, {ReturnValueIndex}}) 428 .Case("fopen", {{}, {ReturnValueIndex}}) 429 .Case("freopen", {{}, {ReturnValueIndex}}) 430 .Case("getch", {{}, {ReturnValueIndex}}) 431 .Case("getchar", {{}, {ReturnValueIndex}}) 432 .Case("getchar_unlocked", {{}, {ReturnValueIndex}}) 433 .Case("getenv", {{}, {ReturnValueIndex}}) 434 .Case("gets", {{}, {0, ReturnValueIndex}}) 435 .Case("scanf", {{}, {}, VariadicType::Dst, 1}) 436 .Case("socket", {{}, 437 {ReturnValueIndex}, 438 VariadicType::None, 439 InvalidArgIndex, 440 &TaintPropagationRule::postSocket}) 441 .Case("wgetch", {{}, {ReturnValueIndex}}) 442 // Propagating functions 443 .Case("atoi", {{0}, {ReturnValueIndex}}) 444 .Case("atol", {{0}, {ReturnValueIndex}}) 445 .Case("atoll", {{0}, {ReturnValueIndex}}) 446 .Case("fgetc", {{0}, {ReturnValueIndex}}) 447 .Case("fgetln", {{0}, {ReturnValueIndex}}) 448 .Case("fgets", {{2}, {0, ReturnValueIndex}}) 449 .Case("fscanf", {{0}, {}, VariadicType::Dst, 2}) 450 .Case("sscanf", {{0}, {}, VariadicType::Dst, 2}) 451 .Case("getc", {{0}, {ReturnValueIndex}}) 452 .Case("getc_unlocked", {{0}, {ReturnValueIndex}}) 453 .Case("getdelim", {{3}, {0}}) 454 .Case("getline", {{2}, {0}}) 455 .Case("getw", {{0}, {ReturnValueIndex}}) 456 .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}}) 457 .Case("read", {{0, 2}, {1, ReturnValueIndex}}) 458 .Case("strchr", {{0}, {ReturnValueIndex}}) 459 .Case("strrchr", {{0}, {ReturnValueIndex}}) 460 .Case("tolower", {{0}, {ReturnValueIndex}}) 461 .Case("toupper", {{0}, {ReturnValueIndex}}) 462 .Default({}); 463 464 if (!Rule.isNull()) 465 return Rule; 466 assert(FData.FDecl); 467 468 // Check if it's one of the memory setting/copying functions. 469 // This check is specialized but faster then calling isCLibraryFunction. 470 const FunctionDecl *FDecl = FData.FDecl; 471 unsigned BId = 0; 472 if ((BId = FDecl->getMemoryFunctionKind())) { 473 switch (BId) { 474 case Builtin::BImemcpy: 475 case Builtin::BImemmove: 476 case Builtin::BIstrncpy: 477 case Builtin::BIstrncat: 478 return {{1, 2}, {0, ReturnValueIndex}}; 479 case Builtin::BIstrlcpy: 480 case Builtin::BIstrlcat: 481 return {{1, 2}, {0}}; 482 case Builtin::BIstrndup: 483 return {{0, 1}, {ReturnValueIndex}}; 484 485 default: 486 break; 487 } 488 } 489 490 // Process all other functions which could be defined as builtins. 491 if (Rule.isNull()) { 492 const auto OneOf = [FDecl](const auto &... Name) { 493 // FIXME: use fold expression in C++17 494 using unused = int[]; 495 bool ret = false; 496 static_cast<void>(unused{ 497 0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...}); 498 return ret; 499 }; 500 if (OneOf("snprintf")) 501 return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3}; 502 if (OneOf("sprintf")) 503 return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2}; 504 if (OneOf("strcpy", "stpcpy", "strcat")) 505 return {{1}, {0, ReturnValueIndex}}; 506 if (OneOf("bcopy")) 507 return {{0, 2}, {1}}; 508 if (OneOf("strdup", "strdupa", "wcsdup")) 509 return {{0}, {ReturnValueIndex}}; 510 } 511 512 // Skipping the following functions, since they might be used for cleansing or 513 // smart memory copy: 514 // - memccpy - copying until hitting a special character. 515 516 auto It = findFunctionInConfig(CustomPropagations, FData); 517 if (It != CustomPropagations.end()) 518 return It->second.second; 519 return {}; 520 } 521 522 void GenericTaintChecker::checkPreCall(const CallEvent &Call, 523 CheckerContext &C) const { 524 Optional<FunctionData> FData = FunctionData::create(Call, C); 525 if (!FData) 526 return; 527 528 // Check for taintedness related errors first: system call, uncontrolled 529 // format string, tainted buffer size. 530 if (checkPre(Call, *FData, C)) 531 return; 532 533 // Marks the function's arguments and/or return value tainted if it present in 534 // the list. 535 if (addSourcesPre(Call, *FData, C)) 536 return; 537 538 addFiltersPre(Call, *FData, C); 539 } 540 541 void GenericTaintChecker::checkPostCall(const CallEvent &Call, 542 CheckerContext &C) const { 543 // Set the marked values as tainted. The return value only accessible from 544 // checkPostStmt. 545 propagateFromPre(Call, C); 546 } 547 548 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 549 const char *NL, const char *Sep) const { 550 printTaint(State, Out, NL, Sep); 551 } 552 553 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call, 554 const FunctionData &FData, 555 CheckerContext &C) const { 556 // First, try generating a propagation rule for this function. 557 TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( 558 this->CustomPropagations, FData, C); 559 if (!Rule.isNull()) { 560 ProgramStateRef State = Rule.process(Call, C); 561 if (State) { 562 C.addTransition(State); 563 return true; 564 } 565 } 566 return false; 567 } 568 569 bool GenericTaintChecker::addFiltersPre(const CallEvent &Call, 570 const FunctionData &FData, 571 CheckerContext &C) const { 572 auto It = findFunctionInConfig(CustomFilters, FData); 573 if (It == CustomFilters.end()) 574 return false; 575 576 ProgramStateRef State = C.getState(); 577 const auto &Value = It->second; 578 const ArgVector &Args = Value.second; 579 for (unsigned ArgNum : Args) { 580 if (ArgNum >= Call.getNumArgs()) 581 continue; 582 583 const Expr *Arg = Call.getArgExpr(ArgNum); 584 Optional<SVal> V = getPointeeOf(C, Arg); 585 if (V) 586 State = removeTaint(State, *V); 587 } 588 589 if (State != C.getState()) { 590 C.addTransition(State); 591 return true; 592 } 593 return false; 594 } 595 596 bool GenericTaintChecker::propagateFromPre(const CallEvent &Call, 597 CheckerContext &C) { 598 ProgramStateRef State = C.getState(); 599 600 // Depending on what was tainted at pre-visit, we determined a set of 601 // arguments which should be tainted after the function returns. These are 602 // stored in the state as TaintArgsOnPostVisit set. 603 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 604 if (TaintArgs.isEmpty()) 605 return false; 606 607 for (unsigned ArgNum : TaintArgs) { 608 // Special handling for the tainted return value. 609 if (ArgNum == ReturnValueIndex) { 610 State = addTaint(State, Call.getReturnValue()); 611 continue; 612 } 613 614 // The arguments are pointer arguments. The data they are pointing at is 615 // tainted after the call. 616 if (Call.getNumArgs() < (ArgNum + 1)) 617 return false; 618 const Expr *Arg = Call.getArgExpr(ArgNum); 619 Optional<SVal> V = getPointeeOf(C, Arg); 620 if (V) 621 State = addTaint(State, *V); 622 } 623 624 // Clear up the taint info from the state. 625 State = State->remove<TaintArgsOnPostVisit>(); 626 627 if (State != C.getState()) { 628 C.addTransition(State); 629 return true; 630 } 631 return false; 632 } 633 634 bool GenericTaintChecker::checkPre(const CallEvent &Call, 635 const FunctionData &FData, 636 CheckerContext &C) const { 637 if (checkUncontrolledFormatString(Call, C)) 638 return true; 639 640 if (checkSystemCall(Call, FData.Name, C)) 641 return true; 642 643 if (checkTaintedBufferSize(Call, C)) 644 return true; 645 646 return checkCustomSinks(Call, FData, C); 647 } 648 649 Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C, 650 const Expr *Arg) { 651 ProgramStateRef State = C.getState(); 652 SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 653 if (AddrVal.isUnknownOrUndef()) 654 return None; 655 656 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 657 if (!AddrLoc) 658 return None; 659 660 QualType ArgTy = Arg->getType().getCanonicalType(); 661 if (!ArgTy->isPointerType()) 662 return State->getSVal(*AddrLoc); 663 664 QualType ValTy = ArgTy->getPointeeType(); 665 666 // Do not dereference void pointers. Treat them as byte pointers instead. 667 // FIXME: we might want to consider more than just the first byte. 668 if (ValTy->isVoidType()) 669 ValTy = C.getASTContext().CharTy; 670 671 return State->getSVal(*AddrLoc, ValTy); 672 } 673 674 ProgramStateRef 675 GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call, 676 CheckerContext &C) const { 677 ProgramStateRef State = C.getState(); 678 679 // Check for taint in arguments. 680 bool IsTainted = true; 681 for (unsigned ArgNum : SrcArgs) { 682 if (ArgNum >= Call.getNumArgs()) 683 continue; 684 685 if ((IsTainted = 686 isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C))) 687 break; 688 } 689 690 // Check for taint in variadic arguments. 691 if (!IsTainted && VariadicType::Src == VarType) { 692 // Check if any of the arguments is tainted 693 for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { 694 if ((IsTainted = 695 isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C))) 696 break; 697 } 698 } 699 700 if (PropagationFunc) 701 IsTainted = PropagationFunc(IsTainted, Call, C); 702 703 if (!IsTainted) 704 return State; 705 706 // Mark the arguments which should be tainted after the function returns. 707 for (unsigned ArgNum : DstArgs) { 708 // Should mark the return value? 709 if (ArgNum == ReturnValueIndex) { 710 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 711 continue; 712 } 713 714 if (ArgNum >= Call.getNumArgs()) 715 continue; 716 717 // Mark the given argument. 718 State = State->add<TaintArgsOnPostVisit>(ArgNum); 719 } 720 721 // Mark all variadic arguments tainted if present. 722 if (VariadicType::Dst == VarType) { 723 // For all pointer and references that were passed in: 724 // If they are not pointing to const data, mark data as tainted. 725 // TODO: So far we are just going one level down; ideally we'd need to 726 // recurse here. 727 for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { 728 const Expr *Arg = Call.getArgExpr(i); 729 // Process pointer argument. 730 const Type *ArgTy = Arg->getType().getTypePtr(); 731 QualType PType = ArgTy->getPointeeType(); 732 if ((!PType.isNull() && !PType.isConstQualified()) || 733 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) { 734 State = State->add<TaintArgsOnPostVisit>(i); 735 } 736 } 737 } 738 739 return State; 740 } 741 742 // If argument 0(protocol domain) is network, the return value should get taint. 743 bool GenericTaintChecker::TaintPropagationRule::postSocket( 744 bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) { 745 SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); 746 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 747 // White list the internal communication protocols. 748 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 749 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 750 return false; 751 return true; 752 } 753 754 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 755 ProgramStateRef State = C.getState(); 756 SVal Val = C.getSVal(E); 757 758 // stdin is a pointer, so it would be a region. 759 const MemRegion *MemReg = Val.getAsRegion(); 760 761 // The region should be symbolic, we do not know it's value. 762 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 763 if (!SymReg) 764 return false; 765 766 // Get it's symbol and find the declaration region it's pointing to. 767 const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 768 if (!Sm) 769 return false; 770 const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 771 if (!DeclReg) 772 return false; 773 774 // This region corresponds to a declaration, find out if it's a global/extern 775 // variable named stdin with the proper type. 776 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 777 D = D->getCanonicalDecl(); 778 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { 779 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); 780 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == 781 C.getASTContext().getFILEType().getCanonicalType()) 782 return true; 783 } 784 } 785 return false; 786 } 787 788 static bool getPrintfFormatArgumentNum(const CallEvent &Call, 789 const CheckerContext &C, 790 unsigned &ArgNum) { 791 // Find if the function contains a format string argument. 792 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 793 // vsnprintf, syslog, custom annotated functions. 794 const FunctionDecl *FDecl = Call.getDecl()->getAsFunction(); 795 if (!FDecl) 796 return false; 797 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 798 ArgNum = Format->getFormatIdx() - 1; 799 if ((Format->getType()->getName() == "printf") && 800 Call.getNumArgs() > ArgNum) 801 return true; 802 } 803 804 // Or if a function is named setproctitle (this is a heuristic). 805 if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) { 806 ArgNum = 0; 807 return true; 808 } 809 810 return false; 811 } 812 813 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 814 CheckerContext &C) const { 815 assert(E); 816 817 // Check for taint. 818 ProgramStateRef State = C.getState(); 819 Optional<SVal> PointedToSVal = getPointeeOf(C, E); 820 SVal TaintedSVal; 821 if (PointedToSVal && isTainted(State, *PointedToSVal)) 822 TaintedSVal = *PointedToSVal; 823 else if (isTainted(State, E, C.getLocationContext())) 824 TaintedSVal = C.getSVal(E); 825 else 826 return false; 827 828 // Generate diagnostic. 829 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 830 initBugType(); 831 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 832 report->addRange(E->getSourceRange()); 833 report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal)); 834 C.emitReport(std::move(report)); 835 return true; 836 } 837 return false; 838 } 839 840 bool GenericTaintChecker::checkUncontrolledFormatString( 841 const CallEvent &Call, CheckerContext &C) const { 842 // Check if the function contains a format string argument. 843 unsigned ArgNum = 0; 844 if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) 845 return false; 846 847 // If either the format string content or the pointer itself are tainted, 848 // warn. 849 return generateReportIfTainted(Call.getArgExpr(ArgNum), 850 MsgUncontrolledFormatString, C); 851 } 852 853 bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name, 854 CheckerContext &C) const { 855 // TODO: It might make sense to run this check on demand. In some cases, 856 // we should check if the environment has been cleansed here. We also might 857 // need to know if the user was reset before these calls(seteuid). 858 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 859 .Case("system", 0) 860 .Case("popen", 0) 861 .Case("execl", 0) 862 .Case("execle", 0) 863 .Case("execlp", 0) 864 .Case("execv", 0) 865 .Case("execvp", 0) 866 .Case("execvP", 0) 867 .Case("execve", 0) 868 .Case("dlopen", 0) 869 .Default(InvalidArgIndex); 870 871 if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1)) 872 return false; 873 874 return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs, 875 C); 876 } 877 878 // TODO: Should this check be a part of the CString checker? 879 // If yes, should taint be a global setting? 880 bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call, 881 CheckerContext &C) const { 882 const auto *FDecl = Call.getDecl()->getAsFunction(); 883 // If the function has a buffer size argument, set ArgNum. 884 unsigned ArgNum = InvalidArgIndex; 885 unsigned BId = 0; 886 if ((BId = FDecl->getMemoryFunctionKind())) { 887 switch (BId) { 888 case Builtin::BImemcpy: 889 case Builtin::BImemmove: 890 case Builtin::BIstrncpy: 891 ArgNum = 2; 892 break; 893 case Builtin::BIstrndup: 894 ArgNum = 1; 895 break; 896 default: 897 break; 898 } 899 } 900 901 if (ArgNum == InvalidArgIndex) { 902 using CCtx = CheckerContext; 903 if (CCtx::isCLibraryFunction(FDecl, "malloc") || 904 CCtx::isCLibraryFunction(FDecl, "calloc") || 905 CCtx::isCLibraryFunction(FDecl, "alloca")) 906 ArgNum = 0; 907 else if (CCtx::isCLibraryFunction(FDecl, "memccpy")) 908 ArgNum = 3; 909 else if (CCtx::isCLibraryFunction(FDecl, "realloc")) 910 ArgNum = 1; 911 else if (CCtx::isCLibraryFunction(FDecl, "bcopy")) 912 ArgNum = 2; 913 } 914 915 return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum && 916 generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize, 917 C); 918 } 919 920 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call, 921 const FunctionData &FData, 922 CheckerContext &C) const { 923 auto It = findFunctionInConfig(CustomSinks, FData); 924 if (It == CustomSinks.end()) 925 return false; 926 927 const auto &Value = It->second; 928 const GenericTaintChecker::ArgVector &Args = Value.second; 929 for (unsigned ArgNum : Args) { 930 if (ArgNum >= Call.getNumArgs()) 931 continue; 932 933 if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C)) 934 return true; 935 } 936 937 return false; 938 } 939 940 void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 941 auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); 942 std::string Option{"Config"}; 943 StringRef ConfigFile = 944 Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); 945 llvm::Optional<TaintConfig> Config = 946 getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); 947 if (Config) 948 Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); 949 } 950 951 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { 952 return true; 953 } 954