1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker defines the attack surface for generic taint propagation. 10 // 11 // The taint information produced by it might be useful to other checkers. For 12 // example, checkers should report errors which involve tainted data more 13 // aggressively, even if the involved symbols are under constrained. 14 // 15 //===----------------------------------------------------------------------===// 16 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 17 #include "clang/AST/Attr.h" 18 #include "clang/Basic/Builtins.h" 19 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 20 #include "clang/StaticAnalyzer/Core/Checker.h" 21 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 24 #include <climits> 25 #include <initializer_list> 26 #include <utility> 27 28 using namespace clang; 29 using namespace ento; 30 31 namespace { 32 class GenericTaintChecker 33 : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> { 34 public: 35 static void *getTag() { 36 static int Tag; 37 return &Tag; 38 } 39 40 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 41 42 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 43 44 private: 45 static const unsigned InvalidArgIndex = UINT_MAX; 46 /// Denotes the return vale. 47 static const unsigned ReturnValueIndex = UINT_MAX - 1; 48 49 mutable std::unique_ptr<BugType> BT; 50 void initBugType() const { 51 if (!BT) 52 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); 53 } 54 55 /// Catch taint related bugs. Check if tainted data is passed to a 56 /// system call etc. 57 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 58 59 /// Add taint sources on a pre-visit. 60 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 61 62 /// Propagate taint generated at pre-visit. 63 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 64 65 /// Add taint sources on a post visit. 66 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 67 68 /// Check if the region the expression evaluates to is the standard input, 69 /// and thus, is tainted. 70 static bool isStdin(const Expr *E, CheckerContext &C); 71 72 /// Given a pointer argument, return the value it points to. 73 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); 74 75 /// Functions defining the attack surface. 76 using FnCheck = ProgramStateRef (GenericTaintChecker::*)( 77 const CallExpr *, CheckerContext &C) const; 78 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 79 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 80 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 81 82 /// Taint the scanned input if the file is tainted. 83 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 84 85 /// Check for CWE-134: Uncontrolled Format String. 86 static const char MsgUncontrolledFormatString[]; 87 bool checkUncontrolledFormatString(const CallExpr *CE, 88 CheckerContext &C) const; 89 90 /// Check for: 91 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 92 /// CWE-78, "Failure to Sanitize Data into an OS Command" 93 static const char MsgSanitizeSystemArgs[]; 94 bool checkSystemCall(const CallExpr *CE, StringRef Name, 95 CheckerContext &C) const; 96 97 /// Check if tainted data is used as a buffer size ins strn.. functions, 98 /// and allocators. 99 static const char MsgTaintedBufferSize[]; 100 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 101 CheckerContext &C) const; 102 103 /// Generate a report if the expression is tainted or points to tainted data. 104 bool generateReportIfTainted(const Expr *E, const char Msg[], 105 CheckerContext &C) const; 106 107 using ArgVector = SmallVector<unsigned, 2>; 108 109 /// A struct used to specify taint propagation rules for a function. 110 /// 111 /// If any of the possible taint source arguments is tainted, all of the 112 /// destination arguments should also be tainted. Use InvalidArgIndex in the 113 /// src list to specify that all of the arguments can introduce taint. Use 114 /// InvalidArgIndex in the dst arguments to signify that all the non-const 115 /// pointer and reference arguments might be tainted on return. If 116 /// ReturnValueIndex is added to the dst list, the return value will be 117 /// tainted. 118 struct TaintPropagationRule { 119 enum class VariadicType { None, Src, Dst }; 120 121 /// List of arguments which can be taint sources and should be checked. 122 ArgVector SrcArgs; 123 /// List of arguments which should be tainted on function return. 124 ArgVector DstArgs; 125 /// Index for the first variadic parameter if exist. 126 unsigned VariadicIndex; 127 /// Show when a function has variadic parameters. If it has, it marks all 128 /// of them as source or destination. 129 VariadicType VarType; 130 131 TaintPropagationRule() 132 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None) {} 133 134 TaintPropagationRule(std::initializer_list<unsigned> &&Src, 135 std::initializer_list<unsigned> &&Dst, 136 VariadicType Var = VariadicType::None, 137 unsigned VarIndex = InvalidArgIndex) 138 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), 139 VariadicIndex(VarIndex), VarType(Var) {} 140 141 /// Get the propagation rule for a given function. 142 static TaintPropagationRule 143 getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name, 144 CheckerContext &C); 145 146 void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 147 void addDstArg(unsigned A) { DstArgs.push_back(A); } 148 149 bool isNull() const { 150 return SrcArgs.empty() && DstArgs.empty() && 151 VariadicType::None == VarType; 152 } 153 154 bool isDestinationArgument(unsigned ArgNum) const { 155 return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); 156 } 157 158 static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, 159 CheckerContext &C) { 160 if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C)) 161 return true; 162 163 if (!E->getType().getTypePtr()->isPointerType()) 164 return false; 165 166 Optional<SVal> V = getPointedToSVal(C, E); 167 return (V && State->isTainted(*V)); 168 } 169 170 /// Pre-process a function which propagates taint according to the 171 /// taint rule. 172 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 173 }; 174 }; 175 176 const unsigned GenericTaintChecker::ReturnValueIndex; 177 const unsigned GenericTaintChecker::InvalidArgIndex; 178 179 const char GenericTaintChecker::MsgUncontrolledFormatString[] = 180 "Untrusted data is used as a format string " 181 "(CWE-134: Uncontrolled Format String)"; 182 183 const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 184 "Untrusted data is passed to a system call " 185 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 186 187 const char GenericTaintChecker::MsgTaintedBufferSize[] = 188 "Untrusted data is used to specify the buffer size " 189 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 190 "for character data and the null terminator)"; 191 192 } // end of anonymous namespace 193 194 /// A set which is used to pass information from call pre-visit instruction 195 /// to the call post-visit. The values are unsigned integers, which are either 196 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 197 /// points to data, which should be tainted on return. 198 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 199 200 GenericTaintChecker::TaintPropagationRule 201 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 202 const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) { 203 // TODO: Currently, we might lose precision here: we always mark a return 204 // value as tainted even if it's just a pointer, pointing to tainted data. 205 206 // Check for exact name match for functions without builtin substitutes. 207 TaintPropagationRule Rule = 208 llvm::StringSwitch<TaintPropagationRule>(Name) 209 .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex})) 210 .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex})) 211 .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex})) 212 .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex})) 213 .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex})) 214 .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex})) 215 .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex})) 216 .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex})) 217 .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex})) 218 .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex})) 219 .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex})) 220 .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex})) 221 .Case("pread", 222 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex})) 223 .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) 224 .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex})) 225 .Case("getline", TaintPropagationRule({2}, {0})) 226 .Case("getdelim", TaintPropagationRule({3}, {0})) 227 .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex})) 228 .Default(TaintPropagationRule()); 229 230 if (!Rule.isNull()) 231 return Rule; 232 233 // Check if it's one of the memory setting/copying functions. 234 // This check is specialized but faster then calling isCLibraryFunction. 235 unsigned BId = 0; 236 if ((BId = FDecl->getMemoryFunctionKind())) 237 switch (BId) { 238 case Builtin::BImemcpy: 239 case Builtin::BImemmove: 240 case Builtin::BIstrncpy: 241 case Builtin::BIstrncat: 242 return TaintPropagationRule({1, 2}, {0, ReturnValueIndex}); 243 case Builtin::BIstrlcpy: 244 case Builtin::BIstrlcat: 245 return TaintPropagationRule({1, 2}, {0}); 246 case Builtin::BIstrndup: 247 return TaintPropagationRule({0, 1}, {ReturnValueIndex}); 248 249 default: 250 break; 251 }; 252 253 // Process all other functions which could be defined as builtins. 254 if (Rule.isNull()) { 255 if (C.isCLibraryFunction(FDecl, "snprintf")) 256 return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src, 257 3); 258 else if (C.isCLibraryFunction(FDecl, "sprintf")) 259 return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src, 260 2); 261 else if (C.isCLibraryFunction(FDecl, "strcpy") || 262 C.isCLibraryFunction(FDecl, "stpcpy") || 263 C.isCLibraryFunction(FDecl, "strcat")) 264 return TaintPropagationRule({1}, {0, ReturnValueIndex}); 265 else if (C.isCLibraryFunction(FDecl, "bcopy")) 266 return TaintPropagationRule({0, 2}, {1}); 267 else if (C.isCLibraryFunction(FDecl, "strdup") || 268 C.isCLibraryFunction(FDecl, "strdupa")) 269 return TaintPropagationRule({0}, {ReturnValueIndex}); 270 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 271 return TaintPropagationRule({0}, {ReturnValueIndex}); 272 } 273 274 // Skipping the following functions, since they might be used for cleansing 275 // or smart memory copy: 276 // - memccpy - copying until hitting a special character. 277 278 return TaintPropagationRule(); 279 } 280 281 void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 282 CheckerContext &C) const { 283 // Check for errors first. 284 if (checkPre(CE, C)) 285 return; 286 287 // Add taint second. 288 addSourcesPre(CE, C); 289 } 290 291 void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 292 CheckerContext &C) const { 293 if (propagateFromPre(CE, C)) 294 return; 295 addSourcesPost(CE, C); 296 } 297 298 void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 299 CheckerContext &C) const { 300 ProgramStateRef State = nullptr; 301 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 302 if (!FDecl || FDecl->getKind() != Decl::Function) 303 return; 304 305 StringRef Name = C.getCalleeName(FDecl); 306 if (Name.empty()) 307 return; 308 309 // First, try generating a propagation rule for this function. 310 TaintPropagationRule Rule = 311 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 312 if (!Rule.isNull()) { 313 State = Rule.process(CE, C); 314 if (!State) 315 return; 316 C.addTransition(State); 317 return; 318 } 319 320 // Otherwise, check if we have custom pre-processing implemented. 321 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 322 .Case("fscanf", &GenericTaintChecker::preFscanf) 323 .Default(nullptr); 324 // Check and evaluate the call. 325 if (evalFunction) 326 State = (this->*evalFunction)(CE, C); 327 if (!State) 328 return; 329 C.addTransition(State); 330 } 331 332 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 333 CheckerContext &C) const { 334 ProgramStateRef State = C.getState(); 335 336 // Depending on what was tainted at pre-visit, we determined a set of 337 // arguments which should be tainted after the function returns. These are 338 // stored in the state as TaintArgsOnPostVisit set. 339 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 340 if (TaintArgs.isEmpty()) 341 return false; 342 343 for (unsigned ArgNum : TaintArgs) { 344 // Special handling for the tainted return value. 345 if (ArgNum == ReturnValueIndex) { 346 State = State->addTaint(CE, C.getLocationContext()); 347 continue; 348 } 349 350 // The arguments are pointer arguments. The data they are pointing at is 351 // tainted after the call. 352 if (CE->getNumArgs() < (ArgNum + 1)) 353 return false; 354 const Expr *Arg = CE->getArg(ArgNum); 355 Optional<SVal> V = getPointedToSVal(C, Arg); 356 if (V) 357 State = State->addTaint(*V); 358 } 359 360 // Clear up the taint info from the state. 361 State = State->remove<TaintArgsOnPostVisit>(); 362 363 if (State != C.getState()) { 364 C.addTransition(State); 365 return true; 366 } 367 return false; 368 } 369 370 void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 371 CheckerContext &C) const { 372 // Define the attack surface. 373 // Set the evaluation function by switching on the callee name. 374 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 375 if (!FDecl || FDecl->getKind() != Decl::Function) 376 return; 377 378 StringRef Name = C.getCalleeName(FDecl); 379 if (Name.empty()) 380 return; 381 FnCheck evalFunction = 382 llvm::StringSwitch<FnCheck>(Name) 383 .Case("scanf", &GenericTaintChecker::postScanf) 384 // TODO: Add support for vfscanf & family. 385 .Case("getchar", &GenericTaintChecker::postRetTaint) 386 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 387 .Case("getenv", &GenericTaintChecker::postRetTaint) 388 .Case("fopen", &GenericTaintChecker::postRetTaint) 389 .Case("fdopen", &GenericTaintChecker::postRetTaint) 390 .Case("freopen", &GenericTaintChecker::postRetTaint) 391 .Case("getch", &GenericTaintChecker::postRetTaint) 392 .Case("wgetch", &GenericTaintChecker::postRetTaint) 393 .Case("socket", &GenericTaintChecker::postSocket) 394 .Default(nullptr); 395 396 // If the callee isn't defined, it is not of security concern. 397 // Check and evaluate the call. 398 ProgramStateRef State = nullptr; 399 if (evalFunction) 400 State = (this->*evalFunction)(CE, C); 401 if (!State) 402 return; 403 404 C.addTransition(State); 405 } 406 407 bool GenericTaintChecker::checkPre(const CallExpr *CE, 408 CheckerContext &C) const { 409 410 if (checkUncontrolledFormatString(CE, C)) 411 return true; 412 413 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 414 if (!FDecl || FDecl->getKind() != Decl::Function) 415 return false; 416 417 StringRef Name = C.getCalleeName(FDecl); 418 if (Name.empty()) 419 return false; 420 421 if (checkSystemCall(CE, Name, C)) 422 return true; 423 424 if (checkTaintedBufferSize(CE, FDecl, C)) 425 return true; 426 427 return false; 428 } 429 430 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C, 431 const Expr *Arg) { 432 ProgramStateRef State = C.getState(); 433 SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 434 if (AddrVal.isUnknownOrUndef()) 435 return None; 436 437 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 438 if (!AddrLoc) 439 return None; 440 441 QualType ArgTy = Arg->getType().getCanonicalType(); 442 if (!ArgTy->isPointerType()) 443 return None; 444 445 QualType ValTy = ArgTy->getPointeeType(); 446 447 // Do not dereference void pointers. Treat them as byte pointers instead. 448 // FIXME: we might want to consider more than just the first byte. 449 if (ValTy->isVoidType()) 450 ValTy = C.getASTContext().CharTy; 451 452 return State->getSVal(*AddrLoc, ValTy); 453 } 454 455 ProgramStateRef 456 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 457 CheckerContext &C) const { 458 ProgramStateRef State = C.getState(); 459 460 // Check for taint in arguments. 461 bool IsTainted = true; 462 for (unsigned ArgNum : SrcArgs) { 463 if (ArgNum >= CE->getNumArgs()) 464 return State; 465 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 466 break; 467 } 468 469 // Check for taint in variadic arguments. 470 if (!IsTainted && VariadicType::Src == VarType) { 471 // Check if any of the arguments is tainted 472 for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) { 473 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 474 break; 475 } 476 } 477 478 if (!IsTainted) 479 return State; 480 481 // Mark the arguments which should be tainted after the function returns. 482 for (unsigned ArgNum : DstArgs) { 483 // Should mark the return value? 484 if (ArgNum == ReturnValueIndex) { 485 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 486 continue; 487 } 488 489 // Mark the given argument. 490 assert(ArgNum < CE->getNumArgs()); 491 State = State->add<TaintArgsOnPostVisit>(ArgNum); 492 } 493 494 // Mark all variadic arguments tainted if present. 495 if (VariadicType::Dst == VarType) { 496 // For all pointer and references that were passed in: 497 // If they are not pointing to const data, mark data as tainted. 498 // TODO: So far we are just going one level down; ideally we'd need to 499 // recurse here. 500 for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) { 501 const Expr *Arg = CE->getArg(i); 502 // Process pointer argument. 503 const Type *ArgTy = Arg->getType().getTypePtr(); 504 QualType PType = ArgTy->getPointeeType(); 505 if ((!PType.isNull() && !PType.isConstQualified()) || 506 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 507 State = State->add<TaintArgsOnPostVisit>(i); 508 } 509 } 510 511 return State; 512 } 513 514 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0 515 // and arg 1 should get taint. 516 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 517 CheckerContext &C) const { 518 assert(CE->getNumArgs() >= 2); 519 ProgramStateRef State = C.getState(); 520 521 // Check is the file descriptor is tainted. 522 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 523 isStdin(CE->getArg(0), C)) { 524 // All arguments except for the first two should get taint. 525 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 526 State = State->add<TaintArgsOnPostVisit>(i); 527 return State; 528 } 529 530 return nullptr; 531 } 532 533 // If argument 0(protocol domain) is network, the return value should get taint. 534 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 535 CheckerContext &C) const { 536 ProgramStateRef State = C.getState(); 537 if (CE->getNumArgs() < 3) 538 return State; 539 540 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 541 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 542 // White list the internal communication protocols. 543 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 544 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 545 return State; 546 State = State->addTaint(CE, C.getLocationContext()); 547 return State; 548 } 549 550 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 551 CheckerContext &C) const { 552 ProgramStateRef State = C.getState(); 553 if (CE->getNumArgs() < 2) 554 return State; 555 556 // All arguments except for the very first one should get taint. 557 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 558 // The arguments are pointer arguments. The data they are pointing at is 559 // tainted after the call. 560 const Expr *Arg = CE->getArg(i); 561 Optional<SVal> V = getPointedToSVal(C, Arg); 562 if (V) 563 State = State->addTaint(*V); 564 } 565 return State; 566 } 567 568 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 569 CheckerContext &C) const { 570 return C.getState()->addTaint(CE, C.getLocationContext()); 571 } 572 573 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 574 ProgramStateRef State = C.getState(); 575 SVal Val = C.getSVal(E); 576 577 // stdin is a pointer, so it would be a region. 578 const MemRegion *MemReg = Val.getAsRegion(); 579 580 // The region should be symbolic, we do not know it's value. 581 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 582 if (!SymReg) 583 return false; 584 585 // Get it's symbol and find the declaration region it's pointing to. 586 const SymbolRegionValue *Sm = 587 dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 588 if (!Sm) 589 return false; 590 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 591 if (!DeclReg) 592 return false; 593 594 // This region corresponds to a declaration, find out if it's a global/extern 595 // variable named stdin with the proper type. 596 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 597 D = D->getCanonicalDecl(); 598 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { 599 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); 600 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == 601 C.getASTContext().getFILEType().getCanonicalType()) 602 return true; 603 } 604 } 605 return false; 606 } 607 608 static bool getPrintfFormatArgumentNum(const CallExpr *CE, 609 const CheckerContext &C, 610 unsigned int &ArgNum) { 611 // Find if the function contains a format string argument. 612 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 613 // vsnprintf, syslog, custom annotated functions. 614 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 615 if (!FDecl) 616 return false; 617 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 618 ArgNum = Format->getFormatIdx() - 1; 619 if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum) 620 return true; 621 } 622 623 // Or if a function is named setproctitle (this is a heuristic). 624 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 625 ArgNum = 0; 626 return true; 627 } 628 629 return false; 630 } 631 632 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 633 const char Msg[], 634 CheckerContext &C) const { 635 assert(E); 636 637 // Check for taint. 638 ProgramStateRef State = C.getState(); 639 Optional<SVal> PointedToSVal = getPointedToSVal(C, E); 640 SVal TaintedSVal; 641 if (PointedToSVal && State->isTainted(*PointedToSVal)) 642 TaintedSVal = *PointedToSVal; 643 else if (State->isTainted(E, C.getLocationContext())) 644 TaintedSVal = C.getSVal(E); 645 else 646 return false; 647 648 // Generate diagnostic. 649 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 650 initBugType(); 651 auto report = llvm::make_unique<BugReport>(*BT, Msg, N); 652 report->addRange(E->getSourceRange()); 653 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal)); 654 C.emitReport(std::move(report)); 655 return true; 656 } 657 return false; 658 } 659 660 bool GenericTaintChecker::checkUncontrolledFormatString( 661 const CallExpr *CE, CheckerContext &C) const { 662 // Check if the function contains a format string argument. 663 unsigned int ArgNum = 0; 664 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 665 return false; 666 667 // If either the format string content or the pointer itself are tainted, 668 // warn. 669 return generateReportIfTainted(CE->getArg(ArgNum), 670 MsgUncontrolledFormatString, C); 671 } 672 673 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, 674 CheckerContext &C) const { 675 // TODO: It might make sense to run this check on demand. In some cases, 676 // we should check if the environment has been cleansed here. We also might 677 // need to know if the user was reset before these calls(seteuid). 678 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 679 .Case("system", 0) 680 .Case("popen", 0) 681 .Case("execl", 0) 682 .Case("execle", 0) 683 .Case("execlp", 0) 684 .Case("execv", 0) 685 .Case("execvp", 0) 686 .Case("execvP", 0) 687 .Case("execve", 0) 688 .Case("dlopen", 0) 689 .Default(UINT_MAX); 690 691 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 692 return false; 693 694 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); 695 } 696 697 // TODO: Should this check be a part of the CString checker? 698 // If yes, should taint be a global setting? 699 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 700 const FunctionDecl *FDecl, 701 CheckerContext &C) const { 702 // If the function has a buffer size argument, set ArgNum. 703 unsigned ArgNum = InvalidArgIndex; 704 unsigned BId = 0; 705 if ((BId = FDecl->getMemoryFunctionKind())) 706 switch (BId) { 707 case Builtin::BImemcpy: 708 case Builtin::BImemmove: 709 case Builtin::BIstrncpy: 710 ArgNum = 2; 711 break; 712 case Builtin::BIstrndup: 713 ArgNum = 1; 714 break; 715 default: 716 break; 717 }; 718 719 if (ArgNum == InvalidArgIndex) { 720 if (C.isCLibraryFunction(FDecl, "malloc") || 721 C.isCLibraryFunction(FDecl, "calloc") || 722 C.isCLibraryFunction(FDecl, "alloca")) 723 ArgNum = 0; 724 else if (C.isCLibraryFunction(FDecl, "memccpy")) 725 ArgNum = 3; 726 else if (C.isCLibraryFunction(FDecl, "realloc")) 727 ArgNum = 1; 728 else if (C.isCLibraryFunction(FDecl, "bcopy")) 729 ArgNum = 2; 730 } 731 732 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 733 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); 734 } 735 736 void ento::registerGenericTaintChecker(CheckerManager &mgr) { 737 mgr.registerChecker<GenericTaintChecker>(); 738 } 739 740 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) { 741 return true; 742 } 743