1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This checker defines the attack surface for generic taint propagation. 11 // 12 // The taint information produced by it might be useful to other checkers. For 13 // example, checkers should report errors which involve tainted data more 14 // aggressively, even if the involved symbols are under constrained. 15 // 16 //===----------------------------------------------------------------------===// 17 #include "ClangSACheckers.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/Basic/Builtins.h" 20 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 21 #include "clang/StaticAnalyzer/Core/Checker.h" 22 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 25 #include <climits> 26 27 using namespace clang; 28 using namespace ento; 29 30 namespace { 31 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 32 check::PreStmt<CallExpr> > { 33 public: 34 static void *getTag() { static int Tag; return &Tag; } 35 36 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 37 38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 39 40 private: 41 static const unsigned InvalidArgIndex = UINT_MAX; 42 /// Denotes the return vale. 43 static const unsigned ReturnValueIndex = UINT_MAX - 1; 44 45 mutable std::unique_ptr<BugType> BT; 46 inline void initBugType() const { 47 if (!BT) 48 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); 49 } 50 51 /// \brief Catch taint related bugs. Check if tainted data is passed to a 52 /// system call etc. 53 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 54 55 /// \brief Add taint sources on a pre-visit. 56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 57 58 /// \brief Propagate taint generated at pre-visit. 59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 60 61 /// \brief Add taint sources on a post visit. 62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 63 64 /// Check if the region the expression evaluates to is the standard input, 65 /// and thus, is tainted. 66 static bool isStdin(const Expr *E, CheckerContext &C); 67 68 /// \brief Given a pointer argument, return the value it points to. 69 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); 70 71 /// Functions defining the attack surface. 72 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 73 CheckerContext &C) const; 74 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 75 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 76 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 77 78 /// Taint the scanned input if the file is tainted. 79 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 80 81 /// Check for CWE-134: Uncontrolled Format String. 82 static const char MsgUncontrolledFormatString[]; 83 bool checkUncontrolledFormatString(const CallExpr *CE, 84 CheckerContext &C) const; 85 86 /// Check for: 87 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 88 /// CWE-78, "Failure to Sanitize Data into an OS Command" 89 static const char MsgSanitizeSystemArgs[]; 90 bool checkSystemCall(const CallExpr *CE, StringRef Name, 91 CheckerContext &C) const; 92 93 /// Check if tainted data is used as a buffer size ins strn.. functions, 94 /// and allocators. 95 static const char MsgTaintedBufferSize[]; 96 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 97 CheckerContext &C) const; 98 99 /// Generate a report if the expression is tainted or points to tainted data. 100 bool generateReportIfTainted(const Expr *E, const char Msg[], 101 CheckerContext &C) const; 102 103 /// The bug visitor prints a diagnostic message at the location where a given 104 /// variable was tainted. 105 class TaintBugVisitor 106 : public BugReporterVisitorImpl<TaintBugVisitor> { 107 private: 108 const SVal V; 109 110 public: 111 TaintBugVisitor(const SVal V) : V(V) {} 112 void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); } 113 114 std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N, 115 const ExplodedNode *PrevN, 116 BugReporterContext &BRC, 117 BugReport &BR) override; 118 }; 119 120 typedef SmallVector<unsigned, 2> ArgVector; 121 122 /// \brief A struct used to specify taint propagation rules for a function. 123 /// 124 /// If any of the possible taint source arguments is tainted, all of the 125 /// destination arguments should also be tainted. Use InvalidArgIndex in the 126 /// src list to specify that all of the arguments can introduce taint. Use 127 /// InvalidArgIndex in the dst arguments to signify that all the non-const 128 /// pointer and reference arguments might be tainted on return. If 129 /// ReturnValueIndex is added to the dst list, the return value will be 130 /// tainted. 131 struct TaintPropagationRule { 132 /// List of arguments which can be taint sources and should be checked. 133 ArgVector SrcArgs; 134 /// List of arguments which should be tainted on function return. 135 ArgVector DstArgs; 136 // TODO: Check if using other data structures would be more optimal. 137 138 TaintPropagationRule() {} 139 140 TaintPropagationRule(unsigned SArg, 141 unsigned DArg, bool TaintRet = false) { 142 SrcArgs.push_back(SArg); 143 DstArgs.push_back(DArg); 144 if (TaintRet) 145 DstArgs.push_back(ReturnValueIndex); 146 } 147 148 TaintPropagationRule(unsigned SArg1, unsigned SArg2, 149 unsigned DArg, bool TaintRet = false) { 150 SrcArgs.push_back(SArg1); 151 SrcArgs.push_back(SArg2); 152 DstArgs.push_back(DArg); 153 if (TaintRet) 154 DstArgs.push_back(ReturnValueIndex); 155 } 156 157 /// Get the propagation rule for a given function. 158 static TaintPropagationRule 159 getTaintPropagationRule(const FunctionDecl *FDecl, 160 StringRef Name, 161 CheckerContext &C); 162 163 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 164 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 165 166 inline bool isNull() const { return SrcArgs.empty(); } 167 168 inline bool isDestinationArgument(unsigned ArgNum) const { 169 return (std::find(DstArgs.begin(), 170 DstArgs.end(), ArgNum) != DstArgs.end()); 171 } 172 173 static inline bool isTaintedOrPointsToTainted(const Expr *E, 174 ProgramStateRef State, 175 CheckerContext &C) { 176 if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C)) 177 return true; 178 179 if (!E->getType().getTypePtr()->isPointerType()) 180 return false; 181 182 Optional<SVal> V = getPointedToSVal(C, E); 183 return (V && State->isTainted(*V)); 184 } 185 186 /// \brief Pre-process a function which propagates taint according to the 187 /// taint rule. 188 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 189 190 }; 191 }; 192 193 const unsigned GenericTaintChecker::ReturnValueIndex; 194 const unsigned GenericTaintChecker::InvalidArgIndex; 195 196 const char GenericTaintChecker::MsgUncontrolledFormatString[] = 197 "Untrusted data is used as a format string " 198 "(CWE-134: Uncontrolled Format String)"; 199 200 const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 201 "Untrusted data is passed to a system call " 202 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 203 204 const char GenericTaintChecker::MsgTaintedBufferSize[] = 205 "Untrusted data is used to specify the buffer size " 206 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 207 "character data and the null terminator)"; 208 209 } // end of anonymous namespace 210 211 /// A set which is used to pass information from call pre-visit instruction 212 /// to the call post-visit. The values are unsigned integers, which are either 213 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 214 /// points to data, which should be tainted on return. 215 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 216 217 std::shared_ptr<PathDiagnosticPiece> 218 GenericTaintChecker::TaintBugVisitor::VisitNode(const ExplodedNode *N, 219 const ExplodedNode *PrevN, BugReporterContext &BRC, BugReport &BR) { 220 221 // Find the ExplodedNode where the taint was first introduced 222 if (!N->getState()->isTainted(V) || PrevN->getState()->isTainted(V)) 223 return nullptr; 224 225 const Stmt *S = PathDiagnosticLocation::getStmt(N); 226 if (!S) 227 return nullptr; 228 229 const LocationContext *NCtx = N->getLocationContext(); 230 PathDiagnosticLocation L = 231 PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); 232 if (!L.isValid() || !L.asLocation().isValid()) 233 return nullptr; 234 235 return std::make_shared<PathDiagnosticEventPiece>( 236 L, "Taint originated here"); 237 } 238 239 GenericTaintChecker::TaintPropagationRule 240 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 241 const FunctionDecl *FDecl, 242 StringRef Name, 243 CheckerContext &C) { 244 // TODO: Currently, we might lose precision here: we always mark a return 245 // value as tainted even if it's just a pointer, pointing to tainted data. 246 247 // Check for exact name match for functions without builtin substitutes. 248 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 249 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 250 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 251 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 252 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 253 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 254 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 255 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 256 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 257 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 258 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 259 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 260 .Case("read", TaintPropagationRule(0, 2, 1, true)) 261 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 262 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 263 .Case("fgets", TaintPropagationRule(2, 0, true)) 264 .Case("getline", TaintPropagationRule(2, 0)) 265 .Case("getdelim", TaintPropagationRule(3, 0)) 266 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 267 .Default(TaintPropagationRule()); 268 269 if (!Rule.isNull()) 270 return Rule; 271 272 // Check if it's one of the memory setting/copying functions. 273 // This check is specialized but faster then calling isCLibraryFunction. 274 unsigned BId = 0; 275 if ( (BId = FDecl->getMemoryFunctionKind()) ) 276 switch(BId) { 277 case Builtin::BImemcpy: 278 case Builtin::BImemmove: 279 case Builtin::BIstrncpy: 280 case Builtin::BIstrncat: 281 return TaintPropagationRule(1, 2, 0, true); 282 case Builtin::BIstrlcpy: 283 case Builtin::BIstrlcat: 284 return TaintPropagationRule(1, 2, 0, false); 285 case Builtin::BIstrndup: 286 return TaintPropagationRule(0, 1, ReturnValueIndex); 287 288 default: 289 break; 290 }; 291 292 // Process all other functions which could be defined as builtins. 293 if (Rule.isNull()) { 294 if (C.isCLibraryFunction(FDecl, "snprintf") || 295 C.isCLibraryFunction(FDecl, "sprintf")) 296 return TaintPropagationRule(InvalidArgIndex, 0, true); 297 else if (C.isCLibraryFunction(FDecl, "strcpy") || 298 C.isCLibraryFunction(FDecl, "stpcpy") || 299 C.isCLibraryFunction(FDecl, "strcat")) 300 return TaintPropagationRule(1, 0, true); 301 else if (C.isCLibraryFunction(FDecl, "bcopy")) 302 return TaintPropagationRule(0, 2, 1, false); 303 else if (C.isCLibraryFunction(FDecl, "strdup") || 304 C.isCLibraryFunction(FDecl, "strdupa")) 305 return TaintPropagationRule(0, ReturnValueIndex); 306 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 307 return TaintPropagationRule(0, ReturnValueIndex); 308 } 309 310 // Skipping the following functions, since they might be used for cleansing 311 // or smart memory copy: 312 // - memccpy - copying until hitting a special character. 313 314 return TaintPropagationRule(); 315 } 316 317 void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 318 CheckerContext &C) const { 319 // Check for errors first. 320 if (checkPre(CE, C)) 321 return; 322 323 // Add taint second. 324 addSourcesPre(CE, C); 325 } 326 327 void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 328 CheckerContext &C) const { 329 if (propagateFromPre(CE, C)) 330 return; 331 addSourcesPost(CE, C); 332 } 333 334 void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 335 CheckerContext &C) const { 336 ProgramStateRef State = nullptr; 337 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 338 if (!FDecl || FDecl->getKind() != Decl::Function) 339 return; 340 341 StringRef Name = C.getCalleeName(FDecl); 342 if (Name.empty()) 343 return; 344 345 // First, try generating a propagation rule for this function. 346 TaintPropagationRule Rule = 347 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 348 if (!Rule.isNull()) { 349 State = Rule.process(CE, C); 350 if (!State) 351 return; 352 C.addTransition(State); 353 return; 354 } 355 356 // Otherwise, check if we have custom pre-processing implemented. 357 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 358 .Case("fscanf", &GenericTaintChecker::preFscanf) 359 .Default(nullptr); 360 // Check and evaluate the call. 361 if (evalFunction) 362 State = (this->*evalFunction)(CE, C); 363 if (!State) 364 return; 365 C.addTransition(State); 366 367 } 368 369 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 370 CheckerContext &C) const { 371 ProgramStateRef State = C.getState(); 372 373 // Depending on what was tainted at pre-visit, we determined a set of 374 // arguments which should be tainted after the function returns. These are 375 // stored in the state as TaintArgsOnPostVisit set. 376 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 377 if (TaintArgs.isEmpty()) 378 return false; 379 380 for (llvm::ImmutableSet<unsigned>::iterator 381 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 382 unsigned ArgNum = *I; 383 384 // Special handling for the tainted return value. 385 if (ArgNum == ReturnValueIndex) { 386 State = State->addTaint(CE, C.getLocationContext()); 387 continue; 388 } 389 390 // The arguments are pointer arguments. The data they are pointing at is 391 // tainted after the call. 392 if (CE->getNumArgs() < (ArgNum + 1)) 393 return false; 394 const Expr* Arg = CE->getArg(ArgNum); 395 Optional<SVal> V = getPointedToSVal(C, Arg); 396 if (V) 397 State = State->addTaint(*V); 398 } 399 400 // Clear up the taint info from the state. 401 State = State->remove<TaintArgsOnPostVisit>(); 402 403 if (State != C.getState()) { 404 C.addTransition(State); 405 return true; 406 } 407 return false; 408 } 409 410 void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 411 CheckerContext &C) const { 412 // Define the attack surface. 413 // Set the evaluation function by switching on the callee name. 414 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 415 if (!FDecl || FDecl->getKind() != Decl::Function) 416 return; 417 418 StringRef Name = C.getCalleeName(FDecl); 419 if (Name.empty()) 420 return; 421 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 422 .Case("scanf", &GenericTaintChecker::postScanf) 423 // TODO: Add support for vfscanf & family. 424 .Case("getchar", &GenericTaintChecker::postRetTaint) 425 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 426 .Case("getenv", &GenericTaintChecker::postRetTaint) 427 .Case("fopen", &GenericTaintChecker::postRetTaint) 428 .Case("fdopen", &GenericTaintChecker::postRetTaint) 429 .Case("freopen", &GenericTaintChecker::postRetTaint) 430 .Case("getch", &GenericTaintChecker::postRetTaint) 431 .Case("wgetch", &GenericTaintChecker::postRetTaint) 432 .Case("socket", &GenericTaintChecker::postSocket) 433 .Default(nullptr); 434 435 // If the callee isn't defined, it is not of security concern. 436 // Check and evaluate the call. 437 ProgramStateRef State = nullptr; 438 if (evalFunction) 439 State = (this->*evalFunction)(CE, C); 440 if (!State) 441 return; 442 443 C.addTransition(State); 444 } 445 446 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 447 448 if (checkUncontrolledFormatString(CE, C)) 449 return true; 450 451 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 452 if (!FDecl || FDecl->getKind() != Decl::Function) 453 return false; 454 455 StringRef Name = C.getCalleeName(FDecl); 456 if (Name.empty()) 457 return false; 458 459 if (checkSystemCall(CE, Name, C)) 460 return true; 461 462 if (checkTaintedBufferSize(CE, FDecl, C)) 463 return true; 464 465 return false; 466 } 467 468 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C, 469 const Expr *Arg) { 470 ProgramStateRef State = C.getState(); 471 SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 472 if (AddrVal.isUnknownOrUndef()) 473 return None; 474 475 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 476 if (!AddrLoc) 477 return None; 478 479 QualType ArgTy = Arg->getType().getCanonicalType(); 480 if (!ArgTy->isPointerType()) 481 return None; 482 483 QualType ValTy = ArgTy->getPointeeType(); 484 485 // Do not dereference void pointers. Treat them as byte pointers instead. 486 // FIXME: we might want to consider more than just the first byte. 487 if (ValTy->isVoidType()) 488 ValTy = C.getASTContext().CharTy; 489 490 return State->getSVal(*AddrLoc, ValTy); 491 } 492 493 ProgramStateRef 494 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 495 CheckerContext &C) const { 496 ProgramStateRef State = C.getState(); 497 498 // Check for taint in arguments. 499 bool IsTainted = false; 500 for (ArgVector::const_iterator I = SrcArgs.begin(), 501 E = SrcArgs.end(); I != E; ++I) { 502 unsigned ArgNum = *I; 503 504 if (ArgNum == InvalidArgIndex) { 505 // Check if any of the arguments is tainted, but skip the 506 // destination arguments. 507 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 508 if (isDestinationArgument(i)) 509 continue; 510 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 511 break; 512 } 513 break; 514 } 515 516 if (CE->getNumArgs() < (ArgNum + 1)) 517 return State; 518 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 519 break; 520 } 521 if (!IsTainted) 522 return State; 523 524 // Mark the arguments which should be tainted after the function returns. 525 for (ArgVector::const_iterator I = DstArgs.begin(), 526 E = DstArgs.end(); I != E; ++I) { 527 unsigned ArgNum = *I; 528 529 // Should we mark all arguments as tainted? 530 if (ArgNum == InvalidArgIndex) { 531 // For all pointer and references that were passed in: 532 // If they are not pointing to const data, mark data as tainted. 533 // TODO: So far we are just going one level down; ideally we'd need to 534 // recurse here. 535 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 536 const Expr *Arg = CE->getArg(i); 537 // Process pointer argument. 538 const Type *ArgTy = Arg->getType().getTypePtr(); 539 QualType PType = ArgTy->getPointeeType(); 540 if ((!PType.isNull() && !PType.isConstQualified()) 541 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 542 State = State->add<TaintArgsOnPostVisit>(i); 543 } 544 continue; 545 } 546 547 // Should mark the return value? 548 if (ArgNum == ReturnValueIndex) { 549 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 550 continue; 551 } 552 553 // Mark the given argument. 554 assert(ArgNum < CE->getNumArgs()); 555 State = State->add<TaintArgsOnPostVisit>(ArgNum); 556 } 557 558 return State; 559 } 560 561 562 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0 563 // and arg 1 should get taint. 564 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 565 CheckerContext &C) const { 566 assert(CE->getNumArgs() >= 2); 567 ProgramStateRef State = C.getState(); 568 569 // Check is the file descriptor is tainted. 570 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 571 isStdin(CE->getArg(0), C)) { 572 // All arguments except for the first two should get taint. 573 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 574 State = State->add<TaintArgsOnPostVisit>(i); 575 return State; 576 } 577 578 return nullptr; 579 } 580 581 582 // If argument 0(protocol domain) is network, the return value should get taint. 583 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 584 CheckerContext &C) const { 585 ProgramStateRef State = C.getState(); 586 if (CE->getNumArgs() < 3) 587 return State; 588 589 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 590 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 591 // White list the internal communication protocols. 592 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 593 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 594 return State; 595 State = State->addTaint(CE, C.getLocationContext()); 596 return State; 597 } 598 599 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 600 CheckerContext &C) const { 601 ProgramStateRef State = C.getState(); 602 if (CE->getNumArgs() < 2) 603 return State; 604 605 // All arguments except for the very first one should get taint. 606 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 607 // The arguments are pointer arguments. The data they are pointing at is 608 // tainted after the call. 609 const Expr* Arg = CE->getArg(i); 610 Optional<SVal> V = getPointedToSVal(C, Arg); 611 if (V) 612 State = State->addTaint(*V); 613 } 614 return State; 615 } 616 617 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 618 CheckerContext &C) const { 619 return C.getState()->addTaint(CE, C.getLocationContext()); 620 } 621 622 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 623 ProgramStateRef State = C.getState(); 624 SVal Val = C.getSVal(E); 625 626 // stdin is a pointer, so it would be a region. 627 const MemRegion *MemReg = Val.getAsRegion(); 628 629 // The region should be symbolic, we do not know it's value. 630 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 631 if (!SymReg) 632 return false; 633 634 // Get it's symbol and find the declaration region it's pointing to. 635 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 636 if (!Sm) 637 return false; 638 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 639 if (!DeclReg) 640 return false; 641 642 // This region corresponds to a declaration, find out if it's a global/extern 643 // variable named stdin with the proper type. 644 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 645 D = D->getCanonicalDecl(); 646 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 647 if (const PointerType * PtrTy = 648 dyn_cast<PointerType>(D->getType().getTypePtr())) 649 if (PtrTy->getPointeeType().getCanonicalType() == 650 C.getASTContext().getFILEType().getCanonicalType()) 651 return true; 652 } 653 return false; 654 } 655 656 static bool getPrintfFormatArgumentNum(const CallExpr *CE, 657 const CheckerContext &C, 658 unsigned int &ArgNum) { 659 // Find if the function contains a format string argument. 660 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 661 // vsnprintf, syslog, custom annotated functions. 662 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 663 if (!FDecl) 664 return false; 665 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 666 ArgNum = Format->getFormatIdx() - 1; 667 if ((Format->getType()->getName() == "printf") && 668 CE->getNumArgs() > ArgNum) 669 return true; 670 } 671 672 // Or if a function is named setproctitle (this is a heuristic). 673 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 674 ArgNum = 0; 675 return true; 676 } 677 678 return false; 679 } 680 681 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 682 const char Msg[], 683 CheckerContext &C) const { 684 assert(E); 685 686 // Check for taint. 687 ProgramStateRef State = C.getState(); 688 Optional<SVal> PointedToSVal = getPointedToSVal(C, E); 689 SVal TaintedSVal; 690 if (PointedToSVal && State->isTainted(*PointedToSVal)) 691 TaintedSVal = *PointedToSVal; 692 else if (State->isTainted(E, C.getLocationContext())) 693 TaintedSVal = C.getSVal(E); 694 else 695 return false; 696 697 // Generate diagnostic. 698 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 699 initBugType(); 700 auto report = llvm::make_unique<BugReport>(*BT, Msg, N); 701 report->addRange(E->getSourceRange()); 702 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal)); 703 C.emitReport(std::move(report)); 704 return true; 705 } 706 return false; 707 } 708 709 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 710 CheckerContext &C) const{ 711 // Check if the function contains a format string argument. 712 unsigned int ArgNum = 0; 713 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 714 return false; 715 716 // If either the format string content or the pointer itself are tainted, warn. 717 return generateReportIfTainted(CE->getArg(ArgNum), 718 MsgUncontrolledFormatString, C); 719 } 720 721 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 722 StringRef Name, 723 CheckerContext &C) const { 724 // TODO: It might make sense to run this check on demand. In some cases, 725 // we should check if the environment has been cleansed here. We also might 726 // need to know if the user was reset before these calls(seteuid). 727 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 728 .Case("system", 0) 729 .Case("popen", 0) 730 .Case("execl", 0) 731 .Case("execle", 0) 732 .Case("execlp", 0) 733 .Case("execv", 0) 734 .Case("execvp", 0) 735 .Case("execvP", 0) 736 .Case("execve", 0) 737 .Case("dlopen", 0) 738 .Default(UINT_MAX); 739 740 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 741 return false; 742 743 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); 744 } 745 746 // TODO: Should this check be a part of the CString checker? 747 // If yes, should taint be a global setting? 748 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 749 const FunctionDecl *FDecl, 750 CheckerContext &C) const { 751 // If the function has a buffer size argument, set ArgNum. 752 unsigned ArgNum = InvalidArgIndex; 753 unsigned BId = 0; 754 if ( (BId = FDecl->getMemoryFunctionKind()) ) 755 switch(BId) { 756 case Builtin::BImemcpy: 757 case Builtin::BImemmove: 758 case Builtin::BIstrncpy: 759 ArgNum = 2; 760 break; 761 case Builtin::BIstrndup: 762 ArgNum = 1; 763 break; 764 default: 765 break; 766 }; 767 768 if (ArgNum == InvalidArgIndex) { 769 if (C.isCLibraryFunction(FDecl, "malloc") || 770 C.isCLibraryFunction(FDecl, "calloc") || 771 C.isCLibraryFunction(FDecl, "alloca")) 772 ArgNum = 0; 773 else if (C.isCLibraryFunction(FDecl, "memccpy")) 774 ArgNum = 3; 775 else if (C.isCLibraryFunction(FDecl, "realloc")) 776 ArgNum = 1; 777 else if (C.isCLibraryFunction(FDecl, "bcopy")) 778 ArgNum = 2; 779 } 780 781 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 782 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); 783 } 784 785 void ento::registerGenericTaintChecker(CheckerManager &mgr) { 786 mgr.registerChecker<GenericTaintChecker>(); 787 } 788