1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This checker defines the attack surface for generic taint propagation. 11 // 12 // The taint information produced by it might be useful to other checkers. For 13 // example, checkers should report errors which involve tainted data more 14 // aggressively, even if the involved symbols are under constrained. 15 // 16 //===----------------------------------------------------------------------===// 17 #include "ClangSACheckers.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/Basic/Builtins.h" 20 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 21 #include "clang/StaticAnalyzer/Core/Checker.h" 22 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 25 #include <climits> 26 27 using namespace clang; 28 using namespace ento; 29 30 namespace { 31 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 32 check::PreStmt<CallExpr> > { 33 public: 34 static void *getTag() { static int Tag; return &Tag; } 35 36 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 37 38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 39 40 private: 41 static const unsigned InvalidArgIndex = UINT_MAX; 42 /// Denotes the return vale. 43 static const unsigned ReturnValueIndex = UINT_MAX - 1; 44 45 mutable std::unique_ptr<BugType> BT; 46 inline void initBugType() const { 47 if (!BT) 48 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); 49 } 50 51 /// \brief Catch taint related bugs. Check if tainted data is passed to a 52 /// system call etc. 53 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 54 55 /// \brief Add taint sources on a pre-visit. 56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 57 58 /// \brief Propagate taint generated at pre-visit. 59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 60 61 /// \brief Add taint sources on a post visit. 62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 63 64 /// Check if the region the expression evaluates to is the standard input, 65 /// and thus, is tainted. 66 static bool isStdin(const Expr *E, CheckerContext &C); 67 68 /// This is called from getPointedToSymbol() to resolve symbol references for 69 /// the region underlying a LazyCompoundVal. This is the default binding 70 /// for the LCV, which could be a conjured symbol from a function call that 71 /// initialized the region. It only returns the conjured symbol if the LCV 72 /// covers the entire region, e.g. we avoid false positives by not returning 73 /// a default bindingc for an entire struct if the symbol for only a single 74 /// field or element within it is requested. 75 // TODO: Return an appropriate symbol for sub-fields/elements of an LCV so 76 // that they are also appropriately tainted. 77 static SymbolRef getLCVSymbol(CheckerContext &C, 78 nonloc::LazyCompoundVal &LCV); 79 80 /// \brief Given a pointer argument, get the symbol of the value it contains 81 /// (points to). 82 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); 83 84 /// Functions defining the attack surface. 85 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 86 CheckerContext &C) const; 87 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 88 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 89 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 90 91 /// Taint the scanned input if the file is tainted. 92 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 93 94 /// Check for CWE-134: Uncontrolled Format String. 95 static const char MsgUncontrolledFormatString[]; 96 bool checkUncontrolledFormatString(const CallExpr *CE, 97 CheckerContext &C) const; 98 99 /// Check for: 100 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 101 /// CWE-78, "Failure to Sanitize Data into an OS Command" 102 static const char MsgSanitizeSystemArgs[]; 103 bool checkSystemCall(const CallExpr *CE, StringRef Name, 104 CheckerContext &C) const; 105 106 /// Check if tainted data is used as a buffer size ins strn.. functions, 107 /// and allocators. 108 static const char MsgTaintedBufferSize[]; 109 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 110 CheckerContext &C) const; 111 112 /// Generate a report if the expression is tainted or points to tainted data. 113 bool generateReportIfTainted(const Expr *E, const char Msg[], 114 CheckerContext &C) const; 115 116 /// The bug visitor prints a diagnostic message at the location where a given 117 /// variable was tainted. 118 class TaintBugVisitor 119 : public BugReporterVisitorImpl<TaintBugVisitor> { 120 private: 121 const SVal V; 122 123 public: 124 TaintBugVisitor(const SVal V) : V(V) {} 125 void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); } 126 127 std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N, 128 const ExplodedNode *PrevN, 129 BugReporterContext &BRC, 130 BugReport &BR) override; 131 }; 132 133 typedef SmallVector<unsigned, 2> ArgVector; 134 135 /// \brief A struct used to specify taint propagation rules for a function. 136 /// 137 /// If any of the possible taint source arguments is tainted, all of the 138 /// destination arguments should also be tainted. Use InvalidArgIndex in the 139 /// src list to specify that all of the arguments can introduce taint. Use 140 /// InvalidArgIndex in the dst arguments to signify that all the non-const 141 /// pointer and reference arguments might be tainted on return. If 142 /// ReturnValueIndex is added to the dst list, the return value will be 143 /// tainted. 144 struct TaintPropagationRule { 145 /// List of arguments which can be taint sources and should be checked. 146 ArgVector SrcArgs; 147 /// List of arguments which should be tainted on function return. 148 ArgVector DstArgs; 149 // TODO: Check if using other data structures would be more optimal. 150 151 TaintPropagationRule() {} 152 153 TaintPropagationRule(unsigned SArg, 154 unsigned DArg, bool TaintRet = false) { 155 SrcArgs.push_back(SArg); 156 DstArgs.push_back(DArg); 157 if (TaintRet) 158 DstArgs.push_back(ReturnValueIndex); 159 } 160 161 TaintPropagationRule(unsigned SArg1, unsigned SArg2, 162 unsigned DArg, bool TaintRet = false) { 163 SrcArgs.push_back(SArg1); 164 SrcArgs.push_back(SArg2); 165 DstArgs.push_back(DArg); 166 if (TaintRet) 167 DstArgs.push_back(ReturnValueIndex); 168 } 169 170 /// Get the propagation rule for a given function. 171 static TaintPropagationRule 172 getTaintPropagationRule(const FunctionDecl *FDecl, 173 StringRef Name, 174 CheckerContext &C); 175 176 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 177 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 178 179 inline bool isNull() const { return SrcArgs.empty(); } 180 181 inline bool isDestinationArgument(unsigned ArgNum) const { 182 return (std::find(DstArgs.begin(), 183 DstArgs.end(), ArgNum) != DstArgs.end()); 184 } 185 186 static inline bool isTaintedOrPointsToTainted(const Expr *E, 187 ProgramStateRef State, 188 CheckerContext &C) { 189 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) || 190 (E->getType().getTypePtr()->isPointerType() && 191 State->isTainted(getPointedToSymbol(C, E)))); 192 } 193 194 /// \brief Pre-process a function which propagates taint according to the 195 /// taint rule. 196 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 197 198 }; 199 }; 200 201 const unsigned GenericTaintChecker::ReturnValueIndex; 202 const unsigned GenericTaintChecker::InvalidArgIndex; 203 204 const char GenericTaintChecker::MsgUncontrolledFormatString[] = 205 "Untrusted data is used as a format string " 206 "(CWE-134: Uncontrolled Format String)"; 207 208 const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 209 "Untrusted data is passed to a system call " 210 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 211 212 const char GenericTaintChecker::MsgTaintedBufferSize[] = 213 "Untrusted data is used to specify the buffer size " 214 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 215 "character data and the null terminator)"; 216 217 } // end of anonymous namespace 218 219 /// A set which is used to pass information from call pre-visit instruction 220 /// to the call post-visit. The values are unsigned integers, which are either 221 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 222 /// points to data, which should be tainted on return. 223 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 224 225 std::shared_ptr<PathDiagnosticPiece> 226 GenericTaintChecker::TaintBugVisitor::VisitNode(const ExplodedNode *N, 227 const ExplodedNode *PrevN, BugReporterContext &BRC, BugReport &BR) { 228 229 // Find the ExplodedNode where the taint was first introduced 230 if (!N->getState()->isTainted(V) || PrevN->getState()->isTainted(V)) 231 return nullptr; 232 233 const Stmt *S = PathDiagnosticLocation::getStmt(N); 234 if (!S) 235 return nullptr; 236 237 const LocationContext *NCtx = N->getLocationContext(); 238 PathDiagnosticLocation L = 239 PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); 240 if (!L.isValid() || !L.asLocation().isValid()) 241 return nullptr; 242 243 return std::make_shared<PathDiagnosticEventPiece>( 244 L, "Taint originated here"); 245 } 246 247 GenericTaintChecker::TaintPropagationRule 248 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 249 const FunctionDecl *FDecl, 250 StringRef Name, 251 CheckerContext &C) { 252 // TODO: Currently, we might lose precision here: we always mark a return 253 // value as tainted even if it's just a pointer, pointing to tainted data. 254 255 // Check for exact name match for functions without builtin substitutes. 256 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 257 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 258 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 259 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 260 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 261 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 262 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 263 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 264 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 265 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 266 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 267 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 268 .Case("read", TaintPropagationRule(0, 2, 1, true)) 269 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 270 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 271 .Case("fgets", TaintPropagationRule(2, 0, true)) 272 .Case("getline", TaintPropagationRule(2, 0)) 273 .Case("getdelim", TaintPropagationRule(3, 0)) 274 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 275 .Default(TaintPropagationRule()); 276 277 if (!Rule.isNull()) 278 return Rule; 279 280 // Check if it's one of the memory setting/copying functions. 281 // This check is specialized but faster then calling isCLibraryFunction. 282 unsigned BId = 0; 283 if ( (BId = FDecl->getMemoryFunctionKind()) ) 284 switch(BId) { 285 case Builtin::BImemcpy: 286 case Builtin::BImemmove: 287 case Builtin::BIstrncpy: 288 case Builtin::BIstrncat: 289 return TaintPropagationRule(1, 2, 0, true); 290 case Builtin::BIstrlcpy: 291 case Builtin::BIstrlcat: 292 return TaintPropagationRule(1, 2, 0, false); 293 case Builtin::BIstrndup: 294 return TaintPropagationRule(0, 1, ReturnValueIndex); 295 296 default: 297 break; 298 }; 299 300 // Process all other functions which could be defined as builtins. 301 if (Rule.isNull()) { 302 if (C.isCLibraryFunction(FDecl, "snprintf") || 303 C.isCLibraryFunction(FDecl, "sprintf")) 304 return TaintPropagationRule(InvalidArgIndex, 0, true); 305 else if (C.isCLibraryFunction(FDecl, "strcpy") || 306 C.isCLibraryFunction(FDecl, "stpcpy") || 307 C.isCLibraryFunction(FDecl, "strcat")) 308 return TaintPropagationRule(1, 0, true); 309 else if (C.isCLibraryFunction(FDecl, "bcopy")) 310 return TaintPropagationRule(0, 2, 1, false); 311 else if (C.isCLibraryFunction(FDecl, "strdup") || 312 C.isCLibraryFunction(FDecl, "strdupa")) 313 return TaintPropagationRule(0, ReturnValueIndex); 314 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 315 return TaintPropagationRule(0, ReturnValueIndex); 316 } 317 318 // Skipping the following functions, since they might be used for cleansing 319 // or smart memory copy: 320 // - memccpy - copying until hitting a special character. 321 322 return TaintPropagationRule(); 323 } 324 325 void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 326 CheckerContext &C) const { 327 // Check for errors first. 328 if (checkPre(CE, C)) 329 return; 330 331 // Add taint second. 332 addSourcesPre(CE, C); 333 } 334 335 void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 336 CheckerContext &C) const { 337 if (propagateFromPre(CE, C)) 338 return; 339 addSourcesPost(CE, C); 340 } 341 342 void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 343 CheckerContext &C) const { 344 ProgramStateRef State = nullptr; 345 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 346 if (!FDecl || FDecl->getKind() != Decl::Function) 347 return; 348 349 StringRef Name = C.getCalleeName(FDecl); 350 if (Name.empty()) 351 return; 352 353 // First, try generating a propagation rule for this function. 354 TaintPropagationRule Rule = 355 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 356 if (!Rule.isNull()) { 357 State = Rule.process(CE, C); 358 if (!State) 359 return; 360 C.addTransition(State); 361 return; 362 } 363 364 // Otherwise, check if we have custom pre-processing implemented. 365 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 366 .Case("fscanf", &GenericTaintChecker::preFscanf) 367 .Default(nullptr); 368 // Check and evaluate the call. 369 if (evalFunction) 370 State = (this->*evalFunction)(CE, C); 371 if (!State) 372 return; 373 C.addTransition(State); 374 375 } 376 377 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 378 CheckerContext &C) const { 379 ProgramStateRef State = C.getState(); 380 381 // Depending on what was tainted at pre-visit, we determined a set of 382 // arguments which should be tainted after the function returns. These are 383 // stored in the state as TaintArgsOnPostVisit set. 384 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 385 if (TaintArgs.isEmpty()) 386 return false; 387 388 for (llvm::ImmutableSet<unsigned>::iterator 389 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 390 unsigned ArgNum = *I; 391 392 // Special handling for the tainted return value. 393 if (ArgNum == ReturnValueIndex) { 394 State = State->addTaint(CE, C.getLocationContext()); 395 continue; 396 } 397 398 // The arguments are pointer arguments. The data they are pointing at is 399 // tainted after the call. 400 if (CE->getNumArgs() < (ArgNum + 1)) 401 return false; 402 const Expr* Arg = CE->getArg(ArgNum); 403 SymbolRef Sym = getPointedToSymbol(C, Arg); 404 if (Sym) 405 State = State->addTaint(Sym); 406 } 407 408 // Clear up the taint info from the state. 409 State = State->remove<TaintArgsOnPostVisit>(); 410 411 if (State != C.getState()) { 412 C.addTransition(State); 413 return true; 414 } 415 return false; 416 } 417 418 void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 419 CheckerContext &C) const { 420 // Define the attack surface. 421 // Set the evaluation function by switching on the callee name. 422 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 423 if (!FDecl || FDecl->getKind() != Decl::Function) 424 return; 425 426 StringRef Name = C.getCalleeName(FDecl); 427 if (Name.empty()) 428 return; 429 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 430 .Case("scanf", &GenericTaintChecker::postScanf) 431 // TODO: Add support for vfscanf & family. 432 .Case("getchar", &GenericTaintChecker::postRetTaint) 433 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 434 .Case("getenv", &GenericTaintChecker::postRetTaint) 435 .Case("fopen", &GenericTaintChecker::postRetTaint) 436 .Case("fdopen", &GenericTaintChecker::postRetTaint) 437 .Case("freopen", &GenericTaintChecker::postRetTaint) 438 .Case("getch", &GenericTaintChecker::postRetTaint) 439 .Case("wgetch", &GenericTaintChecker::postRetTaint) 440 .Case("socket", &GenericTaintChecker::postSocket) 441 .Default(nullptr); 442 443 // If the callee isn't defined, it is not of security concern. 444 // Check and evaluate the call. 445 ProgramStateRef State = nullptr; 446 if (evalFunction) 447 State = (this->*evalFunction)(CE, C); 448 if (!State) 449 return; 450 451 C.addTransition(State); 452 } 453 454 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 455 456 if (checkUncontrolledFormatString(CE, C)) 457 return true; 458 459 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 460 if (!FDecl || FDecl->getKind() != Decl::Function) 461 return false; 462 463 StringRef Name = C.getCalleeName(FDecl); 464 if (Name.empty()) 465 return false; 466 467 if (checkSystemCall(CE, Name, C)) 468 return true; 469 470 if (checkTaintedBufferSize(CE, FDecl, C)) 471 return true; 472 473 return false; 474 } 475 476 SymbolRef GenericTaintChecker::getLCVSymbol(CheckerContext &C, 477 nonloc::LazyCompoundVal &LCV) { 478 StoreManager &StoreMgr = C.getStoreManager(); 479 480 // getLCVSymbol() is reached in a PostStmt so we can always expect a default 481 // binding to exist if one is present. 482 if (Optional<SVal> binding = StoreMgr.getDefaultBinding(LCV)) { 483 SymbolRef Sym = binding->getAsSymbol(); 484 if (!Sym) 485 return nullptr; 486 487 // If the LCV covers an entire base region return the default conjured symbol. 488 if (LCV.getRegion() == LCV.getRegion()->getBaseRegion()) 489 return Sym; 490 } 491 492 // Otherwise, return a nullptr as there's not yet a functional way to taint 493 // sub-regions of LCVs. 494 return nullptr; 495 } 496 497 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 498 const Expr* Arg) { 499 ProgramStateRef State = C.getState(); 500 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 501 if (AddrVal.isUnknownOrUndef()) 502 return nullptr; 503 504 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 505 if (!AddrLoc) 506 return nullptr; 507 508 const PointerType *ArgTy = 509 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 510 SVal Val = State->getSVal(*AddrLoc, 511 ArgTy ? ArgTy->getPointeeType(): QualType()); 512 513 if (auto LCV = Val.getAs<nonloc::LazyCompoundVal>()) 514 return getLCVSymbol(C, *LCV); 515 516 return Val.getAsSymbol(); 517 } 518 519 ProgramStateRef 520 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 521 CheckerContext &C) const { 522 ProgramStateRef State = C.getState(); 523 524 // Check for taint in arguments. 525 bool IsTainted = false; 526 for (ArgVector::const_iterator I = SrcArgs.begin(), 527 E = SrcArgs.end(); I != E; ++I) { 528 unsigned ArgNum = *I; 529 530 if (ArgNum == InvalidArgIndex) { 531 // Check if any of the arguments is tainted, but skip the 532 // destination arguments. 533 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 534 if (isDestinationArgument(i)) 535 continue; 536 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 537 break; 538 } 539 break; 540 } 541 542 if (CE->getNumArgs() < (ArgNum + 1)) 543 return State; 544 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 545 break; 546 } 547 if (!IsTainted) 548 return State; 549 550 // Mark the arguments which should be tainted after the function returns. 551 for (ArgVector::const_iterator I = DstArgs.begin(), 552 E = DstArgs.end(); I != E; ++I) { 553 unsigned ArgNum = *I; 554 555 // Should we mark all arguments as tainted? 556 if (ArgNum == InvalidArgIndex) { 557 // For all pointer and references that were passed in: 558 // If they are not pointing to const data, mark data as tainted. 559 // TODO: So far we are just going one level down; ideally we'd need to 560 // recurse here. 561 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 562 const Expr *Arg = CE->getArg(i); 563 // Process pointer argument. 564 const Type *ArgTy = Arg->getType().getTypePtr(); 565 QualType PType = ArgTy->getPointeeType(); 566 if ((!PType.isNull() && !PType.isConstQualified()) 567 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 568 State = State->add<TaintArgsOnPostVisit>(i); 569 } 570 continue; 571 } 572 573 // Should mark the return value? 574 if (ArgNum == ReturnValueIndex) { 575 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 576 continue; 577 } 578 579 // Mark the given argument. 580 assert(ArgNum < CE->getNumArgs()); 581 State = State->add<TaintArgsOnPostVisit>(ArgNum); 582 } 583 584 return State; 585 } 586 587 588 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0 589 // and arg 1 should get taint. 590 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 591 CheckerContext &C) const { 592 assert(CE->getNumArgs() >= 2); 593 ProgramStateRef State = C.getState(); 594 595 // Check is the file descriptor is tainted. 596 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 597 isStdin(CE->getArg(0), C)) { 598 // All arguments except for the first two should get taint. 599 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 600 State = State->add<TaintArgsOnPostVisit>(i); 601 return State; 602 } 603 604 return nullptr; 605 } 606 607 608 // If argument 0(protocol domain) is network, the return value should get taint. 609 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 610 CheckerContext &C) const { 611 ProgramStateRef State = C.getState(); 612 if (CE->getNumArgs() < 3) 613 return State; 614 615 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 616 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 617 // White list the internal communication protocols. 618 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 619 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 620 return State; 621 State = State->addTaint(CE, C.getLocationContext()); 622 return State; 623 } 624 625 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 626 CheckerContext &C) const { 627 ProgramStateRef State = C.getState(); 628 if (CE->getNumArgs() < 2) 629 return State; 630 631 // All arguments except for the very first one should get taint. 632 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 633 // The arguments are pointer arguments. The data they are pointing at is 634 // tainted after the call. 635 const Expr* Arg = CE->getArg(i); 636 SymbolRef Sym = getPointedToSymbol(C, Arg); 637 if (Sym) 638 State = State->addTaint(Sym); 639 } 640 return State; 641 } 642 643 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 644 CheckerContext &C) const { 645 return C.getState()->addTaint(CE, C.getLocationContext()); 646 } 647 648 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 649 ProgramStateRef State = C.getState(); 650 SVal Val = State->getSVal(E, C.getLocationContext()); 651 652 // stdin is a pointer, so it would be a region. 653 const MemRegion *MemReg = Val.getAsRegion(); 654 655 // The region should be symbolic, we do not know it's value. 656 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 657 if (!SymReg) 658 return false; 659 660 // Get it's symbol and find the declaration region it's pointing to. 661 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 662 if (!Sm) 663 return false; 664 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 665 if (!DeclReg) 666 return false; 667 668 // This region corresponds to a declaration, find out if it's a global/extern 669 // variable named stdin with the proper type. 670 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 671 D = D->getCanonicalDecl(); 672 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 673 if (const PointerType * PtrTy = 674 dyn_cast<PointerType>(D->getType().getTypePtr())) 675 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 676 return true; 677 } 678 return false; 679 } 680 681 static bool getPrintfFormatArgumentNum(const CallExpr *CE, 682 const CheckerContext &C, 683 unsigned int &ArgNum) { 684 // Find if the function contains a format string argument. 685 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 686 // vsnprintf, syslog, custom annotated functions. 687 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 688 if (!FDecl) 689 return false; 690 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 691 ArgNum = Format->getFormatIdx() - 1; 692 if ((Format->getType()->getName() == "printf") && 693 CE->getNumArgs() > ArgNum) 694 return true; 695 } 696 697 // Or if a function is named setproctitle (this is a heuristic). 698 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 699 ArgNum = 0; 700 return true; 701 } 702 703 return false; 704 } 705 706 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 707 const char Msg[], 708 CheckerContext &C) const { 709 assert(E); 710 711 // Check for taint. 712 ProgramStateRef State = C.getState(); 713 const SymbolRef PointedToSym = getPointedToSymbol(C, E); 714 SVal TaintedSVal; 715 if (State->isTainted(PointedToSym)) 716 TaintedSVal = nonloc::SymbolVal(PointedToSym); 717 else if (State->isTainted(E, C.getLocationContext())) 718 TaintedSVal = C.getSVal(E); 719 else 720 return false; 721 722 // Generate diagnostic. 723 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 724 initBugType(); 725 auto report = llvm::make_unique<BugReport>(*BT, Msg, N); 726 report->addRange(E->getSourceRange()); 727 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal)); 728 C.emitReport(std::move(report)); 729 return true; 730 } 731 return false; 732 } 733 734 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 735 CheckerContext &C) const{ 736 // Check if the function contains a format string argument. 737 unsigned int ArgNum = 0; 738 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 739 return false; 740 741 // If either the format string content or the pointer itself are tainted, warn. 742 return generateReportIfTainted(CE->getArg(ArgNum), 743 MsgUncontrolledFormatString, C); 744 } 745 746 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 747 StringRef Name, 748 CheckerContext &C) const { 749 // TODO: It might make sense to run this check on demand. In some cases, 750 // we should check if the environment has been cleansed here. We also might 751 // need to know if the user was reset before these calls(seteuid). 752 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 753 .Case("system", 0) 754 .Case("popen", 0) 755 .Case("execl", 0) 756 .Case("execle", 0) 757 .Case("execlp", 0) 758 .Case("execv", 0) 759 .Case("execvp", 0) 760 .Case("execvP", 0) 761 .Case("execve", 0) 762 .Case("dlopen", 0) 763 .Default(UINT_MAX); 764 765 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 766 return false; 767 768 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); 769 } 770 771 // TODO: Should this check be a part of the CString checker? 772 // If yes, should taint be a global setting? 773 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 774 const FunctionDecl *FDecl, 775 CheckerContext &C) const { 776 // If the function has a buffer size argument, set ArgNum. 777 unsigned ArgNum = InvalidArgIndex; 778 unsigned BId = 0; 779 if ( (BId = FDecl->getMemoryFunctionKind()) ) 780 switch(BId) { 781 case Builtin::BImemcpy: 782 case Builtin::BImemmove: 783 case Builtin::BIstrncpy: 784 ArgNum = 2; 785 break; 786 case Builtin::BIstrndup: 787 ArgNum = 1; 788 break; 789 default: 790 break; 791 }; 792 793 if (ArgNum == InvalidArgIndex) { 794 if (C.isCLibraryFunction(FDecl, "malloc") || 795 C.isCLibraryFunction(FDecl, "calloc") || 796 C.isCLibraryFunction(FDecl, "alloca")) 797 ArgNum = 0; 798 else if (C.isCLibraryFunction(FDecl, "memccpy")) 799 ArgNum = 3; 800 else if (C.isCLibraryFunction(FDecl, "realloc")) 801 ArgNum = 1; 802 else if (C.isCLibraryFunction(FDecl, "bcopy")) 803 ArgNum = 2; 804 } 805 806 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 807 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); 808 } 809 810 void ento::registerGenericTaintChecker(CheckerManager &mgr) { 811 mgr.registerChecker<GenericTaintChecker>(); 812 } 813