1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This defines CStringChecker, which is an assortment of checks on calls 11 // to functions in <string.h>. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ClangSACheckers.h" 16 #include "InterCheckerAPI.h" 17 #include "clang/StaticAnalyzer/Core/Checker.h" 18 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 19 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 20 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/StringSwitch.h" 25 26 using namespace clang; 27 using namespace ento; 28 29 namespace { 30 class CStringChecker : public Checker< eval::Call, 31 check::PreStmt<DeclStmt>, 32 check::LiveSymbols, 33 check::DeadSymbols, 34 check::RegionChanges 35 > { 36 mutable OwningPtr<BugType> BT_Null, 37 BT_Bounds, 38 BT_Overlap, 39 BT_NotCString, 40 BT_AdditionOverflow; 41 42 mutable const char *CurrentFunctionDescription; 43 44 public: 45 /// The filter is used to filter out the diagnostics which are not enabled by 46 /// the user. 47 struct CStringChecksFilter { 48 DefaultBool CheckCStringNullArg; 49 DefaultBool CheckCStringOutOfBounds; 50 DefaultBool CheckCStringBufferOverlap; 51 DefaultBool CheckCStringNotNullTerm; 52 }; 53 54 CStringChecksFilter Filter; 55 56 static void *getTag() { static int tag; return &tag; } 57 58 bool evalCall(const CallExpr *CE, CheckerContext &C) const; 59 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 60 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; 61 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 62 bool wantsRegionChangeUpdate(ProgramStateRef state) const; 63 64 ProgramStateRef 65 checkRegionChanges(ProgramStateRef state, 66 const StoreManager::InvalidatedSymbols *, 67 ArrayRef<const MemRegion *> ExplicitRegions, 68 ArrayRef<const MemRegion *> Regions, 69 const CallEvent *Call) const; 70 71 typedef void (CStringChecker::*FnCheck)(CheckerContext &, 72 const CallExpr *) const; 73 74 void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; 75 void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; 76 void evalMemmove(CheckerContext &C, const CallExpr *CE) const; 77 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 78 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 79 ProgramStateRef state, 80 const Expr *Size, 81 const Expr *Source, 82 const Expr *Dest, 83 bool Restricted = false, 84 bool IsMempcpy = false) const; 85 86 void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; 87 88 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 89 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 90 void evalstrLengthCommon(CheckerContext &C, 91 const CallExpr *CE, 92 bool IsStrnlen = false) const; 93 94 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 95 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 96 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 97 void evalStrcpyCommon(CheckerContext &C, 98 const CallExpr *CE, 99 bool returnEnd, 100 bool isBounded, 101 bool isAppending) const; 102 103 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 104 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 105 106 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 107 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 108 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 109 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 110 void evalStrcmpCommon(CheckerContext &C, 111 const CallExpr *CE, 112 bool isBounded = false, 113 bool ignoreCase = false) const; 114 115 // Utility methods 116 std::pair<ProgramStateRef , ProgramStateRef > 117 static assumeZero(CheckerContext &C, 118 ProgramStateRef state, SVal V, QualType Ty); 119 120 static ProgramStateRef setCStringLength(ProgramStateRef state, 121 const MemRegion *MR, 122 SVal strLength); 123 static SVal getCStringLengthForRegion(CheckerContext &C, 124 ProgramStateRef &state, 125 const Expr *Ex, 126 const MemRegion *MR, 127 bool hypothetical); 128 SVal getCStringLength(CheckerContext &C, 129 ProgramStateRef &state, 130 const Expr *Ex, 131 SVal Buf, 132 bool hypothetical = false) const; 133 134 const StringLiteral *getCStringLiteral(CheckerContext &C, 135 ProgramStateRef &state, 136 const Expr *expr, 137 SVal val) const; 138 139 static ProgramStateRef InvalidateBuffer(CheckerContext &C, 140 ProgramStateRef state, 141 const Expr *Ex, SVal V); 142 143 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 144 const MemRegion *MR); 145 146 // Re-usable checks 147 ProgramStateRef checkNonNull(CheckerContext &C, 148 ProgramStateRef state, 149 const Expr *S, 150 SVal l) const; 151 ProgramStateRef CheckLocation(CheckerContext &C, 152 ProgramStateRef state, 153 const Expr *S, 154 SVal l, 155 const char *message = NULL) const; 156 ProgramStateRef CheckBufferAccess(CheckerContext &C, 157 ProgramStateRef state, 158 const Expr *Size, 159 const Expr *FirstBuf, 160 const Expr *SecondBuf, 161 const char *firstMessage = NULL, 162 const char *secondMessage = NULL, 163 bool WarnAboutSize = false) const; 164 165 ProgramStateRef CheckBufferAccess(CheckerContext &C, 166 ProgramStateRef state, 167 const Expr *Size, 168 const Expr *Buf, 169 const char *message = NULL, 170 bool WarnAboutSize = false) const { 171 // This is a convenience override. 172 return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL, 173 WarnAboutSize); 174 } 175 ProgramStateRef CheckOverlap(CheckerContext &C, 176 ProgramStateRef state, 177 const Expr *Size, 178 const Expr *First, 179 const Expr *Second) const; 180 void emitOverlapBug(CheckerContext &C, 181 ProgramStateRef state, 182 const Stmt *First, 183 const Stmt *Second) const; 184 185 ProgramStateRef checkAdditionOverflow(CheckerContext &C, 186 ProgramStateRef state, 187 NonLoc left, 188 NonLoc right) const; 189 }; 190 191 } //end anonymous namespace 192 193 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) 194 195 //===----------------------------------------------------------------------===// 196 // Individual checks and utility methods. 197 //===----------------------------------------------------------------------===// 198 199 std::pair<ProgramStateRef , ProgramStateRef > 200 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, 201 QualType Ty) { 202 DefinedSVal *val = dyn_cast<DefinedSVal>(&V); 203 if (!val) 204 return std::pair<ProgramStateRef , ProgramStateRef >(state, state); 205 206 SValBuilder &svalBuilder = C.getSValBuilder(); 207 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 208 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 209 } 210 211 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, 212 ProgramStateRef state, 213 const Expr *S, SVal l) const { 214 // If a previous check has failed, propagate the failure. 215 if (!state) 216 return NULL; 217 218 ProgramStateRef stateNull, stateNonNull; 219 llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 220 221 if (stateNull && !stateNonNull) { 222 if (!Filter.CheckCStringNullArg) 223 return NULL; 224 225 ExplodedNode *N = C.generateSink(stateNull); 226 if (!N) 227 return NULL; 228 229 if (!BT_Null) 230 BT_Null.reset(new BuiltinBug("Unix API", 231 "Null pointer argument in call to byte string function")); 232 233 SmallString<80> buf; 234 llvm::raw_svector_ostream os(buf); 235 assert(CurrentFunctionDescription); 236 os << "Null pointer argument in call to " << CurrentFunctionDescription; 237 238 // Generate a report for this bug. 239 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get()); 240 BugReport *report = new BugReport(*BT, os.str(), N); 241 242 report->addRange(S->getSourceRange()); 243 bugreporter::trackNullOrUndefValue(N, S, *report); 244 C.emitReport(report); 245 return NULL; 246 } 247 248 // From here on, assume that the value is non-null. 249 assert(stateNonNull); 250 return stateNonNull; 251 } 252 253 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 254 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, 255 ProgramStateRef state, 256 const Expr *S, SVal l, 257 const char *warningMsg) const { 258 // If a previous check has failed, propagate the failure. 259 if (!state) 260 return NULL; 261 262 // Check for out of bound array element access. 263 const MemRegion *R = l.getAsRegion(); 264 if (!R) 265 return state; 266 267 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 268 if (!ER) 269 return state; 270 271 assert(ER->getValueType() == C.getASTContext().CharTy && 272 "CheckLocation should only be called with char* ElementRegions"); 273 274 // Get the size of the array. 275 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 276 SValBuilder &svalBuilder = C.getSValBuilder(); 277 SVal Extent = 278 svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 279 DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent); 280 281 // Get the index of the accessed element. 282 DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex()); 283 284 ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true); 285 ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false); 286 if (StOutBound && !StInBound) { 287 ExplodedNode *N = C.generateSink(StOutBound); 288 if (!N) 289 return NULL; 290 291 if (!BT_Bounds) { 292 BT_Bounds.reset(new BuiltinBug("Out-of-bound array access", 293 "Byte string function accesses out-of-bound array element")); 294 } 295 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get()); 296 297 // Generate a report for this bug. 298 BugReport *report; 299 if (warningMsg) { 300 report = new BugReport(*BT, warningMsg, N); 301 } else { 302 assert(CurrentFunctionDescription); 303 assert(CurrentFunctionDescription[0] != '\0'); 304 305 SmallString<80> buf; 306 llvm::raw_svector_ostream os(buf); 307 os << (char)toupper(CurrentFunctionDescription[0]) 308 << &CurrentFunctionDescription[1] 309 << " accesses out-of-bound array element"; 310 report = new BugReport(*BT, os.str(), N); 311 } 312 313 // FIXME: It would be nice to eventually make this diagnostic more clear, 314 // e.g., by referencing the original declaration or by saying *why* this 315 // reference is outside the range. 316 317 report->addRange(S->getSourceRange()); 318 C.emitReport(report); 319 return NULL; 320 } 321 322 // Array bound check succeeded. From this point forward the array bound 323 // should always succeed. 324 return StInBound; 325 } 326 327 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C, 328 ProgramStateRef state, 329 const Expr *Size, 330 const Expr *FirstBuf, 331 const Expr *SecondBuf, 332 const char *firstMessage, 333 const char *secondMessage, 334 bool WarnAboutSize) const { 335 // If a previous check has failed, propagate the failure. 336 if (!state) 337 return NULL; 338 339 SValBuilder &svalBuilder = C.getSValBuilder(); 340 ASTContext &Ctx = svalBuilder.getContext(); 341 const LocationContext *LCtx = C.getLocationContext(); 342 343 QualType sizeTy = Size->getType(); 344 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 345 346 // Check that the first buffer is non-null. 347 SVal BufVal = state->getSVal(FirstBuf, LCtx); 348 state = checkNonNull(C, state, FirstBuf, BufVal); 349 if (!state) 350 return NULL; 351 352 // If out-of-bounds checking is turned off, skip the rest. 353 if (!Filter.CheckCStringOutOfBounds) 354 return state; 355 356 // Get the access length and make sure it is known. 357 // FIXME: This assumes the caller has already checked that the access length 358 // is positive. And that it's unsigned. 359 SVal LengthVal = state->getSVal(Size, LCtx); 360 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 361 if (!Length) 362 return state; 363 364 // Compute the offset of the last element to be accessed: size-1. 365 NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); 366 NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub, 367 *Length, One, sizeTy)); 368 369 // Check that the first buffer is sufficiently long. 370 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 371 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 372 const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf); 373 374 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 375 LastOffset, PtrTy); 376 state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage); 377 378 // If the buffer isn't large enough, abort. 379 if (!state) 380 return NULL; 381 } 382 383 // If there's a second buffer, check it as well. 384 if (SecondBuf) { 385 BufVal = state->getSVal(SecondBuf, LCtx); 386 state = checkNonNull(C, state, SecondBuf, BufVal); 387 if (!state) 388 return NULL; 389 390 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 391 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 392 const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf); 393 394 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 395 LastOffset, PtrTy); 396 state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage); 397 } 398 } 399 400 // Large enough or not, return this state! 401 return state; 402 } 403 404 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, 405 ProgramStateRef state, 406 const Expr *Size, 407 const Expr *First, 408 const Expr *Second) const { 409 if (!Filter.CheckCStringBufferOverlap) 410 return state; 411 412 // Do a simple check for overlap: if the two arguments are from the same 413 // buffer, see if the end of the first is greater than the start of the second 414 // or vice versa. 415 416 // If a previous check has failed, propagate the failure. 417 if (!state) 418 return NULL; 419 420 ProgramStateRef stateTrue, stateFalse; 421 422 // Get the buffer values and make sure they're known locations. 423 const LocationContext *LCtx = C.getLocationContext(); 424 SVal firstVal = state->getSVal(First, LCtx); 425 SVal secondVal = state->getSVal(Second, LCtx); 426 427 Loc *firstLoc = dyn_cast<Loc>(&firstVal); 428 if (!firstLoc) 429 return state; 430 431 Loc *secondLoc = dyn_cast<Loc>(&secondVal); 432 if (!secondLoc) 433 return state; 434 435 // Are the two values the same? 436 SValBuilder &svalBuilder = C.getSValBuilder(); 437 llvm::tie(stateTrue, stateFalse) = 438 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 439 440 if (stateTrue && !stateFalse) { 441 // If the values are known to be equal, that's automatically an overlap. 442 emitOverlapBug(C, stateTrue, First, Second); 443 return NULL; 444 } 445 446 // assume the two expressions are not equal. 447 assert(stateFalse); 448 state = stateFalse; 449 450 // Which value comes first? 451 QualType cmpTy = svalBuilder.getConditionType(); 452 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 453 *firstLoc, *secondLoc, cmpTy); 454 DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse); 455 if (!reverseTest) 456 return state; 457 458 llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 459 if (stateTrue) { 460 if (stateFalse) { 461 // If we don't know which one comes first, we can't perform this test. 462 return state; 463 } else { 464 // Switch the values so that firstVal is before secondVal. 465 Loc *tmpLoc = firstLoc; 466 firstLoc = secondLoc; 467 secondLoc = tmpLoc; 468 469 // Switch the Exprs as well, so that they still correspond. 470 const Expr *tmpExpr = First; 471 First = Second; 472 Second = tmpExpr; 473 } 474 } 475 476 // Get the length, and make sure it too is known. 477 SVal LengthVal = state->getSVal(Size, LCtx); 478 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 479 if (!Length) 480 return state; 481 482 // Convert the first buffer's start address to char*. 483 // Bail out if the cast fails. 484 ASTContext &Ctx = svalBuilder.getContext(); 485 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 486 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, 487 First->getType()); 488 Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart); 489 if (!FirstStartLoc) 490 return state; 491 492 // Compute the end of the first buffer. Bail out if THAT fails. 493 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 494 *FirstStartLoc, *Length, CharPtrTy); 495 Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd); 496 if (!FirstEndLoc) 497 return state; 498 499 // Is the end of the first buffer past the start of the second buffer? 500 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 501 *FirstEndLoc, *secondLoc, cmpTy); 502 DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap); 503 if (!OverlapTest) 504 return state; 505 506 llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 507 508 if (stateTrue && !stateFalse) { 509 // Overlap! 510 emitOverlapBug(C, stateTrue, First, Second); 511 return NULL; 512 } 513 514 // assume the two expressions don't overlap. 515 assert(stateFalse); 516 return stateFalse; 517 } 518 519 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, 520 const Stmt *First, const Stmt *Second) const { 521 ExplodedNode *N = C.generateSink(state); 522 if (!N) 523 return; 524 525 if (!BT_Overlap) 526 BT_Overlap.reset(new BugType("Unix API", "Improper arguments")); 527 528 // Generate a report for this bug. 529 BugReport *report = 530 new BugReport(*BT_Overlap, 531 "Arguments must not be overlapping buffers", N); 532 report->addRange(First->getSourceRange()); 533 report->addRange(Second->getSourceRange()); 534 535 C.emitReport(report); 536 } 537 538 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, 539 ProgramStateRef state, 540 NonLoc left, 541 NonLoc right) const { 542 // If out-of-bounds checking is turned off, skip the rest. 543 if (!Filter.CheckCStringOutOfBounds) 544 return state; 545 546 // If a previous check has failed, propagate the failure. 547 if (!state) 548 return NULL; 549 550 SValBuilder &svalBuilder = C.getSValBuilder(); 551 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 552 553 QualType sizeTy = svalBuilder.getContext().getSizeType(); 554 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 555 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 556 557 SVal maxMinusRight; 558 if (isa<nonloc::ConcreteInt>(right)) { 559 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 560 sizeTy); 561 } else { 562 // Try switching the operands. (The order of these two assignments is 563 // important!) 564 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 565 sizeTy); 566 left = right; 567 } 568 569 if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) { 570 QualType cmpTy = svalBuilder.getConditionType(); 571 // If left > max - right, we have an overflow. 572 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 573 *maxMinusRightNL, cmpTy); 574 575 ProgramStateRef stateOverflow, stateOkay; 576 llvm::tie(stateOverflow, stateOkay) = 577 state->assume(cast<DefinedOrUnknownSVal>(willOverflow)); 578 579 if (stateOverflow && !stateOkay) { 580 // We have an overflow. Emit a bug report. 581 ExplodedNode *N = C.generateSink(stateOverflow); 582 if (!N) 583 return NULL; 584 585 if (!BT_AdditionOverflow) 586 BT_AdditionOverflow.reset(new BuiltinBug("API", 587 "Sum of expressions causes overflow")); 588 589 // This isn't a great error message, but this should never occur in real 590 // code anyway -- you'd have to create a buffer longer than a size_t can 591 // represent, which is sort of a contradiction. 592 const char *warning = 593 "This expression will create a string whose length is too big to " 594 "be represented as a size_t"; 595 596 // Generate a report for this bug. 597 BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N); 598 C.emitReport(report); 599 600 return NULL; 601 } 602 603 // From now on, assume an overflow didn't occur. 604 assert(stateOkay); 605 state = stateOkay; 606 } 607 608 return state; 609 } 610 611 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, 612 const MemRegion *MR, 613 SVal strLength) { 614 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 615 616 MR = MR->StripCasts(); 617 618 switch (MR->getKind()) { 619 case MemRegion::StringRegionKind: 620 // FIXME: This can happen if we strcpy() into a string region. This is 621 // undefined [C99 6.4.5p6], but we should still warn about it. 622 return state; 623 624 case MemRegion::SymbolicRegionKind: 625 case MemRegion::AllocaRegionKind: 626 case MemRegion::VarRegionKind: 627 case MemRegion::FieldRegionKind: 628 case MemRegion::ObjCIvarRegionKind: 629 // These are the types we can currently track string lengths for. 630 break; 631 632 case MemRegion::ElementRegionKind: 633 // FIXME: Handle element regions by upper-bounding the parent region's 634 // string length. 635 return state; 636 637 default: 638 // Other regions (mostly non-data) can't have a reliable C string length. 639 // For now, just ignore the change. 640 // FIXME: These are rare but not impossible. We should output some kind of 641 // warning for things like strcpy((char[]){'a', 0}, "b"); 642 return state; 643 } 644 645 if (strLength.isUnknown()) 646 return state->remove<CStringLength>(MR); 647 648 return state->set<CStringLength>(MR, strLength); 649 } 650 651 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 652 ProgramStateRef &state, 653 const Expr *Ex, 654 const MemRegion *MR, 655 bool hypothetical) { 656 if (!hypothetical) { 657 // If there's a recorded length, go ahead and return it. 658 const SVal *Recorded = state->get<CStringLength>(MR); 659 if (Recorded) 660 return *Recorded; 661 } 662 663 // Otherwise, get a new symbol and update the state. 664 SValBuilder &svalBuilder = C.getSValBuilder(); 665 QualType sizeTy = svalBuilder.getContext().getSizeType(); 666 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 667 MR, Ex, sizeTy, 668 C.blockCount()); 669 670 if (!hypothetical) 671 state = state->set<CStringLength>(MR, strLength); 672 673 return strLength; 674 } 675 676 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, 677 const Expr *Ex, SVal Buf, 678 bool hypothetical) const { 679 const MemRegion *MR = Buf.getAsRegion(); 680 if (!MR) { 681 // If we can't get a region, see if it's something we /know/ isn't a 682 // C string. In the context of locations, the only time we can issue such 683 // a warning is for labels. 684 if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) { 685 if (!Filter.CheckCStringNotNullTerm) 686 return UndefinedVal(); 687 688 if (ExplodedNode *N = C.addTransition(state)) { 689 if (!BT_NotCString) 690 BT_NotCString.reset(new BuiltinBug("Unix API", 691 "Argument is not a null-terminated string.")); 692 693 SmallString<120> buf; 694 llvm::raw_svector_ostream os(buf); 695 assert(CurrentFunctionDescription); 696 os << "Argument to " << CurrentFunctionDescription 697 << " is the address of the label '" << Label->getLabel()->getName() 698 << "', which is not a null-terminated string"; 699 700 // Generate a report for this bug. 701 BugReport *report = new BugReport(*BT_NotCString, 702 os.str(), N); 703 704 report->addRange(Ex->getSourceRange()); 705 C.emitReport(report); 706 } 707 return UndefinedVal(); 708 709 } 710 711 // If it's not a region and not a label, give up. 712 return UnknownVal(); 713 } 714 715 // If we have a region, strip casts from it and see if we can figure out 716 // its length. For anything we can't figure out, just return UnknownVal. 717 MR = MR->StripCasts(); 718 719 switch (MR->getKind()) { 720 case MemRegion::StringRegionKind: { 721 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 722 // so we can assume that the byte length is the correct C string length. 723 SValBuilder &svalBuilder = C.getSValBuilder(); 724 QualType sizeTy = svalBuilder.getContext().getSizeType(); 725 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 726 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 727 } 728 case MemRegion::SymbolicRegionKind: 729 case MemRegion::AllocaRegionKind: 730 case MemRegion::VarRegionKind: 731 case MemRegion::FieldRegionKind: 732 case MemRegion::ObjCIvarRegionKind: 733 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 734 case MemRegion::CompoundLiteralRegionKind: 735 // FIXME: Can we track this? Is it necessary? 736 return UnknownVal(); 737 case MemRegion::ElementRegionKind: 738 // FIXME: How can we handle this? It's not good enough to subtract the 739 // offset from the base string length; consider "123\x00567" and &a[5]. 740 return UnknownVal(); 741 default: 742 // Other regions (mostly non-data) can't have a reliable C string length. 743 // In this case, an error is emitted and UndefinedVal is returned. 744 // The caller should always be prepared to handle this case. 745 if (!Filter.CheckCStringNotNullTerm) 746 return UndefinedVal(); 747 748 if (ExplodedNode *N = C.addTransition(state)) { 749 if (!BT_NotCString) 750 BT_NotCString.reset(new BuiltinBug("Unix API", 751 "Argument is not a null-terminated string.")); 752 753 SmallString<120> buf; 754 llvm::raw_svector_ostream os(buf); 755 756 assert(CurrentFunctionDescription); 757 os << "Argument to " << CurrentFunctionDescription << " is "; 758 759 if (SummarizeRegion(os, C.getASTContext(), MR)) 760 os << ", which is not a null-terminated string"; 761 else 762 os << "not a null-terminated string"; 763 764 // Generate a report for this bug. 765 BugReport *report = new BugReport(*BT_NotCString, 766 os.str(), N); 767 768 report->addRange(Ex->getSourceRange()); 769 C.emitReport(report); 770 } 771 772 return UndefinedVal(); 773 } 774 } 775 776 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 777 ProgramStateRef &state, const Expr *expr, SVal val) const { 778 779 // Get the memory region pointed to by the val. 780 const MemRegion *bufRegion = val.getAsRegion(); 781 if (!bufRegion) 782 return NULL; 783 784 // Strip casts off the memory region. 785 bufRegion = bufRegion->StripCasts(); 786 787 // Cast the memory region to a string region. 788 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 789 if (!strRegion) 790 return NULL; 791 792 // Return the actual string in the string region. 793 return strRegion->getStringLiteral(); 794 } 795 796 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, 797 ProgramStateRef state, 798 const Expr *E, SVal V) { 799 Loc *L = dyn_cast<Loc>(&V); 800 if (!L) 801 return state; 802 803 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 804 // some assumptions about the value that CFRefCount can't. Even so, it should 805 // probably be refactored. 806 if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) { 807 const MemRegion *R = MR->getRegion()->StripCasts(); 808 809 // Are we dealing with an ElementRegion? If so, we should be invalidating 810 // the super-region. 811 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 812 R = ER->getSuperRegion(); 813 // FIXME: What about layers of ElementRegions? 814 } 815 816 // Invalidate this region. 817 const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); 818 return state->invalidateRegions(R, E, C.blockCount(), LCtx); 819 } 820 821 // If we have a non-region value by chance, just remove the binding. 822 // FIXME: is this necessary or correct? This handles the non-Region 823 // cases. Is it ever valid to store to these? 824 return state->killBinding(*L); 825 } 826 827 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 828 const MemRegion *MR) { 829 const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR); 830 831 switch (MR->getKind()) { 832 case MemRegion::FunctionTextRegionKind: { 833 const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl(); 834 if (FD) 835 os << "the address of the function '" << *FD << '\''; 836 else 837 os << "the address of a function"; 838 return true; 839 } 840 case MemRegion::BlockTextRegionKind: 841 os << "block text"; 842 return true; 843 case MemRegion::BlockDataRegionKind: 844 os << "a block"; 845 return true; 846 case MemRegion::CXXThisRegionKind: 847 case MemRegion::CXXTempObjectRegionKind: 848 os << "a C++ temp object of type " << TVR->getValueType().getAsString(); 849 return true; 850 case MemRegion::VarRegionKind: 851 os << "a variable of type" << TVR->getValueType().getAsString(); 852 return true; 853 case MemRegion::FieldRegionKind: 854 os << "a field of type " << TVR->getValueType().getAsString(); 855 return true; 856 case MemRegion::ObjCIvarRegionKind: 857 os << "an instance variable of type " << TVR->getValueType().getAsString(); 858 return true; 859 default: 860 return false; 861 } 862 } 863 864 //===----------------------------------------------------------------------===// 865 // evaluation of individual function calls. 866 //===----------------------------------------------------------------------===// 867 868 void CStringChecker::evalCopyCommon(CheckerContext &C, 869 const CallExpr *CE, 870 ProgramStateRef state, 871 const Expr *Size, const Expr *Dest, 872 const Expr *Source, bool Restricted, 873 bool IsMempcpy) const { 874 CurrentFunctionDescription = "memory copy function"; 875 876 // See if the size argument is zero. 877 const LocationContext *LCtx = C.getLocationContext(); 878 SVal sizeVal = state->getSVal(Size, LCtx); 879 QualType sizeTy = Size->getType(); 880 881 ProgramStateRef stateZeroSize, stateNonZeroSize; 882 llvm::tie(stateZeroSize, stateNonZeroSize) = 883 assumeZero(C, state, sizeVal, sizeTy); 884 885 // Get the value of the Dest. 886 SVal destVal = state->getSVal(Dest, LCtx); 887 888 // If the size is zero, there won't be any actual memory access, so 889 // just bind the return value to the destination buffer and return. 890 if (stateZeroSize && !stateNonZeroSize) { 891 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); 892 C.addTransition(stateZeroSize); 893 return; 894 } 895 896 // If the size can be nonzero, we have to check the other arguments. 897 if (stateNonZeroSize) { 898 state = stateNonZeroSize; 899 900 // Ensure the destination is not null. If it is NULL there will be a 901 // NULL pointer dereference. 902 state = checkNonNull(C, state, Dest, destVal); 903 if (!state) 904 return; 905 906 // Get the value of the Src. 907 SVal srcVal = state->getSVal(Source, LCtx); 908 909 // Ensure the source is not null. If it is NULL there will be a 910 // NULL pointer dereference. 911 state = checkNonNull(C, state, Source, srcVal); 912 if (!state) 913 return; 914 915 // Ensure the accesses are valid and that the buffers do not overlap. 916 const char * const writeWarning = 917 "Memory copy function overflows destination buffer"; 918 state = CheckBufferAccess(C, state, Size, Dest, Source, 919 writeWarning, /* sourceWarning = */ NULL); 920 if (Restricted) 921 state = CheckOverlap(C, state, Size, Dest, Source); 922 923 if (!state) 924 return; 925 926 // If this is mempcpy, get the byte after the last byte copied and 927 // bind the expr. 928 if (IsMempcpy) { 929 loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal); 930 assert(destRegVal && "Destination should be a known MemRegionVal here"); 931 932 // Get the length to copy. 933 NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal); 934 935 if (lenValNonLoc) { 936 // Get the byte after the last byte copied. 937 SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, 938 *destRegVal, 939 *lenValNonLoc, 940 Dest->getType()); 941 942 // The byte after the last byte copied is the return value. 943 state = state->BindExpr(CE, LCtx, lastElement); 944 } else { 945 // If we don't know how much we copied, we can at least 946 // conjure a return value for later. 947 SVal result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, 948 C.blockCount()); 949 state = state->BindExpr(CE, LCtx, result); 950 } 951 952 } else { 953 // All other copies return the destination buffer. 954 // (Well, bcopy() has a void return type, but this won't hurt.) 955 state = state->BindExpr(CE, LCtx, destVal); 956 } 957 958 // Invalidate the destination. 959 // FIXME: Even if we can't perfectly model the copy, we should see if we 960 // can use LazyCompoundVals to copy the source values into the destination. 961 // This would probably remove any existing bindings past the end of the 962 // copied region, but that's still an improvement over blank invalidation. 963 state = InvalidateBuffer(C, state, Dest, 964 state->getSVal(Dest, C.getLocationContext())); 965 C.addTransition(state); 966 } 967 } 968 969 970 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const { 971 if (CE->getNumArgs() < 3) 972 return; 973 974 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 975 // The return value is the address of the destination buffer. 976 const Expr *Dest = CE->getArg(0); 977 ProgramStateRef state = C.getState(); 978 979 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true); 980 } 981 982 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const { 983 if (CE->getNumArgs() < 3) 984 return; 985 986 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 987 // The return value is a pointer to the byte following the last written byte. 988 const Expr *Dest = CE->getArg(0); 989 ProgramStateRef state = C.getState(); 990 991 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true); 992 } 993 994 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const { 995 if (CE->getNumArgs() < 3) 996 return; 997 998 // void *memmove(void *dst, const void *src, size_t n); 999 // The return value is the address of the destination buffer. 1000 const Expr *Dest = CE->getArg(0); 1001 ProgramStateRef state = C.getState(); 1002 1003 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1)); 1004 } 1005 1006 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 1007 if (CE->getNumArgs() < 3) 1008 return; 1009 1010 // void bcopy(const void *src, void *dst, size_t n); 1011 evalCopyCommon(C, CE, C.getState(), 1012 CE->getArg(2), CE->getArg(1), CE->getArg(0)); 1013 } 1014 1015 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { 1016 if (CE->getNumArgs() < 3) 1017 return; 1018 1019 // int memcmp(const void *s1, const void *s2, size_t n); 1020 CurrentFunctionDescription = "memory comparison function"; 1021 1022 const Expr *Left = CE->getArg(0); 1023 const Expr *Right = CE->getArg(1); 1024 const Expr *Size = CE->getArg(2); 1025 1026 ProgramStateRef state = C.getState(); 1027 SValBuilder &svalBuilder = C.getSValBuilder(); 1028 1029 // See if the size argument is zero. 1030 const LocationContext *LCtx = C.getLocationContext(); 1031 SVal sizeVal = state->getSVal(Size, LCtx); 1032 QualType sizeTy = Size->getType(); 1033 1034 ProgramStateRef stateZeroSize, stateNonZeroSize; 1035 llvm::tie(stateZeroSize, stateNonZeroSize) = 1036 assumeZero(C, state, sizeVal, sizeTy); 1037 1038 // If the size can be zero, the result will be 0 in that case, and we don't 1039 // have to check either of the buffers. 1040 if (stateZeroSize) { 1041 state = stateZeroSize; 1042 state = state->BindExpr(CE, LCtx, 1043 svalBuilder.makeZeroVal(CE->getType())); 1044 C.addTransition(state); 1045 } 1046 1047 // If the size can be nonzero, we have to check the other arguments. 1048 if (stateNonZeroSize) { 1049 state = stateNonZeroSize; 1050 // If we know the two buffers are the same, we know the result is 0. 1051 // First, get the two buffers' addresses. Another checker will have already 1052 // made sure they're not undefined. 1053 DefinedOrUnknownSVal LV = 1054 cast<DefinedOrUnknownSVal>(state->getSVal(Left, LCtx)); 1055 DefinedOrUnknownSVal RV = 1056 cast<DefinedOrUnknownSVal>(state->getSVal(Right, LCtx)); 1057 1058 // See if they are the same. 1059 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 1060 ProgramStateRef StSameBuf, StNotSameBuf; 1061 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 1062 1063 // If the two arguments might be the same buffer, we know the result is 0, 1064 // and we only need to check one size. 1065 if (StSameBuf) { 1066 state = StSameBuf; 1067 state = CheckBufferAccess(C, state, Size, Left); 1068 if (state) { 1069 state = StSameBuf->BindExpr(CE, LCtx, 1070 svalBuilder.makeZeroVal(CE->getType())); 1071 C.addTransition(state); 1072 } 1073 } 1074 1075 // If the two arguments might be different buffers, we have to check the 1076 // size of both of them. 1077 if (StNotSameBuf) { 1078 state = StNotSameBuf; 1079 state = CheckBufferAccess(C, state, Size, Left, Right); 1080 if (state) { 1081 // The return value is the comparison result, which we don't know. 1082 SVal CmpV = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount()); 1083 state = state->BindExpr(CE, LCtx, CmpV); 1084 C.addTransition(state); 1085 } 1086 } 1087 } 1088 } 1089 1090 void CStringChecker::evalstrLength(CheckerContext &C, 1091 const CallExpr *CE) const { 1092 if (CE->getNumArgs() < 1) 1093 return; 1094 1095 // size_t strlen(const char *s); 1096 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 1097 } 1098 1099 void CStringChecker::evalstrnLength(CheckerContext &C, 1100 const CallExpr *CE) const { 1101 if (CE->getNumArgs() < 2) 1102 return; 1103 1104 // size_t strnlen(const char *s, size_t maxlen); 1105 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 1106 } 1107 1108 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 1109 bool IsStrnlen) const { 1110 CurrentFunctionDescription = "string length function"; 1111 ProgramStateRef state = C.getState(); 1112 const LocationContext *LCtx = C.getLocationContext(); 1113 1114 if (IsStrnlen) { 1115 const Expr *maxlenExpr = CE->getArg(1); 1116 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1117 1118 ProgramStateRef stateZeroSize, stateNonZeroSize; 1119 llvm::tie(stateZeroSize, stateNonZeroSize) = 1120 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 1121 1122 // If the size can be zero, the result will be 0 in that case, and we don't 1123 // have to check the string itself. 1124 if (stateZeroSize) { 1125 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 1126 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); 1127 C.addTransition(stateZeroSize); 1128 } 1129 1130 // If the size is GUARANTEED to be zero, we're done! 1131 if (!stateNonZeroSize) 1132 return; 1133 1134 // Otherwise, record the assumption that the size is nonzero. 1135 state = stateNonZeroSize; 1136 } 1137 1138 // Check that the string argument is non-null. 1139 const Expr *Arg = CE->getArg(0); 1140 SVal ArgVal = state->getSVal(Arg, LCtx); 1141 1142 state = checkNonNull(C, state, Arg, ArgVal); 1143 1144 if (!state) 1145 return; 1146 1147 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 1148 1149 // If the argument isn't a valid C string, there's no valid state to 1150 // transition to. 1151 if (strLength.isUndef()) 1152 return; 1153 1154 DefinedOrUnknownSVal result = UnknownVal(); 1155 1156 // If the check is for strnlen() then bind the return value to no more than 1157 // the maxlen value. 1158 if (IsStrnlen) { 1159 QualType cmpTy = C.getSValBuilder().getConditionType(); 1160 1161 // It's a little unfortunate to be getting this again, 1162 // but it's not that expensive... 1163 const Expr *maxlenExpr = CE->getArg(1); 1164 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1165 1166 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 1167 NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal); 1168 1169 if (strLengthNL && maxlenValNL) { 1170 ProgramStateRef stateStringTooLong, stateStringNotTooLong; 1171 1172 // Check if the strLength is greater than the maxlen. 1173 llvm::tie(stateStringTooLong, stateStringNotTooLong) = 1174 state->assume(cast<DefinedOrUnknownSVal> 1175 (C.getSValBuilder().evalBinOpNN(state, BO_GT, 1176 *strLengthNL, 1177 *maxlenValNL, 1178 cmpTy))); 1179 1180 if (stateStringTooLong && !stateStringNotTooLong) { 1181 // If the string is longer than maxlen, return maxlen. 1182 result = *maxlenValNL; 1183 } else if (stateStringNotTooLong && !stateStringTooLong) { 1184 // If the string is shorter than maxlen, return its length. 1185 result = *strLengthNL; 1186 } 1187 } 1188 1189 if (result.isUnknown()) { 1190 // If we don't have enough information for a comparison, there's 1191 // no guarantee the full string length will actually be returned. 1192 // All we know is the return value is the min of the string length 1193 // and the limit. This is better than nothing. 1194 result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount()); 1195 NonLoc *resultNL = cast<NonLoc>(&result); 1196 1197 if (strLengthNL) { 1198 state = state->assume(cast<DefinedOrUnknownSVal> 1199 (C.getSValBuilder().evalBinOpNN(state, BO_LE, 1200 *resultNL, 1201 *strLengthNL, 1202 cmpTy)), true); 1203 } 1204 1205 if (maxlenValNL) { 1206 state = state->assume(cast<DefinedOrUnknownSVal> 1207 (C.getSValBuilder().evalBinOpNN(state, BO_LE, 1208 *resultNL, 1209 *maxlenValNL, 1210 cmpTy)), true); 1211 } 1212 } 1213 1214 } else { 1215 // This is a plain strlen(), not strnlen(). 1216 result = cast<DefinedOrUnknownSVal>(strLength); 1217 1218 // If we don't know the length of the string, conjure a return 1219 // value, so it can be used in constraints, at least. 1220 if (result.isUnknown()) { 1221 result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount()); 1222 } 1223 } 1224 1225 // Bind the return value. 1226 assert(!result.isUnknown() && "Should have conjured a value by now"); 1227 state = state->BindExpr(CE, LCtx, result); 1228 C.addTransition(state); 1229 } 1230 1231 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 1232 if (CE->getNumArgs() < 2) 1233 return; 1234 1235 // char *strcpy(char *restrict dst, const char *restrict src); 1236 evalStrcpyCommon(C, CE, 1237 /* returnEnd = */ false, 1238 /* isBounded = */ false, 1239 /* isAppending = */ false); 1240 } 1241 1242 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 1243 if (CE->getNumArgs() < 3) 1244 return; 1245 1246 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 1247 evalStrcpyCommon(C, CE, 1248 /* returnEnd = */ false, 1249 /* isBounded = */ true, 1250 /* isAppending = */ false); 1251 } 1252 1253 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 1254 if (CE->getNumArgs() < 2) 1255 return; 1256 1257 // char *stpcpy(char *restrict dst, const char *restrict src); 1258 evalStrcpyCommon(C, CE, 1259 /* returnEnd = */ true, 1260 /* isBounded = */ false, 1261 /* isAppending = */ false); 1262 } 1263 1264 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 1265 if (CE->getNumArgs() < 2) 1266 return; 1267 1268 //char *strcat(char *restrict s1, const char *restrict s2); 1269 evalStrcpyCommon(C, CE, 1270 /* returnEnd = */ false, 1271 /* isBounded = */ false, 1272 /* isAppending = */ true); 1273 } 1274 1275 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 1276 if (CE->getNumArgs() < 3) 1277 return; 1278 1279 //char *strncat(char *restrict s1, const char *restrict s2, size_t n); 1280 evalStrcpyCommon(C, CE, 1281 /* returnEnd = */ false, 1282 /* isBounded = */ true, 1283 /* isAppending = */ true); 1284 } 1285 1286 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 1287 bool returnEnd, bool isBounded, 1288 bool isAppending) const { 1289 CurrentFunctionDescription = "string copy function"; 1290 ProgramStateRef state = C.getState(); 1291 const LocationContext *LCtx = C.getLocationContext(); 1292 1293 // Check that the destination is non-null. 1294 const Expr *Dst = CE->getArg(0); 1295 SVal DstVal = state->getSVal(Dst, LCtx); 1296 1297 state = checkNonNull(C, state, Dst, DstVal); 1298 if (!state) 1299 return; 1300 1301 // Check that the source is non-null. 1302 const Expr *srcExpr = CE->getArg(1); 1303 SVal srcVal = state->getSVal(srcExpr, LCtx); 1304 state = checkNonNull(C, state, srcExpr, srcVal); 1305 if (!state) 1306 return; 1307 1308 // Get the string length of the source. 1309 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 1310 1311 // If the source isn't a valid C string, give up. 1312 if (strLength.isUndef()) 1313 return; 1314 1315 SValBuilder &svalBuilder = C.getSValBuilder(); 1316 QualType cmpTy = svalBuilder.getConditionType(); 1317 QualType sizeTy = svalBuilder.getContext().getSizeType(); 1318 1319 // These two values allow checking two kinds of errors: 1320 // - actual overflows caused by a source that doesn't fit in the destination 1321 // - potential overflows caused by a bound that could exceed the destination 1322 SVal amountCopied = UnknownVal(); 1323 SVal maxLastElementIndex = UnknownVal(); 1324 const char *boundWarning = NULL; 1325 1326 // If the function is strncpy, strncat, etc... it is bounded. 1327 if (isBounded) { 1328 // Get the max number of characters to copy. 1329 const Expr *lenExpr = CE->getArg(2); 1330 SVal lenVal = state->getSVal(lenExpr, LCtx); 1331 1332 // Protect against misdeclared strncpy(). 1333 lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType()); 1334 1335 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 1336 NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal); 1337 1338 // If we know both values, we might be able to figure out how much 1339 // we're copying. 1340 if (strLengthNL && lenValNL) { 1341 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong; 1342 1343 // Check if the max number to copy is less than the length of the src. 1344 // If the bound is equal to the source length, strncpy won't null- 1345 // terminate the result! 1346 llvm::tie(stateSourceTooLong, stateSourceNotTooLong) = 1347 state->assume(cast<DefinedOrUnknownSVal> 1348 (svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, 1349 *lenValNL, cmpTy))); 1350 1351 if (stateSourceTooLong && !stateSourceNotTooLong) { 1352 // Max number to copy is less than the length of the src, so the actual 1353 // strLength copied is the max number arg. 1354 state = stateSourceTooLong; 1355 amountCopied = lenVal; 1356 1357 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 1358 // The source buffer entirely fits in the bound. 1359 state = stateSourceNotTooLong; 1360 amountCopied = strLength; 1361 } 1362 } 1363 1364 // We still want to know if the bound is known to be too large. 1365 if (lenValNL) { 1366 if (isAppending) { 1367 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) 1368 1369 // Get the string length of the destination. If the destination is 1370 // memory that can't have a string length, we shouldn't be copying 1371 // into it anyway. 1372 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 1373 if (dstStrLength.isUndef()) 1374 return; 1375 1376 if (NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength)) { 1377 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add, 1378 *lenValNL, 1379 *dstStrLengthNL, 1380 sizeTy); 1381 boundWarning = "Size argument is greater than the free space in the " 1382 "destination buffer"; 1383 } 1384 1385 } else { 1386 // For strncpy, this is just checking that lenVal <= sizeof(dst) 1387 // (Yes, strncpy and strncat differ in how they treat termination. 1388 // strncat ALWAYS terminates, but strncpy doesn't.) 1389 1390 // We need a special case for when the copy size is zero, in which 1391 // case strncpy will do no work at all. Our bounds check uses n-1 1392 // as the last element accessed, so n == 0 is problematic. 1393 ProgramStateRef StateZeroSize, StateNonZeroSize; 1394 llvm::tie(StateZeroSize, StateNonZeroSize) = 1395 assumeZero(C, state, *lenValNL, sizeTy); 1396 1397 // If the size is known to be zero, we're done. 1398 if (StateZeroSize && !StateNonZeroSize) { 1399 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal); 1400 C.addTransition(StateZeroSize); 1401 return; 1402 } 1403 1404 // Otherwise, go ahead and figure out the last element we'll touch. 1405 // We don't record the non-zero assumption here because we can't 1406 // be sure. We won't warn on a possible zero. 1407 NonLoc one = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); 1408 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, 1409 one, sizeTy); 1410 boundWarning = "Size argument is greater than the length of the " 1411 "destination buffer"; 1412 } 1413 } 1414 1415 // If we couldn't pin down the copy length, at least bound it. 1416 // FIXME: We should actually run this code path for append as well, but 1417 // right now it creates problems with constraints (since we can end up 1418 // trying to pass constraints from symbol to symbol). 1419 if (amountCopied.isUnknown() && !isAppending) { 1420 // Try to get a "hypothetical" string length symbol, which we can later 1421 // set as a real value if that turns out to be the case. 1422 amountCopied = getCStringLength(C, state, lenExpr, srcVal, true); 1423 assert(!amountCopied.isUndef()); 1424 1425 if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) { 1426 if (lenValNL) { 1427 // amountCopied <= lenVal 1428 SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE, 1429 *amountCopiedNL, 1430 *lenValNL, 1431 cmpTy); 1432 state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound), 1433 true); 1434 if (!state) 1435 return; 1436 } 1437 1438 if (strLengthNL) { 1439 // amountCopied <= strlen(source) 1440 SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE, 1441 *amountCopiedNL, 1442 *strLengthNL, 1443 cmpTy); 1444 state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc), 1445 true); 1446 if (!state) 1447 return; 1448 } 1449 } 1450 } 1451 1452 } else { 1453 // The function isn't bounded. The amount copied should match the length 1454 // of the source buffer. 1455 amountCopied = strLength; 1456 } 1457 1458 assert(state); 1459 1460 // This represents the number of characters copied into the destination 1461 // buffer. (It may not actually be the strlen if the destination buffer 1462 // is not terminated.) 1463 SVal finalStrLength = UnknownVal(); 1464 1465 // If this is an appending function (strcat, strncat...) then set the 1466 // string length to strlen(src) + strlen(dst) since the buffer will 1467 // ultimately contain both. 1468 if (isAppending) { 1469 // Get the string length of the destination. If the destination is memory 1470 // that can't have a string length, we shouldn't be copying into it anyway. 1471 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 1472 if (dstStrLength.isUndef()) 1473 return; 1474 1475 NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied); 1476 NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength); 1477 1478 // If we know both string lengths, we might know the final string length. 1479 if (srcStrLengthNL && dstStrLengthNL) { 1480 // Make sure the two lengths together don't overflow a size_t. 1481 state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL); 1482 if (!state) 1483 return; 1484 1485 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, 1486 *dstStrLengthNL, sizeTy); 1487 } 1488 1489 // If we couldn't get a single value for the final string length, 1490 // we can at least bound it by the individual lengths. 1491 if (finalStrLength.isUnknown()) { 1492 // Try to get a "hypothetical" string length symbol, which we can later 1493 // set as a real value if that turns out to be the case. 1494 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 1495 assert(!finalStrLength.isUndef()); 1496 1497 if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) { 1498 if (srcStrLengthNL) { 1499 // finalStrLength >= srcStrLength 1500 SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1501 *finalStrLengthNL, 1502 *srcStrLengthNL, 1503 cmpTy); 1504 state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult), 1505 true); 1506 if (!state) 1507 return; 1508 } 1509 1510 if (dstStrLengthNL) { 1511 // finalStrLength >= dstStrLength 1512 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1513 *finalStrLengthNL, 1514 *dstStrLengthNL, 1515 cmpTy); 1516 state = state->assume(cast<DefinedOrUnknownSVal>(destInResult), 1517 true); 1518 if (!state) 1519 return; 1520 } 1521 } 1522 } 1523 1524 } else { 1525 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 1526 // the final string length will match the input string length. 1527 finalStrLength = amountCopied; 1528 } 1529 1530 // The final result of the function will either be a pointer past the last 1531 // copied element, or a pointer to the start of the destination buffer. 1532 SVal Result = (returnEnd ? UnknownVal() : DstVal); 1533 1534 assert(state); 1535 1536 // If the destination is a MemRegion, try to check for a buffer overflow and 1537 // record the new string length. 1538 if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) { 1539 QualType ptrTy = Dst->getType(); 1540 1541 // If we have an exact value on a bounded copy, use that to check for 1542 // overflows, rather than our estimate about how much is actually copied. 1543 if (boundWarning) { 1544 if (NonLoc *maxLastNL = dyn_cast<NonLoc>(&maxLastElementIndex)) { 1545 SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 1546 *maxLastNL, ptrTy); 1547 state = CheckLocation(C, state, CE->getArg(2), maxLastElement, 1548 boundWarning); 1549 if (!state) 1550 return; 1551 } 1552 } 1553 1554 // Then, if the final length is known... 1555 if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) { 1556 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 1557 *knownStrLength, ptrTy); 1558 1559 // ...and we haven't checked the bound, we'll check the actual copy. 1560 if (!boundWarning) { 1561 const char * const warningMsg = 1562 "String copy function overflows destination buffer"; 1563 state = CheckLocation(C, state, Dst, lastElement, warningMsg); 1564 if (!state) 1565 return; 1566 } 1567 1568 // If this is a stpcpy-style copy, the last element is the return value. 1569 if (returnEnd) 1570 Result = lastElement; 1571 } 1572 1573 // Invalidate the destination. This must happen before we set the C string 1574 // length because invalidation will clear the length. 1575 // FIXME: Even if we can't perfectly model the copy, we should see if we 1576 // can use LazyCompoundVals to copy the source values into the destination. 1577 // This would probably remove any existing bindings past the end of the 1578 // string, but that's still an improvement over blank invalidation. 1579 state = InvalidateBuffer(C, state, Dst, *dstRegVal); 1580 1581 // Set the C string length of the destination, if we know it. 1582 if (isBounded && !isAppending) { 1583 // strncpy is annoying in that it doesn't guarantee to null-terminate 1584 // the result string. If the original string didn't fit entirely inside 1585 // the bound (including the null-terminator), we don't know how long the 1586 // result is. 1587 if (amountCopied != strLength) 1588 finalStrLength = UnknownVal(); 1589 } 1590 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 1591 } 1592 1593 assert(state); 1594 1595 // If this is a stpcpy-style copy, but we were unable to check for a buffer 1596 // overflow, we still need a result. Conjure a return value. 1597 if (returnEnd && Result.isUnknown()) { 1598 Result = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount()); 1599 } 1600 1601 // Set the return value. 1602 state = state->BindExpr(CE, LCtx, Result); 1603 C.addTransition(state); 1604 } 1605 1606 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 1607 if (CE->getNumArgs() < 2) 1608 return; 1609 1610 //int strcmp(const char *s1, const char *s2); 1611 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); 1612 } 1613 1614 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 1615 if (CE->getNumArgs() < 3) 1616 return; 1617 1618 //int strncmp(const char *s1, const char *s2, size_t n); 1619 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); 1620 } 1621 1622 void CStringChecker::evalStrcasecmp(CheckerContext &C, 1623 const CallExpr *CE) const { 1624 if (CE->getNumArgs() < 2) 1625 return; 1626 1627 //int strcasecmp(const char *s1, const char *s2); 1628 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); 1629 } 1630 1631 void CStringChecker::evalStrncasecmp(CheckerContext &C, 1632 const CallExpr *CE) const { 1633 if (CE->getNumArgs() < 3) 1634 return; 1635 1636 //int strncasecmp(const char *s1, const char *s2, size_t n); 1637 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); 1638 } 1639 1640 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 1641 bool isBounded, bool ignoreCase) const { 1642 CurrentFunctionDescription = "string comparison function"; 1643 ProgramStateRef state = C.getState(); 1644 const LocationContext *LCtx = C.getLocationContext(); 1645 1646 // Check that the first string is non-null 1647 const Expr *s1 = CE->getArg(0); 1648 SVal s1Val = state->getSVal(s1, LCtx); 1649 state = checkNonNull(C, state, s1, s1Val); 1650 if (!state) 1651 return; 1652 1653 // Check that the second string is non-null. 1654 const Expr *s2 = CE->getArg(1); 1655 SVal s2Val = state->getSVal(s2, LCtx); 1656 state = checkNonNull(C, state, s2, s2Val); 1657 if (!state) 1658 return; 1659 1660 // Get the string length of the first string or give up. 1661 SVal s1Length = getCStringLength(C, state, s1, s1Val); 1662 if (s1Length.isUndef()) 1663 return; 1664 1665 // Get the string length of the second string or give up. 1666 SVal s2Length = getCStringLength(C, state, s2, s2Val); 1667 if (s2Length.isUndef()) 1668 return; 1669 1670 // If we know the two buffers are the same, we know the result is 0. 1671 // First, get the two buffers' addresses. Another checker will have already 1672 // made sure they're not undefined. 1673 DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val); 1674 DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val); 1675 1676 // See if they are the same. 1677 SValBuilder &svalBuilder = C.getSValBuilder(); 1678 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 1679 ProgramStateRef StSameBuf, StNotSameBuf; 1680 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 1681 1682 // If the two arguments might be the same buffer, we know the result is 0, 1683 // and we only need to check one size. 1684 if (StSameBuf) { 1685 StSameBuf = StSameBuf->BindExpr(CE, LCtx, 1686 svalBuilder.makeZeroVal(CE->getType())); 1687 C.addTransition(StSameBuf); 1688 1689 // If the two arguments are GUARANTEED to be the same, we're done! 1690 if (!StNotSameBuf) 1691 return; 1692 } 1693 1694 assert(StNotSameBuf); 1695 state = StNotSameBuf; 1696 1697 // At this point we can go about comparing the two buffers. 1698 // For now, we only do this if they're both known string literals. 1699 1700 // Attempt to extract string literals from both expressions. 1701 const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); 1702 const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); 1703 bool canComputeResult = false; 1704 1705 if (s1StrLiteral && s2StrLiteral) { 1706 StringRef s1StrRef = s1StrLiteral->getString(); 1707 StringRef s2StrRef = s2StrLiteral->getString(); 1708 1709 if (isBounded) { 1710 // Get the max number of characters to compare. 1711 const Expr *lenExpr = CE->getArg(2); 1712 SVal lenVal = state->getSVal(lenExpr, LCtx); 1713 1714 // If the length is known, we can get the right substrings. 1715 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { 1716 // Create substrings of each to compare the prefix. 1717 s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue()); 1718 s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue()); 1719 canComputeResult = true; 1720 } 1721 } else { 1722 // This is a normal, unbounded strcmp. 1723 canComputeResult = true; 1724 } 1725 1726 if (canComputeResult) { 1727 // Real strcmp stops at null characters. 1728 size_t s1Term = s1StrRef.find('\0'); 1729 if (s1Term != StringRef::npos) 1730 s1StrRef = s1StrRef.substr(0, s1Term); 1731 1732 size_t s2Term = s2StrRef.find('\0'); 1733 if (s2Term != StringRef::npos) 1734 s2StrRef = s2StrRef.substr(0, s2Term); 1735 1736 // Use StringRef's comparison methods to compute the actual result. 1737 int result; 1738 1739 if (ignoreCase) { 1740 // Compare string 1 to string 2 the same way strcasecmp() does. 1741 result = s1StrRef.compare_lower(s2StrRef); 1742 } else { 1743 // Compare string 1 to string 2 the same way strcmp() does. 1744 result = s1StrRef.compare(s2StrRef); 1745 } 1746 1747 // Build the SVal of the comparison and bind the return value. 1748 SVal resultVal = svalBuilder.makeIntVal(result, CE->getType()); 1749 state = state->BindExpr(CE, LCtx, resultVal); 1750 } 1751 } 1752 1753 if (!canComputeResult) { 1754 // Conjure a symbolic value. It's the best we can do. 1755 SVal resultVal = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount()); 1756 state = state->BindExpr(CE, LCtx, resultVal); 1757 } 1758 1759 // Record this as a possible path. 1760 C.addTransition(state); 1761 } 1762 1763 //===----------------------------------------------------------------------===// 1764 // The driver method, and other Checker callbacks. 1765 //===----------------------------------------------------------------------===// 1766 1767 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { 1768 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 1769 1770 if (!FDecl) 1771 return false; 1772 1773 FnCheck evalFunction = 0; 1774 if (C.isCLibraryFunction(FDecl, "memcpy")) 1775 evalFunction = &CStringChecker::evalMemcpy; 1776 else if (C.isCLibraryFunction(FDecl, "mempcpy")) 1777 evalFunction = &CStringChecker::evalMempcpy; 1778 else if (C.isCLibraryFunction(FDecl, "memcmp")) 1779 evalFunction = &CStringChecker::evalMemcmp; 1780 else if (C.isCLibraryFunction(FDecl, "memmove")) 1781 evalFunction = &CStringChecker::evalMemmove; 1782 else if (C.isCLibraryFunction(FDecl, "strcpy")) 1783 evalFunction = &CStringChecker::evalStrcpy; 1784 else if (C.isCLibraryFunction(FDecl, "strncpy")) 1785 evalFunction = &CStringChecker::evalStrncpy; 1786 else if (C.isCLibraryFunction(FDecl, "stpcpy")) 1787 evalFunction = &CStringChecker::evalStpcpy; 1788 else if (C.isCLibraryFunction(FDecl, "strcat")) 1789 evalFunction = &CStringChecker::evalStrcat; 1790 else if (C.isCLibraryFunction(FDecl, "strncat")) 1791 evalFunction = &CStringChecker::evalStrncat; 1792 else if (C.isCLibraryFunction(FDecl, "strlen")) 1793 evalFunction = &CStringChecker::evalstrLength; 1794 else if (C.isCLibraryFunction(FDecl, "strnlen")) 1795 evalFunction = &CStringChecker::evalstrnLength; 1796 else if (C.isCLibraryFunction(FDecl, "strcmp")) 1797 evalFunction = &CStringChecker::evalStrcmp; 1798 else if (C.isCLibraryFunction(FDecl, "strncmp")) 1799 evalFunction = &CStringChecker::evalStrncmp; 1800 else if (C.isCLibraryFunction(FDecl, "strcasecmp")) 1801 evalFunction = &CStringChecker::evalStrcasecmp; 1802 else if (C.isCLibraryFunction(FDecl, "strncasecmp")) 1803 evalFunction = &CStringChecker::evalStrncasecmp; 1804 else if (C.isCLibraryFunction(FDecl, "bcopy")) 1805 evalFunction = &CStringChecker::evalBcopy; 1806 else if (C.isCLibraryFunction(FDecl, "bcmp")) 1807 evalFunction = &CStringChecker::evalMemcmp; 1808 1809 // If the callee isn't a string function, let another checker handle it. 1810 if (!evalFunction) 1811 return false; 1812 1813 // Make sure each function sets its own description. 1814 // (But don't bother in a release build.) 1815 assert(!(CurrentFunctionDescription = NULL)); 1816 1817 // Check and evaluate the call. 1818 (this->*evalFunction)(C, CE); 1819 1820 // If the evaluate call resulted in no change, chain to the next eval call 1821 // handler. 1822 // Note, the custom CString evaluation calls assume that basic safety 1823 // properties are held. However, if the user chooses to turn off some of these 1824 // checks, we ignore the issues and leave the call evaluation to a generic 1825 // handler. 1826 if (!C.isDifferent()) 1827 return false; 1828 1829 return true; 1830 } 1831 1832 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 1833 // Record string length for char a[] = "abc"; 1834 ProgramStateRef state = C.getState(); 1835 1836 for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end(); 1837 I != E; ++I) { 1838 const VarDecl *D = dyn_cast<VarDecl>(*I); 1839 if (!D) 1840 continue; 1841 1842 // FIXME: Handle array fields of structs. 1843 if (!D->getType()->isArrayType()) 1844 continue; 1845 1846 const Expr *Init = D->getInit(); 1847 if (!Init) 1848 continue; 1849 if (!isa<StringLiteral>(Init)) 1850 continue; 1851 1852 Loc VarLoc = state->getLValue(D, C.getLocationContext()); 1853 const MemRegion *MR = VarLoc.getAsRegion(); 1854 if (!MR) 1855 continue; 1856 1857 SVal StrVal = state->getSVal(Init, C.getLocationContext()); 1858 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 1859 DefinedOrUnknownSVal strLength 1860 = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal)); 1861 1862 state = state->set<CStringLength>(MR, strLength); 1863 } 1864 1865 C.addTransition(state); 1866 } 1867 1868 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const { 1869 CStringLengthTy Entries = state->get<CStringLength>(); 1870 return !Entries.isEmpty(); 1871 } 1872 1873 ProgramStateRef 1874 CStringChecker::checkRegionChanges(ProgramStateRef state, 1875 const StoreManager::InvalidatedSymbols *, 1876 ArrayRef<const MemRegion *> ExplicitRegions, 1877 ArrayRef<const MemRegion *> Regions, 1878 const CallEvent *Call) const { 1879 CStringLengthTy Entries = state->get<CStringLength>(); 1880 if (Entries.isEmpty()) 1881 return state; 1882 1883 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 1884 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 1885 1886 // First build sets for the changed regions and their super-regions. 1887 for (ArrayRef<const MemRegion *>::iterator 1888 I = Regions.begin(), E = Regions.end(); I != E; ++I) { 1889 const MemRegion *MR = *I; 1890 Invalidated.insert(MR); 1891 1892 SuperRegions.insert(MR); 1893 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 1894 MR = SR->getSuperRegion(); 1895 SuperRegions.insert(MR); 1896 } 1897 } 1898 1899 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 1900 1901 // Then loop over the entries in the current state. 1902 for (CStringLengthTy::iterator I = Entries.begin(), 1903 E = Entries.end(); I != E; ++I) { 1904 const MemRegion *MR = I.getKey(); 1905 1906 // Is this entry for a super-region of a changed region? 1907 if (SuperRegions.count(MR)) { 1908 Entries = F.remove(Entries, MR); 1909 continue; 1910 } 1911 1912 // Is this entry for a sub-region of a changed region? 1913 const MemRegion *Super = MR; 1914 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 1915 Super = SR->getSuperRegion(); 1916 if (Invalidated.count(Super)) { 1917 Entries = F.remove(Entries, MR); 1918 break; 1919 } 1920 } 1921 } 1922 1923 return state->set<CStringLength>(Entries); 1924 } 1925 1926 void CStringChecker::checkLiveSymbols(ProgramStateRef state, 1927 SymbolReaper &SR) const { 1928 // Mark all symbols in our string length map as valid. 1929 CStringLengthTy Entries = state->get<CStringLength>(); 1930 1931 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 1932 I != E; ++I) { 1933 SVal Len = I.getData(); 1934 1935 for (SymExpr::symbol_iterator si = Len.symbol_begin(), 1936 se = Len.symbol_end(); si != se; ++si) 1937 SR.markInUse(*si); 1938 } 1939 } 1940 1941 void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 1942 CheckerContext &C) const { 1943 if (!SR.hasDeadSymbols()) 1944 return; 1945 1946 ProgramStateRef state = C.getState(); 1947 CStringLengthTy Entries = state->get<CStringLength>(); 1948 if (Entries.isEmpty()) 1949 return; 1950 1951 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 1952 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 1953 I != E; ++I) { 1954 SVal Len = I.getData(); 1955 if (SymbolRef Sym = Len.getAsSymbol()) { 1956 if (SR.isDead(Sym)) 1957 Entries = F.remove(Entries, I.getKey()); 1958 } 1959 } 1960 1961 state = state->set<CStringLength>(Entries); 1962 C.addTransition(state); 1963 } 1964 1965 #define REGISTER_CHECKER(name) \ 1966 void ento::register##name(CheckerManager &mgr) {\ 1967 static CStringChecker *TheChecker = 0; \ 1968 if (TheChecker == 0) \ 1969 TheChecker = mgr.registerChecker<CStringChecker>(); \ 1970 TheChecker->Filter.Check##name = true; \ 1971 } 1972 1973 REGISTER_CHECKER(CStringNullArg) 1974 REGISTER_CHECKER(CStringOutOfBounds) 1975 REGISTER_CHECKER(CStringBufferOverlap) 1976 REGISTER_CHECKER(CStringNotNullTerm) 1977 1978 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) { 1979 registerCStringNullArg(Mgr); 1980 } 1981