1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This defines CStringChecker, which is an assortment of checks on calls 11 // to functions in <string.h>. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ClangSACheckers.h" 16 #include "InterCheckerAPI.h" 17 #include "clang/Basic/CharInfo.h" 18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 19 #include "clang/StaticAnalyzer/Core/Checker.h" 20 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 using namespace clang; 28 using namespace ento; 29 30 namespace { 31 class CStringChecker : public Checker< eval::Call, 32 check::PreStmt<DeclStmt>, 33 check::LiveSymbols, 34 check::DeadSymbols, 35 check::RegionChanges 36 > { 37 mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap, 38 BT_NotCString, BT_AdditionOverflow; 39 40 mutable const char *CurrentFunctionDescription; 41 42 public: 43 /// The filter is used to filter out the diagnostics which are not enabled by 44 /// the user. 45 struct CStringChecksFilter { 46 DefaultBool CheckCStringNullArg; 47 DefaultBool CheckCStringOutOfBounds; 48 DefaultBool CheckCStringBufferOverlap; 49 DefaultBool CheckCStringNotNullTerm; 50 51 CheckName CheckNameCStringNullArg; 52 CheckName CheckNameCStringOutOfBounds; 53 CheckName CheckNameCStringBufferOverlap; 54 CheckName CheckNameCStringNotNullTerm; 55 }; 56 57 CStringChecksFilter Filter; 58 59 static void *getTag() { static int tag; return &tag; } 60 61 bool evalCall(const CallExpr *CE, CheckerContext &C) const; 62 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 63 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; 64 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 65 66 ProgramStateRef 67 checkRegionChanges(ProgramStateRef state, 68 const InvalidatedSymbols *, 69 ArrayRef<const MemRegion *> ExplicitRegions, 70 ArrayRef<const MemRegion *> Regions, 71 const CallEvent *Call) const; 72 73 typedef void (CStringChecker::*FnCheck)(CheckerContext &, 74 const CallExpr *) const; 75 76 void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; 77 void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; 78 void evalMemmove(CheckerContext &C, const CallExpr *CE) const; 79 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 80 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 81 ProgramStateRef state, 82 const Expr *Size, 83 const Expr *Source, 84 const Expr *Dest, 85 bool Restricted = false, 86 bool IsMempcpy = false) const; 87 88 void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; 89 90 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 91 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 92 void evalstrLengthCommon(CheckerContext &C, 93 const CallExpr *CE, 94 bool IsStrnlen = false) const; 95 96 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 97 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 98 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 99 void evalStrcpyCommon(CheckerContext &C, 100 const CallExpr *CE, 101 bool returnEnd, 102 bool isBounded, 103 bool isAppending) const; 104 105 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 106 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 107 108 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 109 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 110 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 111 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 112 void evalStrcmpCommon(CheckerContext &C, 113 const CallExpr *CE, 114 bool isBounded = false, 115 bool ignoreCase = false) const; 116 117 void evalStrsep(CheckerContext &C, const CallExpr *CE) const; 118 119 void evalStdCopy(CheckerContext &C, const CallExpr *CE) const; 120 void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const; 121 void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const; 122 123 // Utility methods 124 std::pair<ProgramStateRef , ProgramStateRef > 125 static assumeZero(CheckerContext &C, 126 ProgramStateRef state, SVal V, QualType Ty); 127 128 static ProgramStateRef setCStringLength(ProgramStateRef state, 129 const MemRegion *MR, 130 SVal strLength); 131 static SVal getCStringLengthForRegion(CheckerContext &C, 132 ProgramStateRef &state, 133 const Expr *Ex, 134 const MemRegion *MR, 135 bool hypothetical); 136 SVal getCStringLength(CheckerContext &C, 137 ProgramStateRef &state, 138 const Expr *Ex, 139 SVal Buf, 140 bool hypothetical = false) const; 141 142 const StringLiteral *getCStringLiteral(CheckerContext &C, 143 ProgramStateRef &state, 144 const Expr *expr, 145 SVal val) const; 146 147 static ProgramStateRef InvalidateBuffer(CheckerContext &C, 148 ProgramStateRef state, 149 const Expr *Ex, SVal V, 150 bool IsSourceBuffer, 151 const Expr *Size); 152 153 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 154 const MemRegion *MR); 155 156 // Re-usable checks 157 ProgramStateRef checkNonNull(CheckerContext &C, 158 ProgramStateRef state, 159 const Expr *S, 160 SVal l) const; 161 ProgramStateRef CheckLocation(CheckerContext &C, 162 ProgramStateRef state, 163 const Expr *S, 164 SVal l, 165 const char *message = nullptr) const; 166 ProgramStateRef CheckBufferAccess(CheckerContext &C, 167 ProgramStateRef state, 168 const Expr *Size, 169 const Expr *FirstBuf, 170 const Expr *SecondBuf, 171 const char *firstMessage = nullptr, 172 const char *secondMessage = nullptr, 173 bool WarnAboutSize = false) const; 174 175 ProgramStateRef CheckBufferAccess(CheckerContext &C, 176 ProgramStateRef state, 177 const Expr *Size, 178 const Expr *Buf, 179 const char *message = nullptr, 180 bool WarnAboutSize = false) const { 181 // This is a convenience override. 182 return CheckBufferAccess(C, state, Size, Buf, nullptr, message, nullptr, 183 WarnAboutSize); 184 } 185 ProgramStateRef CheckOverlap(CheckerContext &C, 186 ProgramStateRef state, 187 const Expr *Size, 188 const Expr *First, 189 const Expr *Second) const; 190 void emitOverlapBug(CheckerContext &C, 191 ProgramStateRef state, 192 const Stmt *First, 193 const Stmt *Second) const; 194 195 ProgramStateRef checkAdditionOverflow(CheckerContext &C, 196 ProgramStateRef state, 197 NonLoc left, 198 NonLoc right) const; 199 200 // Return true if the destination buffer of the copy function may be in bound. 201 // Expects SVal of Size to be positive and unsigned. 202 // Expects SVal of FirstBuf to be a FieldRegion. 203 static bool IsFirstBufInBound(CheckerContext &C, 204 ProgramStateRef state, 205 const Expr *FirstBuf, 206 const Expr *Size); 207 }; 208 209 } //end anonymous namespace 210 211 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) 212 213 //===----------------------------------------------------------------------===// 214 // Individual checks and utility methods. 215 //===----------------------------------------------------------------------===// 216 217 std::pair<ProgramStateRef , ProgramStateRef > 218 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, 219 QualType Ty) { 220 Optional<DefinedSVal> val = V.getAs<DefinedSVal>(); 221 if (!val) 222 return std::pair<ProgramStateRef , ProgramStateRef >(state, state); 223 224 SValBuilder &svalBuilder = C.getSValBuilder(); 225 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 226 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 227 } 228 229 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, 230 ProgramStateRef state, 231 const Expr *S, SVal l) const { 232 // If a previous check has failed, propagate the failure. 233 if (!state) 234 return nullptr; 235 236 ProgramStateRef stateNull, stateNonNull; 237 std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 238 239 if (stateNull && !stateNonNull) { 240 if (!Filter.CheckCStringNullArg) 241 return nullptr; 242 243 ExplodedNode *N = C.generateErrorNode(stateNull); 244 if (!N) 245 return nullptr; 246 247 if (!BT_Null) 248 BT_Null.reset(new BuiltinBug( 249 Filter.CheckNameCStringNullArg, categories::UnixAPI, 250 "Null pointer argument in call to byte string function")); 251 252 SmallString<80> buf; 253 llvm::raw_svector_ostream os(buf); 254 assert(CurrentFunctionDescription); 255 os << "Null pointer argument in call to " << CurrentFunctionDescription; 256 257 // Generate a report for this bug. 258 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get()); 259 auto report = llvm::make_unique<BugReport>(*BT, os.str(), N); 260 261 report->addRange(S->getSourceRange()); 262 bugreporter::trackNullOrUndefValue(N, S, *report); 263 C.emitReport(std::move(report)); 264 return nullptr; 265 } 266 267 // From here on, assume that the value is non-null. 268 assert(stateNonNull); 269 return stateNonNull; 270 } 271 272 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 273 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, 274 ProgramStateRef state, 275 const Expr *S, SVal l, 276 const char *warningMsg) const { 277 // If a previous check has failed, propagate the failure. 278 if (!state) 279 return nullptr; 280 281 // Check for out of bound array element access. 282 const MemRegion *R = l.getAsRegion(); 283 if (!R) 284 return state; 285 286 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 287 if (!ER) 288 return state; 289 290 assert(ER->getValueType() == C.getASTContext().CharTy && 291 "CheckLocation should only be called with char* ElementRegions"); 292 293 // Get the size of the array. 294 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 295 SValBuilder &svalBuilder = C.getSValBuilder(); 296 SVal Extent = 297 svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 298 DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>(); 299 300 // Get the index of the accessed element. 301 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); 302 303 ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true); 304 ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false); 305 if (StOutBound && !StInBound) { 306 ExplodedNode *N = C.generateErrorNode(StOutBound); 307 if (!N) 308 return nullptr; 309 310 if (!BT_Bounds) { 311 BT_Bounds.reset(new BuiltinBug( 312 Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access", 313 "Byte string function accesses out-of-bound array element")); 314 } 315 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get()); 316 317 // Generate a report for this bug. 318 std::unique_ptr<BugReport> report; 319 if (warningMsg) { 320 report = llvm::make_unique<BugReport>(*BT, warningMsg, N); 321 } else { 322 assert(CurrentFunctionDescription); 323 assert(CurrentFunctionDescription[0] != '\0'); 324 325 SmallString<80> buf; 326 llvm::raw_svector_ostream os(buf); 327 os << toUppercase(CurrentFunctionDescription[0]) 328 << &CurrentFunctionDescription[1] 329 << " accesses out-of-bound array element"; 330 report = llvm::make_unique<BugReport>(*BT, os.str(), N); 331 } 332 333 // FIXME: It would be nice to eventually make this diagnostic more clear, 334 // e.g., by referencing the original declaration or by saying *why* this 335 // reference is outside the range. 336 337 report->addRange(S->getSourceRange()); 338 C.emitReport(std::move(report)); 339 return nullptr; 340 } 341 342 // Array bound check succeeded. From this point forward the array bound 343 // should always succeed. 344 return StInBound; 345 } 346 347 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C, 348 ProgramStateRef state, 349 const Expr *Size, 350 const Expr *FirstBuf, 351 const Expr *SecondBuf, 352 const char *firstMessage, 353 const char *secondMessage, 354 bool WarnAboutSize) const { 355 // If a previous check has failed, propagate the failure. 356 if (!state) 357 return nullptr; 358 359 SValBuilder &svalBuilder = C.getSValBuilder(); 360 ASTContext &Ctx = svalBuilder.getContext(); 361 const LocationContext *LCtx = C.getLocationContext(); 362 363 QualType sizeTy = Size->getType(); 364 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 365 366 // Check that the first buffer is non-null. 367 SVal BufVal = state->getSVal(FirstBuf, LCtx); 368 state = checkNonNull(C, state, FirstBuf, BufVal); 369 if (!state) 370 return nullptr; 371 372 // If out-of-bounds checking is turned off, skip the rest. 373 if (!Filter.CheckCStringOutOfBounds) 374 return state; 375 376 // Get the access length and make sure it is known. 377 // FIXME: This assumes the caller has already checked that the access length 378 // is positive. And that it's unsigned. 379 SVal LengthVal = state->getSVal(Size, LCtx); 380 Optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 381 if (!Length) 382 return state; 383 384 // Compute the offset of the last element to be accessed: size-1. 385 NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 386 NonLoc LastOffset = svalBuilder 387 .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>(); 388 389 // Check that the first buffer is sufficiently long. 390 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 391 if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) { 392 const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf); 393 394 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 395 LastOffset, PtrTy); 396 state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage); 397 398 // If the buffer isn't large enough, abort. 399 if (!state) 400 return nullptr; 401 } 402 403 // If there's a second buffer, check it as well. 404 if (SecondBuf) { 405 BufVal = state->getSVal(SecondBuf, LCtx); 406 state = checkNonNull(C, state, SecondBuf, BufVal); 407 if (!state) 408 return nullptr; 409 410 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 411 if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) { 412 const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf); 413 414 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 415 LastOffset, PtrTy); 416 state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage); 417 } 418 } 419 420 // Large enough or not, return this state! 421 return state; 422 } 423 424 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, 425 ProgramStateRef state, 426 const Expr *Size, 427 const Expr *First, 428 const Expr *Second) const { 429 if (!Filter.CheckCStringBufferOverlap) 430 return state; 431 432 // Do a simple check for overlap: if the two arguments are from the same 433 // buffer, see if the end of the first is greater than the start of the second 434 // or vice versa. 435 436 // If a previous check has failed, propagate the failure. 437 if (!state) 438 return nullptr; 439 440 ProgramStateRef stateTrue, stateFalse; 441 442 // Get the buffer values and make sure they're known locations. 443 const LocationContext *LCtx = C.getLocationContext(); 444 SVal firstVal = state->getSVal(First, LCtx); 445 SVal secondVal = state->getSVal(Second, LCtx); 446 447 Optional<Loc> firstLoc = firstVal.getAs<Loc>(); 448 if (!firstLoc) 449 return state; 450 451 Optional<Loc> secondLoc = secondVal.getAs<Loc>(); 452 if (!secondLoc) 453 return state; 454 455 // Are the two values the same? 456 SValBuilder &svalBuilder = C.getSValBuilder(); 457 std::tie(stateTrue, stateFalse) = 458 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 459 460 if (stateTrue && !stateFalse) { 461 // If the values are known to be equal, that's automatically an overlap. 462 emitOverlapBug(C, stateTrue, First, Second); 463 return nullptr; 464 } 465 466 // assume the two expressions are not equal. 467 assert(stateFalse); 468 state = stateFalse; 469 470 // Which value comes first? 471 QualType cmpTy = svalBuilder.getConditionType(); 472 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 473 *firstLoc, *secondLoc, cmpTy); 474 Optional<DefinedOrUnknownSVal> reverseTest = 475 reverse.getAs<DefinedOrUnknownSVal>(); 476 if (!reverseTest) 477 return state; 478 479 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 480 if (stateTrue) { 481 if (stateFalse) { 482 // If we don't know which one comes first, we can't perform this test. 483 return state; 484 } else { 485 // Switch the values so that firstVal is before secondVal. 486 std::swap(firstLoc, secondLoc); 487 488 // Switch the Exprs as well, so that they still correspond. 489 std::swap(First, Second); 490 } 491 } 492 493 // Get the length, and make sure it too is known. 494 SVal LengthVal = state->getSVal(Size, LCtx); 495 Optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 496 if (!Length) 497 return state; 498 499 // Convert the first buffer's start address to char*. 500 // Bail out if the cast fails. 501 ASTContext &Ctx = svalBuilder.getContext(); 502 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 503 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, 504 First->getType()); 505 Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>(); 506 if (!FirstStartLoc) 507 return state; 508 509 // Compute the end of the first buffer. Bail out if THAT fails. 510 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 511 *FirstStartLoc, *Length, CharPtrTy); 512 Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>(); 513 if (!FirstEndLoc) 514 return state; 515 516 // Is the end of the first buffer past the start of the second buffer? 517 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 518 *FirstEndLoc, *secondLoc, cmpTy); 519 Optional<DefinedOrUnknownSVal> OverlapTest = 520 Overlap.getAs<DefinedOrUnknownSVal>(); 521 if (!OverlapTest) 522 return state; 523 524 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 525 526 if (stateTrue && !stateFalse) { 527 // Overlap! 528 emitOverlapBug(C, stateTrue, First, Second); 529 return nullptr; 530 } 531 532 // assume the two expressions don't overlap. 533 assert(stateFalse); 534 return stateFalse; 535 } 536 537 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, 538 const Stmt *First, const Stmt *Second) const { 539 ExplodedNode *N = C.generateErrorNode(state); 540 if (!N) 541 return; 542 543 if (!BT_Overlap) 544 BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap, 545 categories::UnixAPI, "Improper arguments")); 546 547 // Generate a report for this bug. 548 auto report = llvm::make_unique<BugReport>( 549 *BT_Overlap, "Arguments must not be overlapping buffers", N); 550 report->addRange(First->getSourceRange()); 551 report->addRange(Second->getSourceRange()); 552 553 C.emitReport(std::move(report)); 554 } 555 556 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, 557 ProgramStateRef state, 558 NonLoc left, 559 NonLoc right) const { 560 // If out-of-bounds checking is turned off, skip the rest. 561 if (!Filter.CheckCStringOutOfBounds) 562 return state; 563 564 // If a previous check has failed, propagate the failure. 565 if (!state) 566 return nullptr; 567 568 SValBuilder &svalBuilder = C.getSValBuilder(); 569 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 570 571 QualType sizeTy = svalBuilder.getContext().getSizeType(); 572 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 573 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 574 575 SVal maxMinusRight; 576 if (right.getAs<nonloc::ConcreteInt>()) { 577 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 578 sizeTy); 579 } else { 580 // Try switching the operands. (The order of these two assignments is 581 // important!) 582 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 583 sizeTy); 584 left = right; 585 } 586 587 if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) { 588 QualType cmpTy = svalBuilder.getConditionType(); 589 // If left > max - right, we have an overflow. 590 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 591 *maxMinusRightNL, cmpTy); 592 593 ProgramStateRef stateOverflow, stateOkay; 594 std::tie(stateOverflow, stateOkay) = 595 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>()); 596 597 if (stateOverflow && !stateOkay) { 598 // We have an overflow. Emit a bug report. 599 ExplodedNode *N = C.generateErrorNode(stateOverflow); 600 if (!N) 601 return nullptr; 602 603 if (!BT_AdditionOverflow) 604 BT_AdditionOverflow.reset( 605 new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API", 606 "Sum of expressions causes overflow")); 607 608 // This isn't a great error message, but this should never occur in real 609 // code anyway -- you'd have to create a buffer longer than a size_t can 610 // represent, which is sort of a contradiction. 611 const char *warning = 612 "This expression will create a string whose length is too big to " 613 "be represented as a size_t"; 614 615 // Generate a report for this bug. 616 C.emitReport( 617 llvm::make_unique<BugReport>(*BT_AdditionOverflow, warning, N)); 618 619 return nullptr; 620 } 621 622 // From now on, assume an overflow didn't occur. 623 assert(stateOkay); 624 state = stateOkay; 625 } 626 627 return state; 628 } 629 630 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, 631 const MemRegion *MR, 632 SVal strLength) { 633 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 634 635 MR = MR->StripCasts(); 636 637 switch (MR->getKind()) { 638 case MemRegion::StringRegionKind: 639 // FIXME: This can happen if we strcpy() into a string region. This is 640 // undefined [C99 6.4.5p6], but we should still warn about it. 641 return state; 642 643 case MemRegion::SymbolicRegionKind: 644 case MemRegion::AllocaRegionKind: 645 case MemRegion::VarRegionKind: 646 case MemRegion::FieldRegionKind: 647 case MemRegion::ObjCIvarRegionKind: 648 // These are the types we can currently track string lengths for. 649 break; 650 651 case MemRegion::ElementRegionKind: 652 // FIXME: Handle element regions by upper-bounding the parent region's 653 // string length. 654 return state; 655 656 default: 657 // Other regions (mostly non-data) can't have a reliable C string length. 658 // For now, just ignore the change. 659 // FIXME: These are rare but not impossible. We should output some kind of 660 // warning for things like strcpy((char[]){'a', 0}, "b"); 661 return state; 662 } 663 664 if (strLength.isUnknown()) 665 return state->remove<CStringLength>(MR); 666 667 return state->set<CStringLength>(MR, strLength); 668 } 669 670 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 671 ProgramStateRef &state, 672 const Expr *Ex, 673 const MemRegion *MR, 674 bool hypothetical) { 675 if (!hypothetical) { 676 // If there's a recorded length, go ahead and return it. 677 const SVal *Recorded = state->get<CStringLength>(MR); 678 if (Recorded) 679 return *Recorded; 680 } 681 682 // Otherwise, get a new symbol and update the state. 683 SValBuilder &svalBuilder = C.getSValBuilder(); 684 QualType sizeTy = svalBuilder.getContext().getSizeType(); 685 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 686 MR, Ex, sizeTy, 687 C.getLocationContext(), 688 C.blockCount()); 689 690 if (!hypothetical) { 691 if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) { 692 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 693 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 694 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 695 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4); 696 const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt, 697 fourInt); 698 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt); 699 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, 700 maxLength, sizeTy); 701 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true); 702 } 703 state = state->set<CStringLength>(MR, strLength); 704 } 705 706 return strLength; 707 } 708 709 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, 710 const Expr *Ex, SVal Buf, 711 bool hypothetical) const { 712 const MemRegion *MR = Buf.getAsRegion(); 713 if (!MR) { 714 // If we can't get a region, see if it's something we /know/ isn't a 715 // C string. In the context of locations, the only time we can issue such 716 // a warning is for labels. 717 if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) { 718 if (!Filter.CheckCStringNotNullTerm) 719 return UndefinedVal(); 720 721 if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) { 722 if (!BT_NotCString) 723 BT_NotCString.reset(new BuiltinBug( 724 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI, 725 "Argument is not a null-terminated string.")); 726 727 SmallString<120> buf; 728 llvm::raw_svector_ostream os(buf); 729 assert(CurrentFunctionDescription); 730 os << "Argument to " << CurrentFunctionDescription 731 << " is the address of the label '" << Label->getLabel()->getName() 732 << "', which is not a null-terminated string"; 733 734 // Generate a report for this bug. 735 auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N); 736 737 report->addRange(Ex->getSourceRange()); 738 C.emitReport(std::move(report)); 739 } 740 return UndefinedVal(); 741 742 } 743 744 // If it's not a region and not a label, give up. 745 return UnknownVal(); 746 } 747 748 // If we have a region, strip casts from it and see if we can figure out 749 // its length. For anything we can't figure out, just return UnknownVal. 750 MR = MR->StripCasts(); 751 752 switch (MR->getKind()) { 753 case MemRegion::StringRegionKind: { 754 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 755 // so we can assume that the byte length is the correct C string length. 756 SValBuilder &svalBuilder = C.getSValBuilder(); 757 QualType sizeTy = svalBuilder.getContext().getSizeType(); 758 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 759 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 760 } 761 case MemRegion::SymbolicRegionKind: 762 case MemRegion::AllocaRegionKind: 763 case MemRegion::VarRegionKind: 764 case MemRegion::FieldRegionKind: 765 case MemRegion::ObjCIvarRegionKind: 766 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 767 case MemRegion::CompoundLiteralRegionKind: 768 // FIXME: Can we track this? Is it necessary? 769 return UnknownVal(); 770 case MemRegion::ElementRegionKind: 771 // FIXME: How can we handle this? It's not good enough to subtract the 772 // offset from the base string length; consider "123\x00567" and &a[5]. 773 return UnknownVal(); 774 default: 775 // Other regions (mostly non-data) can't have a reliable C string length. 776 // In this case, an error is emitted and UndefinedVal is returned. 777 // The caller should always be prepared to handle this case. 778 if (!Filter.CheckCStringNotNullTerm) 779 return UndefinedVal(); 780 781 if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) { 782 if (!BT_NotCString) 783 BT_NotCString.reset(new BuiltinBug( 784 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI, 785 "Argument is not a null-terminated string.")); 786 787 SmallString<120> buf; 788 llvm::raw_svector_ostream os(buf); 789 790 assert(CurrentFunctionDescription); 791 os << "Argument to " << CurrentFunctionDescription << " is "; 792 793 if (SummarizeRegion(os, C.getASTContext(), MR)) 794 os << ", which is not a null-terminated string"; 795 else 796 os << "not a null-terminated string"; 797 798 // Generate a report for this bug. 799 auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N); 800 801 report->addRange(Ex->getSourceRange()); 802 C.emitReport(std::move(report)); 803 } 804 805 return UndefinedVal(); 806 } 807 } 808 809 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 810 ProgramStateRef &state, const Expr *expr, SVal val) const { 811 812 // Get the memory region pointed to by the val. 813 const MemRegion *bufRegion = val.getAsRegion(); 814 if (!bufRegion) 815 return nullptr; 816 817 // Strip casts off the memory region. 818 bufRegion = bufRegion->StripCasts(); 819 820 // Cast the memory region to a string region. 821 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 822 if (!strRegion) 823 return nullptr; 824 825 // Return the actual string in the string region. 826 return strRegion->getStringLiteral(); 827 } 828 829 bool CStringChecker::IsFirstBufInBound(CheckerContext &C, 830 ProgramStateRef state, 831 const Expr *FirstBuf, 832 const Expr *Size) { 833 // If we do not know that the buffer is long enough we return 'true'. 834 // Otherwise the parent region of this field region would also get 835 // invalidated, which would lead to warnings based on an unknown state. 836 837 // Originally copied from CheckBufferAccess and CheckLocation. 838 SValBuilder &svalBuilder = C.getSValBuilder(); 839 ASTContext &Ctx = svalBuilder.getContext(); 840 const LocationContext *LCtx = C.getLocationContext(); 841 842 QualType sizeTy = Size->getType(); 843 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 844 SVal BufVal = state->getSVal(FirstBuf, LCtx); 845 846 SVal LengthVal = state->getSVal(Size, LCtx); 847 Optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 848 if (!Length) 849 return true; // cf top comment. 850 851 // Compute the offset of the last element to be accessed: size-1. 852 NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 853 NonLoc LastOffset = 854 svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy) 855 .castAs<NonLoc>(); 856 857 // Check that the first buffer is sufficiently long. 858 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 859 Optional<Loc> BufLoc = BufStart.getAs<Loc>(); 860 if (!BufLoc) 861 return true; // cf top comment. 862 863 SVal BufEnd = 864 svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy); 865 866 // Check for out of bound array element access. 867 const MemRegion *R = BufEnd.getAsRegion(); 868 if (!R) 869 return true; // cf top comment. 870 871 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 872 if (!ER) 873 return true; // cf top comment. 874 875 assert(ER->getValueType() == C.getASTContext().CharTy && 876 "IsFirstBufInBound should only be called with char* ElementRegions"); 877 878 // Get the size of the array. 879 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 880 SVal Extent = 881 svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 882 DefinedOrUnknownSVal ExtentSize = Extent.castAs<DefinedOrUnknownSVal>(); 883 884 // Get the index of the accessed element. 885 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); 886 887 ProgramStateRef StInBound = state->assumeInBound(Idx, ExtentSize, true); 888 889 return static_cast<bool>(StInBound); 890 } 891 892 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, 893 ProgramStateRef state, 894 const Expr *E, SVal V, 895 bool IsSourceBuffer, 896 const Expr *Size) { 897 Optional<Loc> L = V.getAs<Loc>(); 898 if (!L) 899 return state; 900 901 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 902 // some assumptions about the value that CFRefCount can't. Even so, it should 903 // probably be refactored. 904 if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) { 905 const MemRegion *R = MR->getRegion()->StripCasts(); 906 907 // Are we dealing with an ElementRegion? If so, we should be invalidating 908 // the super-region. 909 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 910 R = ER->getSuperRegion(); 911 // FIXME: What about layers of ElementRegions? 912 } 913 914 // Invalidate this region. 915 const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); 916 917 bool CausesPointerEscape = false; 918 RegionAndSymbolInvalidationTraits ITraits; 919 // Invalidate and escape only indirect regions accessible through the source 920 // buffer. 921 if (IsSourceBuffer) { 922 ITraits.setTrait(R->getBaseRegion(), 923 RegionAndSymbolInvalidationTraits::TK_PreserveContents); 924 ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape); 925 CausesPointerEscape = true; 926 } else { 927 const MemRegion::Kind& K = R->getKind(); 928 if (K == MemRegion::FieldRegionKind) 929 if (Size && IsFirstBufInBound(C, state, E, Size)) { 930 // If destination buffer is a field region and access is in bound, 931 // do not invalidate its super region. 932 ITraits.setTrait( 933 R, 934 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 935 } 936 } 937 938 return state->invalidateRegions(R, E, C.blockCount(), LCtx, 939 CausesPointerEscape, nullptr, nullptr, 940 &ITraits); 941 } 942 943 // If we have a non-region value by chance, just remove the binding. 944 // FIXME: is this necessary or correct? This handles the non-Region 945 // cases. Is it ever valid to store to these? 946 return state->killBinding(*L); 947 } 948 949 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 950 const MemRegion *MR) { 951 const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR); 952 953 switch (MR->getKind()) { 954 case MemRegion::FunctionCodeRegionKind: { 955 const NamedDecl *FD = cast<FunctionCodeRegion>(MR)->getDecl(); 956 if (FD) 957 os << "the address of the function '" << *FD << '\''; 958 else 959 os << "the address of a function"; 960 return true; 961 } 962 case MemRegion::BlockCodeRegionKind: 963 os << "block text"; 964 return true; 965 case MemRegion::BlockDataRegionKind: 966 os << "a block"; 967 return true; 968 case MemRegion::CXXThisRegionKind: 969 case MemRegion::CXXTempObjectRegionKind: 970 os << "a C++ temp object of type " << TVR->getValueType().getAsString(); 971 return true; 972 case MemRegion::VarRegionKind: 973 os << "a variable of type" << TVR->getValueType().getAsString(); 974 return true; 975 case MemRegion::FieldRegionKind: 976 os << "a field of type " << TVR->getValueType().getAsString(); 977 return true; 978 case MemRegion::ObjCIvarRegionKind: 979 os << "an instance variable of type " << TVR->getValueType().getAsString(); 980 return true; 981 default: 982 return false; 983 } 984 } 985 986 //===----------------------------------------------------------------------===// 987 // evaluation of individual function calls. 988 //===----------------------------------------------------------------------===// 989 990 void CStringChecker::evalCopyCommon(CheckerContext &C, 991 const CallExpr *CE, 992 ProgramStateRef state, 993 const Expr *Size, const Expr *Dest, 994 const Expr *Source, bool Restricted, 995 bool IsMempcpy) const { 996 CurrentFunctionDescription = "memory copy function"; 997 998 // See if the size argument is zero. 999 const LocationContext *LCtx = C.getLocationContext(); 1000 SVal sizeVal = state->getSVal(Size, LCtx); 1001 QualType sizeTy = Size->getType(); 1002 1003 ProgramStateRef stateZeroSize, stateNonZeroSize; 1004 std::tie(stateZeroSize, stateNonZeroSize) = 1005 assumeZero(C, state, sizeVal, sizeTy); 1006 1007 // Get the value of the Dest. 1008 SVal destVal = state->getSVal(Dest, LCtx); 1009 1010 // If the size is zero, there won't be any actual memory access, so 1011 // just bind the return value to the destination buffer and return. 1012 if (stateZeroSize && !stateNonZeroSize) { 1013 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); 1014 C.addTransition(stateZeroSize); 1015 return; 1016 } 1017 1018 // If the size can be nonzero, we have to check the other arguments. 1019 if (stateNonZeroSize) { 1020 state = stateNonZeroSize; 1021 1022 // Ensure the destination is not null. If it is NULL there will be a 1023 // NULL pointer dereference. 1024 state = checkNonNull(C, state, Dest, destVal); 1025 if (!state) 1026 return; 1027 1028 // Get the value of the Src. 1029 SVal srcVal = state->getSVal(Source, LCtx); 1030 1031 // Ensure the source is not null. If it is NULL there will be a 1032 // NULL pointer dereference. 1033 state = checkNonNull(C, state, Source, srcVal); 1034 if (!state) 1035 return; 1036 1037 // Ensure the accesses are valid and that the buffers do not overlap. 1038 const char * const writeWarning = 1039 "Memory copy function overflows destination buffer"; 1040 state = CheckBufferAccess(C, state, Size, Dest, Source, 1041 writeWarning, /* sourceWarning = */ nullptr); 1042 if (Restricted) 1043 state = CheckOverlap(C, state, Size, Dest, Source); 1044 1045 if (!state) 1046 return; 1047 1048 // If this is mempcpy, get the byte after the last byte copied and 1049 // bind the expr. 1050 if (IsMempcpy) { 1051 loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>(); 1052 1053 // Get the length to copy. 1054 if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) { 1055 // Get the byte after the last byte copied. 1056 SValBuilder &SvalBuilder = C.getSValBuilder(); 1057 ASTContext &Ctx = SvalBuilder.getContext(); 1058 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 1059 loc::MemRegionVal DestRegCharVal = SvalBuilder.evalCast(destRegVal, 1060 CharPtrTy, Dest->getType()).castAs<loc::MemRegionVal>(); 1061 SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, 1062 DestRegCharVal, 1063 *lenValNonLoc, 1064 Dest->getType()); 1065 1066 // The byte after the last byte copied is the return value. 1067 state = state->BindExpr(CE, LCtx, lastElement); 1068 } else { 1069 // If we don't know how much we copied, we can at least 1070 // conjure a return value for later. 1071 SVal result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1072 C.blockCount()); 1073 state = state->BindExpr(CE, LCtx, result); 1074 } 1075 1076 } else { 1077 // All other copies return the destination buffer. 1078 // (Well, bcopy() has a void return type, but this won't hurt.) 1079 state = state->BindExpr(CE, LCtx, destVal); 1080 } 1081 1082 // Invalidate the destination (regular invalidation without pointer-escaping 1083 // the address of the top-level region). 1084 // FIXME: Even if we can't perfectly model the copy, we should see if we 1085 // can use LazyCompoundVals to copy the source values into the destination. 1086 // This would probably remove any existing bindings past the end of the 1087 // copied region, but that's still an improvement over blank invalidation. 1088 state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest), 1089 /*IsSourceBuffer*/false, Size); 1090 1091 // Invalidate the source (const-invalidation without const-pointer-escaping 1092 // the address of the top-level region). 1093 state = InvalidateBuffer(C, state, Source, C.getSVal(Source), 1094 /*IsSourceBuffer*/true, nullptr); 1095 1096 C.addTransition(state); 1097 } 1098 } 1099 1100 1101 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const { 1102 if (CE->getNumArgs() < 3) 1103 return; 1104 1105 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 1106 // The return value is the address of the destination buffer. 1107 const Expr *Dest = CE->getArg(0); 1108 ProgramStateRef state = C.getState(); 1109 1110 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true); 1111 } 1112 1113 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const { 1114 if (CE->getNumArgs() < 3) 1115 return; 1116 1117 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 1118 // The return value is a pointer to the byte following the last written byte. 1119 const Expr *Dest = CE->getArg(0); 1120 ProgramStateRef state = C.getState(); 1121 1122 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true); 1123 } 1124 1125 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const { 1126 if (CE->getNumArgs() < 3) 1127 return; 1128 1129 // void *memmove(void *dst, const void *src, size_t n); 1130 // The return value is the address of the destination buffer. 1131 const Expr *Dest = CE->getArg(0); 1132 ProgramStateRef state = C.getState(); 1133 1134 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1)); 1135 } 1136 1137 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 1138 if (CE->getNumArgs() < 3) 1139 return; 1140 1141 // void bcopy(const void *src, void *dst, size_t n); 1142 evalCopyCommon(C, CE, C.getState(), 1143 CE->getArg(2), CE->getArg(1), CE->getArg(0)); 1144 } 1145 1146 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { 1147 if (CE->getNumArgs() < 3) 1148 return; 1149 1150 // int memcmp(const void *s1, const void *s2, size_t n); 1151 CurrentFunctionDescription = "memory comparison function"; 1152 1153 const Expr *Left = CE->getArg(0); 1154 const Expr *Right = CE->getArg(1); 1155 const Expr *Size = CE->getArg(2); 1156 1157 ProgramStateRef state = C.getState(); 1158 SValBuilder &svalBuilder = C.getSValBuilder(); 1159 1160 // See if the size argument is zero. 1161 const LocationContext *LCtx = C.getLocationContext(); 1162 SVal sizeVal = state->getSVal(Size, LCtx); 1163 QualType sizeTy = Size->getType(); 1164 1165 ProgramStateRef stateZeroSize, stateNonZeroSize; 1166 std::tie(stateZeroSize, stateNonZeroSize) = 1167 assumeZero(C, state, sizeVal, sizeTy); 1168 1169 // If the size can be zero, the result will be 0 in that case, and we don't 1170 // have to check either of the buffers. 1171 if (stateZeroSize) { 1172 state = stateZeroSize; 1173 state = state->BindExpr(CE, LCtx, 1174 svalBuilder.makeZeroVal(CE->getType())); 1175 C.addTransition(state); 1176 } 1177 1178 // If the size can be nonzero, we have to check the other arguments. 1179 if (stateNonZeroSize) { 1180 state = stateNonZeroSize; 1181 // If we know the two buffers are the same, we know the result is 0. 1182 // First, get the two buffers' addresses. Another checker will have already 1183 // made sure they're not undefined. 1184 DefinedOrUnknownSVal LV = 1185 state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>(); 1186 DefinedOrUnknownSVal RV = 1187 state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>(); 1188 1189 // See if they are the same. 1190 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 1191 ProgramStateRef StSameBuf, StNotSameBuf; 1192 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 1193 1194 // If the two arguments might be the same buffer, we know the result is 0, 1195 // and we only need to check one size. 1196 if (StSameBuf) { 1197 state = StSameBuf; 1198 state = CheckBufferAccess(C, state, Size, Left); 1199 if (state) { 1200 state = StSameBuf->BindExpr(CE, LCtx, 1201 svalBuilder.makeZeroVal(CE->getType())); 1202 C.addTransition(state); 1203 } 1204 } 1205 1206 // If the two arguments might be different buffers, we have to check the 1207 // size of both of them. 1208 if (StNotSameBuf) { 1209 state = StNotSameBuf; 1210 state = CheckBufferAccess(C, state, Size, Left, Right); 1211 if (state) { 1212 // The return value is the comparison result, which we don't know. 1213 SVal CmpV = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, 1214 C.blockCount()); 1215 state = state->BindExpr(CE, LCtx, CmpV); 1216 C.addTransition(state); 1217 } 1218 } 1219 } 1220 } 1221 1222 void CStringChecker::evalstrLength(CheckerContext &C, 1223 const CallExpr *CE) const { 1224 if (CE->getNumArgs() < 1) 1225 return; 1226 1227 // size_t strlen(const char *s); 1228 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 1229 } 1230 1231 void CStringChecker::evalstrnLength(CheckerContext &C, 1232 const CallExpr *CE) const { 1233 if (CE->getNumArgs() < 2) 1234 return; 1235 1236 // size_t strnlen(const char *s, size_t maxlen); 1237 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 1238 } 1239 1240 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 1241 bool IsStrnlen) const { 1242 CurrentFunctionDescription = "string length function"; 1243 ProgramStateRef state = C.getState(); 1244 const LocationContext *LCtx = C.getLocationContext(); 1245 1246 if (IsStrnlen) { 1247 const Expr *maxlenExpr = CE->getArg(1); 1248 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1249 1250 ProgramStateRef stateZeroSize, stateNonZeroSize; 1251 std::tie(stateZeroSize, stateNonZeroSize) = 1252 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 1253 1254 // If the size can be zero, the result will be 0 in that case, and we don't 1255 // have to check the string itself. 1256 if (stateZeroSize) { 1257 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 1258 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); 1259 C.addTransition(stateZeroSize); 1260 } 1261 1262 // If the size is GUARANTEED to be zero, we're done! 1263 if (!stateNonZeroSize) 1264 return; 1265 1266 // Otherwise, record the assumption that the size is nonzero. 1267 state = stateNonZeroSize; 1268 } 1269 1270 // Check that the string argument is non-null. 1271 const Expr *Arg = CE->getArg(0); 1272 SVal ArgVal = state->getSVal(Arg, LCtx); 1273 1274 state = checkNonNull(C, state, Arg, ArgVal); 1275 1276 if (!state) 1277 return; 1278 1279 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 1280 1281 // If the argument isn't a valid C string, there's no valid state to 1282 // transition to. 1283 if (strLength.isUndef()) 1284 return; 1285 1286 DefinedOrUnknownSVal result = UnknownVal(); 1287 1288 // If the check is for strnlen() then bind the return value to no more than 1289 // the maxlen value. 1290 if (IsStrnlen) { 1291 QualType cmpTy = C.getSValBuilder().getConditionType(); 1292 1293 // It's a little unfortunate to be getting this again, 1294 // but it's not that expensive... 1295 const Expr *maxlenExpr = CE->getArg(1); 1296 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1297 1298 Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1299 Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>(); 1300 1301 if (strLengthNL && maxlenValNL) { 1302 ProgramStateRef stateStringTooLong, stateStringNotTooLong; 1303 1304 // Check if the strLength is greater than the maxlen. 1305 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume( 1306 C.getSValBuilder() 1307 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy) 1308 .castAs<DefinedOrUnknownSVal>()); 1309 1310 if (stateStringTooLong && !stateStringNotTooLong) { 1311 // If the string is longer than maxlen, return maxlen. 1312 result = *maxlenValNL; 1313 } else if (stateStringNotTooLong && !stateStringTooLong) { 1314 // If the string is shorter than maxlen, return its length. 1315 result = *strLengthNL; 1316 } 1317 } 1318 1319 if (result.isUnknown()) { 1320 // If we don't have enough information for a comparison, there's 1321 // no guarantee the full string length will actually be returned. 1322 // All we know is the return value is the min of the string length 1323 // and the limit. This is better than nothing. 1324 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1325 C.blockCount()); 1326 NonLoc resultNL = result.castAs<NonLoc>(); 1327 1328 if (strLengthNL) { 1329 state = state->assume(C.getSValBuilder().evalBinOpNN( 1330 state, BO_LE, resultNL, *strLengthNL, cmpTy) 1331 .castAs<DefinedOrUnknownSVal>(), true); 1332 } 1333 1334 if (maxlenValNL) { 1335 state = state->assume(C.getSValBuilder().evalBinOpNN( 1336 state, BO_LE, resultNL, *maxlenValNL, cmpTy) 1337 .castAs<DefinedOrUnknownSVal>(), true); 1338 } 1339 } 1340 1341 } else { 1342 // This is a plain strlen(), not strnlen(). 1343 result = strLength.castAs<DefinedOrUnknownSVal>(); 1344 1345 // If we don't know the length of the string, conjure a return 1346 // value, so it can be used in constraints, at least. 1347 if (result.isUnknown()) { 1348 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1349 C.blockCount()); 1350 } 1351 } 1352 1353 // Bind the return value. 1354 assert(!result.isUnknown() && "Should have conjured a value by now"); 1355 state = state->BindExpr(CE, LCtx, result); 1356 C.addTransition(state); 1357 } 1358 1359 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 1360 if (CE->getNumArgs() < 2) 1361 return; 1362 1363 // char *strcpy(char *restrict dst, const char *restrict src); 1364 evalStrcpyCommon(C, CE, 1365 /* returnEnd = */ false, 1366 /* isBounded = */ false, 1367 /* isAppending = */ false); 1368 } 1369 1370 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 1371 if (CE->getNumArgs() < 3) 1372 return; 1373 1374 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 1375 evalStrcpyCommon(C, CE, 1376 /* returnEnd = */ false, 1377 /* isBounded = */ true, 1378 /* isAppending = */ false); 1379 } 1380 1381 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 1382 if (CE->getNumArgs() < 2) 1383 return; 1384 1385 // char *stpcpy(char *restrict dst, const char *restrict src); 1386 evalStrcpyCommon(C, CE, 1387 /* returnEnd = */ true, 1388 /* isBounded = */ false, 1389 /* isAppending = */ false); 1390 } 1391 1392 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 1393 if (CE->getNumArgs() < 2) 1394 return; 1395 1396 //char *strcat(char *restrict s1, const char *restrict s2); 1397 evalStrcpyCommon(C, CE, 1398 /* returnEnd = */ false, 1399 /* isBounded = */ false, 1400 /* isAppending = */ true); 1401 } 1402 1403 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 1404 if (CE->getNumArgs() < 3) 1405 return; 1406 1407 //char *strncat(char *restrict s1, const char *restrict s2, size_t n); 1408 evalStrcpyCommon(C, CE, 1409 /* returnEnd = */ false, 1410 /* isBounded = */ true, 1411 /* isAppending = */ true); 1412 } 1413 1414 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 1415 bool returnEnd, bool isBounded, 1416 bool isAppending) const { 1417 CurrentFunctionDescription = "string copy function"; 1418 ProgramStateRef state = C.getState(); 1419 const LocationContext *LCtx = C.getLocationContext(); 1420 1421 // Check that the destination is non-null. 1422 const Expr *Dst = CE->getArg(0); 1423 SVal DstVal = state->getSVal(Dst, LCtx); 1424 1425 state = checkNonNull(C, state, Dst, DstVal); 1426 if (!state) 1427 return; 1428 1429 // Check that the source is non-null. 1430 const Expr *srcExpr = CE->getArg(1); 1431 SVal srcVal = state->getSVal(srcExpr, LCtx); 1432 state = checkNonNull(C, state, srcExpr, srcVal); 1433 if (!state) 1434 return; 1435 1436 // Get the string length of the source. 1437 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 1438 1439 // If the source isn't a valid C string, give up. 1440 if (strLength.isUndef()) 1441 return; 1442 1443 SValBuilder &svalBuilder = C.getSValBuilder(); 1444 QualType cmpTy = svalBuilder.getConditionType(); 1445 QualType sizeTy = svalBuilder.getContext().getSizeType(); 1446 1447 // These two values allow checking two kinds of errors: 1448 // - actual overflows caused by a source that doesn't fit in the destination 1449 // - potential overflows caused by a bound that could exceed the destination 1450 SVal amountCopied = UnknownVal(); 1451 SVal maxLastElementIndex = UnknownVal(); 1452 const char *boundWarning = nullptr; 1453 1454 // If the function is strncpy, strncat, etc... it is bounded. 1455 if (isBounded) { 1456 // Get the max number of characters to copy. 1457 const Expr *lenExpr = CE->getArg(2); 1458 SVal lenVal = state->getSVal(lenExpr, LCtx); 1459 1460 // Protect against misdeclared strncpy(). 1461 lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType()); 1462 1463 Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1464 Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>(); 1465 1466 // If we know both values, we might be able to figure out how much 1467 // we're copying. 1468 if (strLengthNL && lenValNL) { 1469 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong; 1470 1471 // Check if the max number to copy is less than the length of the src. 1472 // If the bound is equal to the source length, strncpy won't null- 1473 // terminate the result! 1474 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume( 1475 svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy) 1476 .castAs<DefinedOrUnknownSVal>()); 1477 1478 if (stateSourceTooLong && !stateSourceNotTooLong) { 1479 // Max number to copy is less than the length of the src, so the actual 1480 // strLength copied is the max number arg. 1481 state = stateSourceTooLong; 1482 amountCopied = lenVal; 1483 1484 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 1485 // The source buffer entirely fits in the bound. 1486 state = stateSourceNotTooLong; 1487 amountCopied = strLength; 1488 } 1489 } 1490 1491 // We still want to know if the bound is known to be too large. 1492 if (lenValNL) { 1493 if (isAppending) { 1494 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) 1495 1496 // Get the string length of the destination. If the destination is 1497 // memory that can't have a string length, we shouldn't be copying 1498 // into it anyway. 1499 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 1500 if (dstStrLength.isUndef()) 1501 return; 1502 1503 if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) { 1504 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add, 1505 *lenValNL, 1506 *dstStrLengthNL, 1507 sizeTy); 1508 boundWarning = "Size argument is greater than the free space in the " 1509 "destination buffer"; 1510 } 1511 1512 } else { 1513 // For strncpy, this is just checking that lenVal <= sizeof(dst) 1514 // (Yes, strncpy and strncat differ in how they treat termination. 1515 // strncat ALWAYS terminates, but strncpy doesn't.) 1516 1517 // We need a special case for when the copy size is zero, in which 1518 // case strncpy will do no work at all. Our bounds check uses n-1 1519 // as the last element accessed, so n == 0 is problematic. 1520 ProgramStateRef StateZeroSize, StateNonZeroSize; 1521 std::tie(StateZeroSize, StateNonZeroSize) = 1522 assumeZero(C, state, *lenValNL, sizeTy); 1523 1524 // If the size is known to be zero, we're done. 1525 if (StateZeroSize && !StateNonZeroSize) { 1526 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal); 1527 C.addTransition(StateZeroSize); 1528 return; 1529 } 1530 1531 // Otherwise, go ahead and figure out the last element we'll touch. 1532 // We don't record the non-zero assumption here because we can't 1533 // be sure. We won't warn on a possible zero. 1534 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 1535 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, 1536 one, sizeTy); 1537 boundWarning = "Size argument is greater than the length of the " 1538 "destination buffer"; 1539 } 1540 } 1541 1542 // If we couldn't pin down the copy length, at least bound it. 1543 // FIXME: We should actually run this code path for append as well, but 1544 // right now it creates problems with constraints (since we can end up 1545 // trying to pass constraints from symbol to symbol). 1546 if (amountCopied.isUnknown() && !isAppending) { 1547 // Try to get a "hypothetical" string length symbol, which we can later 1548 // set as a real value if that turns out to be the case. 1549 amountCopied = getCStringLength(C, state, lenExpr, srcVal, true); 1550 assert(!amountCopied.isUndef()); 1551 1552 if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) { 1553 if (lenValNL) { 1554 // amountCopied <= lenVal 1555 SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE, 1556 *amountCopiedNL, 1557 *lenValNL, 1558 cmpTy); 1559 state = state->assume( 1560 copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true); 1561 if (!state) 1562 return; 1563 } 1564 1565 if (strLengthNL) { 1566 // amountCopied <= strlen(source) 1567 SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE, 1568 *amountCopiedNL, 1569 *strLengthNL, 1570 cmpTy); 1571 state = state->assume( 1572 copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true); 1573 if (!state) 1574 return; 1575 } 1576 } 1577 } 1578 1579 } else { 1580 // The function isn't bounded. The amount copied should match the length 1581 // of the source buffer. 1582 amountCopied = strLength; 1583 } 1584 1585 assert(state); 1586 1587 // This represents the number of characters copied into the destination 1588 // buffer. (It may not actually be the strlen if the destination buffer 1589 // is not terminated.) 1590 SVal finalStrLength = UnknownVal(); 1591 1592 // If this is an appending function (strcat, strncat...) then set the 1593 // string length to strlen(src) + strlen(dst) since the buffer will 1594 // ultimately contain both. 1595 if (isAppending) { 1596 // Get the string length of the destination. If the destination is memory 1597 // that can't have a string length, we shouldn't be copying into it anyway. 1598 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 1599 if (dstStrLength.isUndef()) 1600 return; 1601 1602 Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>(); 1603 Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>(); 1604 1605 // If we know both string lengths, we might know the final string length. 1606 if (srcStrLengthNL && dstStrLengthNL) { 1607 // Make sure the two lengths together don't overflow a size_t. 1608 state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL); 1609 if (!state) 1610 return; 1611 1612 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, 1613 *dstStrLengthNL, sizeTy); 1614 } 1615 1616 // If we couldn't get a single value for the final string length, 1617 // we can at least bound it by the individual lengths. 1618 if (finalStrLength.isUnknown()) { 1619 // Try to get a "hypothetical" string length symbol, which we can later 1620 // set as a real value if that turns out to be the case. 1621 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 1622 assert(!finalStrLength.isUndef()); 1623 1624 if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) { 1625 if (srcStrLengthNL) { 1626 // finalStrLength >= srcStrLength 1627 SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1628 *finalStrLengthNL, 1629 *srcStrLengthNL, 1630 cmpTy); 1631 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(), 1632 true); 1633 if (!state) 1634 return; 1635 } 1636 1637 if (dstStrLengthNL) { 1638 // finalStrLength >= dstStrLength 1639 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1640 *finalStrLengthNL, 1641 *dstStrLengthNL, 1642 cmpTy); 1643 state = 1644 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true); 1645 if (!state) 1646 return; 1647 } 1648 } 1649 } 1650 1651 } else { 1652 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 1653 // the final string length will match the input string length. 1654 finalStrLength = amountCopied; 1655 } 1656 1657 // The final result of the function will either be a pointer past the last 1658 // copied element, or a pointer to the start of the destination buffer. 1659 SVal Result = (returnEnd ? UnknownVal() : DstVal); 1660 1661 assert(state); 1662 1663 // If the destination is a MemRegion, try to check for a buffer overflow and 1664 // record the new string length. 1665 if (Optional<loc::MemRegionVal> dstRegVal = 1666 DstVal.getAs<loc::MemRegionVal>()) { 1667 QualType ptrTy = Dst->getType(); 1668 1669 // If we have an exact value on a bounded copy, use that to check for 1670 // overflows, rather than our estimate about how much is actually copied. 1671 if (boundWarning) { 1672 if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) { 1673 SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 1674 *maxLastNL, ptrTy); 1675 state = CheckLocation(C, state, CE->getArg(2), maxLastElement, 1676 boundWarning); 1677 if (!state) 1678 return; 1679 } 1680 } 1681 1682 // Then, if the final length is known... 1683 if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) { 1684 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 1685 *knownStrLength, ptrTy); 1686 1687 // ...and we haven't checked the bound, we'll check the actual copy. 1688 if (!boundWarning) { 1689 const char * const warningMsg = 1690 "String copy function overflows destination buffer"; 1691 state = CheckLocation(C, state, Dst, lastElement, warningMsg); 1692 if (!state) 1693 return; 1694 } 1695 1696 // If this is a stpcpy-style copy, the last element is the return value. 1697 if (returnEnd) 1698 Result = lastElement; 1699 } 1700 1701 // Invalidate the destination (regular invalidation without pointer-escaping 1702 // the address of the top-level region). This must happen before we set the 1703 // C string length because invalidation will clear the length. 1704 // FIXME: Even if we can't perfectly model the copy, we should see if we 1705 // can use LazyCompoundVals to copy the source values into the destination. 1706 // This would probably remove any existing bindings past the end of the 1707 // string, but that's still an improvement over blank invalidation. 1708 state = InvalidateBuffer(C, state, Dst, *dstRegVal, 1709 /*IsSourceBuffer*/false, nullptr); 1710 1711 // Invalidate the source (const-invalidation without const-pointer-escaping 1712 // the address of the top-level region). 1713 state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true, 1714 nullptr); 1715 1716 // Set the C string length of the destination, if we know it. 1717 if (isBounded && !isAppending) { 1718 // strncpy is annoying in that it doesn't guarantee to null-terminate 1719 // the result string. If the original string didn't fit entirely inside 1720 // the bound (including the null-terminator), we don't know how long the 1721 // result is. 1722 if (amountCopied != strLength) 1723 finalStrLength = UnknownVal(); 1724 } 1725 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 1726 } 1727 1728 assert(state); 1729 1730 // If this is a stpcpy-style copy, but we were unable to check for a buffer 1731 // overflow, we still need a result. Conjure a return value. 1732 if (returnEnd && Result.isUnknown()) { 1733 Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 1734 } 1735 1736 // Set the return value. 1737 state = state->BindExpr(CE, LCtx, Result); 1738 C.addTransition(state); 1739 } 1740 1741 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 1742 if (CE->getNumArgs() < 2) 1743 return; 1744 1745 //int strcmp(const char *s1, const char *s2); 1746 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); 1747 } 1748 1749 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 1750 if (CE->getNumArgs() < 3) 1751 return; 1752 1753 //int strncmp(const char *s1, const char *s2, size_t n); 1754 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); 1755 } 1756 1757 void CStringChecker::evalStrcasecmp(CheckerContext &C, 1758 const CallExpr *CE) const { 1759 if (CE->getNumArgs() < 2) 1760 return; 1761 1762 //int strcasecmp(const char *s1, const char *s2); 1763 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); 1764 } 1765 1766 void CStringChecker::evalStrncasecmp(CheckerContext &C, 1767 const CallExpr *CE) const { 1768 if (CE->getNumArgs() < 3) 1769 return; 1770 1771 //int strncasecmp(const char *s1, const char *s2, size_t n); 1772 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); 1773 } 1774 1775 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 1776 bool isBounded, bool ignoreCase) const { 1777 CurrentFunctionDescription = "string comparison function"; 1778 ProgramStateRef state = C.getState(); 1779 const LocationContext *LCtx = C.getLocationContext(); 1780 1781 // Check that the first string is non-null 1782 const Expr *s1 = CE->getArg(0); 1783 SVal s1Val = state->getSVal(s1, LCtx); 1784 state = checkNonNull(C, state, s1, s1Val); 1785 if (!state) 1786 return; 1787 1788 // Check that the second string is non-null. 1789 const Expr *s2 = CE->getArg(1); 1790 SVal s2Val = state->getSVal(s2, LCtx); 1791 state = checkNonNull(C, state, s2, s2Val); 1792 if (!state) 1793 return; 1794 1795 // Get the string length of the first string or give up. 1796 SVal s1Length = getCStringLength(C, state, s1, s1Val); 1797 if (s1Length.isUndef()) 1798 return; 1799 1800 // Get the string length of the second string or give up. 1801 SVal s2Length = getCStringLength(C, state, s2, s2Val); 1802 if (s2Length.isUndef()) 1803 return; 1804 1805 // If we know the two buffers are the same, we know the result is 0. 1806 // First, get the two buffers' addresses. Another checker will have already 1807 // made sure they're not undefined. 1808 DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>(); 1809 DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>(); 1810 1811 // See if they are the same. 1812 SValBuilder &svalBuilder = C.getSValBuilder(); 1813 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 1814 ProgramStateRef StSameBuf, StNotSameBuf; 1815 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 1816 1817 // If the two arguments might be the same buffer, we know the result is 0, 1818 // and we only need to check one size. 1819 if (StSameBuf) { 1820 StSameBuf = StSameBuf->BindExpr(CE, LCtx, 1821 svalBuilder.makeZeroVal(CE->getType())); 1822 C.addTransition(StSameBuf); 1823 1824 // If the two arguments are GUARANTEED to be the same, we're done! 1825 if (!StNotSameBuf) 1826 return; 1827 } 1828 1829 assert(StNotSameBuf); 1830 state = StNotSameBuf; 1831 1832 // At this point we can go about comparing the two buffers. 1833 // For now, we only do this if they're both known string literals. 1834 1835 // Attempt to extract string literals from both expressions. 1836 const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); 1837 const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); 1838 bool canComputeResult = false; 1839 SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, 1840 C.blockCount()); 1841 1842 if (s1StrLiteral && s2StrLiteral) { 1843 StringRef s1StrRef = s1StrLiteral->getString(); 1844 StringRef s2StrRef = s2StrLiteral->getString(); 1845 1846 if (isBounded) { 1847 // Get the max number of characters to compare. 1848 const Expr *lenExpr = CE->getArg(2); 1849 SVal lenVal = state->getSVal(lenExpr, LCtx); 1850 1851 // If the length is known, we can get the right substrings. 1852 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { 1853 // Create substrings of each to compare the prefix. 1854 s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue()); 1855 s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue()); 1856 canComputeResult = true; 1857 } 1858 } else { 1859 // This is a normal, unbounded strcmp. 1860 canComputeResult = true; 1861 } 1862 1863 if (canComputeResult) { 1864 // Real strcmp stops at null characters. 1865 size_t s1Term = s1StrRef.find('\0'); 1866 if (s1Term != StringRef::npos) 1867 s1StrRef = s1StrRef.substr(0, s1Term); 1868 1869 size_t s2Term = s2StrRef.find('\0'); 1870 if (s2Term != StringRef::npos) 1871 s2StrRef = s2StrRef.substr(0, s2Term); 1872 1873 // Use StringRef's comparison methods to compute the actual result. 1874 int compareRes = ignoreCase ? s1StrRef.compare_lower(s2StrRef) 1875 : s1StrRef.compare(s2StrRef); 1876 1877 // The strcmp function returns an integer greater than, equal to, or less 1878 // than zero, [c11, p7.24.4.2]. 1879 if (compareRes == 0) { 1880 resultVal = svalBuilder.makeIntVal(compareRes, CE->getType()); 1881 } 1882 else { 1883 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType()); 1884 // Constrain strcmp's result range based on the result of StringRef's 1885 // comparison methods. 1886 BinaryOperatorKind op = (compareRes == 1) ? BO_GT : BO_LT; 1887 SVal compareWithZero = 1888 svalBuilder.evalBinOp(state, op, resultVal, zeroVal, 1889 svalBuilder.getConditionType()); 1890 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>(); 1891 state = state->assume(compareWithZeroVal, true); 1892 } 1893 } 1894 } 1895 1896 state = state->BindExpr(CE, LCtx, resultVal); 1897 1898 // Record this as a possible path. 1899 C.addTransition(state); 1900 } 1901 1902 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { 1903 //char *strsep(char **stringp, const char *delim); 1904 if (CE->getNumArgs() < 2) 1905 return; 1906 1907 // Sanity: does the search string parameter match the return type? 1908 const Expr *SearchStrPtr = CE->getArg(0); 1909 QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType(); 1910 if (CharPtrTy.isNull() || 1911 CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType()) 1912 return; 1913 1914 CurrentFunctionDescription = "strsep()"; 1915 ProgramStateRef State = C.getState(); 1916 const LocationContext *LCtx = C.getLocationContext(); 1917 1918 // Check that the search string pointer is non-null (though it may point to 1919 // a null string). 1920 SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx); 1921 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal); 1922 if (!State) 1923 return; 1924 1925 // Check that the delimiter string is non-null. 1926 const Expr *DelimStr = CE->getArg(1); 1927 SVal DelimStrVal = State->getSVal(DelimStr, LCtx); 1928 State = checkNonNull(C, State, DelimStr, DelimStrVal); 1929 if (!State) 1930 return; 1931 1932 SValBuilder &SVB = C.getSValBuilder(); 1933 SVal Result; 1934 if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) { 1935 // Get the current value of the search string pointer, as a char*. 1936 Result = State->getSVal(*SearchStrLoc, CharPtrTy); 1937 1938 // Invalidate the search string, representing the change of one delimiter 1939 // character to NUL. 1940 State = InvalidateBuffer(C, State, SearchStrPtr, Result, 1941 /*IsSourceBuffer*/false, nullptr); 1942 1943 // Overwrite the search string pointer. The new value is either an address 1944 // further along in the same string, or NULL if there are no more tokens. 1945 State = State->bindLoc(*SearchStrLoc, 1946 SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy, 1947 C.blockCount())); 1948 } else { 1949 assert(SearchStrVal.isUnknown()); 1950 // Conjure a symbolic value. It's the best we can do. 1951 Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 1952 } 1953 1954 // Set the return value, and finish. 1955 State = State->BindExpr(CE, LCtx, Result); 1956 C.addTransition(State); 1957 } 1958 1959 // These should probably be moved into a C++ standard library checker. 1960 void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const { 1961 evalStdCopyCommon(C, CE); 1962 } 1963 1964 void CStringChecker::evalStdCopyBackward(CheckerContext &C, 1965 const CallExpr *CE) const { 1966 evalStdCopyCommon(C, CE); 1967 } 1968 1969 void CStringChecker::evalStdCopyCommon(CheckerContext &C, 1970 const CallExpr *CE) const { 1971 if (CE->getNumArgs() < 3) 1972 return; 1973 1974 ProgramStateRef State = C.getState(); 1975 1976 const LocationContext *LCtx = C.getLocationContext(); 1977 1978 // template <class _InputIterator, class _OutputIterator> 1979 // _OutputIterator 1980 // copy(_InputIterator __first, _InputIterator __last, 1981 // _OutputIterator __result) 1982 1983 // Invalidate the destination buffer 1984 const Expr *Dst = CE->getArg(2); 1985 SVal DstVal = State->getSVal(Dst, LCtx); 1986 State = InvalidateBuffer(C, State, Dst, DstVal, /*IsSource=*/false, 1987 /*Size=*/nullptr); 1988 1989 SValBuilder &SVB = C.getSValBuilder(); 1990 1991 SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 1992 State = State->BindExpr(CE, LCtx, ResultVal); 1993 1994 C.addTransition(State); 1995 } 1996 1997 static bool isCPPStdLibraryFunction(const FunctionDecl *FD, StringRef Name) { 1998 IdentifierInfo *II = FD->getIdentifier(); 1999 if (!II) 2000 return false; 2001 2002 if (!AnalysisDeclContext::isInStdNamespace(FD)) 2003 return false; 2004 2005 if (II->getName().equals(Name)) 2006 return true; 2007 2008 return false; 2009 } 2010 //===----------------------------------------------------------------------===// 2011 // The driver method, and other Checker callbacks. 2012 //===----------------------------------------------------------------------===// 2013 2014 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { 2015 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 2016 2017 if (!FDecl) 2018 return false; 2019 2020 // FIXME: Poorly-factored string switches are slow. 2021 FnCheck evalFunction = nullptr; 2022 if (C.isCLibraryFunction(FDecl, "memcpy")) 2023 evalFunction = &CStringChecker::evalMemcpy; 2024 else if (C.isCLibraryFunction(FDecl, "mempcpy")) 2025 evalFunction = &CStringChecker::evalMempcpy; 2026 else if (C.isCLibraryFunction(FDecl, "memcmp")) 2027 evalFunction = &CStringChecker::evalMemcmp; 2028 else if (C.isCLibraryFunction(FDecl, "memmove")) 2029 evalFunction = &CStringChecker::evalMemmove; 2030 else if (C.isCLibraryFunction(FDecl, "strcpy")) 2031 evalFunction = &CStringChecker::evalStrcpy; 2032 else if (C.isCLibraryFunction(FDecl, "strncpy")) 2033 evalFunction = &CStringChecker::evalStrncpy; 2034 else if (C.isCLibraryFunction(FDecl, "stpcpy")) 2035 evalFunction = &CStringChecker::evalStpcpy; 2036 else if (C.isCLibraryFunction(FDecl, "strcat")) 2037 evalFunction = &CStringChecker::evalStrcat; 2038 else if (C.isCLibraryFunction(FDecl, "strncat")) 2039 evalFunction = &CStringChecker::evalStrncat; 2040 else if (C.isCLibraryFunction(FDecl, "strlen")) 2041 evalFunction = &CStringChecker::evalstrLength; 2042 else if (C.isCLibraryFunction(FDecl, "strnlen")) 2043 evalFunction = &CStringChecker::evalstrnLength; 2044 else if (C.isCLibraryFunction(FDecl, "strcmp")) 2045 evalFunction = &CStringChecker::evalStrcmp; 2046 else if (C.isCLibraryFunction(FDecl, "strncmp")) 2047 evalFunction = &CStringChecker::evalStrncmp; 2048 else if (C.isCLibraryFunction(FDecl, "strcasecmp")) 2049 evalFunction = &CStringChecker::evalStrcasecmp; 2050 else if (C.isCLibraryFunction(FDecl, "strncasecmp")) 2051 evalFunction = &CStringChecker::evalStrncasecmp; 2052 else if (C.isCLibraryFunction(FDecl, "strsep")) 2053 evalFunction = &CStringChecker::evalStrsep; 2054 else if (C.isCLibraryFunction(FDecl, "bcopy")) 2055 evalFunction = &CStringChecker::evalBcopy; 2056 else if (C.isCLibraryFunction(FDecl, "bcmp")) 2057 evalFunction = &CStringChecker::evalMemcmp; 2058 else if (isCPPStdLibraryFunction(FDecl, "copy")) 2059 evalFunction = &CStringChecker::evalStdCopy; 2060 else if (isCPPStdLibraryFunction(FDecl, "copy_backward")) 2061 evalFunction = &CStringChecker::evalStdCopyBackward; 2062 2063 // If the callee isn't a string function, let another checker handle it. 2064 if (!evalFunction) 2065 return false; 2066 2067 // Check and evaluate the call. 2068 (this->*evalFunction)(C, CE); 2069 2070 // If the evaluate call resulted in no change, chain to the next eval call 2071 // handler. 2072 // Note, the custom CString evaluation calls assume that basic safety 2073 // properties are held. However, if the user chooses to turn off some of these 2074 // checks, we ignore the issues and leave the call evaluation to a generic 2075 // handler. 2076 return C.isDifferent(); 2077 } 2078 2079 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 2080 // Record string length for char a[] = "abc"; 2081 ProgramStateRef state = C.getState(); 2082 2083 for (const auto *I : DS->decls()) { 2084 const VarDecl *D = dyn_cast<VarDecl>(I); 2085 if (!D) 2086 continue; 2087 2088 // FIXME: Handle array fields of structs. 2089 if (!D->getType()->isArrayType()) 2090 continue; 2091 2092 const Expr *Init = D->getInit(); 2093 if (!Init) 2094 continue; 2095 if (!isa<StringLiteral>(Init)) 2096 continue; 2097 2098 Loc VarLoc = state->getLValue(D, C.getLocationContext()); 2099 const MemRegion *MR = VarLoc.getAsRegion(); 2100 if (!MR) 2101 continue; 2102 2103 SVal StrVal = state->getSVal(Init, C.getLocationContext()); 2104 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 2105 DefinedOrUnknownSVal strLength = 2106 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>(); 2107 2108 state = state->set<CStringLength>(MR, strLength); 2109 } 2110 2111 C.addTransition(state); 2112 } 2113 2114 ProgramStateRef 2115 CStringChecker::checkRegionChanges(ProgramStateRef state, 2116 const InvalidatedSymbols *, 2117 ArrayRef<const MemRegion *> ExplicitRegions, 2118 ArrayRef<const MemRegion *> Regions, 2119 const CallEvent *Call) const { 2120 CStringLengthTy Entries = state->get<CStringLength>(); 2121 if (Entries.isEmpty()) 2122 return state; 2123 2124 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 2125 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 2126 2127 // First build sets for the changed regions and their super-regions. 2128 for (ArrayRef<const MemRegion *>::iterator 2129 I = Regions.begin(), E = Regions.end(); I != E; ++I) { 2130 const MemRegion *MR = *I; 2131 Invalidated.insert(MR); 2132 2133 SuperRegions.insert(MR); 2134 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 2135 MR = SR->getSuperRegion(); 2136 SuperRegions.insert(MR); 2137 } 2138 } 2139 2140 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2141 2142 // Then loop over the entries in the current state. 2143 for (CStringLengthTy::iterator I = Entries.begin(), 2144 E = Entries.end(); I != E; ++I) { 2145 const MemRegion *MR = I.getKey(); 2146 2147 // Is this entry for a super-region of a changed region? 2148 if (SuperRegions.count(MR)) { 2149 Entries = F.remove(Entries, MR); 2150 continue; 2151 } 2152 2153 // Is this entry for a sub-region of a changed region? 2154 const MemRegion *Super = MR; 2155 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 2156 Super = SR->getSuperRegion(); 2157 if (Invalidated.count(Super)) { 2158 Entries = F.remove(Entries, MR); 2159 break; 2160 } 2161 } 2162 } 2163 2164 return state->set<CStringLength>(Entries); 2165 } 2166 2167 void CStringChecker::checkLiveSymbols(ProgramStateRef state, 2168 SymbolReaper &SR) const { 2169 // Mark all symbols in our string length map as valid. 2170 CStringLengthTy Entries = state->get<CStringLength>(); 2171 2172 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 2173 I != E; ++I) { 2174 SVal Len = I.getData(); 2175 2176 for (SymExpr::symbol_iterator si = Len.symbol_begin(), 2177 se = Len.symbol_end(); si != se; ++si) 2178 SR.markInUse(*si); 2179 } 2180 } 2181 2182 void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 2183 CheckerContext &C) const { 2184 if (!SR.hasDeadSymbols()) 2185 return; 2186 2187 ProgramStateRef state = C.getState(); 2188 CStringLengthTy Entries = state->get<CStringLength>(); 2189 if (Entries.isEmpty()) 2190 return; 2191 2192 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2193 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 2194 I != E; ++I) { 2195 SVal Len = I.getData(); 2196 if (SymbolRef Sym = Len.getAsSymbol()) { 2197 if (SR.isDead(Sym)) 2198 Entries = F.remove(Entries, I.getKey()); 2199 } 2200 } 2201 2202 state = state->set<CStringLength>(Entries); 2203 C.addTransition(state); 2204 } 2205 2206 #define REGISTER_CHECKER(name) \ 2207 void ento::register##name(CheckerManager &mgr) { \ 2208 CStringChecker *checker = mgr.registerChecker<CStringChecker>(); \ 2209 checker->Filter.Check##name = true; \ 2210 checker->Filter.CheckName##name = mgr.getCurrentCheckName(); \ 2211 } 2212 2213 REGISTER_CHECKER(CStringNullArg) 2214 REGISTER_CHECKER(CStringOutOfBounds) 2215 REGISTER_CHECKER(CStringBufferOverlap) 2216 REGISTER_CHECKER(CStringNotNullTerm) 2217 2218 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) { 2219 registerCStringNullArg(Mgr); 2220 } 2221