1 //===- Store.cpp - Interface for maps from Locations to Values ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defined the types Store and StoreManager. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h" 15 #include "clang/AST/ASTContext.h" 16 #include "clang/AST/CXXInheritance.h" 17 #include "clang/AST/CharUnits.h" 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/DeclCXX.h" 20 #include "clang/AST/DeclObjC.h" 21 #include "clang/AST/Expr.h" 22 #include "clang/AST/Type.h" 23 #include "clang/Basic/LLVM.h" 24 #include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" 27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" 28 #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" 29 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" 30 #include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h" 31 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" 32 #include "llvm/ADT/APSInt.h" 33 #include "llvm/ADT/Optional.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/Support/Casting.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include <cassert> 38 #include <cstdint> 39 40 using namespace clang; 41 using namespace ento; 42 43 StoreManager::StoreManager(ProgramStateManager &stateMgr) 44 : svalBuilder(stateMgr.getSValBuilder()), StateMgr(stateMgr), 45 MRMgr(svalBuilder.getRegionManager()), Ctx(stateMgr.getContext()) {} 46 47 StoreRef StoreManager::enterStackFrame(Store OldStore, 48 const CallEvent &Call, 49 const StackFrameContext *LCtx) { 50 StoreRef Store = StoreRef(OldStore, *this); 51 52 SmallVector<CallEvent::FrameBindingTy, 16> InitialBindings; 53 Call.getInitialStackFrameContents(LCtx, InitialBindings); 54 55 for (const auto &I : InitialBindings) 56 Store = Bind(Store.getStore(), I.first, I.second); 57 58 return Store; 59 } 60 61 const ElementRegion *StoreManager::MakeElementRegion(const SubRegion *Base, 62 QualType EleTy, 63 uint64_t index) { 64 NonLoc idx = svalBuilder.makeArrayIndex(index); 65 return MRMgr.getElementRegion(EleTy, idx, Base, svalBuilder.getContext()); 66 } 67 68 const ElementRegion *StoreManager::GetElementZeroRegion(const SubRegion *R, 69 QualType T) { 70 NonLoc idx = svalBuilder.makeZeroArrayIndex(); 71 assert(!T.isNull()); 72 return MRMgr.getElementRegion(T, idx, R, Ctx); 73 } 74 75 const MemRegion *StoreManager::castRegion(const MemRegion *R, QualType CastToTy) { 76 ASTContext &Ctx = StateMgr.getContext(); 77 78 // Handle casts to Objective-C objects. 79 if (CastToTy->isObjCObjectPointerType()) 80 return R->StripCasts(); 81 82 if (CastToTy->isBlockPointerType()) { 83 // FIXME: We may need different solutions, depending on the symbol 84 // involved. Blocks can be casted to/from 'id', as they can be treated 85 // as Objective-C objects. This could possibly be handled by enhancing 86 // our reasoning of downcasts of symbolic objects. 87 if (isa<CodeTextRegion>(R) || isa<SymbolicRegion>(R)) 88 return R; 89 90 // We don't know what to make of it. Return a NULL region, which 91 // will be interpretted as UnknownVal. 92 return nullptr; 93 } 94 95 // Now assume we are casting from pointer to pointer. Other cases should 96 // already be handled. 97 QualType PointeeTy = CastToTy->getPointeeType(); 98 QualType CanonPointeeTy = Ctx.getCanonicalType(PointeeTy); 99 100 // Handle casts to void*. We just pass the region through. 101 if (CanonPointeeTy.getLocalUnqualifiedType() == Ctx.VoidTy) 102 return R; 103 104 // Handle casts from compatible types. 105 if (R->isBoundable()) 106 if (const auto *TR = dyn_cast<TypedValueRegion>(R)) { 107 QualType ObjTy = Ctx.getCanonicalType(TR->getValueType()); 108 if (CanonPointeeTy == ObjTy) 109 return R; 110 } 111 112 // Process region cast according to the kind of the region being cast. 113 switch (R->getKind()) { 114 case MemRegion::CXXThisRegionKind: 115 case MemRegion::CodeSpaceRegionKind: 116 case MemRegion::StackLocalsSpaceRegionKind: 117 case MemRegion::StackArgumentsSpaceRegionKind: 118 case MemRegion::HeapSpaceRegionKind: 119 case MemRegion::UnknownSpaceRegionKind: 120 case MemRegion::StaticGlobalSpaceRegionKind: 121 case MemRegion::GlobalInternalSpaceRegionKind: 122 case MemRegion::GlobalSystemSpaceRegionKind: 123 case MemRegion::GlobalImmutableSpaceRegionKind: { 124 llvm_unreachable("Invalid region cast"); 125 } 126 127 case MemRegion::FunctionCodeRegionKind: 128 case MemRegion::BlockCodeRegionKind: 129 case MemRegion::BlockDataRegionKind: 130 case MemRegion::StringRegionKind: 131 // FIXME: Need to handle arbitrary downcasts. 132 case MemRegion::SymbolicRegionKind: 133 case MemRegion::AllocaRegionKind: 134 case MemRegion::CompoundLiteralRegionKind: 135 case MemRegion::FieldRegionKind: 136 case MemRegion::ObjCIvarRegionKind: 137 case MemRegion::ObjCStringRegionKind: 138 case MemRegion::VarRegionKind: 139 case MemRegion::CXXTempObjectRegionKind: 140 case MemRegion::CXXBaseObjectRegionKind: 141 return MakeElementRegion(cast<SubRegion>(R), PointeeTy); 142 143 case MemRegion::ElementRegionKind: { 144 // If we are casting from an ElementRegion to another type, the 145 // algorithm is as follows: 146 // 147 // (1) Compute the "raw offset" of the ElementRegion from the 148 // base region. This is done by calling 'getAsRawOffset()'. 149 // 150 // (2a) If we get a 'RegionRawOffset' after calling 151 // 'getAsRawOffset()', determine if the absolute offset 152 // can be exactly divided into chunks of the size of the 153 // casted-pointee type. If so, create a new ElementRegion with 154 // the pointee-cast type as the new ElementType and the index 155 // being the offset divded by the chunk size. If not, create 156 // a new ElementRegion at offset 0 off the raw offset region. 157 // 158 // (2b) If we don't a get a 'RegionRawOffset' after calling 159 // 'getAsRawOffset()', it means that we are at offset 0. 160 // 161 // FIXME: Handle symbolic raw offsets. 162 163 const ElementRegion *elementR = cast<ElementRegion>(R); 164 const RegionRawOffset &rawOff = elementR->getAsArrayOffset(); 165 const MemRegion *baseR = rawOff.getRegion(); 166 167 // If we cannot compute a raw offset, throw up our hands and return 168 // a NULL MemRegion*. 169 if (!baseR) 170 return nullptr; 171 172 CharUnits off = rawOff.getOffset(); 173 174 if (off.isZero()) { 175 // Edge case: we are at 0 bytes off the beginning of baseR. We 176 // check to see if type we are casting to is the same as the base 177 // region. If so, just return the base region. 178 if (const auto *TR = dyn_cast<TypedValueRegion>(baseR)) { 179 QualType ObjTy = Ctx.getCanonicalType(TR->getValueType()); 180 QualType CanonPointeeTy = Ctx.getCanonicalType(PointeeTy); 181 if (CanonPointeeTy == ObjTy) 182 return baseR; 183 } 184 185 // Otherwise, create a new ElementRegion at offset 0. 186 return MakeElementRegion(cast<SubRegion>(baseR), PointeeTy); 187 } 188 189 // We have a non-zero offset from the base region. We want to determine 190 // if the offset can be evenly divided by sizeof(PointeeTy). If so, 191 // we create an ElementRegion whose index is that value. Otherwise, we 192 // create two ElementRegions, one that reflects a raw offset and the other 193 // that reflects the cast. 194 195 // Compute the index for the new ElementRegion. 196 int64_t newIndex = 0; 197 const MemRegion *newSuperR = nullptr; 198 199 // We can only compute sizeof(PointeeTy) if it is a complete type. 200 if (!PointeeTy->isIncompleteType()) { 201 // Compute the size in **bytes**. 202 CharUnits pointeeTySize = Ctx.getTypeSizeInChars(PointeeTy); 203 if (!pointeeTySize.isZero()) { 204 // Is the offset a multiple of the size? If so, we can layer the 205 // ElementRegion (with elementType == PointeeTy) directly on top of 206 // the base region. 207 if (off % pointeeTySize == 0) { 208 newIndex = off / pointeeTySize; 209 newSuperR = baseR; 210 } 211 } 212 } 213 214 if (!newSuperR) { 215 // Create an intermediate ElementRegion to represent the raw byte. 216 // This will be the super region of the final ElementRegion. 217 newSuperR = MakeElementRegion(cast<SubRegion>(baseR), Ctx.CharTy, 218 off.getQuantity()); 219 } 220 221 return MakeElementRegion(cast<SubRegion>(newSuperR), PointeeTy, newIndex); 222 } 223 } 224 225 llvm_unreachable("unreachable"); 226 } 227 228 static bool regionMatchesCXXRecordType(SVal V, QualType Ty) { 229 const MemRegion *MR = V.getAsRegion(); 230 if (!MR) 231 return true; 232 233 const auto *TVR = dyn_cast<TypedValueRegion>(MR); 234 if (!TVR) 235 return true; 236 237 const CXXRecordDecl *RD = TVR->getValueType()->getAsCXXRecordDecl(); 238 if (!RD) 239 return true; 240 241 const CXXRecordDecl *Expected = Ty->getPointeeCXXRecordDecl(); 242 if (!Expected) 243 Expected = Ty->getAsCXXRecordDecl(); 244 245 return Expected->getCanonicalDecl() == RD->getCanonicalDecl(); 246 } 247 248 SVal StoreManager::evalDerivedToBase(SVal Derived, const CastExpr *Cast) { 249 // Sanity check to avoid doing the wrong thing in the face of 250 // reinterpret_cast. 251 if (!regionMatchesCXXRecordType(Derived, Cast->getSubExpr()->getType())) 252 return UnknownVal(); 253 254 // Walk through the cast path to create nested CXXBaseRegions. 255 SVal Result = Derived; 256 for (CastExpr::path_const_iterator I = Cast->path_begin(), 257 E = Cast->path_end(); 258 I != E; ++I) { 259 Result = evalDerivedToBase(Result, (*I)->getType(), (*I)->isVirtual()); 260 } 261 return Result; 262 } 263 264 SVal StoreManager::evalDerivedToBase(SVal Derived, const CXXBasePath &Path) { 265 // Walk through the path to create nested CXXBaseRegions. 266 SVal Result = Derived; 267 for (const auto &I : Path) 268 Result = evalDerivedToBase(Result, I.Base->getType(), 269 I.Base->isVirtual()); 270 return Result; 271 } 272 273 SVal StoreManager::evalDerivedToBase(SVal Derived, QualType BaseType, 274 bool IsVirtual) { 275 Optional<loc::MemRegionVal> DerivedRegVal = 276 Derived.getAs<loc::MemRegionVal>(); 277 if (!DerivedRegVal) 278 return Derived; 279 280 const CXXRecordDecl *BaseDecl = BaseType->getPointeeCXXRecordDecl(); 281 if (!BaseDecl) 282 BaseDecl = BaseType->getAsCXXRecordDecl(); 283 assert(BaseDecl && "not a C++ object?"); 284 285 const MemRegion *BaseReg = MRMgr.getCXXBaseObjectRegion( 286 BaseDecl, cast<SubRegion>(DerivedRegVal->getRegion()), IsVirtual); 287 288 return loc::MemRegionVal(BaseReg); 289 } 290 291 /// Returns the static type of the given region, if it represents a C++ class 292 /// object. 293 /// 294 /// This handles both fully-typed regions, where the dynamic type is known, and 295 /// symbolic regions, where the dynamic type is merely bounded (and even then, 296 /// only ostensibly!), but does not take advantage of any dynamic type info. 297 static const CXXRecordDecl *getCXXRecordType(const MemRegion *MR) { 298 if (const auto *TVR = dyn_cast<TypedValueRegion>(MR)) 299 return TVR->getValueType()->getAsCXXRecordDecl(); 300 if (const auto *SR = dyn_cast<SymbolicRegion>(MR)) 301 return SR->getSymbol()->getType()->getPointeeCXXRecordDecl(); 302 return nullptr; 303 } 304 305 SVal StoreManager::attemptDownCast(SVal Base, QualType TargetType, 306 bool &Failed) { 307 Failed = false; 308 309 const MemRegion *MR = Base.getAsRegion(); 310 if (!MR) 311 return UnknownVal(); 312 313 // Assume the derived class is a pointer or a reference to a CXX record. 314 TargetType = TargetType->getPointeeType(); 315 assert(!TargetType.isNull()); 316 const CXXRecordDecl *TargetClass = TargetType->getAsCXXRecordDecl(); 317 if (!TargetClass && !TargetType->isVoidType()) 318 return UnknownVal(); 319 320 // Drill down the CXXBaseObject chains, which represent upcasts (casts from 321 // derived to base). 322 while (const CXXRecordDecl *MRClass = getCXXRecordType(MR)) { 323 // If found the derived class, the cast succeeds. 324 if (MRClass == TargetClass) 325 return loc::MemRegionVal(MR); 326 327 // We skip over incomplete types. They must be the result of an earlier 328 // reinterpret_cast, as one can only dynamic_cast between types in the same 329 // class hierarchy. 330 if (!TargetType->isVoidType() && MRClass->hasDefinition()) { 331 // Static upcasts are marked as DerivedToBase casts by Sema, so this will 332 // only happen when multiple or virtual inheritance is involved. 333 CXXBasePaths Paths(/*FindAmbiguities=*/false, /*RecordPaths=*/true, 334 /*DetectVirtual=*/false); 335 if (MRClass->isDerivedFrom(TargetClass, Paths)) 336 return evalDerivedToBase(loc::MemRegionVal(MR), Paths.front()); 337 } 338 339 if (const auto *BaseR = dyn_cast<CXXBaseObjectRegion>(MR)) { 340 // Drill down the chain to get the derived classes. 341 MR = BaseR->getSuperRegion(); 342 continue; 343 } 344 345 // If this is a cast to void*, return the region. 346 if (TargetType->isVoidType()) 347 return loc::MemRegionVal(MR); 348 349 // Strange use of reinterpret_cast can give us paths we don't reason 350 // about well, by putting in ElementRegions where we'd expect 351 // CXXBaseObjectRegions. If it's a valid reinterpret_cast (i.e. if the 352 // derived class has a zero offset from the base class), then it's safe 353 // to strip the cast; if it's invalid, -Wreinterpret-base-class should 354 // catch it. In the interest of performance, the analyzer will silently 355 // do the wrong thing in the invalid case (because offsets for subregions 356 // will be wrong). 357 const MemRegion *Uncasted = MR->StripCasts(/*IncludeBaseCasts=*/false); 358 if (Uncasted == MR) { 359 // We reached the bottom of the hierarchy and did not find the derived 360 // class. We must be casting the base to derived, so the cast should 361 // fail. 362 break; 363 } 364 365 MR = Uncasted; 366 } 367 368 // We failed if the region we ended up with has perfect type info. 369 Failed = isa<TypedValueRegion>(MR); 370 return UnknownVal(); 371 } 372 373 /// CastRetrievedVal - Used by subclasses of StoreManager to implement 374 /// implicit casts that arise from loads from regions that are reinterpreted 375 /// as another region. 376 SVal StoreManager::CastRetrievedVal(SVal V, const TypedValueRegion *R, 377 QualType castTy) { 378 if (castTy.isNull() || V.isUnknownOrUndef()) 379 return V; 380 381 // When retrieving symbolic pointer and expecting a non-void pointer, 382 // wrap them into element regions of the expected type if necessary. 383 // SValBuilder::dispatchCast() doesn't do that, but it is necessary to 384 // make sure that the retrieved value makes sense, because there's no other 385 // cast in the AST that would tell us to cast it to the correct pointer type. 386 // We might need to do that for non-void pointers as well. 387 // FIXME: We really need a single good function to perform casts for us 388 // correctly every time we need it. 389 if (castTy->isPointerType() && !castTy->isVoidPointerType()) 390 if (const auto *SR = dyn_cast_or_null<SymbolicRegion>(V.getAsRegion())) 391 if (SR->getSymbol()->getType().getCanonicalType() != 392 castTy.getCanonicalType()) 393 return loc::MemRegionVal(castRegion(SR, castTy)); 394 395 return svalBuilder.dispatchCast(V, castTy); 396 } 397 398 SVal StoreManager::getLValueFieldOrIvar(const Decl *D, SVal Base) { 399 if (Base.isUnknownOrUndef()) 400 return Base; 401 402 Loc BaseL = Base.castAs<Loc>(); 403 const SubRegion* BaseR = nullptr; 404 405 switch (BaseL.getSubKind()) { 406 case loc::MemRegionValKind: 407 BaseR = cast<SubRegion>(BaseL.castAs<loc::MemRegionVal>().getRegion()); 408 break; 409 410 case loc::GotoLabelKind: 411 // These are anormal cases. Flag an undefined value. 412 return UndefinedVal(); 413 414 case loc::ConcreteIntKind: 415 // While these seem funny, this can happen through casts. 416 // FIXME: What we should return is the field offset, not base. For example, 417 // add the field offset to the integer value. That way things 418 // like this work properly: &(((struct foo *) 0xa)->f) 419 // However, that's not easy to fix without reducing our abilities 420 // to catch null pointer dereference. Eg., ((struct foo *)0x0)->f = 7 421 // is a null dereference even though we're dereferencing offset of f 422 // rather than null. Coming up with an approach that computes offsets 423 // over null pointers properly while still being able to catch null 424 // dereferences might be worth it. 425 return Base; 426 427 default: 428 llvm_unreachable("Unhandled Base."); 429 } 430 431 // NOTE: We must have this check first because ObjCIvarDecl is a subclass 432 // of FieldDecl. 433 if (const auto *ID = dyn_cast<ObjCIvarDecl>(D)) 434 return loc::MemRegionVal(MRMgr.getObjCIvarRegion(ID, BaseR)); 435 436 return loc::MemRegionVal(MRMgr.getFieldRegion(cast<FieldDecl>(D), BaseR)); 437 } 438 439 SVal StoreManager::getLValueIvar(const ObjCIvarDecl *decl, SVal base) { 440 return getLValueFieldOrIvar(decl, base); 441 } 442 443 SVal StoreManager::getLValueElement(QualType elementType, NonLoc Offset, 444 SVal Base) { 445 // If the base is an unknown or undefined value, just return it back. 446 // FIXME: For absolute pointer addresses, we just return that value back as 447 // well, although in reality we should return the offset added to that 448 // value. See also the similar FIXME in getLValueFieldOrIvar(). 449 if (Base.isUnknownOrUndef() || Base.getAs<loc::ConcreteInt>()) 450 return Base; 451 452 if (Base.getAs<loc::GotoLabel>()) 453 return UnknownVal(); 454 455 const SubRegion *BaseRegion = 456 Base.castAs<loc::MemRegionVal>().getRegionAs<SubRegion>(); 457 458 // Pointer of any type can be cast and used as array base. 459 const auto *ElemR = dyn_cast<ElementRegion>(BaseRegion); 460 461 // Convert the offset to the appropriate size and signedness. 462 Offset = svalBuilder.convertToArrayIndex(Offset).castAs<NonLoc>(); 463 464 if (!ElemR) { 465 // If the base region is not an ElementRegion, create one. 466 // This can happen in the following example: 467 // 468 // char *p = __builtin_alloc(10); 469 // p[1] = 8; 470 // 471 // Observe that 'p' binds to an AllocaRegion. 472 return loc::MemRegionVal(MRMgr.getElementRegion(elementType, Offset, 473 BaseRegion, Ctx)); 474 } 475 476 SVal BaseIdx = ElemR->getIndex(); 477 478 if (!BaseIdx.getAs<nonloc::ConcreteInt>()) 479 return UnknownVal(); 480 481 const llvm::APSInt &BaseIdxI = 482 BaseIdx.castAs<nonloc::ConcreteInt>().getValue(); 483 484 // Only allow non-integer offsets if the base region has no offset itself. 485 // FIXME: This is a somewhat arbitrary restriction. We should be using 486 // SValBuilder here to add the two offsets without checking their types. 487 if (!Offset.getAs<nonloc::ConcreteInt>()) { 488 if (isa<ElementRegion>(BaseRegion->StripCasts())) 489 return UnknownVal(); 490 491 return loc::MemRegionVal(MRMgr.getElementRegion( 492 elementType, Offset, cast<SubRegion>(ElemR->getSuperRegion()), Ctx)); 493 } 494 495 const llvm::APSInt& OffI = Offset.castAs<nonloc::ConcreteInt>().getValue(); 496 assert(BaseIdxI.isSigned()); 497 498 // Compute the new index. 499 nonloc::ConcreteInt NewIdx(svalBuilder.getBasicValueFactory().getValue(BaseIdxI + 500 OffI)); 501 502 // Construct the new ElementRegion. 503 const SubRegion *ArrayR = cast<SubRegion>(ElemR->getSuperRegion()); 504 return loc::MemRegionVal(MRMgr.getElementRegion(elementType, NewIdx, ArrayR, 505 Ctx)); 506 } 507 508 StoreManager::BindingsHandler::~BindingsHandler() = default; 509 510 bool StoreManager::FindUniqueBinding::HandleBinding(StoreManager& SMgr, 511 Store store, 512 const MemRegion* R, 513 SVal val) { 514 SymbolRef SymV = val.getAsLocSymbol(); 515 if (!SymV || SymV != Sym) 516 return true; 517 518 if (Binding) { 519 First = false; 520 return false; 521 } 522 else 523 Binding = R; 524 525 return true; 526 } 527