1 //== RegionStore.cpp - Field-sensitive store model --------------*- C++ -*--==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a basic region store model. In this model, we do have field 10 // sensitivity. But we assume nothing about the heap shape. So recursive data 11 // structures are largely ignored. Basically we do 1-limiting analysis. 12 // Parameter pointers are assumed with no aliasing. Pointee objects of 13 // parameters are created lazily. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "clang/AST/Attr.h" 18 #include "clang/AST/CharUnits.h" 19 #include "clang/ASTMatchers/ASTMatchFinder.h" 20 #include "clang/Analysis/Analyses/LiveVariables.h" 21 #include "clang/Analysis/AnalysisDeclContext.h" 22 #include "clang/Basic/JsonSupport.h" 23 #include "clang/Basic/TargetInfo.h" 24 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" 27 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" 28 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" 29 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 30 #include "llvm/ADT/ImmutableMap.h" 31 #include "llvm/ADT/Optional.h" 32 #include "llvm/Support/raw_ostream.h" 33 #include <utility> 34 35 using namespace clang; 36 using namespace ento; 37 38 //===----------------------------------------------------------------------===// 39 // Representation of binding keys. 40 //===----------------------------------------------------------------------===// 41 42 namespace { 43 class BindingKey { 44 public: 45 enum Kind { Default = 0x0, Direct = 0x1 }; 46 private: 47 enum { Symbolic = 0x2 }; 48 49 llvm::PointerIntPair<const MemRegion *, 2> P; 50 uint64_t Data; 51 52 /// Create a key for a binding to region \p r, which has a symbolic offset 53 /// from region \p Base. 54 explicit BindingKey(const SubRegion *r, const SubRegion *Base, Kind k) 55 : P(r, k | Symbolic), Data(reinterpret_cast<uintptr_t>(Base)) { 56 assert(r && Base && "Must have known regions."); 57 assert(getConcreteOffsetRegion() == Base && "Failed to store base region"); 58 } 59 60 /// Create a key for a binding at \p offset from base region \p r. 61 explicit BindingKey(const MemRegion *r, uint64_t offset, Kind k) 62 : P(r, k), Data(offset) { 63 assert(r && "Must have known regions."); 64 assert(getOffset() == offset && "Failed to store offset"); 65 assert((r == r->getBaseRegion() || 66 isa<ObjCIvarRegion, CXXDerivedObjectRegion>(r)) && 67 "Not a base"); 68 } 69 public: 70 71 bool isDirect() const { return P.getInt() & Direct; } 72 bool hasSymbolicOffset() const { return P.getInt() & Symbolic; } 73 74 const MemRegion *getRegion() const { return P.getPointer(); } 75 uint64_t getOffset() const { 76 assert(!hasSymbolicOffset()); 77 return Data; 78 } 79 80 const SubRegion *getConcreteOffsetRegion() const { 81 assert(hasSymbolicOffset()); 82 return reinterpret_cast<const SubRegion *>(static_cast<uintptr_t>(Data)); 83 } 84 85 const MemRegion *getBaseRegion() const { 86 if (hasSymbolicOffset()) 87 return getConcreteOffsetRegion()->getBaseRegion(); 88 return getRegion()->getBaseRegion(); 89 } 90 91 void Profile(llvm::FoldingSetNodeID& ID) const { 92 ID.AddPointer(P.getOpaqueValue()); 93 ID.AddInteger(Data); 94 } 95 96 static BindingKey Make(const MemRegion *R, Kind k); 97 98 bool operator<(const BindingKey &X) const { 99 if (P.getOpaqueValue() < X.P.getOpaqueValue()) 100 return true; 101 if (P.getOpaqueValue() > X.P.getOpaqueValue()) 102 return false; 103 return Data < X.Data; 104 } 105 106 bool operator==(const BindingKey &X) const { 107 return P.getOpaqueValue() == X.P.getOpaqueValue() && 108 Data == X.Data; 109 } 110 111 LLVM_DUMP_METHOD void dump() const; 112 }; 113 } // end anonymous namespace 114 115 BindingKey BindingKey::Make(const MemRegion *R, Kind k) { 116 const RegionOffset &RO = R->getAsOffset(); 117 if (RO.hasSymbolicOffset()) 118 return BindingKey(cast<SubRegion>(R), cast<SubRegion>(RO.getRegion()), k); 119 120 return BindingKey(RO.getRegion(), RO.getOffset(), k); 121 } 122 123 namespace llvm { 124 static inline raw_ostream &operator<<(raw_ostream &Out, BindingKey K) { 125 Out << "\"kind\": \"" << (K.isDirect() ? "Direct" : "Default") 126 << "\", \"offset\": "; 127 128 if (!K.hasSymbolicOffset()) 129 Out << K.getOffset(); 130 else 131 Out << "null"; 132 133 return Out; 134 } 135 136 } // namespace llvm 137 138 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 139 void BindingKey::dump() const { llvm::errs() << *this; } 140 #endif 141 142 //===----------------------------------------------------------------------===// 143 // Actual Store type. 144 //===----------------------------------------------------------------------===// 145 146 typedef llvm::ImmutableMap<BindingKey, SVal> ClusterBindings; 147 typedef llvm::ImmutableMapRef<BindingKey, SVal> ClusterBindingsRef; 148 typedef std::pair<BindingKey, SVal> BindingPair; 149 150 typedef llvm::ImmutableMap<const MemRegion *, ClusterBindings> 151 RegionBindings; 152 153 namespace { 154 class RegionBindingsRef : public llvm::ImmutableMapRef<const MemRegion *, 155 ClusterBindings> { 156 ClusterBindings::Factory *CBFactory; 157 158 // This flag indicates whether the current bindings are within the analysis 159 // that has started from main(). It affects how we perform loads from 160 // global variables that have initializers: if we have observed the 161 // program execution from the start and we know that these variables 162 // have not been overwritten yet, we can be sure that their initializers 163 // are still relevant. This flag never gets changed when the bindings are 164 // updated, so it could potentially be moved into RegionStoreManager 165 // (as if it's the same bindings but a different loading procedure) 166 // however that would have made the manager needlessly stateful. 167 bool IsMainAnalysis; 168 169 public: 170 typedef llvm::ImmutableMapRef<const MemRegion *, ClusterBindings> 171 ParentTy; 172 173 RegionBindingsRef(ClusterBindings::Factory &CBFactory, 174 const RegionBindings::TreeTy *T, 175 RegionBindings::TreeTy::Factory *F, 176 bool IsMainAnalysis) 177 : llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(T, F), 178 CBFactory(&CBFactory), IsMainAnalysis(IsMainAnalysis) {} 179 180 RegionBindingsRef(const ParentTy &P, 181 ClusterBindings::Factory &CBFactory, 182 bool IsMainAnalysis) 183 : llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(P), 184 CBFactory(&CBFactory), IsMainAnalysis(IsMainAnalysis) {} 185 186 RegionBindingsRef add(key_type_ref K, data_type_ref D) const { 187 return RegionBindingsRef(static_cast<const ParentTy *>(this)->add(K, D), 188 *CBFactory, IsMainAnalysis); 189 } 190 191 RegionBindingsRef remove(key_type_ref K) const { 192 return RegionBindingsRef(static_cast<const ParentTy *>(this)->remove(K), 193 *CBFactory, IsMainAnalysis); 194 } 195 196 RegionBindingsRef addBinding(BindingKey K, SVal V) const; 197 198 RegionBindingsRef addBinding(const MemRegion *R, 199 BindingKey::Kind k, SVal V) const; 200 201 const SVal *lookup(BindingKey K) const; 202 const SVal *lookup(const MemRegion *R, BindingKey::Kind k) const; 203 using llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>::lookup; 204 205 RegionBindingsRef removeBinding(BindingKey K); 206 207 RegionBindingsRef removeBinding(const MemRegion *R, 208 BindingKey::Kind k); 209 210 RegionBindingsRef removeBinding(const MemRegion *R) { 211 return removeBinding(R, BindingKey::Direct). 212 removeBinding(R, BindingKey::Default); 213 } 214 215 Optional<SVal> getDirectBinding(const MemRegion *R) const; 216 217 /// getDefaultBinding - Returns an SVal* representing an optional default 218 /// binding associated with a region and its subregions. 219 Optional<SVal> getDefaultBinding(const MemRegion *R) const; 220 221 /// Return the internal tree as a Store. 222 Store asStore() const { 223 llvm::PointerIntPair<Store, 1, bool> Ptr = { 224 asImmutableMap().getRootWithoutRetain(), IsMainAnalysis}; 225 return reinterpret_cast<Store>(Ptr.getOpaqueValue()); 226 } 227 228 bool isMainAnalysis() const { 229 return IsMainAnalysis; 230 } 231 232 void printJson(raw_ostream &Out, const char *NL = "\n", 233 unsigned int Space = 0, bool IsDot = false) const { 234 for (iterator I = begin(); I != end(); ++I) { 235 // TODO: We might need a .printJson for I.getKey() as well. 236 Indent(Out, Space, IsDot) 237 << "{ \"cluster\": \"" << I.getKey() << "\", \"pointer\": \"" 238 << (const void *)I.getKey() << "\", \"items\": [" << NL; 239 240 ++Space; 241 const ClusterBindings &CB = I.getData(); 242 for (ClusterBindings::iterator CI = CB.begin(); CI != CB.end(); ++CI) { 243 Indent(Out, Space, IsDot) << "{ " << CI.getKey() << ", \"value\": "; 244 CI.getData().printJson(Out, /*AddQuotes=*/true); 245 Out << " }"; 246 if (std::next(CI) != CB.end()) 247 Out << ','; 248 Out << NL; 249 } 250 251 --Space; 252 Indent(Out, Space, IsDot) << "]}"; 253 if (std::next(I) != end()) 254 Out << ','; 255 Out << NL; 256 } 257 } 258 259 LLVM_DUMP_METHOD void dump() const { printJson(llvm::errs()); } 260 }; 261 } // end anonymous namespace 262 263 typedef const RegionBindingsRef& RegionBindingsConstRef; 264 265 Optional<SVal> RegionBindingsRef::getDirectBinding(const MemRegion *R) const { 266 return Optional<SVal>::create(lookup(R, BindingKey::Direct)); 267 } 268 269 Optional<SVal> RegionBindingsRef::getDefaultBinding(const MemRegion *R) const { 270 return Optional<SVal>::create(lookup(R, BindingKey::Default)); 271 } 272 273 RegionBindingsRef RegionBindingsRef::addBinding(BindingKey K, SVal V) const { 274 const MemRegion *Base = K.getBaseRegion(); 275 276 const ClusterBindings *ExistingCluster = lookup(Base); 277 ClusterBindings Cluster = 278 (ExistingCluster ? *ExistingCluster : CBFactory->getEmptyMap()); 279 280 ClusterBindings NewCluster = CBFactory->add(Cluster, K, V); 281 return add(Base, NewCluster); 282 } 283 284 285 RegionBindingsRef RegionBindingsRef::addBinding(const MemRegion *R, 286 BindingKey::Kind k, 287 SVal V) const { 288 return addBinding(BindingKey::Make(R, k), V); 289 } 290 291 const SVal *RegionBindingsRef::lookup(BindingKey K) const { 292 const ClusterBindings *Cluster = lookup(K.getBaseRegion()); 293 if (!Cluster) 294 return nullptr; 295 return Cluster->lookup(K); 296 } 297 298 const SVal *RegionBindingsRef::lookup(const MemRegion *R, 299 BindingKey::Kind k) const { 300 return lookup(BindingKey::Make(R, k)); 301 } 302 303 RegionBindingsRef RegionBindingsRef::removeBinding(BindingKey K) { 304 const MemRegion *Base = K.getBaseRegion(); 305 const ClusterBindings *Cluster = lookup(Base); 306 if (!Cluster) 307 return *this; 308 309 ClusterBindings NewCluster = CBFactory->remove(*Cluster, K); 310 if (NewCluster.isEmpty()) 311 return remove(Base); 312 return add(Base, NewCluster); 313 } 314 315 RegionBindingsRef RegionBindingsRef::removeBinding(const MemRegion *R, 316 BindingKey::Kind k){ 317 return removeBinding(BindingKey::Make(R, k)); 318 } 319 320 //===----------------------------------------------------------------------===// 321 // Main RegionStore logic. 322 //===----------------------------------------------------------------------===// 323 324 namespace { 325 class InvalidateRegionsWorker; 326 327 class RegionStoreManager : public StoreManager { 328 public: 329 RegionBindings::Factory RBFactory; 330 mutable ClusterBindings::Factory CBFactory; 331 332 typedef std::vector<SVal> SValListTy; 333 private: 334 typedef llvm::DenseMap<const LazyCompoundValData *, 335 SValListTy> LazyBindingsMapTy; 336 LazyBindingsMapTy LazyBindingsMap; 337 338 /// The largest number of fields a struct can have and still be 339 /// considered "small". 340 /// 341 /// This is currently used to decide whether or not it is worth "forcing" a 342 /// LazyCompoundVal on bind. 343 /// 344 /// This is controlled by 'region-store-small-struct-limit' option. 345 /// To disable all small-struct-dependent behavior, set the option to "0". 346 unsigned SmallStructLimit; 347 348 /// A helper used to populate the work list with the given set of 349 /// regions. 350 void populateWorkList(InvalidateRegionsWorker &W, 351 ArrayRef<SVal> Values, 352 InvalidatedRegions *TopLevelRegions); 353 354 public: 355 RegionStoreManager(ProgramStateManager &mgr) 356 : StoreManager(mgr), RBFactory(mgr.getAllocator()), 357 CBFactory(mgr.getAllocator()), SmallStructLimit(0) { 358 ExprEngine &Eng = StateMgr.getOwningEngine(); 359 AnalyzerOptions &Options = Eng.getAnalysisManager().options; 360 SmallStructLimit = Options.RegionStoreSmallStructLimit; 361 } 362 363 /// setImplicitDefaultValue - Set the default binding for the provided 364 /// MemRegion to the value implicitly defined for compound literals when 365 /// the value is not specified. 366 RegionBindingsRef setImplicitDefaultValue(RegionBindingsConstRef B, 367 const MemRegion *R, QualType T); 368 369 /// ArrayToPointer - Emulates the "decay" of an array to a pointer 370 /// type. 'Array' represents the lvalue of the array being decayed 371 /// to a pointer, and the returned SVal represents the decayed 372 /// version of that lvalue (i.e., a pointer to the first element of 373 /// the array). This is called by ExprEngine when evaluating 374 /// casts from arrays to pointers. 375 SVal ArrayToPointer(Loc Array, QualType ElementTy) override; 376 377 /// Creates the Store that correctly represents memory contents before 378 /// the beginning of the analysis of the given top-level stack frame. 379 StoreRef getInitialStore(const LocationContext *InitLoc) override { 380 bool IsMainAnalysis = false; 381 if (const auto *FD = dyn_cast<FunctionDecl>(InitLoc->getDecl())) 382 IsMainAnalysis = FD->isMain() && !Ctx.getLangOpts().CPlusPlus; 383 return StoreRef(RegionBindingsRef( 384 RegionBindingsRef::ParentTy(RBFactory.getEmptyMap(), RBFactory), 385 CBFactory, IsMainAnalysis).asStore(), *this); 386 } 387 388 //===-------------------------------------------------------------------===// 389 // Binding values to regions. 390 //===-------------------------------------------------------------------===// 391 RegionBindingsRef invalidateGlobalRegion(MemRegion::Kind K, 392 const Expr *Ex, 393 unsigned Count, 394 const LocationContext *LCtx, 395 RegionBindingsRef B, 396 InvalidatedRegions *Invalidated); 397 398 StoreRef invalidateRegions(Store store, 399 ArrayRef<SVal> Values, 400 const Expr *E, unsigned Count, 401 const LocationContext *LCtx, 402 const CallEvent *Call, 403 InvalidatedSymbols &IS, 404 RegionAndSymbolInvalidationTraits &ITraits, 405 InvalidatedRegions *Invalidated, 406 InvalidatedRegions *InvalidatedTopLevel) override; 407 408 bool scanReachableSymbols(Store S, const MemRegion *R, 409 ScanReachableSymbols &Callbacks) override; 410 411 RegionBindingsRef removeSubRegionBindings(RegionBindingsConstRef B, 412 const SubRegion *R); 413 Optional<SVal> 414 getConstantValFromConstArrayInitializer(RegionBindingsConstRef B, 415 const ElementRegion *R); 416 Optional<SVal> 417 getSValFromInitListExpr(const InitListExpr *ILE, 418 const SmallVector<uint64_t, 2> &ConcreteOffsets, 419 QualType ElemT); 420 SVal getSValFromStringLiteral(const StringLiteral *SL, uint64_t Offset, 421 QualType ElemT); 422 423 public: // Part of public interface to class. 424 425 StoreRef Bind(Store store, Loc LV, SVal V) override { 426 return StoreRef(bind(getRegionBindings(store), LV, V).asStore(), *this); 427 } 428 429 RegionBindingsRef bind(RegionBindingsConstRef B, Loc LV, SVal V); 430 431 // BindDefaultInitial is only used to initialize a region with 432 // a default value. 433 StoreRef BindDefaultInitial(Store store, const MemRegion *R, 434 SVal V) override { 435 RegionBindingsRef B = getRegionBindings(store); 436 // Use other APIs when you have to wipe the region that was initialized 437 // earlier. 438 assert(!(B.getDefaultBinding(R) || B.getDirectBinding(R)) && 439 "Double initialization!"); 440 B = B.addBinding(BindingKey::Make(R, BindingKey::Default), V); 441 return StoreRef(B.asImmutableMap().getRootWithoutRetain(), *this); 442 } 443 444 // BindDefaultZero is used for zeroing constructors that may accidentally 445 // overwrite existing bindings. 446 StoreRef BindDefaultZero(Store store, const MemRegion *R) override { 447 // FIXME: The offsets of empty bases can be tricky because of 448 // of the so called "empty base class optimization". 449 // If a base class has been optimized out 450 // we should not try to create a binding, otherwise we should. 451 // Unfortunately, at the moment ASTRecordLayout doesn't expose 452 // the actual sizes of the empty bases 453 // and trying to infer them from offsets/alignments 454 // seems to be error-prone and non-trivial because of the trailing padding. 455 // As a temporary mitigation we don't create bindings for empty bases. 456 if (const auto *BR = dyn_cast<CXXBaseObjectRegion>(R)) 457 if (BR->getDecl()->isEmpty()) 458 return StoreRef(store, *this); 459 460 RegionBindingsRef B = getRegionBindings(store); 461 SVal V = svalBuilder.makeZeroVal(Ctx.CharTy); 462 B = removeSubRegionBindings(B, cast<SubRegion>(R)); 463 B = B.addBinding(BindingKey::Make(R, BindingKey::Default), V); 464 return StoreRef(B.asImmutableMap().getRootWithoutRetain(), *this); 465 } 466 467 /// Attempt to extract the fields of \p LCV and bind them to the struct region 468 /// \p R. 469 /// 470 /// This path is used when it seems advantageous to "force" loading the values 471 /// within a LazyCompoundVal to bind memberwise to the struct region, rather 472 /// than using a Default binding at the base of the entire region. This is a 473 /// heuristic attempting to avoid building long chains of LazyCompoundVals. 474 /// 475 /// \returns The updated store bindings, or \c None if binding non-lazily 476 /// would be too expensive. 477 Optional<RegionBindingsRef> tryBindSmallStruct(RegionBindingsConstRef B, 478 const TypedValueRegion *R, 479 const RecordDecl *RD, 480 nonloc::LazyCompoundVal LCV); 481 482 /// BindStruct - Bind a compound value to a structure. 483 RegionBindingsRef bindStruct(RegionBindingsConstRef B, 484 const TypedValueRegion* R, SVal V); 485 486 /// BindVector - Bind a compound value to a vector. 487 RegionBindingsRef bindVector(RegionBindingsConstRef B, 488 const TypedValueRegion* R, SVal V); 489 490 RegionBindingsRef bindArray(RegionBindingsConstRef B, 491 const TypedValueRegion* R, 492 SVal V); 493 494 /// Clears out all bindings in the given region and assigns a new value 495 /// as a Default binding. 496 RegionBindingsRef bindAggregate(RegionBindingsConstRef B, 497 const TypedRegion *R, 498 SVal DefaultVal); 499 500 /// Create a new store with the specified binding removed. 501 /// \param ST the original store, that is the basis for the new store. 502 /// \param L the location whose binding should be removed. 503 StoreRef killBinding(Store ST, Loc L) override; 504 505 void incrementReferenceCount(Store store) override { 506 getRegionBindings(store).manualRetain(); 507 } 508 509 /// If the StoreManager supports it, decrement the reference count of 510 /// the specified Store object. If the reference count hits 0, the memory 511 /// associated with the object is recycled. 512 void decrementReferenceCount(Store store) override { 513 getRegionBindings(store).manualRelease(); 514 } 515 516 bool includedInBindings(Store store, const MemRegion *region) const override; 517 518 /// Return the value bound to specified location in a given state. 519 /// 520 /// The high level logic for this method is this: 521 /// getBinding (L) 522 /// if L has binding 523 /// return L's binding 524 /// else if L is in killset 525 /// return unknown 526 /// else 527 /// if L is on stack or heap 528 /// return undefined 529 /// else 530 /// return symbolic 531 SVal getBinding(Store S, Loc L, QualType T) override { 532 return getBinding(getRegionBindings(S), L, T); 533 } 534 535 Optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override { 536 RegionBindingsRef B = getRegionBindings(S); 537 // Default bindings are always applied over a base region so look up the 538 // base region's default binding, otherwise the lookup will fail when R 539 // is at an offset from R->getBaseRegion(). 540 return B.getDefaultBinding(R->getBaseRegion()); 541 } 542 543 SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType()); 544 545 SVal getBindingForElement(RegionBindingsConstRef B, const ElementRegion *R); 546 547 SVal getBindingForField(RegionBindingsConstRef B, const FieldRegion *R); 548 549 SVal getBindingForObjCIvar(RegionBindingsConstRef B, const ObjCIvarRegion *R); 550 551 SVal getBindingForVar(RegionBindingsConstRef B, const VarRegion *R); 552 553 SVal getBindingForLazySymbol(const TypedValueRegion *R); 554 555 SVal getBindingForFieldOrElementCommon(RegionBindingsConstRef B, 556 const TypedValueRegion *R, 557 QualType Ty); 558 559 SVal getLazyBinding(const SubRegion *LazyBindingRegion, 560 RegionBindingsRef LazyBinding); 561 562 /// Get bindings for the values in a struct and return a CompoundVal, used 563 /// when doing struct copy: 564 /// struct s x, y; 565 /// x = y; 566 /// y's value is retrieved by this method. 567 SVal getBindingForStruct(RegionBindingsConstRef B, const TypedValueRegion *R); 568 SVal getBindingForArray(RegionBindingsConstRef B, const TypedValueRegion *R); 569 NonLoc createLazyBinding(RegionBindingsConstRef B, const TypedValueRegion *R); 570 571 /// Used to lazily generate derived symbols for bindings that are defined 572 /// implicitly by default bindings in a super region. 573 /// 574 /// Note that callers may need to specially handle LazyCompoundVals, which 575 /// are returned as is in case the caller needs to treat them differently. 576 Optional<SVal> getBindingForDerivedDefaultValue(RegionBindingsConstRef B, 577 const MemRegion *superR, 578 const TypedValueRegion *R, 579 QualType Ty); 580 581 /// Get the state and region whose binding this region \p R corresponds to. 582 /// 583 /// If there is no lazy binding for \p R, the returned value will have a null 584 /// \c second. Note that a null pointer can represents a valid Store. 585 std::pair<Store, const SubRegion *> 586 findLazyBinding(RegionBindingsConstRef B, const SubRegion *R, 587 const SubRegion *originalRegion); 588 589 /// Returns the cached set of interesting SVals contained within a lazy 590 /// binding. 591 /// 592 /// The precise value of "interesting" is determined for the purposes of 593 /// RegionStore's internal analysis. It must always contain all regions and 594 /// symbols, but may omit constants and other kinds of SVal. 595 const SValListTy &getInterestingValues(nonloc::LazyCompoundVal LCV); 596 597 //===------------------------------------------------------------------===// 598 // State pruning. 599 //===------------------------------------------------------------------===// 600 601 /// removeDeadBindings - Scans the RegionStore of 'state' for dead values. 602 /// It returns a new Store with these values removed. 603 StoreRef removeDeadBindings(Store store, const StackFrameContext *LCtx, 604 SymbolReaper& SymReaper) override; 605 606 //===------------------------------------------------------------------===// 607 // Utility methods. 608 //===------------------------------------------------------------------===// 609 610 RegionBindingsRef getRegionBindings(Store store) const { 611 llvm::PointerIntPair<Store, 1, bool> Ptr; 612 Ptr.setFromOpaqueValue(const_cast<void *>(store)); 613 return RegionBindingsRef( 614 CBFactory, 615 static_cast<const RegionBindings::TreeTy *>(Ptr.getPointer()), 616 RBFactory.getTreeFactory(), 617 Ptr.getInt()); 618 } 619 620 void printJson(raw_ostream &Out, Store S, const char *NL = "\n", 621 unsigned int Space = 0, bool IsDot = false) const override; 622 623 void iterBindings(Store store, BindingsHandler& f) override { 624 RegionBindingsRef B = getRegionBindings(store); 625 for (RegionBindingsRef::iterator I = B.begin(), E = B.end(); I != E; ++I) { 626 const ClusterBindings &Cluster = I.getData(); 627 for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end(); 628 CI != CE; ++CI) { 629 const BindingKey &K = CI.getKey(); 630 if (!K.isDirect()) 631 continue; 632 if (const SubRegion *R = dyn_cast<SubRegion>(K.getRegion())) { 633 // FIXME: Possibly incorporate the offset? 634 if (!f.HandleBinding(*this, store, R, CI.getData())) 635 return; 636 } 637 } 638 } 639 } 640 }; 641 642 } // end anonymous namespace 643 644 //===----------------------------------------------------------------------===// 645 // RegionStore creation. 646 //===----------------------------------------------------------------------===// 647 648 std::unique_ptr<StoreManager> 649 ento::CreateRegionStoreManager(ProgramStateManager &StMgr) { 650 return std::make_unique<RegionStoreManager>(StMgr); 651 } 652 653 //===----------------------------------------------------------------------===// 654 // Region Cluster analysis. 655 //===----------------------------------------------------------------------===// 656 657 namespace { 658 /// Used to determine which global regions are automatically included in the 659 /// initial worklist of a ClusterAnalysis. 660 enum GlobalsFilterKind { 661 /// Don't include any global regions. 662 GFK_None, 663 /// Only include system globals. 664 GFK_SystemOnly, 665 /// Include all global regions. 666 GFK_All 667 }; 668 669 template <typename DERIVED> 670 class ClusterAnalysis { 671 protected: 672 typedef llvm::DenseMap<const MemRegion *, const ClusterBindings *> ClusterMap; 673 typedef const MemRegion * WorkListElement; 674 typedef SmallVector<WorkListElement, 10> WorkList; 675 676 llvm::SmallPtrSet<const ClusterBindings *, 16> Visited; 677 678 WorkList WL; 679 680 RegionStoreManager &RM; 681 ASTContext &Ctx; 682 SValBuilder &svalBuilder; 683 684 RegionBindingsRef B; 685 686 687 protected: 688 const ClusterBindings *getCluster(const MemRegion *R) { 689 return B.lookup(R); 690 } 691 692 /// Returns true if all clusters in the given memspace should be initially 693 /// included in the cluster analysis. Subclasses may provide their 694 /// own implementation. 695 bool includeEntireMemorySpace(const MemRegion *Base) { 696 return false; 697 } 698 699 public: 700 ClusterAnalysis(RegionStoreManager &rm, ProgramStateManager &StateMgr, 701 RegionBindingsRef b) 702 : RM(rm), Ctx(StateMgr.getContext()), 703 svalBuilder(StateMgr.getSValBuilder()), B(std::move(b)) {} 704 705 RegionBindingsRef getRegionBindings() const { return B; } 706 707 bool isVisited(const MemRegion *R) { 708 return Visited.count(getCluster(R)); 709 } 710 711 void GenerateClusters() { 712 // Scan the entire set of bindings and record the region clusters. 713 for (RegionBindingsRef::iterator RI = B.begin(), RE = B.end(); 714 RI != RE; ++RI){ 715 const MemRegion *Base = RI.getKey(); 716 717 const ClusterBindings &Cluster = RI.getData(); 718 assert(!Cluster.isEmpty() && "Empty clusters should be removed"); 719 static_cast<DERIVED*>(this)->VisitAddedToCluster(Base, Cluster); 720 721 // If the base's memspace should be entirely invalidated, add the cluster 722 // to the workspace up front. 723 if (static_cast<DERIVED*>(this)->includeEntireMemorySpace(Base)) 724 AddToWorkList(WorkListElement(Base), &Cluster); 725 } 726 } 727 728 bool AddToWorkList(WorkListElement E, const ClusterBindings *C) { 729 if (C && !Visited.insert(C).second) 730 return false; 731 WL.push_back(E); 732 return true; 733 } 734 735 bool AddToWorkList(const MemRegion *R) { 736 return static_cast<DERIVED*>(this)->AddToWorkList(R); 737 } 738 739 void RunWorkList() { 740 while (!WL.empty()) { 741 WorkListElement E = WL.pop_back_val(); 742 const MemRegion *BaseR = E; 743 744 static_cast<DERIVED*>(this)->VisitCluster(BaseR, getCluster(BaseR)); 745 } 746 } 747 748 void VisitAddedToCluster(const MemRegion *baseR, const ClusterBindings &C) {} 749 void VisitCluster(const MemRegion *baseR, const ClusterBindings *C) {} 750 751 void VisitCluster(const MemRegion *BaseR, const ClusterBindings *C, 752 bool Flag) { 753 static_cast<DERIVED*>(this)->VisitCluster(BaseR, C); 754 } 755 }; 756 } 757 758 //===----------------------------------------------------------------------===// 759 // Binding invalidation. 760 //===----------------------------------------------------------------------===// 761 762 bool RegionStoreManager::scanReachableSymbols(Store S, const MemRegion *R, 763 ScanReachableSymbols &Callbacks) { 764 assert(R == R->getBaseRegion() && "Should only be called for base regions"); 765 RegionBindingsRef B = getRegionBindings(S); 766 const ClusterBindings *Cluster = B.lookup(R); 767 768 if (!Cluster) 769 return true; 770 771 for (ClusterBindings::iterator RI = Cluster->begin(), RE = Cluster->end(); 772 RI != RE; ++RI) { 773 if (!Callbacks.scan(RI.getData())) 774 return false; 775 } 776 777 return true; 778 } 779 780 static inline bool isUnionField(const FieldRegion *FR) { 781 return FR->getDecl()->getParent()->isUnion(); 782 } 783 784 typedef SmallVector<const FieldDecl *, 8> FieldVector; 785 786 static void getSymbolicOffsetFields(BindingKey K, FieldVector &Fields) { 787 assert(K.hasSymbolicOffset() && "Not implemented for concrete offset keys"); 788 789 const MemRegion *Base = K.getConcreteOffsetRegion(); 790 const MemRegion *R = K.getRegion(); 791 792 while (R != Base) { 793 if (const FieldRegion *FR = dyn_cast<FieldRegion>(R)) 794 if (!isUnionField(FR)) 795 Fields.push_back(FR->getDecl()); 796 797 R = cast<SubRegion>(R)->getSuperRegion(); 798 } 799 } 800 801 static bool isCompatibleWithFields(BindingKey K, const FieldVector &Fields) { 802 assert(K.hasSymbolicOffset() && "Not implemented for concrete offset keys"); 803 804 if (Fields.empty()) 805 return true; 806 807 FieldVector FieldsInBindingKey; 808 getSymbolicOffsetFields(K, FieldsInBindingKey); 809 810 ptrdiff_t Delta = FieldsInBindingKey.size() - Fields.size(); 811 if (Delta >= 0) 812 return std::equal(FieldsInBindingKey.begin() + Delta, 813 FieldsInBindingKey.end(), 814 Fields.begin()); 815 else 816 return std::equal(FieldsInBindingKey.begin(), FieldsInBindingKey.end(), 817 Fields.begin() - Delta); 818 } 819 820 /// Collects all bindings in \p Cluster that may refer to bindings within 821 /// \p Top. 822 /// 823 /// Each binding is a pair whose \c first is the key (a BindingKey) and whose 824 /// \c second is the value (an SVal). 825 /// 826 /// The \p IncludeAllDefaultBindings parameter specifies whether to include 827 /// default bindings that may extend beyond \p Top itself, e.g. if \p Top is 828 /// an aggregate within a larger aggregate with a default binding. 829 static void 830 collectSubRegionBindings(SmallVectorImpl<BindingPair> &Bindings, 831 SValBuilder &SVB, const ClusterBindings &Cluster, 832 const SubRegion *Top, BindingKey TopKey, 833 bool IncludeAllDefaultBindings) { 834 FieldVector FieldsInSymbolicSubregions; 835 if (TopKey.hasSymbolicOffset()) { 836 getSymbolicOffsetFields(TopKey, FieldsInSymbolicSubregions); 837 Top = TopKey.getConcreteOffsetRegion(); 838 TopKey = BindingKey::Make(Top, BindingKey::Default); 839 } 840 841 // Find the length (in bits) of the region being invalidated. 842 uint64_t Length = UINT64_MAX; 843 SVal Extent = Top->getMemRegionManager().getStaticSize(Top, SVB); 844 if (Optional<nonloc::ConcreteInt> ExtentCI = 845 Extent.getAs<nonloc::ConcreteInt>()) { 846 const llvm::APSInt &ExtentInt = ExtentCI->getValue(); 847 assert(ExtentInt.isNonNegative() || ExtentInt.isUnsigned()); 848 // Extents are in bytes but region offsets are in bits. Be careful! 849 Length = ExtentInt.getLimitedValue() * SVB.getContext().getCharWidth(); 850 } else if (const FieldRegion *FR = dyn_cast<FieldRegion>(Top)) { 851 if (FR->getDecl()->isBitField()) 852 Length = FR->getDecl()->getBitWidthValue(SVB.getContext()); 853 } 854 855 for (ClusterBindings::iterator I = Cluster.begin(), E = Cluster.end(); 856 I != E; ++I) { 857 BindingKey NextKey = I.getKey(); 858 if (NextKey.getRegion() == TopKey.getRegion()) { 859 // FIXME: This doesn't catch the case where we're really invalidating a 860 // region with a symbolic offset. Example: 861 // R: points[i].y 862 // Next: points[0].x 863 864 if (NextKey.getOffset() > TopKey.getOffset() && 865 NextKey.getOffset() - TopKey.getOffset() < Length) { 866 // Case 1: The next binding is inside the region we're invalidating. 867 // Include it. 868 Bindings.push_back(*I); 869 870 } else if (NextKey.getOffset() == TopKey.getOffset()) { 871 // Case 2: The next binding is at the same offset as the region we're 872 // invalidating. In this case, we need to leave default bindings alone, 873 // since they may be providing a default value for a regions beyond what 874 // we're invalidating. 875 // FIXME: This is probably incorrect; consider invalidating an outer 876 // struct whose first field is bound to a LazyCompoundVal. 877 if (IncludeAllDefaultBindings || NextKey.isDirect()) 878 Bindings.push_back(*I); 879 } 880 881 } else if (NextKey.hasSymbolicOffset()) { 882 const MemRegion *Base = NextKey.getConcreteOffsetRegion(); 883 if (Top->isSubRegionOf(Base) && Top != Base) { 884 // Case 3: The next key is symbolic and we just changed something within 885 // its concrete region. We don't know if the binding is still valid, so 886 // we'll be conservative and include it. 887 if (IncludeAllDefaultBindings || NextKey.isDirect()) 888 if (isCompatibleWithFields(NextKey, FieldsInSymbolicSubregions)) 889 Bindings.push_back(*I); 890 } else if (const SubRegion *BaseSR = dyn_cast<SubRegion>(Base)) { 891 // Case 4: The next key is symbolic, but we changed a known 892 // super-region. In this case the binding is certainly included. 893 if (BaseSR->isSubRegionOf(Top)) 894 if (isCompatibleWithFields(NextKey, FieldsInSymbolicSubregions)) 895 Bindings.push_back(*I); 896 } 897 } 898 } 899 } 900 901 static void 902 collectSubRegionBindings(SmallVectorImpl<BindingPair> &Bindings, 903 SValBuilder &SVB, const ClusterBindings &Cluster, 904 const SubRegion *Top, bool IncludeAllDefaultBindings) { 905 collectSubRegionBindings(Bindings, SVB, Cluster, Top, 906 BindingKey::Make(Top, BindingKey::Default), 907 IncludeAllDefaultBindings); 908 } 909 910 RegionBindingsRef 911 RegionStoreManager::removeSubRegionBindings(RegionBindingsConstRef B, 912 const SubRegion *Top) { 913 BindingKey TopKey = BindingKey::Make(Top, BindingKey::Default); 914 const MemRegion *ClusterHead = TopKey.getBaseRegion(); 915 916 if (Top == ClusterHead) { 917 // We can remove an entire cluster's bindings all in one go. 918 return B.remove(Top); 919 } 920 921 const ClusterBindings *Cluster = B.lookup(ClusterHead); 922 if (!Cluster) { 923 // If we're invalidating a region with a symbolic offset, we need to make 924 // sure we don't treat the base region as uninitialized anymore. 925 if (TopKey.hasSymbolicOffset()) { 926 const SubRegion *Concrete = TopKey.getConcreteOffsetRegion(); 927 return B.addBinding(Concrete, BindingKey::Default, UnknownVal()); 928 } 929 return B; 930 } 931 932 SmallVector<BindingPair, 32> Bindings; 933 collectSubRegionBindings(Bindings, svalBuilder, *Cluster, Top, TopKey, 934 /*IncludeAllDefaultBindings=*/false); 935 936 ClusterBindingsRef Result(*Cluster, CBFactory); 937 for (SmallVectorImpl<BindingPair>::const_iterator I = Bindings.begin(), 938 E = Bindings.end(); 939 I != E; ++I) 940 Result = Result.remove(I->first); 941 942 // If we're invalidating a region with a symbolic offset, we need to make sure 943 // we don't treat the base region as uninitialized anymore. 944 // FIXME: This isn't very precise; see the example in 945 // collectSubRegionBindings. 946 if (TopKey.hasSymbolicOffset()) { 947 const SubRegion *Concrete = TopKey.getConcreteOffsetRegion(); 948 Result = Result.add(BindingKey::Make(Concrete, BindingKey::Default), 949 UnknownVal()); 950 } 951 952 if (Result.isEmpty()) 953 return B.remove(ClusterHead); 954 return B.add(ClusterHead, Result.asImmutableMap()); 955 } 956 957 namespace { 958 class InvalidateRegionsWorker : public ClusterAnalysis<InvalidateRegionsWorker> 959 { 960 const Expr *Ex; 961 unsigned Count; 962 const LocationContext *LCtx; 963 InvalidatedSymbols &IS; 964 RegionAndSymbolInvalidationTraits &ITraits; 965 StoreManager::InvalidatedRegions *Regions; 966 GlobalsFilterKind GlobalsFilter; 967 public: 968 InvalidateRegionsWorker(RegionStoreManager &rm, 969 ProgramStateManager &stateMgr, 970 RegionBindingsRef b, 971 const Expr *ex, unsigned count, 972 const LocationContext *lctx, 973 InvalidatedSymbols &is, 974 RegionAndSymbolInvalidationTraits &ITraitsIn, 975 StoreManager::InvalidatedRegions *r, 976 GlobalsFilterKind GFK) 977 : ClusterAnalysis<InvalidateRegionsWorker>(rm, stateMgr, b), 978 Ex(ex), Count(count), LCtx(lctx), IS(is), ITraits(ITraitsIn), Regions(r), 979 GlobalsFilter(GFK) {} 980 981 void VisitCluster(const MemRegion *baseR, const ClusterBindings *C); 982 void VisitBinding(SVal V); 983 984 using ClusterAnalysis::AddToWorkList; 985 986 bool AddToWorkList(const MemRegion *R); 987 988 /// Returns true if all clusters in the memory space for \p Base should be 989 /// be invalidated. 990 bool includeEntireMemorySpace(const MemRegion *Base); 991 992 /// Returns true if the memory space of the given region is one of the global 993 /// regions specially included at the start of invalidation. 994 bool isInitiallyIncludedGlobalRegion(const MemRegion *R); 995 }; 996 } 997 998 bool InvalidateRegionsWorker::AddToWorkList(const MemRegion *R) { 999 bool doNotInvalidateSuperRegion = ITraits.hasTrait( 1000 R, RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 1001 const MemRegion *BaseR = doNotInvalidateSuperRegion ? R : R->getBaseRegion(); 1002 return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR)); 1003 } 1004 1005 void InvalidateRegionsWorker::VisitBinding(SVal V) { 1006 // A symbol? Mark it touched by the invalidation. 1007 if (SymbolRef Sym = V.getAsSymbol()) 1008 IS.insert(Sym); 1009 1010 if (const MemRegion *R = V.getAsRegion()) { 1011 AddToWorkList(R); 1012 return; 1013 } 1014 1015 // Is it a LazyCompoundVal? All references get invalidated as well. 1016 if (Optional<nonloc::LazyCompoundVal> LCS = 1017 V.getAs<nonloc::LazyCompoundVal>()) { 1018 1019 const RegionStoreManager::SValListTy &Vals = RM.getInterestingValues(*LCS); 1020 1021 for (RegionStoreManager::SValListTy::const_iterator I = Vals.begin(), 1022 E = Vals.end(); 1023 I != E; ++I) 1024 VisitBinding(*I); 1025 1026 return; 1027 } 1028 } 1029 1030 void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR, 1031 const ClusterBindings *C) { 1032 1033 bool PreserveRegionsContents = 1034 ITraits.hasTrait(baseR, 1035 RegionAndSymbolInvalidationTraits::TK_PreserveContents); 1036 1037 if (C) { 1038 for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E; ++I) 1039 VisitBinding(I.getData()); 1040 1041 // Invalidate regions contents. 1042 if (!PreserveRegionsContents) 1043 B = B.remove(baseR); 1044 } 1045 1046 if (const auto *TO = dyn_cast<TypedValueRegion>(baseR)) { 1047 if (const auto *RD = TO->getValueType()->getAsCXXRecordDecl()) { 1048 1049 // Lambdas can affect all static local variables without explicitly 1050 // capturing those. 1051 // We invalidate all static locals referenced inside the lambda body. 1052 if (RD->isLambda() && RD->getLambdaCallOperator()->getBody()) { 1053 using namespace ast_matchers; 1054 1055 const char *DeclBind = "DeclBind"; 1056 StatementMatcher RefToStatic = stmt(hasDescendant(declRefExpr( 1057 to(varDecl(hasStaticStorageDuration()).bind(DeclBind))))); 1058 auto Matches = 1059 match(RefToStatic, *RD->getLambdaCallOperator()->getBody(), 1060 RD->getASTContext()); 1061 1062 for (BoundNodes &Match : Matches) { 1063 auto *VD = Match.getNodeAs<VarDecl>(DeclBind); 1064 const VarRegion *ToInvalidate = 1065 RM.getRegionManager().getVarRegion(VD, LCtx); 1066 AddToWorkList(ToInvalidate); 1067 } 1068 } 1069 } 1070 } 1071 1072 // BlockDataRegion? If so, invalidate captured variables that are passed 1073 // by reference. 1074 if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(baseR)) { 1075 for (BlockDataRegion::referenced_vars_iterator 1076 BI = BR->referenced_vars_begin(), BE = BR->referenced_vars_end() ; 1077 BI != BE; ++BI) { 1078 const VarRegion *VR = BI.getCapturedRegion(); 1079 const VarDecl *VD = VR->getDecl(); 1080 if (VD->hasAttr<BlocksAttr>() || !VD->hasLocalStorage()) { 1081 AddToWorkList(VR); 1082 } 1083 else if (Loc::isLocType(VR->getValueType())) { 1084 // Map the current bindings to a Store to retrieve the value 1085 // of the binding. If that binding itself is a region, we should 1086 // invalidate that region. This is because a block may capture 1087 // a pointer value, but the thing pointed by that pointer may 1088 // get invalidated. 1089 SVal V = RM.getBinding(B, loc::MemRegionVal(VR)); 1090 if (Optional<Loc> L = V.getAs<Loc>()) { 1091 if (const MemRegion *LR = L->getAsRegion()) 1092 AddToWorkList(LR); 1093 } 1094 } 1095 } 1096 return; 1097 } 1098 1099 // Symbolic region? 1100 if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(baseR)) 1101 IS.insert(SR->getSymbol()); 1102 1103 // Nothing else should be done in the case when we preserve regions context. 1104 if (PreserveRegionsContents) 1105 return; 1106 1107 // Otherwise, we have a normal data region. Record that we touched the region. 1108 if (Regions) 1109 Regions->push_back(baseR); 1110 1111 if (isa<AllocaRegion, SymbolicRegion>(baseR)) { 1112 // Invalidate the region by setting its default value to 1113 // conjured symbol. The type of the symbol is irrelevant. 1114 DefinedOrUnknownSVal V = 1115 svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, Ctx.IntTy, Count); 1116 B = B.addBinding(baseR, BindingKey::Default, V); 1117 return; 1118 } 1119 1120 if (!baseR->isBoundable()) 1121 return; 1122 1123 const TypedValueRegion *TR = cast<TypedValueRegion>(baseR); 1124 QualType T = TR->getValueType(); 1125 1126 if (isInitiallyIncludedGlobalRegion(baseR)) { 1127 // If the region is a global and we are invalidating all globals, 1128 // erasing the entry is good enough. This causes all globals to be lazily 1129 // symbolicated from the same base symbol. 1130 return; 1131 } 1132 1133 if (T->isRecordType()) { 1134 // Invalidate the region by setting its default value to 1135 // conjured symbol. The type of the symbol is irrelevant. 1136 DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, 1137 Ctx.IntTy, Count); 1138 B = B.addBinding(baseR, BindingKey::Default, V); 1139 return; 1140 } 1141 1142 if (const ArrayType *AT = Ctx.getAsArrayType(T)) { 1143 bool doNotInvalidateSuperRegion = ITraits.hasTrait( 1144 baseR, 1145 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 1146 1147 if (doNotInvalidateSuperRegion) { 1148 // We are not doing blank invalidation of the whole array region so we 1149 // have to manually invalidate each elements. 1150 Optional<uint64_t> NumElements; 1151 1152 // Compute lower and upper offsets for region within array. 1153 if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT)) 1154 NumElements = CAT->getSize().getZExtValue(); 1155 if (!NumElements) // We are not dealing with a constant size array 1156 goto conjure_default; 1157 QualType ElementTy = AT->getElementType(); 1158 uint64_t ElemSize = Ctx.getTypeSize(ElementTy); 1159 const RegionOffset &RO = baseR->getAsOffset(); 1160 const MemRegion *SuperR = baseR->getBaseRegion(); 1161 if (RO.hasSymbolicOffset()) { 1162 // If base region has a symbolic offset, 1163 // we revert to invalidating the super region. 1164 if (SuperR) 1165 AddToWorkList(SuperR); 1166 goto conjure_default; 1167 } 1168 1169 uint64_t LowerOffset = RO.getOffset(); 1170 uint64_t UpperOffset = LowerOffset + *NumElements * ElemSize; 1171 bool UpperOverflow = UpperOffset < LowerOffset; 1172 1173 // Invalidate regions which are within array boundaries, 1174 // or have a symbolic offset. 1175 if (!SuperR) 1176 goto conjure_default; 1177 1178 const ClusterBindings *C = B.lookup(SuperR); 1179 if (!C) 1180 goto conjure_default; 1181 1182 for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E; 1183 ++I) { 1184 const BindingKey &BK = I.getKey(); 1185 Optional<uint64_t> ROffset = 1186 BK.hasSymbolicOffset() ? Optional<uint64_t>() : BK.getOffset(); 1187 1188 // Check offset is not symbolic and within array's boundaries. 1189 // Handles arrays of 0 elements and of 0-sized elements as well. 1190 if (!ROffset || 1191 ((*ROffset >= LowerOffset && *ROffset < UpperOffset) || 1192 (UpperOverflow && 1193 (*ROffset >= LowerOffset || *ROffset < UpperOffset)) || 1194 (LowerOffset == UpperOffset && *ROffset == LowerOffset))) { 1195 B = B.removeBinding(I.getKey()); 1196 // Bound symbolic regions need to be invalidated for dead symbol 1197 // detection. 1198 SVal V = I.getData(); 1199 const MemRegion *R = V.getAsRegion(); 1200 if (isa_and_nonnull<SymbolicRegion>(R)) 1201 VisitBinding(V); 1202 } 1203 } 1204 } 1205 conjure_default: 1206 // Set the default value of the array to conjured symbol. 1207 DefinedOrUnknownSVal V = 1208 svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, 1209 AT->getElementType(), Count); 1210 B = B.addBinding(baseR, BindingKey::Default, V); 1211 return; 1212 } 1213 1214 DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, 1215 T,Count); 1216 assert(SymbolManager::canSymbolicate(T) || V.isUnknown()); 1217 B = B.addBinding(baseR, BindingKey::Direct, V); 1218 } 1219 1220 bool InvalidateRegionsWorker::isInitiallyIncludedGlobalRegion( 1221 const MemRegion *R) { 1222 switch (GlobalsFilter) { 1223 case GFK_None: 1224 return false; 1225 case GFK_SystemOnly: 1226 return isa<GlobalSystemSpaceRegion>(R->getMemorySpace()); 1227 case GFK_All: 1228 return isa<NonStaticGlobalSpaceRegion>(R->getMemorySpace()); 1229 } 1230 1231 llvm_unreachable("unknown globals filter"); 1232 } 1233 1234 bool InvalidateRegionsWorker::includeEntireMemorySpace(const MemRegion *Base) { 1235 if (isInitiallyIncludedGlobalRegion(Base)) 1236 return true; 1237 1238 const MemSpaceRegion *MemSpace = Base->getMemorySpace(); 1239 return ITraits.hasTrait(MemSpace, 1240 RegionAndSymbolInvalidationTraits::TK_EntireMemSpace); 1241 } 1242 1243 RegionBindingsRef 1244 RegionStoreManager::invalidateGlobalRegion(MemRegion::Kind K, 1245 const Expr *Ex, 1246 unsigned Count, 1247 const LocationContext *LCtx, 1248 RegionBindingsRef B, 1249 InvalidatedRegions *Invalidated) { 1250 // Bind the globals memory space to a new symbol that we will use to derive 1251 // the bindings for all globals. 1252 const GlobalsSpaceRegion *GS = MRMgr.getGlobalsRegion(K); 1253 SVal V = svalBuilder.conjureSymbolVal(/* symbolTag = */ (const void*) GS, Ex, LCtx, 1254 /* type does not matter */ Ctx.IntTy, 1255 Count); 1256 1257 B = B.removeBinding(GS) 1258 .addBinding(BindingKey::Make(GS, BindingKey::Default), V); 1259 1260 // Even if there are no bindings in the global scope, we still need to 1261 // record that we touched it. 1262 if (Invalidated) 1263 Invalidated->push_back(GS); 1264 1265 return B; 1266 } 1267 1268 void RegionStoreManager::populateWorkList(InvalidateRegionsWorker &W, 1269 ArrayRef<SVal> Values, 1270 InvalidatedRegions *TopLevelRegions) { 1271 for (ArrayRef<SVal>::iterator I = Values.begin(), 1272 E = Values.end(); I != E; ++I) { 1273 SVal V = *I; 1274 if (Optional<nonloc::LazyCompoundVal> LCS = 1275 V.getAs<nonloc::LazyCompoundVal>()) { 1276 1277 const SValListTy &Vals = getInterestingValues(*LCS); 1278 1279 for (SValListTy::const_iterator I = Vals.begin(), 1280 E = Vals.end(); I != E; ++I) { 1281 // Note: the last argument is false here because these are 1282 // non-top-level regions. 1283 if (const MemRegion *R = (*I).getAsRegion()) 1284 W.AddToWorkList(R); 1285 } 1286 continue; 1287 } 1288 1289 if (const MemRegion *R = V.getAsRegion()) { 1290 if (TopLevelRegions) 1291 TopLevelRegions->push_back(R); 1292 W.AddToWorkList(R); 1293 continue; 1294 } 1295 } 1296 } 1297 1298 StoreRef 1299 RegionStoreManager::invalidateRegions(Store store, 1300 ArrayRef<SVal> Values, 1301 const Expr *Ex, unsigned Count, 1302 const LocationContext *LCtx, 1303 const CallEvent *Call, 1304 InvalidatedSymbols &IS, 1305 RegionAndSymbolInvalidationTraits &ITraits, 1306 InvalidatedRegions *TopLevelRegions, 1307 InvalidatedRegions *Invalidated) { 1308 GlobalsFilterKind GlobalsFilter; 1309 if (Call) { 1310 if (Call->isInSystemHeader()) 1311 GlobalsFilter = GFK_SystemOnly; 1312 else 1313 GlobalsFilter = GFK_All; 1314 } else { 1315 GlobalsFilter = GFK_None; 1316 } 1317 1318 RegionBindingsRef B = getRegionBindings(store); 1319 InvalidateRegionsWorker W(*this, StateMgr, B, Ex, Count, LCtx, IS, ITraits, 1320 Invalidated, GlobalsFilter); 1321 1322 // Scan the bindings and generate the clusters. 1323 W.GenerateClusters(); 1324 1325 // Add the regions to the worklist. 1326 populateWorkList(W, Values, TopLevelRegions); 1327 1328 W.RunWorkList(); 1329 1330 // Return the new bindings. 1331 B = W.getRegionBindings(); 1332 1333 // For calls, determine which global regions should be invalidated and 1334 // invalidate them. (Note that function-static and immutable globals are never 1335 // invalidated by this.) 1336 // TODO: This could possibly be more precise with modules. 1337 switch (GlobalsFilter) { 1338 case GFK_All: 1339 B = invalidateGlobalRegion(MemRegion::GlobalInternalSpaceRegionKind, 1340 Ex, Count, LCtx, B, Invalidated); 1341 LLVM_FALLTHROUGH; 1342 case GFK_SystemOnly: 1343 B = invalidateGlobalRegion(MemRegion::GlobalSystemSpaceRegionKind, 1344 Ex, Count, LCtx, B, Invalidated); 1345 LLVM_FALLTHROUGH; 1346 case GFK_None: 1347 break; 1348 } 1349 1350 return StoreRef(B.asStore(), *this); 1351 } 1352 1353 //===----------------------------------------------------------------------===// 1354 // Location and region casting. 1355 //===----------------------------------------------------------------------===// 1356 1357 /// ArrayToPointer - Emulates the "decay" of an array to a pointer 1358 /// type. 'Array' represents the lvalue of the array being decayed 1359 /// to a pointer, and the returned SVal represents the decayed 1360 /// version of that lvalue (i.e., a pointer to the first element of 1361 /// the array). This is called by ExprEngine when evaluating casts 1362 /// from arrays to pointers. 1363 SVal RegionStoreManager::ArrayToPointer(Loc Array, QualType T) { 1364 if (Array.getAs<loc::ConcreteInt>()) 1365 return Array; 1366 1367 if (!Array.getAs<loc::MemRegionVal>()) 1368 return UnknownVal(); 1369 1370 const SubRegion *R = 1371 cast<SubRegion>(Array.castAs<loc::MemRegionVal>().getRegion()); 1372 NonLoc ZeroIdx = svalBuilder.makeZeroArrayIndex(); 1373 return loc::MemRegionVal(MRMgr.getElementRegion(T, ZeroIdx, R, Ctx)); 1374 } 1375 1376 //===----------------------------------------------------------------------===// 1377 // Loading values from regions. 1378 //===----------------------------------------------------------------------===// 1379 1380 SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T) { 1381 assert(!L.getAs<UnknownVal>() && "location unknown"); 1382 assert(!L.getAs<UndefinedVal>() && "location undefined"); 1383 1384 // For access to concrete addresses, return UnknownVal. Checks 1385 // for null dereferences (and similar errors) are done by checkers, not 1386 // the Store. 1387 // FIXME: We can consider lazily symbolicating such memory, but we really 1388 // should defer this when we can reason easily about symbolicating arrays 1389 // of bytes. 1390 if (L.getAs<loc::ConcreteInt>()) { 1391 return UnknownVal(); 1392 } 1393 if (!L.getAs<loc::MemRegionVal>()) { 1394 return UnknownVal(); 1395 } 1396 1397 const MemRegion *MR = L.castAs<loc::MemRegionVal>().getRegion(); 1398 1399 if (isa<BlockDataRegion>(MR)) { 1400 return UnknownVal(); 1401 } 1402 1403 if (!isa<TypedValueRegion>(MR)) { 1404 if (T.isNull()) { 1405 if (const TypedRegion *TR = dyn_cast<TypedRegion>(MR)) 1406 T = TR->getLocationType()->getPointeeType(); 1407 else if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(MR)) 1408 T = SR->getSymbol()->getType()->getPointeeType(); 1409 } 1410 assert(!T.isNull() && "Unable to auto-detect binding type!"); 1411 assert(!T->isVoidType() && "Attempting to dereference a void pointer!"); 1412 MR = GetElementZeroRegion(cast<SubRegion>(MR), T); 1413 } else { 1414 T = cast<TypedValueRegion>(MR)->getValueType(); 1415 } 1416 1417 // FIXME: Perhaps this method should just take a 'const MemRegion*' argument 1418 // instead of 'Loc', and have the other Loc cases handled at a higher level. 1419 const TypedValueRegion *R = cast<TypedValueRegion>(MR); 1420 QualType RTy = R->getValueType(); 1421 1422 // FIXME: we do not yet model the parts of a complex type, so treat the 1423 // whole thing as "unknown". 1424 if (RTy->isAnyComplexType()) 1425 return UnknownVal(); 1426 1427 // FIXME: We should eventually handle funny addressing. e.g.: 1428 // 1429 // int x = ...; 1430 // int *p = &x; 1431 // char *q = (char*) p; 1432 // char c = *q; // returns the first byte of 'x'. 1433 // 1434 // Such funny addressing will occur due to layering of regions. 1435 if (RTy->isStructureOrClassType()) 1436 return getBindingForStruct(B, R); 1437 1438 // FIXME: Handle unions. 1439 if (RTy->isUnionType()) 1440 return createLazyBinding(B, R); 1441 1442 if (RTy->isArrayType()) { 1443 if (RTy->isConstantArrayType()) 1444 return getBindingForArray(B, R); 1445 else 1446 return UnknownVal(); 1447 } 1448 1449 // FIXME: handle Vector types. 1450 if (RTy->isVectorType()) 1451 return UnknownVal(); 1452 1453 if (const FieldRegion* FR = dyn_cast<FieldRegion>(R)) 1454 return svalBuilder.evalCast(getBindingForField(B, FR), T, QualType{}); 1455 1456 if (const ElementRegion* ER = dyn_cast<ElementRegion>(R)) { 1457 // FIXME: Here we actually perform an implicit conversion from the loaded 1458 // value to the element type. Eventually we want to compose these values 1459 // more intelligently. For example, an 'element' can encompass multiple 1460 // bound regions (e.g., several bound bytes), or could be a subset of 1461 // a larger value. 1462 return svalBuilder.evalCast(getBindingForElement(B, ER), T, QualType{}); 1463 } 1464 1465 if (const ObjCIvarRegion *IVR = dyn_cast<ObjCIvarRegion>(R)) { 1466 // FIXME: Here we actually perform an implicit conversion from the loaded 1467 // value to the ivar type. What we should model is stores to ivars 1468 // that blow past the extent of the ivar. If the address of the ivar is 1469 // reinterpretted, it is possible we stored a different value that could 1470 // fit within the ivar. Either we need to cast these when storing them 1471 // or reinterpret them lazily (as we do here). 1472 return svalBuilder.evalCast(getBindingForObjCIvar(B, IVR), T, QualType{}); 1473 } 1474 1475 if (const VarRegion *VR = dyn_cast<VarRegion>(R)) { 1476 // FIXME: Here we actually perform an implicit conversion from the loaded 1477 // value to the variable type. What we should model is stores to variables 1478 // that blow past the extent of the variable. If the address of the 1479 // variable is reinterpretted, it is possible we stored a different value 1480 // that could fit within the variable. Either we need to cast these when 1481 // storing them or reinterpret them lazily (as we do here). 1482 return svalBuilder.evalCast(getBindingForVar(B, VR), T, QualType{}); 1483 } 1484 1485 const SVal *V = B.lookup(R, BindingKey::Direct); 1486 1487 // Check if the region has a binding. 1488 if (V) 1489 return *V; 1490 1491 // The location does not have a bound value. This means that it has 1492 // the value it had upon its creation and/or entry to the analyzed 1493 // function/method. These are either symbolic values or 'undefined'. 1494 if (R->hasStackNonParametersStorage()) { 1495 // All stack variables are considered to have undefined values 1496 // upon creation. All heap allocated blocks are considered to 1497 // have undefined values as well unless they are explicitly bound 1498 // to specific values. 1499 return UndefinedVal(); 1500 } 1501 1502 // All other values are symbolic. 1503 return svalBuilder.getRegionValueSymbolVal(R); 1504 } 1505 1506 static QualType getUnderlyingType(const SubRegion *R) { 1507 QualType RegionTy; 1508 if (const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(R)) 1509 RegionTy = TVR->getValueType(); 1510 1511 if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R)) 1512 RegionTy = SR->getSymbol()->getType(); 1513 1514 return RegionTy; 1515 } 1516 1517 /// Checks to see if store \p B has a lazy binding for region \p R. 1518 /// 1519 /// If \p AllowSubregionBindings is \c false, a lazy binding will be rejected 1520 /// if there are additional bindings within \p R. 1521 /// 1522 /// Note that unlike RegionStoreManager::findLazyBinding, this will not search 1523 /// for lazy bindings for super-regions of \p R. 1524 static Optional<nonloc::LazyCompoundVal> 1525 getExistingLazyBinding(SValBuilder &SVB, RegionBindingsConstRef B, 1526 const SubRegion *R, bool AllowSubregionBindings) { 1527 Optional<SVal> V = B.getDefaultBinding(R); 1528 if (!V) 1529 return None; 1530 1531 Optional<nonloc::LazyCompoundVal> LCV = V->getAs<nonloc::LazyCompoundVal>(); 1532 if (!LCV) 1533 return None; 1534 1535 // If the LCV is for a subregion, the types might not match, and we shouldn't 1536 // reuse the binding. 1537 QualType RegionTy = getUnderlyingType(R); 1538 if (!RegionTy.isNull() && 1539 !RegionTy->isVoidPointerType()) { 1540 QualType SourceRegionTy = LCV->getRegion()->getValueType(); 1541 if (!SVB.getContext().hasSameUnqualifiedType(RegionTy, SourceRegionTy)) 1542 return None; 1543 } 1544 1545 if (!AllowSubregionBindings) { 1546 // If there are any other bindings within this region, we shouldn't reuse 1547 // the top-level binding. 1548 SmallVector<BindingPair, 16> Bindings; 1549 collectSubRegionBindings(Bindings, SVB, *B.lookup(R->getBaseRegion()), R, 1550 /*IncludeAllDefaultBindings=*/true); 1551 if (Bindings.size() > 1) 1552 return None; 1553 } 1554 1555 return *LCV; 1556 } 1557 1558 1559 std::pair<Store, const SubRegion *> 1560 RegionStoreManager::findLazyBinding(RegionBindingsConstRef B, 1561 const SubRegion *R, 1562 const SubRegion *originalRegion) { 1563 if (originalRegion != R) { 1564 if (Optional<nonloc::LazyCompoundVal> V = 1565 getExistingLazyBinding(svalBuilder, B, R, true)) 1566 return std::make_pair(V->getStore(), V->getRegion()); 1567 } 1568 1569 typedef std::pair<Store, const SubRegion *> StoreRegionPair; 1570 StoreRegionPair Result = StoreRegionPair(); 1571 1572 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 1573 Result = findLazyBinding(B, cast<SubRegion>(ER->getSuperRegion()), 1574 originalRegion); 1575 1576 if (Result.second) 1577 Result.second = MRMgr.getElementRegionWithSuper(ER, Result.second); 1578 1579 } else if (const FieldRegion *FR = dyn_cast<FieldRegion>(R)) { 1580 Result = findLazyBinding(B, cast<SubRegion>(FR->getSuperRegion()), 1581 originalRegion); 1582 1583 if (Result.second) 1584 Result.second = MRMgr.getFieldRegionWithSuper(FR, Result.second); 1585 1586 } else if (const CXXBaseObjectRegion *BaseReg = 1587 dyn_cast<CXXBaseObjectRegion>(R)) { 1588 // C++ base object region is another kind of region that we should blast 1589 // through to look for lazy compound value. It is like a field region. 1590 Result = findLazyBinding(B, cast<SubRegion>(BaseReg->getSuperRegion()), 1591 originalRegion); 1592 1593 if (Result.second) 1594 Result.second = MRMgr.getCXXBaseObjectRegionWithSuper(BaseReg, 1595 Result.second); 1596 } 1597 1598 return Result; 1599 } 1600 1601 /// This is a helper function for `getConstantValFromConstArrayInitializer`. 1602 /// 1603 /// Return an array of extents of the declared array type. 1604 /// 1605 /// E.g. for `int x[1][2][3];` returns { 1, 2, 3 }. 1606 static SmallVector<uint64_t, 2> 1607 getConstantArrayExtents(const ConstantArrayType *CAT) { 1608 assert(CAT && "ConstantArrayType should not be null"); 1609 CAT = cast<ConstantArrayType>(CAT->getCanonicalTypeInternal()); 1610 SmallVector<uint64_t, 2> Extents; 1611 do { 1612 Extents.push_back(CAT->getSize().getZExtValue()); 1613 } while ((CAT = dyn_cast<ConstantArrayType>(CAT->getElementType()))); 1614 return Extents; 1615 } 1616 1617 /// This is a helper function for `getConstantValFromConstArrayInitializer`. 1618 /// 1619 /// Return an array of offsets from nested ElementRegions and a root base 1620 /// region. The array is never empty and a base region is never null. 1621 /// 1622 /// E.g. for `Element{Element{Element{VarRegion},1},2},3}` returns { 3, 2, 1 }. 1623 /// This represents an access through indirection: `arr[1][2][3];` 1624 /// 1625 /// \param ER The given (possibly nested) ElementRegion. 1626 /// 1627 /// \note The result array is in the reverse order of indirection expression: 1628 /// arr[1][2][3] -> { 3, 2, 1 }. This helps to provide complexity O(n), where n 1629 /// is a number of indirections. It may not affect performance in real-life 1630 /// code, though. 1631 static std::pair<SmallVector<SVal, 2>, const MemRegion *> 1632 getElementRegionOffsetsWithBase(const ElementRegion *ER) { 1633 assert(ER && "ConstantArrayType should not be null"); 1634 const MemRegion *Base; 1635 SmallVector<SVal, 2> SValOffsets; 1636 do { 1637 SValOffsets.push_back(ER->getIndex()); 1638 Base = ER->getSuperRegion(); 1639 ER = dyn_cast<ElementRegion>(Base); 1640 } while (ER); 1641 return {SValOffsets, Base}; 1642 } 1643 1644 /// This is a helper function for `getConstantValFromConstArrayInitializer`. 1645 /// 1646 /// Convert array of offsets from `SVal` to `uint64_t` in consideration of 1647 /// respective array extents. 1648 /// \param SrcOffsets [in] The array of offsets of type `SVal` in reversed 1649 /// order (expectedly received from `getElementRegionOffsetsWithBase`). 1650 /// \param ArrayExtents [in] The array of extents. 1651 /// \param DstOffsets [out] The array of offsets of type `uint64_t`. 1652 /// \returns: 1653 /// - `None` for successful convertion. 1654 /// - `UndefinedVal` or `UnknownVal` otherwise. It's expected that this SVal 1655 /// will be returned as a suitable value of the access operation. 1656 /// which should be returned as a correct 1657 /// 1658 /// \example: 1659 /// const int arr[10][20][30] = {}; // ArrayExtents { 10, 20, 30 } 1660 /// int x1 = arr[4][5][6]; // SrcOffsets { NonLoc(6), NonLoc(5), NonLoc(4) } 1661 /// // DstOffsets { 4, 5, 6 } 1662 /// // returns None 1663 /// int x2 = arr[42][5][-6]; // returns UndefinedVal 1664 /// int x3 = arr[4][5][x2]; // returns UnknownVal 1665 static Optional<SVal> 1666 convertOffsetsFromSvalToUnsigneds(const SmallVector<SVal, 2> &SrcOffsets, 1667 const SmallVector<uint64_t, 2> ArrayExtents, 1668 SmallVector<uint64_t, 2> &DstOffsets) { 1669 // Check offsets for being out of bounds. 1670 // C++20 [expr.add] 7.6.6.4 (excerpt): 1671 // If P points to an array element i of an array object x with n 1672 // elements, where i < 0 or i > n, the behavior is undefined. 1673 // Dereferencing is not allowed on the "one past the last 1674 // element", when i == n. 1675 // Example: 1676 // const int arr[3][2] = {{1, 2}, {3, 4}}; 1677 // arr[0][0]; // 1 1678 // arr[0][1]; // 2 1679 // arr[0][2]; // UB 1680 // arr[1][0]; // 3 1681 // arr[1][1]; // 4 1682 // arr[1][-1]; // UB 1683 // arr[2][0]; // 0 1684 // arr[2][1]; // 0 1685 // arr[-2][0]; // UB 1686 DstOffsets.resize(SrcOffsets.size()); 1687 auto ExtentIt = ArrayExtents.begin(); 1688 auto OffsetIt = DstOffsets.begin(); 1689 // Reverse `SValOffsets` to make it consistent with `ArrayExtents`. 1690 for (SVal V : llvm::reverse(SrcOffsets)) { 1691 if (auto CI = V.getAs<nonloc::ConcreteInt>()) { 1692 // When offset is out of array's bounds, result is UB. 1693 const llvm::APSInt &Offset = CI->getValue(); 1694 if (Offset.isNegative() || Offset.uge(*(ExtentIt++))) 1695 return UndefinedVal(); 1696 // Store index in a reversive order. 1697 *(OffsetIt++) = Offset.getZExtValue(); 1698 continue; 1699 } 1700 // Symbolic index presented. Return Unknown value. 1701 // FIXME: We also need to take ElementRegions with symbolic indexes into 1702 // account. 1703 return UnknownVal(); 1704 } 1705 return None; 1706 } 1707 1708 Optional<SVal> RegionStoreManager::getConstantValFromConstArrayInitializer( 1709 RegionBindingsConstRef B, const ElementRegion *R) { 1710 assert(R && "ElementRegion should not be null"); 1711 1712 // Treat an n-dimensional array. 1713 SmallVector<SVal, 2> SValOffsets; 1714 const MemRegion *Base; 1715 std::tie(SValOffsets, Base) = getElementRegionOffsetsWithBase(R); 1716 const VarRegion *VR = dyn_cast<VarRegion>(Base); 1717 if (!VR) 1718 return None; 1719 1720 assert(!SValOffsets.empty() && "getElementRegionOffsets guarantees the " 1721 "offsets vector is not empty."); 1722 1723 // Check if the containing array has an initialized value that we can trust. 1724 // We can trust a const value or a value of a global initializer in main(). 1725 const VarDecl *VD = VR->getDecl(); 1726 if (!VD->getType().isConstQualified() && 1727 !R->getElementType().isConstQualified() && 1728 (!B.isMainAnalysis() || !VD->hasGlobalStorage())) 1729 return None; 1730 1731 // Array's declaration should have `ConstantArrayType` type, because only this 1732 // type contains an array extent. It may happen that array type can be of 1733 // `IncompleteArrayType` type. To get the declaration of `ConstantArrayType` 1734 // type, we should find the declaration in the redeclarations chain that has 1735 // the initialization expression. 1736 // NOTE: `getAnyInitializer` has an out-parameter, which returns a new `VD` 1737 // from which an initializer is obtained. We replace current `VD` with the new 1738 // `VD`. If the return value of the function is null than `VD` won't be 1739 // replaced. 1740 const Expr *Init = VD->getAnyInitializer(VD); 1741 // NOTE: If `Init` is non-null, then a new `VD` is non-null for sure. So check 1742 // `Init` for null only and don't worry about the replaced `VD`. 1743 if (!Init) 1744 return None; 1745 1746 // Array's declaration should have ConstantArrayType type, because only this 1747 // type contains an array extent. 1748 const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(VD->getType()); 1749 if (!CAT) 1750 return None; 1751 1752 // Get array extents. 1753 SmallVector<uint64_t, 2> Extents = getConstantArrayExtents(CAT); 1754 1755 // The number of offsets should equal to the numbers of extents, 1756 // otherwise wrong type punning occurred. For instance: 1757 // int arr[1][2][3]; 1758 // auto ptr = (int(*)[42])arr; 1759 // auto x = ptr[4][2]; // UB 1760 // FIXME: Should return UndefinedVal. 1761 if (SValOffsets.size() != Extents.size()) 1762 return None; 1763 1764 SmallVector<uint64_t, 2> ConcreteOffsets; 1765 if (Optional<SVal> V = convertOffsetsFromSvalToUnsigneds(SValOffsets, Extents, 1766 ConcreteOffsets)) 1767 return *V; 1768 1769 // Handle InitListExpr. 1770 // Example: 1771 // const char arr[4][2] = { { 1, 2 }, { 3 }, 4, 5 }; 1772 if (const auto *ILE = dyn_cast<InitListExpr>(Init)) 1773 return getSValFromInitListExpr(ILE, ConcreteOffsets, R->getElementType()); 1774 1775 // Handle StringLiteral. 1776 // Example: 1777 // const char arr[] = "abc"; 1778 if (const auto *SL = dyn_cast<StringLiteral>(Init)) 1779 return getSValFromStringLiteral(SL, ConcreteOffsets.front(), 1780 R->getElementType()); 1781 1782 // FIXME: Handle CompoundLiteralExpr. 1783 1784 return None; 1785 } 1786 1787 /// Returns an SVal, if possible, for the specified position of an 1788 /// initialization list. 1789 /// 1790 /// \param ILE The given initialization list. 1791 /// \param Offsets The array of unsigned offsets. E.g. for the expression 1792 /// `int x = arr[1][2][3];` an array should be { 1, 2, 3 }. 1793 /// \param ElemT The type of the result SVal expression. 1794 /// \return Optional SVal for the particular position in the initialization 1795 /// list. E.g. for the list `{{1, 2},[3, 4],{5, 6}, {}}` offsets: 1796 /// - {1, 1} returns SVal{4}, because it's the second position in the second 1797 /// sublist; 1798 /// - {3, 0} returns SVal{0}, because there's no explicit value at this 1799 /// position in the sublist. 1800 /// 1801 /// NOTE: Inorder to get a valid SVal, a caller shall guarantee valid offsets 1802 /// for the given initialization list. Otherwise SVal can be an equivalent to 0 1803 /// or lead to assertion. 1804 Optional<SVal> RegionStoreManager::getSValFromInitListExpr( 1805 const InitListExpr *ILE, const SmallVector<uint64_t, 2> &Offsets, 1806 QualType ElemT) { 1807 assert(ILE && "InitListExpr should not be null"); 1808 1809 for (uint64_t Offset : Offsets) { 1810 // C++20 [dcl.init.string] 9.4.2.1: 1811 // An array of ordinary character type [...] can be initialized by [...] 1812 // an appropriately-typed string-literal enclosed in braces. 1813 // Example: 1814 // const char arr[] = { "abc" }; 1815 if (ILE->isStringLiteralInit()) 1816 if (const auto *SL = dyn_cast<StringLiteral>(ILE->getInit(0))) 1817 return getSValFromStringLiteral(SL, Offset, ElemT); 1818 1819 // C++20 [expr.add] 9.4.17.5 (excerpt): 1820 // i-th array element is value-initialized for each k < i ≤ n, 1821 // where k is an expression-list size and n is an array extent. 1822 if (Offset >= ILE->getNumInits()) 1823 return svalBuilder.makeZeroVal(ElemT); 1824 1825 const Expr *E = ILE->getInit(Offset); 1826 const auto *IL = dyn_cast<InitListExpr>(E); 1827 if (!IL) 1828 // Return a constant value, if it is presented. 1829 // FIXME: Support other SVals. 1830 return svalBuilder.getConstantVal(E); 1831 1832 // Go to the nested initializer list. 1833 ILE = IL; 1834 } 1835 llvm_unreachable( 1836 "Unhandled InitListExpr sub-expressions or invalid offsets."); 1837 } 1838 1839 /// Returns an SVal, if possible, for the specified position in a string 1840 /// literal. 1841 /// 1842 /// \param SL The given string literal. 1843 /// \param Offset The unsigned offset. E.g. for the expression 1844 /// `char x = str[42];` an offset should be 42. 1845 /// E.g. for the string "abc" offset: 1846 /// - 1 returns SVal{b}, because it's the second position in the string. 1847 /// - 42 returns SVal{0}, because there's no explicit value at this 1848 /// position in the string. 1849 /// \param ElemT The type of the result SVal expression. 1850 /// 1851 /// NOTE: We return `0` for every offset >= the literal length for array 1852 /// declarations, like: 1853 /// const char str[42] = "123"; // Literal length is 4. 1854 /// char c = str[41]; // Offset is 41. 1855 /// FIXME: Nevertheless, we can't do the same for pointer declaraions, like: 1856 /// const char * const str = "123"; // Literal length is 4. 1857 /// char c = str[41]; // Offset is 41. Returns `0`, but Undef 1858 /// // expected. 1859 /// It should be properly handled before reaching this point. 1860 /// The main problem is that we can't distinguish between these declarations, 1861 /// because in case of array we can get the Decl from VarRegion, but in case 1862 /// of pointer the region is a StringRegion, which doesn't contain a Decl. 1863 /// Possible solution could be passing an array extent along with the offset. 1864 SVal RegionStoreManager::getSValFromStringLiteral(const StringLiteral *SL, 1865 uint64_t Offset, 1866 QualType ElemT) { 1867 assert(SL && "StringLiteral should not be null"); 1868 // C++20 [dcl.init.string] 9.4.2.3: 1869 // If there are fewer initializers than there are array elements, each 1870 // element not explicitly initialized shall be zero-initialized [dcl.init]. 1871 uint32_t Code = (Offset >= SL->getLength()) ? 0 : SL->getCodeUnit(Offset); 1872 return svalBuilder.makeIntVal(Code, ElemT); 1873 } 1874 1875 SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B, 1876 const ElementRegion* R) { 1877 // Check if the region has a binding. 1878 if (const Optional<SVal> &V = B.getDirectBinding(R)) 1879 return *V; 1880 1881 const MemRegion* superR = R->getSuperRegion(); 1882 1883 // Check if the region is an element region of a string literal. 1884 if (const StringRegion *StrR = dyn_cast<StringRegion>(superR)) { 1885 // FIXME: Handle loads from strings where the literal is treated as 1886 // an integer, e.g., *((unsigned int*)"hello"). Such loads are UB according 1887 // to C++20 7.2.1.11 [basic.lval]. 1888 QualType T = Ctx.getAsArrayType(StrR->getValueType())->getElementType(); 1889 if (!Ctx.hasSameUnqualifiedType(T, R->getElementType())) 1890 return UnknownVal(); 1891 if (const auto CI = R->getIndex().getAs<nonloc::ConcreteInt>()) { 1892 const llvm::APSInt &Idx = CI->getValue(); 1893 if (Idx < 0) 1894 return UndefinedVal(); 1895 const StringLiteral *SL = StrR->getStringLiteral(); 1896 return getSValFromStringLiteral(SL, Idx.getZExtValue(), T); 1897 } 1898 } else if (isa<ElementRegion, VarRegion>(superR)) { 1899 if (Optional<SVal> V = getConstantValFromConstArrayInitializer(B, R)) 1900 return *V; 1901 } 1902 1903 // Check for loads from a code text region. For such loads, just give up. 1904 if (isa<CodeTextRegion>(superR)) 1905 return UnknownVal(); 1906 1907 // Handle the case where we are indexing into a larger scalar object. 1908 // For example, this handles: 1909 // int x = ... 1910 // char *y = &x; 1911 // return *y; 1912 // FIXME: This is a hack, and doesn't do anything really intelligent yet. 1913 const RegionRawOffset &O = R->getAsArrayOffset(); 1914 1915 // If we cannot reason about the offset, return an unknown value. 1916 if (!O.getRegion()) 1917 return UnknownVal(); 1918 1919 if (const TypedValueRegion *baseR = 1920 dyn_cast_or_null<TypedValueRegion>(O.getRegion())) { 1921 QualType baseT = baseR->getValueType(); 1922 if (baseT->isScalarType()) { 1923 QualType elemT = R->getElementType(); 1924 if (elemT->isScalarType()) { 1925 if (Ctx.getTypeSizeInChars(baseT) >= Ctx.getTypeSizeInChars(elemT)) { 1926 if (const Optional<SVal> &V = B.getDirectBinding(superR)) { 1927 if (SymbolRef parentSym = V->getAsSymbol()) 1928 return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R); 1929 1930 if (V->isUnknownOrUndef()) 1931 return *V; 1932 // Other cases: give up. We are indexing into a larger object 1933 // that has some value, but we don't know how to handle that yet. 1934 return UnknownVal(); 1935 } 1936 } 1937 } 1938 } 1939 } 1940 return getBindingForFieldOrElementCommon(B, R, R->getElementType()); 1941 } 1942 1943 SVal RegionStoreManager::getBindingForField(RegionBindingsConstRef B, 1944 const FieldRegion* R) { 1945 1946 // Check if the region has a binding. 1947 if (const Optional<SVal> &V = B.getDirectBinding(R)) 1948 return *V; 1949 1950 // If the containing record was initialized, try to get its constant value. 1951 const FieldDecl *FD = R->getDecl(); 1952 QualType Ty = FD->getType(); 1953 const MemRegion* superR = R->getSuperRegion(); 1954 if (const auto *VR = dyn_cast<VarRegion>(superR)) { 1955 const VarDecl *VD = VR->getDecl(); 1956 QualType RecordVarTy = VD->getType(); 1957 unsigned Index = FD->getFieldIndex(); 1958 // Either the record variable or the field has an initializer that we can 1959 // trust. We trust initializers of constants and, additionally, respect 1960 // initializers of globals when analyzing main(). 1961 if (RecordVarTy.isConstQualified() || Ty.isConstQualified() || 1962 (B.isMainAnalysis() && VD->hasGlobalStorage())) 1963 if (const Expr *Init = VD->getAnyInitializer()) 1964 if (const auto *InitList = dyn_cast<InitListExpr>(Init)) { 1965 if (Index < InitList->getNumInits()) { 1966 if (const Expr *FieldInit = InitList->getInit(Index)) 1967 if (Optional<SVal> V = svalBuilder.getConstantVal(FieldInit)) 1968 return *V; 1969 } else { 1970 return svalBuilder.makeZeroVal(Ty); 1971 } 1972 } 1973 } 1974 1975 return getBindingForFieldOrElementCommon(B, R, Ty); 1976 } 1977 1978 Optional<SVal> 1979 RegionStoreManager::getBindingForDerivedDefaultValue(RegionBindingsConstRef B, 1980 const MemRegion *superR, 1981 const TypedValueRegion *R, 1982 QualType Ty) { 1983 1984 if (const Optional<SVal> &D = B.getDefaultBinding(superR)) { 1985 const SVal &val = D.getValue(); 1986 if (SymbolRef parentSym = val.getAsSymbol()) 1987 return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R); 1988 1989 if (val.isZeroConstant()) 1990 return svalBuilder.makeZeroVal(Ty); 1991 1992 if (val.isUnknownOrUndef()) 1993 return val; 1994 1995 // Lazy bindings are usually handled through getExistingLazyBinding(). 1996 // We should unify these two code paths at some point. 1997 if (val.getAs<nonloc::LazyCompoundVal>() || 1998 val.getAs<nonloc::CompoundVal>()) 1999 return val; 2000 2001 llvm_unreachable("Unknown default value"); 2002 } 2003 2004 return None; 2005 } 2006 2007 SVal RegionStoreManager::getLazyBinding(const SubRegion *LazyBindingRegion, 2008 RegionBindingsRef LazyBinding) { 2009 SVal Result; 2010 if (const ElementRegion *ER = dyn_cast<ElementRegion>(LazyBindingRegion)) 2011 Result = getBindingForElement(LazyBinding, ER); 2012 else 2013 Result = getBindingForField(LazyBinding, 2014 cast<FieldRegion>(LazyBindingRegion)); 2015 2016 // FIXME: This is a hack to deal with RegionStore's inability to distinguish a 2017 // default value for /part/ of an aggregate from a default value for the 2018 // /entire/ aggregate. The most common case of this is when struct Outer 2019 // has as its first member a struct Inner, which is copied in from a stack 2020 // variable. In this case, even if the Outer's default value is symbolic, 0, 2021 // or unknown, it gets overridden by the Inner's default value of undefined. 2022 // 2023 // This is a general problem -- if the Inner is zero-initialized, the Outer 2024 // will now look zero-initialized. The proper way to solve this is with a 2025 // new version of RegionStore that tracks the extent of a binding as well 2026 // as the offset. 2027 // 2028 // This hack only takes care of the undefined case because that can very 2029 // quickly result in a warning. 2030 if (Result.isUndef()) 2031 Result = UnknownVal(); 2032 2033 return Result; 2034 } 2035 2036 SVal 2037 RegionStoreManager::getBindingForFieldOrElementCommon(RegionBindingsConstRef B, 2038 const TypedValueRegion *R, 2039 QualType Ty) { 2040 2041 // At this point we have already checked in either getBindingForElement or 2042 // getBindingForField if 'R' has a direct binding. 2043 2044 // Lazy binding? 2045 Store lazyBindingStore = nullptr; 2046 const SubRegion *lazyBindingRegion = nullptr; 2047 std::tie(lazyBindingStore, lazyBindingRegion) = findLazyBinding(B, R, R); 2048 if (lazyBindingRegion) 2049 return getLazyBinding(lazyBindingRegion, 2050 getRegionBindings(lazyBindingStore)); 2051 2052 // Record whether or not we see a symbolic index. That can completely 2053 // be out of scope of our lookup. 2054 bool hasSymbolicIndex = false; 2055 2056 // FIXME: This is a hack to deal with RegionStore's inability to distinguish a 2057 // default value for /part/ of an aggregate from a default value for the 2058 // /entire/ aggregate. The most common case of this is when struct Outer 2059 // has as its first member a struct Inner, which is copied in from a stack 2060 // variable. In this case, even if the Outer's default value is symbolic, 0, 2061 // or unknown, it gets overridden by the Inner's default value of undefined. 2062 // 2063 // This is a general problem -- if the Inner is zero-initialized, the Outer 2064 // will now look zero-initialized. The proper way to solve this is with a 2065 // new version of RegionStore that tracks the extent of a binding as well 2066 // as the offset. 2067 // 2068 // This hack only takes care of the undefined case because that can very 2069 // quickly result in a warning. 2070 bool hasPartialLazyBinding = false; 2071 2072 const SubRegion *SR = R; 2073 while (SR) { 2074 const MemRegion *Base = SR->getSuperRegion(); 2075 if (Optional<SVal> D = getBindingForDerivedDefaultValue(B, Base, R, Ty)) { 2076 if (D->getAs<nonloc::LazyCompoundVal>()) { 2077 hasPartialLazyBinding = true; 2078 break; 2079 } 2080 2081 return *D; 2082 } 2083 2084 if (const ElementRegion *ER = dyn_cast<ElementRegion>(Base)) { 2085 NonLoc index = ER->getIndex(); 2086 if (!index.isConstant()) 2087 hasSymbolicIndex = true; 2088 } 2089 2090 // If our super region is a field or element itself, walk up the region 2091 // hierarchy to see if there is a default value installed in an ancestor. 2092 SR = dyn_cast<SubRegion>(Base); 2093 } 2094 2095 if (R->hasStackNonParametersStorage()) { 2096 if (isa<ElementRegion>(R)) { 2097 // Currently we don't reason specially about Clang-style vectors. Check 2098 // if superR is a vector and if so return Unknown. 2099 if (const TypedValueRegion *typedSuperR = 2100 dyn_cast<TypedValueRegion>(R->getSuperRegion())) { 2101 if (typedSuperR->getValueType()->isVectorType()) 2102 return UnknownVal(); 2103 } 2104 } 2105 2106 // FIXME: We also need to take ElementRegions with symbolic indexes into 2107 // account. This case handles both directly accessing an ElementRegion 2108 // with a symbolic offset, but also fields within an element with 2109 // a symbolic offset. 2110 if (hasSymbolicIndex) 2111 return UnknownVal(); 2112 2113 // Additionally allow introspection of a block's internal layout. 2114 // Try to get direct binding if all other attempts failed thus far. 2115 // Else, return UndefinedVal() 2116 if (!hasPartialLazyBinding && !isa<BlockDataRegion>(R->getBaseRegion())) { 2117 if (const Optional<SVal> &V = B.getDefaultBinding(R)) 2118 return *V; 2119 return UndefinedVal(); 2120 } 2121 } 2122 2123 // All other values are symbolic. 2124 return svalBuilder.getRegionValueSymbolVal(R); 2125 } 2126 2127 SVal RegionStoreManager::getBindingForObjCIvar(RegionBindingsConstRef B, 2128 const ObjCIvarRegion* R) { 2129 // Check if the region has a binding. 2130 if (const Optional<SVal> &V = B.getDirectBinding(R)) 2131 return *V; 2132 2133 const MemRegion *superR = R->getSuperRegion(); 2134 2135 // Check if the super region has a default binding. 2136 if (const Optional<SVal> &V = B.getDefaultBinding(superR)) { 2137 if (SymbolRef parentSym = V->getAsSymbol()) 2138 return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R); 2139 2140 // Other cases: give up. 2141 return UnknownVal(); 2142 } 2143 2144 return getBindingForLazySymbol(R); 2145 } 2146 2147 SVal RegionStoreManager::getBindingForVar(RegionBindingsConstRef B, 2148 const VarRegion *R) { 2149 2150 // Check if the region has a binding. 2151 if (Optional<SVal> V = B.getDirectBinding(R)) 2152 return *V; 2153 2154 if (Optional<SVal> V = B.getDefaultBinding(R)) 2155 return *V; 2156 2157 // Lazily derive a value for the VarRegion. 2158 const VarDecl *VD = R->getDecl(); 2159 const MemSpaceRegion *MS = R->getMemorySpace(); 2160 2161 // Arguments are always symbolic. 2162 if (isa<StackArgumentsSpaceRegion>(MS)) 2163 return svalBuilder.getRegionValueSymbolVal(R); 2164 2165 // Is 'VD' declared constant? If so, retrieve the constant value. 2166 if (VD->getType().isConstQualified()) { 2167 if (const Expr *Init = VD->getAnyInitializer()) { 2168 if (Optional<SVal> V = svalBuilder.getConstantVal(Init)) 2169 return *V; 2170 2171 // If the variable is const qualified and has an initializer but 2172 // we couldn't evaluate initializer to a value, treat the value as 2173 // unknown. 2174 return UnknownVal(); 2175 } 2176 } 2177 2178 // This must come after the check for constants because closure-captured 2179 // constant variables may appear in UnknownSpaceRegion. 2180 if (isa<UnknownSpaceRegion>(MS)) 2181 return svalBuilder.getRegionValueSymbolVal(R); 2182 2183 if (isa<GlobalsSpaceRegion>(MS)) { 2184 QualType T = VD->getType(); 2185 2186 // If we're in main(), then global initializers have not become stale yet. 2187 if (B.isMainAnalysis()) 2188 if (const Expr *Init = VD->getAnyInitializer()) 2189 if (Optional<SVal> V = svalBuilder.getConstantVal(Init)) 2190 return *V; 2191 2192 // Function-scoped static variables are default-initialized to 0; if they 2193 // have an initializer, it would have been processed by now. 2194 // FIXME: This is only true when we're starting analysis from main(). 2195 // We're losing a lot of coverage here. 2196 if (isa<StaticGlobalSpaceRegion>(MS)) 2197 return svalBuilder.makeZeroVal(T); 2198 2199 if (Optional<SVal> V = getBindingForDerivedDefaultValue(B, MS, R, T)) { 2200 assert(!V->getAs<nonloc::LazyCompoundVal>()); 2201 return V.getValue(); 2202 } 2203 2204 return svalBuilder.getRegionValueSymbolVal(R); 2205 } 2206 2207 return UndefinedVal(); 2208 } 2209 2210 SVal RegionStoreManager::getBindingForLazySymbol(const TypedValueRegion *R) { 2211 // All other values are symbolic. 2212 return svalBuilder.getRegionValueSymbolVal(R); 2213 } 2214 2215 const RegionStoreManager::SValListTy & 2216 RegionStoreManager::getInterestingValues(nonloc::LazyCompoundVal LCV) { 2217 // First, check the cache. 2218 LazyBindingsMapTy::iterator I = LazyBindingsMap.find(LCV.getCVData()); 2219 if (I != LazyBindingsMap.end()) 2220 return I->second; 2221 2222 // If we don't have a list of values cached, start constructing it. 2223 SValListTy List; 2224 2225 const SubRegion *LazyR = LCV.getRegion(); 2226 RegionBindingsRef B = getRegionBindings(LCV.getStore()); 2227 2228 // If this region had /no/ bindings at the time, there are no interesting 2229 // values to return. 2230 const ClusterBindings *Cluster = B.lookup(LazyR->getBaseRegion()); 2231 if (!Cluster) 2232 return (LazyBindingsMap[LCV.getCVData()] = std::move(List)); 2233 2234 SmallVector<BindingPair, 32> Bindings; 2235 collectSubRegionBindings(Bindings, svalBuilder, *Cluster, LazyR, 2236 /*IncludeAllDefaultBindings=*/true); 2237 for (SmallVectorImpl<BindingPair>::const_iterator I = Bindings.begin(), 2238 E = Bindings.end(); 2239 I != E; ++I) { 2240 SVal V = I->second; 2241 if (V.isUnknownOrUndef() || V.isConstant()) 2242 continue; 2243 2244 if (Optional<nonloc::LazyCompoundVal> InnerLCV = 2245 V.getAs<nonloc::LazyCompoundVal>()) { 2246 const SValListTy &InnerList = getInterestingValues(*InnerLCV); 2247 List.insert(List.end(), InnerList.begin(), InnerList.end()); 2248 continue; 2249 } 2250 2251 List.push_back(V); 2252 } 2253 2254 return (LazyBindingsMap[LCV.getCVData()] = std::move(List)); 2255 } 2256 2257 NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B, 2258 const TypedValueRegion *R) { 2259 if (Optional<nonloc::LazyCompoundVal> V = 2260 getExistingLazyBinding(svalBuilder, B, R, false)) 2261 return *V; 2262 2263 return svalBuilder.makeLazyCompoundVal(StoreRef(B.asStore(), *this), R); 2264 } 2265 2266 static bool isRecordEmpty(const RecordDecl *RD) { 2267 if (!RD->field_empty()) 2268 return false; 2269 if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) 2270 return CRD->getNumBases() == 0; 2271 return true; 2272 } 2273 2274 SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B, 2275 const TypedValueRegion *R) { 2276 const RecordDecl *RD = R->getValueType()->castAs<RecordType>()->getDecl(); 2277 if (!RD->getDefinition() || isRecordEmpty(RD)) 2278 return UnknownVal(); 2279 2280 return createLazyBinding(B, R); 2281 } 2282 2283 SVal RegionStoreManager::getBindingForArray(RegionBindingsConstRef B, 2284 const TypedValueRegion *R) { 2285 assert(Ctx.getAsConstantArrayType(R->getValueType()) && 2286 "Only constant array types can have compound bindings."); 2287 2288 return createLazyBinding(B, R); 2289 } 2290 2291 bool RegionStoreManager::includedInBindings(Store store, 2292 const MemRegion *region) const { 2293 RegionBindingsRef B = getRegionBindings(store); 2294 region = region->getBaseRegion(); 2295 2296 // Quick path: if the base is the head of a cluster, the region is live. 2297 if (B.lookup(region)) 2298 return true; 2299 2300 // Slow path: if the region is the VALUE of any binding, it is live. 2301 for (RegionBindingsRef::iterator RI = B.begin(), RE = B.end(); RI != RE; ++RI) { 2302 const ClusterBindings &Cluster = RI.getData(); 2303 for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end(); 2304 CI != CE; ++CI) { 2305 const SVal &D = CI.getData(); 2306 if (const MemRegion *R = D.getAsRegion()) 2307 if (R->getBaseRegion() == region) 2308 return true; 2309 } 2310 } 2311 2312 return false; 2313 } 2314 2315 //===----------------------------------------------------------------------===// 2316 // Binding values to regions. 2317 //===----------------------------------------------------------------------===// 2318 2319 StoreRef RegionStoreManager::killBinding(Store ST, Loc L) { 2320 if (Optional<loc::MemRegionVal> LV = L.getAs<loc::MemRegionVal>()) 2321 if (const MemRegion* R = LV->getRegion()) 2322 return StoreRef(getRegionBindings(ST).removeBinding(R) 2323 .asImmutableMap() 2324 .getRootWithoutRetain(), 2325 *this); 2326 2327 return StoreRef(ST, *this); 2328 } 2329 2330 RegionBindingsRef 2331 RegionStoreManager::bind(RegionBindingsConstRef B, Loc L, SVal V) { 2332 if (L.getAs<loc::ConcreteInt>()) 2333 return B; 2334 2335 // If we get here, the location should be a region. 2336 const MemRegion *R = L.castAs<loc::MemRegionVal>().getRegion(); 2337 2338 // Check if the region is a struct region. 2339 if (const TypedValueRegion* TR = dyn_cast<TypedValueRegion>(R)) { 2340 QualType Ty = TR->getValueType(); 2341 if (Ty->isArrayType()) 2342 return bindArray(B, TR, V); 2343 if (Ty->isStructureOrClassType()) 2344 return bindStruct(B, TR, V); 2345 if (Ty->isVectorType()) 2346 return bindVector(B, TR, V); 2347 if (Ty->isUnionType()) 2348 return bindAggregate(B, TR, V); 2349 } 2350 2351 if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R)) { 2352 // Binding directly to a symbolic region should be treated as binding 2353 // to element 0. 2354 QualType T = SR->getSymbol()->getType(); 2355 if (T->isAnyPointerType() || T->isReferenceType()) 2356 T = T->getPointeeType(); 2357 2358 R = GetElementZeroRegion(SR, T); 2359 } 2360 2361 assert((!isa<CXXThisRegion>(R) || !B.lookup(R)) && 2362 "'this' pointer is not an l-value and is not assignable"); 2363 2364 // Clear out bindings that may overlap with this binding. 2365 RegionBindingsRef NewB = removeSubRegionBindings(B, cast<SubRegion>(R)); 2366 return NewB.addBinding(BindingKey::Make(R, BindingKey::Direct), V); 2367 } 2368 2369 RegionBindingsRef 2370 RegionStoreManager::setImplicitDefaultValue(RegionBindingsConstRef B, 2371 const MemRegion *R, 2372 QualType T) { 2373 SVal V; 2374 2375 if (Loc::isLocType(T)) 2376 V = svalBuilder.makeNullWithType(T); 2377 else if (T->isIntegralOrEnumerationType()) 2378 V = svalBuilder.makeZeroVal(T); 2379 else if (T->isStructureOrClassType() || T->isArrayType()) { 2380 // Set the default value to a zero constant when it is a structure 2381 // or array. The type doesn't really matter. 2382 V = svalBuilder.makeZeroVal(Ctx.IntTy); 2383 } 2384 else { 2385 // We can't represent values of this type, but we still need to set a value 2386 // to record that the region has been initialized. 2387 // If this assertion ever fires, a new case should be added above -- we 2388 // should know how to default-initialize any value we can symbolicate. 2389 assert(!SymbolManager::canSymbolicate(T) && "This type is representable"); 2390 V = UnknownVal(); 2391 } 2392 2393 return B.addBinding(R, BindingKey::Default, V); 2394 } 2395 2396 RegionBindingsRef 2397 RegionStoreManager::bindArray(RegionBindingsConstRef B, 2398 const TypedValueRegion* R, 2399 SVal Init) { 2400 2401 const ArrayType *AT =cast<ArrayType>(Ctx.getCanonicalType(R->getValueType())); 2402 QualType ElementTy = AT->getElementType(); 2403 Optional<uint64_t> Size; 2404 2405 if (const ConstantArrayType* CAT = dyn_cast<ConstantArrayType>(AT)) 2406 Size = CAT->getSize().getZExtValue(); 2407 2408 // Check if the init expr is a literal. If so, bind the rvalue instead. 2409 // FIXME: It's not responsibility of the Store to transform this lvalue 2410 // to rvalue. ExprEngine or maybe even CFG should do this before binding. 2411 if (Optional<loc::MemRegionVal> MRV = Init.getAs<loc::MemRegionVal>()) { 2412 SVal V = getBinding(B.asStore(), *MRV, R->getValueType()); 2413 return bindAggregate(B, R, V); 2414 } 2415 2416 // Handle lazy compound values. 2417 if (Init.getAs<nonloc::LazyCompoundVal>()) 2418 return bindAggregate(B, R, Init); 2419 2420 if (Init.isUnknown()) 2421 return bindAggregate(B, R, UnknownVal()); 2422 2423 // Remaining case: explicit compound values. 2424 const nonloc::CompoundVal& CV = Init.castAs<nonloc::CompoundVal>(); 2425 nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end(); 2426 uint64_t i = 0; 2427 2428 RegionBindingsRef NewB(B); 2429 2430 for (; Size.hasValue() ? i < Size.getValue() : true ; ++i, ++VI) { 2431 // The init list might be shorter than the array length. 2432 if (VI == VE) 2433 break; 2434 2435 const NonLoc &Idx = svalBuilder.makeArrayIndex(i); 2436 const ElementRegion *ER = MRMgr.getElementRegion(ElementTy, Idx, R, Ctx); 2437 2438 if (ElementTy->isStructureOrClassType()) 2439 NewB = bindStruct(NewB, ER, *VI); 2440 else if (ElementTy->isArrayType()) 2441 NewB = bindArray(NewB, ER, *VI); 2442 else 2443 NewB = bind(NewB, loc::MemRegionVal(ER), *VI); 2444 } 2445 2446 // If the init list is shorter than the array length (or the array has 2447 // variable length), set the array default value. Values that are already set 2448 // are not overwritten. 2449 if (!Size.hasValue() || i < Size.getValue()) 2450 NewB = setImplicitDefaultValue(NewB, R, ElementTy); 2451 2452 return NewB; 2453 } 2454 2455 RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B, 2456 const TypedValueRegion* R, 2457 SVal V) { 2458 QualType T = R->getValueType(); 2459 const VectorType *VT = T->castAs<VectorType>(); // Use castAs for typedefs. 2460 2461 // Handle lazy compound values and symbolic values. 2462 if (V.getAs<nonloc::LazyCompoundVal>() || V.getAs<nonloc::SymbolVal>()) 2463 return bindAggregate(B, R, V); 2464 2465 // We may get non-CompoundVal accidentally due to imprecise cast logic or 2466 // that we are binding symbolic struct value. Kill the field values, and if 2467 // the value is symbolic go and bind it as a "default" binding. 2468 if (!V.getAs<nonloc::CompoundVal>()) { 2469 return bindAggregate(B, R, UnknownVal()); 2470 } 2471 2472 QualType ElemType = VT->getElementType(); 2473 nonloc::CompoundVal CV = V.castAs<nonloc::CompoundVal>(); 2474 nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end(); 2475 unsigned index = 0, numElements = VT->getNumElements(); 2476 RegionBindingsRef NewB(B); 2477 2478 for ( ; index != numElements ; ++index) { 2479 if (VI == VE) 2480 break; 2481 2482 NonLoc Idx = svalBuilder.makeArrayIndex(index); 2483 const ElementRegion *ER = MRMgr.getElementRegion(ElemType, Idx, R, Ctx); 2484 2485 if (ElemType->isArrayType()) 2486 NewB = bindArray(NewB, ER, *VI); 2487 else if (ElemType->isStructureOrClassType()) 2488 NewB = bindStruct(NewB, ER, *VI); 2489 else 2490 NewB = bind(NewB, loc::MemRegionVal(ER), *VI); 2491 } 2492 return NewB; 2493 } 2494 2495 Optional<RegionBindingsRef> 2496 RegionStoreManager::tryBindSmallStruct(RegionBindingsConstRef B, 2497 const TypedValueRegion *R, 2498 const RecordDecl *RD, 2499 nonloc::LazyCompoundVal LCV) { 2500 FieldVector Fields; 2501 2502 if (const CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(RD)) 2503 if (Class->getNumBases() != 0 || Class->getNumVBases() != 0) 2504 return None; 2505 2506 for (const auto *FD : RD->fields()) { 2507 if (FD->isUnnamedBitfield()) 2508 continue; 2509 2510 // If there are too many fields, or if any of the fields are aggregates, 2511 // just use the LCV as a default binding. 2512 if (Fields.size() == SmallStructLimit) 2513 return None; 2514 2515 QualType Ty = FD->getType(); 2516 if (!(Ty->isScalarType() || Ty->isReferenceType())) 2517 return None; 2518 2519 Fields.push_back(FD); 2520 } 2521 2522 RegionBindingsRef NewB = B; 2523 2524 for (FieldVector::iterator I = Fields.begin(), E = Fields.end(); I != E; ++I){ 2525 const FieldRegion *SourceFR = MRMgr.getFieldRegion(*I, LCV.getRegion()); 2526 SVal V = getBindingForField(getRegionBindings(LCV.getStore()), SourceFR); 2527 2528 const FieldRegion *DestFR = MRMgr.getFieldRegion(*I, R); 2529 NewB = bind(NewB, loc::MemRegionVal(DestFR), V); 2530 } 2531 2532 return NewB; 2533 } 2534 2535 RegionBindingsRef RegionStoreManager::bindStruct(RegionBindingsConstRef B, 2536 const TypedValueRegion *R, 2537 SVal V) { 2538 QualType T = R->getValueType(); 2539 assert(T->isStructureOrClassType()); 2540 2541 const RecordType* RT = T->castAs<RecordType>(); 2542 const RecordDecl *RD = RT->getDecl(); 2543 2544 if (!RD->isCompleteDefinition()) 2545 return B; 2546 2547 // Handle lazy compound values and symbolic values. 2548 if (Optional<nonloc::LazyCompoundVal> LCV = 2549 V.getAs<nonloc::LazyCompoundVal>()) { 2550 if (Optional<RegionBindingsRef> NewB = tryBindSmallStruct(B, R, RD, *LCV)) 2551 return *NewB; 2552 return bindAggregate(B, R, V); 2553 } 2554 if (V.getAs<nonloc::SymbolVal>()) 2555 return bindAggregate(B, R, V); 2556 2557 // We may get non-CompoundVal accidentally due to imprecise cast logic or 2558 // that we are binding symbolic struct value. Kill the field values, and if 2559 // the value is symbolic go and bind it as a "default" binding. 2560 if (V.isUnknown() || !V.getAs<nonloc::CompoundVal>()) 2561 return bindAggregate(B, R, UnknownVal()); 2562 2563 // The raw CompoundVal is essentially a symbolic InitListExpr: an (immutable) 2564 // list of other values. It appears pretty much only when there's an actual 2565 // initializer list expression in the program, and the analyzer tries to 2566 // unwrap it as soon as possible. 2567 // This code is where such unwrap happens: when the compound value is put into 2568 // the object that it was supposed to initialize (it's an *initializer* list, 2569 // after all), instead of binding the whole value to the whole object, we bind 2570 // sub-values to sub-objects. Sub-values may themselves be compound values, 2571 // and in this case the procedure becomes recursive. 2572 // FIXME: The annoying part about compound values is that they don't carry 2573 // any sort of information about which value corresponds to which sub-object. 2574 // It's simply a list of values in the middle of nowhere; we expect to match 2575 // them to sub-objects, essentially, "by index": first value binds to 2576 // the first field, second value binds to the second field, etc. 2577 // It would have been much safer to organize non-lazy compound values as 2578 // a mapping from fields/bases to values. 2579 const nonloc::CompoundVal& CV = V.castAs<nonloc::CompoundVal>(); 2580 nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end(); 2581 2582 RegionBindingsRef NewB(B); 2583 2584 // In C++17 aggregates may have base classes, handle those as well. 2585 // They appear before fields in the initializer list / compound value. 2586 if (const auto *CRD = dyn_cast<CXXRecordDecl>(RD)) { 2587 // If the object was constructed with a constructor, its value is a 2588 // LazyCompoundVal. If it's a raw CompoundVal, it means that we're 2589 // performing aggregate initialization. The only exception from this 2590 // rule is sending an Objective-C++ message that returns a C++ object 2591 // to a nil receiver; in this case the semantics is to return a 2592 // zero-initialized object even if it's a C++ object that doesn't have 2593 // this sort of constructor; the CompoundVal is empty in this case. 2594 assert((CRD->isAggregate() || (Ctx.getLangOpts().ObjC && VI == VE)) && 2595 "Non-aggregates are constructed with a constructor!"); 2596 2597 for (const auto &B : CRD->bases()) { 2598 // (Multiple inheritance is fine though.) 2599 assert(!B.isVirtual() && "Aggregates cannot have virtual base classes!"); 2600 2601 if (VI == VE) 2602 break; 2603 2604 QualType BTy = B.getType(); 2605 assert(BTy->isStructureOrClassType() && "Base classes must be classes!"); 2606 2607 const CXXRecordDecl *BRD = BTy->getAsCXXRecordDecl(); 2608 assert(BRD && "Base classes must be C++ classes!"); 2609 2610 const CXXBaseObjectRegion *BR = 2611 MRMgr.getCXXBaseObjectRegion(BRD, R, /*IsVirtual=*/false); 2612 2613 NewB = bindStruct(NewB, BR, *VI); 2614 2615 ++VI; 2616 } 2617 } 2618 2619 RecordDecl::field_iterator FI, FE; 2620 2621 for (FI = RD->field_begin(), FE = RD->field_end(); FI != FE; ++FI) { 2622 2623 if (VI == VE) 2624 break; 2625 2626 // Skip any unnamed bitfields to stay in sync with the initializers. 2627 if (FI->isUnnamedBitfield()) 2628 continue; 2629 2630 QualType FTy = FI->getType(); 2631 const FieldRegion* FR = MRMgr.getFieldRegion(*FI, R); 2632 2633 if (FTy->isArrayType()) 2634 NewB = bindArray(NewB, FR, *VI); 2635 else if (FTy->isStructureOrClassType()) 2636 NewB = bindStruct(NewB, FR, *VI); 2637 else 2638 NewB = bind(NewB, loc::MemRegionVal(FR), *VI); 2639 ++VI; 2640 } 2641 2642 // There may be fewer values in the initialize list than the fields of struct. 2643 if (FI != FE) { 2644 NewB = NewB.addBinding(R, BindingKey::Default, 2645 svalBuilder.makeIntVal(0, false)); 2646 } 2647 2648 return NewB; 2649 } 2650 2651 RegionBindingsRef 2652 RegionStoreManager::bindAggregate(RegionBindingsConstRef B, 2653 const TypedRegion *R, 2654 SVal Val) { 2655 // Remove the old bindings, using 'R' as the root of all regions 2656 // we will invalidate. Then add the new binding. 2657 return removeSubRegionBindings(B, R).addBinding(R, BindingKey::Default, Val); 2658 } 2659 2660 //===----------------------------------------------------------------------===// 2661 // State pruning. 2662 //===----------------------------------------------------------------------===// 2663 2664 namespace { 2665 class RemoveDeadBindingsWorker 2666 : public ClusterAnalysis<RemoveDeadBindingsWorker> { 2667 SmallVector<const SymbolicRegion *, 12> Postponed; 2668 SymbolReaper &SymReaper; 2669 const StackFrameContext *CurrentLCtx; 2670 2671 public: 2672 RemoveDeadBindingsWorker(RegionStoreManager &rm, 2673 ProgramStateManager &stateMgr, 2674 RegionBindingsRef b, SymbolReaper &symReaper, 2675 const StackFrameContext *LCtx) 2676 : ClusterAnalysis<RemoveDeadBindingsWorker>(rm, stateMgr, b), 2677 SymReaper(symReaper), CurrentLCtx(LCtx) {} 2678 2679 // Called by ClusterAnalysis. 2680 void VisitAddedToCluster(const MemRegion *baseR, const ClusterBindings &C); 2681 void VisitCluster(const MemRegion *baseR, const ClusterBindings *C); 2682 using ClusterAnalysis<RemoveDeadBindingsWorker>::VisitCluster; 2683 2684 using ClusterAnalysis::AddToWorkList; 2685 2686 bool AddToWorkList(const MemRegion *R); 2687 2688 bool UpdatePostponed(); 2689 void VisitBinding(SVal V); 2690 }; 2691 } 2692 2693 bool RemoveDeadBindingsWorker::AddToWorkList(const MemRegion *R) { 2694 const MemRegion *BaseR = R->getBaseRegion(); 2695 return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR)); 2696 } 2697 2698 void RemoveDeadBindingsWorker::VisitAddedToCluster(const MemRegion *baseR, 2699 const ClusterBindings &C) { 2700 2701 if (const VarRegion *VR = dyn_cast<VarRegion>(baseR)) { 2702 if (SymReaper.isLive(VR)) 2703 AddToWorkList(baseR, &C); 2704 2705 return; 2706 } 2707 2708 if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(baseR)) { 2709 if (SymReaper.isLive(SR->getSymbol())) 2710 AddToWorkList(SR, &C); 2711 else 2712 Postponed.push_back(SR); 2713 2714 return; 2715 } 2716 2717 if (isa<NonStaticGlobalSpaceRegion>(baseR)) { 2718 AddToWorkList(baseR, &C); 2719 return; 2720 } 2721 2722 // CXXThisRegion in the current or parent location context is live. 2723 if (const CXXThisRegion *TR = dyn_cast<CXXThisRegion>(baseR)) { 2724 const auto *StackReg = 2725 cast<StackArgumentsSpaceRegion>(TR->getSuperRegion()); 2726 const StackFrameContext *RegCtx = StackReg->getStackFrame(); 2727 if (CurrentLCtx && 2728 (RegCtx == CurrentLCtx || RegCtx->isParentOf(CurrentLCtx))) 2729 AddToWorkList(TR, &C); 2730 } 2731 } 2732 2733 void RemoveDeadBindingsWorker::VisitCluster(const MemRegion *baseR, 2734 const ClusterBindings *C) { 2735 if (!C) 2736 return; 2737 2738 // Mark the symbol for any SymbolicRegion with live bindings as live itself. 2739 // This means we should continue to track that symbol. 2740 if (const SymbolicRegion *SymR = dyn_cast<SymbolicRegion>(baseR)) 2741 SymReaper.markLive(SymR->getSymbol()); 2742 2743 for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E; ++I) { 2744 // Element index of a binding key is live. 2745 SymReaper.markElementIndicesLive(I.getKey().getRegion()); 2746 2747 VisitBinding(I.getData()); 2748 } 2749 } 2750 2751 void RemoveDeadBindingsWorker::VisitBinding(SVal V) { 2752 // Is it a LazyCompoundVal? All referenced regions are live as well. 2753 if (Optional<nonloc::LazyCompoundVal> LCS = 2754 V.getAs<nonloc::LazyCompoundVal>()) { 2755 2756 const RegionStoreManager::SValListTy &Vals = RM.getInterestingValues(*LCS); 2757 2758 for (RegionStoreManager::SValListTy::const_iterator I = Vals.begin(), 2759 E = Vals.end(); 2760 I != E; ++I) 2761 VisitBinding(*I); 2762 2763 return; 2764 } 2765 2766 // If V is a region, then add it to the worklist. 2767 if (const MemRegion *R = V.getAsRegion()) { 2768 AddToWorkList(R); 2769 SymReaper.markLive(R); 2770 2771 // All regions captured by a block are also live. 2772 if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(R)) { 2773 BlockDataRegion::referenced_vars_iterator I = BR->referenced_vars_begin(), 2774 E = BR->referenced_vars_end(); 2775 for ( ; I != E; ++I) 2776 AddToWorkList(I.getCapturedRegion()); 2777 } 2778 } 2779 2780 2781 // Update the set of live symbols. 2782 for (auto SI = V.symbol_begin(), SE = V.symbol_end(); SI!=SE; ++SI) 2783 SymReaper.markLive(*SI); 2784 } 2785 2786 bool RemoveDeadBindingsWorker::UpdatePostponed() { 2787 // See if any postponed SymbolicRegions are actually live now, after 2788 // having done a scan. 2789 bool Changed = false; 2790 2791 for (auto I = Postponed.begin(), E = Postponed.end(); I != E; ++I) { 2792 if (const SymbolicRegion *SR = *I) { 2793 if (SymReaper.isLive(SR->getSymbol())) { 2794 Changed |= AddToWorkList(SR); 2795 *I = nullptr; 2796 } 2797 } 2798 } 2799 2800 return Changed; 2801 } 2802 2803 StoreRef RegionStoreManager::removeDeadBindings(Store store, 2804 const StackFrameContext *LCtx, 2805 SymbolReaper& SymReaper) { 2806 RegionBindingsRef B = getRegionBindings(store); 2807 RemoveDeadBindingsWorker W(*this, StateMgr, B, SymReaper, LCtx); 2808 W.GenerateClusters(); 2809 2810 // Enqueue the region roots onto the worklist. 2811 for (SymbolReaper::region_iterator I = SymReaper.region_begin(), 2812 E = SymReaper.region_end(); I != E; ++I) { 2813 W.AddToWorkList(*I); 2814 } 2815 2816 do W.RunWorkList(); while (W.UpdatePostponed()); 2817 2818 // We have now scanned the store, marking reachable regions and symbols 2819 // as live. We now remove all the regions that are dead from the store 2820 // as well as update DSymbols with the set symbols that are now dead. 2821 for (RegionBindingsRef::iterator I = B.begin(), E = B.end(); I != E; ++I) { 2822 const MemRegion *Base = I.getKey(); 2823 2824 // If the cluster has been visited, we know the region has been marked. 2825 // Otherwise, remove the dead entry. 2826 if (!W.isVisited(Base)) 2827 B = B.remove(Base); 2828 } 2829 2830 return StoreRef(B.asStore(), *this); 2831 } 2832 2833 //===----------------------------------------------------------------------===// 2834 // Utility methods. 2835 //===----------------------------------------------------------------------===// 2836 2837 void RegionStoreManager::printJson(raw_ostream &Out, Store S, const char *NL, 2838 unsigned int Space, bool IsDot) const { 2839 RegionBindingsRef Bindings = getRegionBindings(S); 2840 2841 Indent(Out, Space, IsDot) << "\"store\": "; 2842 2843 if (Bindings.isEmpty()) { 2844 Out << "null," << NL; 2845 return; 2846 } 2847 2848 Out << "{ \"pointer\": \"" << Bindings.asStore() << "\", \"items\": [" << NL; 2849 Bindings.printJson(Out, NL, Space + 1, IsDot); 2850 Indent(Out, Space, IsDot) << "]}," << NL; 2851 } 2852