1 //- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ---*- C++-*-// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a CFL-based, summary-based alias analysis algorithm. It 11 // differs from CFLSteensAliasAnalysis in its inclusion-based nature while 12 // CFLSteensAliasAnalysis is unification-based. This pass has worse performance 13 // than CFLSteensAliasAnalysis (the worst case complexity of 14 // CFLAndersAliasAnalysis is cubic, while the worst case complexity of 15 // CFLSteensAliasAnalysis is almost linear), but it is able to yield more 16 // precise analysis result. The precision of this analysis is roughly the same 17 // as that of an one level context-sensitive Andersen's algorithm. 18 // 19 // The algorithm used here is based on recursive state machine matching scheme 20 // proposed in "Demand-driven alias analysis for C" by Xin Zheng and Radu 21 // Rugina. The general idea is to extend the tranditional transitive closure 22 // algorithm to perform CFL matching along the way: instead of recording 23 // "whether X is reachable from Y", we keep track of "whether X is reachable 24 // from Y at state Z", where the "state" field indicates where we are in the CFL 25 // matching process. To understand the matching better, it is advisable to have 26 // the state machine shown in Figure 3 of the paper available when reading the 27 // codes: all we do here is to selectively expand the transitive closure by 28 // discarding edges that are not recognized by the state machine. 29 // 30 // There are two differences between our current implementation and the one 31 // described in the paper: 32 // - Our algorithm eagerly computes all alias pairs after the CFLGraph is built, 33 // while in the paper the authors did the computation in a demand-driven 34 // fashion. We did not implement the demand-driven algorithm due to the 35 // additional coding complexity and higher memory profile, but if we found it 36 // necessary we may switch to it eventually. 37 // - In the paper the authors use a state machine that does not distinguish 38 // value reads from value writes. For example, if Y is reachable from X at state 39 // S3, it may be the case that X is written into Y, or it may be the case that 40 // there's a third value Z that writes into both X and Y. To make that 41 // distinction (which is crucial in building function summary as well as 42 // retrieving mod-ref info), we choose to duplicate some of the states in the 43 // paper's proposed state machine. The duplication does not change the set the 44 // machine accepts. Given a pair of reachable values, it only provides more 45 // detailed information on which value is being written into and which is being 46 // read from. 47 // 48 //===----------------------------------------------------------------------===// 49 50 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and 51 // CFLAndersAA is interprocedural. This is *technically* A Bad Thing, because 52 // FunctionPasses are only allowed to inspect the Function that they're being 53 // run on. Realistically, this likely isn't a problem until we allow 54 // FunctionPasses to run concurrently. 55 56 #include "llvm/Analysis/CFLAndersAliasAnalysis.h" 57 #include "CFLGraph.h" 58 #include "llvm/ADT/DenseSet.h" 59 #include "llvm/Pass.h" 60 61 using namespace llvm; 62 using namespace llvm::cflaa; 63 64 #define DEBUG_TYPE "cfl-anders-aa" 65 66 CFLAndersAAResult::CFLAndersAAResult(const TargetLibraryInfo &TLI) : TLI(TLI) {} 67 CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS) 68 : AAResultBase(std::move(RHS)), TLI(RHS.TLI) {} 69 CFLAndersAAResult::~CFLAndersAAResult() {} 70 71 namespace { 72 73 enum class MatchState : uint8_t { 74 // The following state represents S1 in the paper. 75 FlowFromReadOnly = 0, 76 // The following two states together represent S2 in the paper. 77 // The 'NoReadWrite' suffix indicates that there exists an alias path that 78 // does not contain assignment and reverse assignment edges. 79 // The 'ReadOnly' suffix indicates that there exists an alias path that 80 // contains reverse assignment edges only. 81 FlowFromMemAliasNoReadWrite, 82 FlowFromMemAliasReadOnly, 83 // The following two states together represent S3 in the paper. 84 // The 'WriteOnly' suffix indicates that there exists an alias path that 85 // contains assignment edges only. 86 // The 'ReadWrite' suffix indicates that there exists an alias path that 87 // contains both assignment and reverse assignment edges. Note that if X and Y 88 // are reachable at 'ReadWrite' state, it does NOT mean X is both read from 89 // and written to Y. Instead, it means that a third value Z is written to both 90 // X and Y. 91 FlowToWriteOnly, 92 FlowToReadWrite, 93 // The following two states together represent S4 in the paper. 94 FlowToMemAliasWriteOnly, 95 FlowToMemAliasReadWrite, 96 }; 97 98 typedef std::bitset<7> StateSet; 99 const unsigned ReadOnlyStateMask = 100 (1U << static_cast<uint8_t>(MatchState::FlowFromReadOnly)) | 101 (1U << static_cast<uint8_t>(MatchState::FlowFromMemAliasReadOnly)); 102 const unsigned WriteOnlyStateMask = 103 (1U << static_cast<uint8_t>(MatchState::FlowToWriteOnly)) | 104 (1U << static_cast<uint8_t>(MatchState::FlowToMemAliasWriteOnly)); 105 106 // A pair that consists of a value and an offset 107 struct OffsetValue { 108 const Value *Val; 109 int64_t Offset; 110 }; 111 112 bool operator==(OffsetValue LHS, OffsetValue RHS) { 113 return LHS.Val == RHS.Val && LHS.Offset == RHS.Offset; 114 } 115 bool operator<(OffsetValue LHS, OffsetValue RHS) { 116 return std::less<const Value *>()(LHS.Val, RHS.Val) || 117 (LHS.Val == RHS.Val && LHS.Offset < RHS.Offset); 118 } 119 120 // A pair that consists of an InstantiatedValue and an offset 121 struct OffsetInstantiatedValue { 122 InstantiatedValue IVal; 123 int64_t Offset; 124 }; 125 126 bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) { 127 return LHS.IVal == RHS.IVal && LHS.Offset == RHS.Offset; 128 } 129 130 // We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in 131 // the paper) during the analysis. 132 class ReachabilitySet { 133 typedef DenseMap<InstantiatedValue, StateSet> ValueStateMap; 134 typedef DenseMap<InstantiatedValue, ValueStateMap> ValueReachMap; 135 ValueReachMap ReachMap; 136 137 public: 138 typedef ValueStateMap::const_iterator const_valuestate_iterator; 139 typedef ValueReachMap::const_iterator const_value_iterator; 140 141 // Insert edge 'From->To' at state 'State' 142 bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) { 143 assert(From != To); 144 auto &States = ReachMap[To][From]; 145 auto Idx = static_cast<size_t>(State); 146 if (!States.test(Idx)) { 147 States.set(Idx); 148 return true; 149 } 150 return false; 151 } 152 153 // Return the set of all ('From', 'State') pair for a given node 'To' 154 iterator_range<const_valuestate_iterator> 155 reachableValueAliases(InstantiatedValue V) const { 156 auto Itr = ReachMap.find(V); 157 if (Itr == ReachMap.end()) 158 return make_range<const_valuestate_iterator>(const_valuestate_iterator(), 159 const_valuestate_iterator()); 160 return make_range<const_valuestate_iterator>(Itr->second.begin(), 161 Itr->second.end()); 162 } 163 164 iterator_range<const_value_iterator> value_mappings() const { 165 return make_range<const_value_iterator>(ReachMap.begin(), ReachMap.end()); 166 } 167 }; 168 169 // We use AliasMemSet to keep track of all memory aliases (the nonterminal "M" 170 // in the paper) during the analysis. 171 class AliasMemSet { 172 typedef DenseSet<InstantiatedValue> MemSet; 173 typedef DenseMap<InstantiatedValue, MemSet> MemMapType; 174 MemMapType MemMap; 175 176 public: 177 typedef MemSet::const_iterator const_mem_iterator; 178 179 bool insert(InstantiatedValue LHS, InstantiatedValue RHS) { 180 // Top-level values can never be memory aliases because one cannot take the 181 // addresses of them 182 assert(LHS.DerefLevel > 0 && RHS.DerefLevel > 0); 183 return MemMap[LHS].insert(RHS).second; 184 } 185 186 const MemSet *getMemoryAliases(InstantiatedValue V) const { 187 auto Itr = MemMap.find(V); 188 if (Itr == MemMap.end()) 189 return nullptr; 190 return &Itr->second; 191 } 192 }; 193 194 // We use AliasAttrMap to keep track of the AliasAttr of each node. 195 class AliasAttrMap { 196 typedef DenseMap<InstantiatedValue, AliasAttrs> MapType; 197 MapType AttrMap; 198 199 public: 200 typedef MapType::const_iterator const_iterator; 201 202 bool add(InstantiatedValue V, AliasAttrs Attr) { 203 auto &OldAttr = AttrMap[V]; 204 auto NewAttr = OldAttr | Attr; 205 if (OldAttr == NewAttr) 206 return false; 207 OldAttr = NewAttr; 208 return true; 209 } 210 211 AliasAttrs getAttrs(InstantiatedValue V) const { 212 AliasAttrs Attr; 213 auto Itr = AttrMap.find(V); 214 if (Itr != AttrMap.end()) 215 Attr = Itr->second; 216 return Attr; 217 } 218 219 iterator_range<const_iterator> mappings() const { 220 return make_range<const_iterator>(AttrMap.begin(), AttrMap.end()); 221 } 222 }; 223 224 struct WorkListItem { 225 InstantiatedValue From; 226 InstantiatedValue To; 227 MatchState State; 228 }; 229 230 struct ValueSummary { 231 struct Record { 232 InterfaceValue IValue; 233 unsigned DerefLevel; 234 }; 235 SmallVector<Record, 4> FromRecords, ToRecords; 236 }; 237 } 238 239 namespace llvm { 240 // Specialize DenseMapInfo for OffsetValue. 241 template <> struct DenseMapInfo<OffsetValue> { 242 static OffsetValue getEmptyKey() { 243 return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(), 244 DenseMapInfo<int64_t>::getEmptyKey()}; 245 } 246 static OffsetValue getTombstoneKey() { 247 return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(), 248 DenseMapInfo<int64_t>::getEmptyKey()}; 249 } 250 static unsigned getHashValue(const OffsetValue &OVal) { 251 return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue( 252 std::make_pair(OVal.Val, OVal.Offset)); 253 } 254 static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) { 255 return LHS == RHS; 256 } 257 }; 258 259 // Specialize DenseMapInfo for OffsetInstantiatedValue. 260 template <> struct DenseMapInfo<OffsetInstantiatedValue> { 261 static OffsetInstantiatedValue getEmptyKey() { 262 return OffsetInstantiatedValue{ 263 DenseMapInfo<InstantiatedValue>::getEmptyKey(), 264 DenseMapInfo<int64_t>::getEmptyKey()}; 265 } 266 static OffsetInstantiatedValue getTombstoneKey() { 267 return OffsetInstantiatedValue{ 268 DenseMapInfo<InstantiatedValue>::getTombstoneKey(), 269 DenseMapInfo<int64_t>::getEmptyKey()}; 270 } 271 static unsigned getHashValue(const OffsetInstantiatedValue &OVal) { 272 return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue( 273 std::make_pair(OVal.IVal, OVal.Offset)); 274 } 275 static bool isEqual(const OffsetInstantiatedValue &LHS, 276 const OffsetInstantiatedValue &RHS) { 277 return LHS == RHS; 278 } 279 }; 280 } 281 282 class CFLAndersAAResult::FunctionInfo { 283 /// Map a value to other values that may alias it 284 /// Since the alias relation is symmetric, to save some space we assume values 285 /// are properly ordered: if a and b alias each other, and a < b, then b is in 286 /// AliasMap[a] but not vice versa. 287 DenseMap<const Value *, std::vector<OffsetValue>> AliasMap; 288 289 /// Map a value to its corresponding AliasAttrs 290 DenseMap<const Value *, AliasAttrs> AttrMap; 291 292 /// Summary of externally visible effects. 293 AliasSummary Summary; 294 295 Optional<AliasAttrs> getAttrs(const Value *) const; 296 297 public: 298 FunctionInfo(const Function &, const SmallVectorImpl<Value *> &, 299 const ReachabilitySet &, const AliasAttrMap &); 300 301 bool mayAlias(const Value *, uint64_t, const Value *, uint64_t) const; 302 const AliasSummary &getAliasSummary() const { return Summary; } 303 }; 304 305 static bool hasReadOnlyState(StateSet Set) { 306 return (Set & StateSet(ReadOnlyStateMask)).any(); 307 } 308 309 static bool hasWriteOnlyState(StateSet Set) { 310 return (Set & StateSet(WriteOnlyStateMask)).any(); 311 } 312 313 static Optional<InterfaceValue> 314 getInterfaceValue(InstantiatedValue IValue, 315 const SmallVectorImpl<Value *> &RetVals) { 316 auto Val = IValue.Val; 317 318 Optional<unsigned> Index; 319 if (auto Arg = dyn_cast<Argument>(Val)) 320 Index = Arg->getArgNo() + 1; 321 else if (is_contained(RetVals, Val)) 322 Index = 0; 323 324 if (Index) 325 return InterfaceValue{*Index, IValue.DerefLevel}; 326 return None; 327 } 328 329 static void populateAttrMap(DenseMap<const Value *, AliasAttrs> &AttrMap, 330 const AliasAttrMap &AMap) { 331 for (const auto &Mapping : AMap.mappings()) { 332 auto IVal = Mapping.first; 333 334 // Insert IVal into the map 335 auto &Attr = AttrMap[IVal.Val]; 336 // AttrMap only cares about top-level values 337 if (IVal.DerefLevel == 0) 338 Attr |= Mapping.second; 339 } 340 } 341 342 static void 343 populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap, 344 const ReachabilitySet &ReachSet) { 345 for (const auto &OuterMapping : ReachSet.value_mappings()) { 346 // AliasMap only cares about top-level values 347 if (OuterMapping.first.DerefLevel > 0) 348 continue; 349 350 auto Val = OuterMapping.first.Val; 351 auto &AliasList = AliasMap[Val]; 352 for (const auto &InnerMapping : OuterMapping.second) { 353 // Again, AliasMap only cares about top-level values 354 if (InnerMapping.first.DerefLevel == 0) 355 AliasList.push_back(OffsetValue{InnerMapping.first.Val, UnknownOffset}); 356 } 357 358 // Sort AliasList for faster lookup 359 std::sort(AliasList.begin(), AliasList.end()); 360 } 361 } 362 363 static void populateExternalRelations( 364 SmallVectorImpl<ExternalRelation> &ExtRelations, const Function &Fn, 365 const SmallVectorImpl<Value *> &RetVals, const ReachabilitySet &ReachSet) { 366 // If a function only returns one of its argument X, then X will be both an 367 // argument and a return value at the same time. This is an edge case that 368 // needs special handling here. 369 for (const auto &Arg : Fn.args()) { 370 if (is_contained(RetVals, &Arg)) { 371 auto ArgVal = InterfaceValue{Arg.getArgNo() + 1, 0}; 372 auto RetVal = InterfaceValue{0, 0}; 373 ExtRelations.push_back(ExternalRelation{ArgVal, RetVal, 0}); 374 } 375 } 376 377 // Below is the core summary construction logic. 378 // A naive solution of adding only the value aliases that are parameters or 379 // return values in ReachSet to the summary won't work: It is possible that a 380 // parameter P is written into an intermediate value I, and the function 381 // subsequently returns *I. In that case, *I is does not value alias anything 382 // in ReachSet, and the naive solution will miss a summary edge from (P, 1) to 383 // (I, 1). 384 // To account for the aforementioned case, we need to check each non-parameter 385 // and non-return value for the possibility of acting as an intermediate. 386 // 'ValueMap' here records, for each value, which InterfaceValues read from or 387 // write into it. If both the read list and the write list of a given value 388 // are non-empty, we know that a particular value is an intermidate and we 389 // need to add summary edges from the writes to the reads. 390 DenseMap<Value *, ValueSummary> ValueMap; 391 for (const auto &OuterMapping : ReachSet.value_mappings()) { 392 if (auto Dst = getInterfaceValue(OuterMapping.first, RetVals)) { 393 for (const auto &InnerMapping : OuterMapping.second) { 394 // If Src is a param/return value, we get a same-level assignment. 395 if (auto Src = getInterfaceValue(InnerMapping.first, RetVals)) { 396 // This may happen if both Dst and Src are return values 397 if (*Dst == *Src) 398 continue; 399 400 if (hasReadOnlyState(InnerMapping.second)) 401 ExtRelations.push_back(ExternalRelation{*Dst, *Src, UnknownOffset}); 402 // No need to check for WriteOnly state, since ReachSet is symmetric 403 } else { 404 // If Src is not a param/return, add it to ValueMap 405 auto SrcIVal = InnerMapping.first; 406 if (hasReadOnlyState(InnerMapping.second)) 407 ValueMap[SrcIVal.Val].FromRecords.push_back( 408 ValueSummary::Record{*Dst, SrcIVal.DerefLevel}); 409 if (hasWriteOnlyState(InnerMapping.second)) 410 ValueMap[SrcIVal.Val].ToRecords.push_back( 411 ValueSummary::Record{*Dst, SrcIVal.DerefLevel}); 412 } 413 } 414 } 415 } 416 417 for (const auto &Mapping : ValueMap) { 418 for (const auto &FromRecord : Mapping.second.FromRecords) { 419 for (const auto &ToRecord : Mapping.second.ToRecords) { 420 auto ToLevel = ToRecord.DerefLevel; 421 auto FromLevel = FromRecord.DerefLevel; 422 // Same-level assignments should have already been processed by now 423 if (ToLevel == FromLevel) 424 continue; 425 426 auto SrcIndex = FromRecord.IValue.Index; 427 auto SrcLevel = FromRecord.IValue.DerefLevel; 428 auto DstIndex = ToRecord.IValue.Index; 429 auto DstLevel = ToRecord.IValue.DerefLevel; 430 if (ToLevel > FromLevel) 431 SrcLevel += ToLevel - FromLevel; 432 else 433 DstLevel += FromLevel - ToLevel; 434 435 ExtRelations.push_back(ExternalRelation{ 436 InterfaceValue{SrcIndex, SrcLevel}, 437 InterfaceValue{DstIndex, DstLevel}, UnknownOffset}); 438 } 439 } 440 } 441 442 // Remove duplicates in ExtRelations 443 std::sort(ExtRelations.begin(), ExtRelations.end()); 444 ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()), 445 ExtRelations.end()); 446 } 447 448 static void populateExternalAttributes( 449 SmallVectorImpl<ExternalAttribute> &ExtAttributes, const Function &Fn, 450 const SmallVectorImpl<Value *> &RetVals, const AliasAttrMap &AMap) { 451 for (const auto &Mapping : AMap.mappings()) { 452 if (auto IVal = getInterfaceValue(Mapping.first, RetVals)) { 453 auto Attr = getExternallyVisibleAttrs(Mapping.second); 454 if (Attr.any()) 455 ExtAttributes.push_back(ExternalAttribute{*IVal, Attr}); 456 } 457 } 458 } 459 460 CFLAndersAAResult::FunctionInfo::FunctionInfo( 461 const Function &Fn, const SmallVectorImpl<Value *> &RetVals, 462 const ReachabilitySet &ReachSet, const AliasAttrMap &AMap) { 463 populateAttrMap(AttrMap, AMap); 464 populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap); 465 populateAliasMap(AliasMap, ReachSet); 466 populateExternalRelations(Summary.RetParamRelations, Fn, RetVals, ReachSet); 467 } 468 469 Optional<AliasAttrs> 470 CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const { 471 assert(V != nullptr); 472 473 auto Itr = AttrMap.find(V); 474 if (Itr != AttrMap.end()) 475 return Itr->second; 476 return None; 477 } 478 479 bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS, 480 uint64_t LHSSize, 481 const Value *RHS, 482 uint64_t RHSSize) const { 483 assert(LHS && RHS); 484 485 // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created 486 // after the analysis gets executed, and we want to be conservative in those 487 // cases. 488 auto MaybeAttrsA = getAttrs(LHS); 489 auto MaybeAttrsB = getAttrs(RHS); 490 if (!MaybeAttrsA || !MaybeAttrsB) 491 return true; 492 493 // Check AliasAttrs before AliasMap lookup since it's cheaper 494 auto AttrsA = *MaybeAttrsA; 495 auto AttrsB = *MaybeAttrsB; 496 if (hasUnknownOrCallerAttr(AttrsA)) 497 return AttrsB.any(); 498 if (hasUnknownOrCallerAttr(AttrsB)) 499 return AttrsA.any(); 500 if (isGlobalOrArgAttr(AttrsA)) 501 return isGlobalOrArgAttr(AttrsB); 502 if (isGlobalOrArgAttr(AttrsB)) 503 return isGlobalOrArgAttr(AttrsA); 504 505 // At this point both LHS and RHS should point to locally allocated objects 506 507 auto Itr = AliasMap.find(LHS); 508 if (Itr != AliasMap.end()) { 509 510 // Find out all (X, Offset) where X == RHS 511 auto Comparator = [](OffsetValue LHS, OffsetValue RHS) { 512 return std::less<const Value *>()(LHS.Val, RHS.Val); 513 }; 514 #ifdef EXPENSIVE_CHECKS 515 assert(std::is_sorted(Itr->second.begin(), Itr->second.end(), Comparator)); 516 #endif 517 auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(), 518 OffsetValue{RHS, 0}, Comparator); 519 520 if (RangePair.first != RangePair.second) { 521 // Be conservative about UnknownSize 522 if (LHSSize == MemoryLocation::UnknownSize || 523 RHSSize == MemoryLocation::UnknownSize) 524 return true; 525 526 for (const auto &OVal : make_range(RangePair)) { 527 // Be conservative about UnknownOffset 528 if (OVal.Offset == UnknownOffset) 529 return true; 530 531 // We know that LHS aliases (RHS + OVal.Offset) if the control flow 532 // reaches here. The may-alias query essentially becomes integer 533 // range-overlap queries over two ranges [OVal.Offset, OVal.Offset + 534 // LHSSize) and [0, RHSSize). 535 536 // Try to be conservative on super large offsets 537 if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX)) 538 return true; 539 540 auto LHSStart = OVal.Offset; 541 // FIXME: Do we need to guard against integer overflow? 542 auto LHSEnd = OVal.Offset + static_cast<int64_t>(LHSSize); 543 auto RHSStart = 0; 544 auto RHSEnd = static_cast<int64_t>(RHSSize); 545 if (LHSEnd > RHSStart && LHSStart < RHSEnd) 546 return true; 547 } 548 } 549 } 550 551 return false; 552 } 553 554 static void propagate(InstantiatedValue From, InstantiatedValue To, 555 MatchState State, ReachabilitySet &ReachSet, 556 std::vector<WorkListItem> &WorkList) { 557 if (From == To) 558 return; 559 if (ReachSet.insert(From, To, State)) 560 WorkList.push_back(WorkListItem{From, To, State}); 561 } 562 563 static void initializeWorkList(std::vector<WorkListItem> &WorkList, 564 ReachabilitySet &ReachSet, 565 const CFLGraph &Graph) { 566 for (const auto &Mapping : Graph.value_mappings()) { 567 auto Val = Mapping.first; 568 auto &ValueInfo = Mapping.second; 569 assert(ValueInfo.getNumLevels() > 0); 570 571 // Insert all immediate assignment neighbors to the worklist 572 for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { 573 auto Src = InstantiatedValue{Val, I}; 574 // If there's an assignment edge from X to Y, it means Y is reachable from 575 // X at S2 and X is reachable from Y at S1 576 for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) { 577 propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet, 578 WorkList); 579 propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet, 580 WorkList); 581 } 582 } 583 } 584 } 585 586 static Optional<InstantiatedValue> getNodeBelow(const CFLGraph &Graph, 587 InstantiatedValue V) { 588 auto NodeBelow = InstantiatedValue{V.Val, V.DerefLevel + 1}; 589 if (Graph.getNode(NodeBelow)) 590 return NodeBelow; 591 return None; 592 } 593 594 static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph, 595 ReachabilitySet &ReachSet, AliasMemSet &MemSet, 596 std::vector<WorkListItem> &WorkList) { 597 auto FromNode = Item.From; 598 auto ToNode = Item.To; 599 600 auto NodeInfo = Graph.getNode(ToNode); 601 assert(NodeInfo != nullptr); 602 603 // TODO: propagate field offsets 604 605 // FIXME: Here is a neat trick we can do: since both ReachSet and MemSet holds 606 // relations that are symmetric, we could actually cut the storage by half by 607 // sorting FromNode and ToNode before insertion happens. 608 609 // The newly added value alias pair may pontentially generate more memory 610 // alias pairs. Check for them here. 611 auto FromNodeBelow = getNodeBelow(Graph, FromNode); 612 auto ToNodeBelow = getNodeBelow(Graph, ToNode); 613 if (FromNodeBelow && ToNodeBelow && 614 MemSet.insert(*FromNodeBelow, *ToNodeBelow)) { 615 propagate(*FromNodeBelow, *ToNodeBelow, 616 MatchState::FlowFromMemAliasNoReadWrite, ReachSet, WorkList); 617 for (const auto &Mapping : ReachSet.reachableValueAliases(*FromNodeBelow)) { 618 auto Src = Mapping.first; 619 auto MemAliasPropagate = [&](MatchState FromState, MatchState ToState) { 620 if (Mapping.second.test(static_cast<size_t>(FromState))) 621 propagate(Src, *ToNodeBelow, ToState, ReachSet, WorkList); 622 }; 623 624 MemAliasPropagate(MatchState::FlowFromReadOnly, 625 MatchState::FlowFromMemAliasReadOnly); 626 MemAliasPropagate(MatchState::FlowToWriteOnly, 627 MatchState::FlowToMemAliasWriteOnly); 628 MemAliasPropagate(MatchState::FlowToReadWrite, 629 MatchState::FlowToMemAliasReadWrite); 630 } 631 } 632 633 // This is the core of the state machine walking algorithm. We expand ReachSet 634 // based on which state we are at (which in turn dictates what edges we 635 // should examine) 636 // From a high-level point of view, the state machine here guarantees two 637 // properties: 638 // - If *X and *Y are memory aliases, then X and Y are value aliases 639 // - If Y is an alias of X, then reverse assignment edges (if there is any) 640 // should precede any assignment edges on the path from X to Y. 641 auto NextAssignState = [&](MatchState State) { 642 for (const auto &AssignEdge : NodeInfo->Edges) 643 propagate(FromNode, AssignEdge.Other, State, ReachSet, WorkList); 644 }; 645 auto NextRevAssignState = [&](MatchState State) { 646 for (const auto &RevAssignEdge : NodeInfo->ReverseEdges) 647 propagate(FromNode, RevAssignEdge.Other, State, ReachSet, WorkList); 648 }; 649 auto NextMemState = [&](MatchState State) { 650 if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) { 651 for (const auto &MemAlias : *AliasSet) 652 propagate(FromNode, MemAlias, State, ReachSet, WorkList); 653 } 654 }; 655 656 switch (Item.State) { 657 case MatchState::FlowFromReadOnly: { 658 NextRevAssignState(MatchState::FlowFromReadOnly); 659 NextAssignState(MatchState::FlowToReadWrite); 660 NextMemState(MatchState::FlowFromMemAliasReadOnly); 661 break; 662 } 663 case MatchState::FlowFromMemAliasNoReadWrite: { 664 NextRevAssignState(MatchState::FlowFromReadOnly); 665 NextAssignState(MatchState::FlowToWriteOnly); 666 break; 667 } 668 case MatchState::FlowFromMemAliasReadOnly: { 669 NextRevAssignState(MatchState::FlowFromReadOnly); 670 NextAssignState(MatchState::FlowToReadWrite); 671 break; 672 } 673 case MatchState::FlowToWriteOnly: { 674 NextAssignState(MatchState::FlowToWriteOnly); 675 NextMemState(MatchState::FlowToMemAliasWriteOnly); 676 break; 677 } 678 case MatchState::FlowToReadWrite: { 679 NextAssignState(MatchState::FlowToReadWrite); 680 NextMemState(MatchState::FlowToMemAliasReadWrite); 681 break; 682 } 683 case MatchState::FlowToMemAliasWriteOnly: { 684 NextAssignState(MatchState::FlowToWriteOnly); 685 break; 686 } 687 case MatchState::FlowToMemAliasReadWrite: { 688 NextAssignState(MatchState::FlowToReadWrite); 689 break; 690 } 691 } 692 } 693 694 static AliasAttrMap buildAttrMap(const CFLGraph &Graph, 695 const ReachabilitySet &ReachSet) { 696 AliasAttrMap AttrMap; 697 std::vector<InstantiatedValue> WorkList, NextList; 698 699 // Initialize each node with its original AliasAttrs in CFLGraph 700 for (const auto &Mapping : Graph.value_mappings()) { 701 auto Val = Mapping.first; 702 auto &ValueInfo = Mapping.second; 703 for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { 704 auto Node = InstantiatedValue{Val, I}; 705 AttrMap.add(Node, ValueInfo.getNodeInfoAtLevel(I).Attr); 706 WorkList.push_back(Node); 707 } 708 } 709 710 while (!WorkList.empty()) { 711 for (const auto &Dst : WorkList) { 712 auto DstAttr = AttrMap.getAttrs(Dst); 713 if (DstAttr.none()) 714 continue; 715 716 // Propagate attr on the same level 717 for (const auto &Mapping : ReachSet.reachableValueAliases(Dst)) { 718 auto Src = Mapping.first; 719 if (AttrMap.add(Src, DstAttr)) 720 NextList.push_back(Src); 721 } 722 723 // Propagate attr to the levels below 724 auto DstBelow = getNodeBelow(Graph, Dst); 725 while (DstBelow) { 726 if (AttrMap.add(*DstBelow, DstAttr)) { 727 NextList.push_back(*DstBelow); 728 break; 729 } 730 DstBelow = getNodeBelow(Graph, *DstBelow); 731 } 732 } 733 WorkList.swap(NextList); 734 NextList.clear(); 735 } 736 737 return AttrMap; 738 } 739 740 CFLAndersAAResult::FunctionInfo 741 CFLAndersAAResult::buildInfoFrom(const Function &Fn) { 742 CFLGraphBuilder<CFLAndersAAResult> GraphBuilder( 743 *this, TLI, 744 // Cast away the constness here due to GraphBuilder's API requirement 745 const_cast<Function &>(Fn)); 746 auto &Graph = GraphBuilder.getCFLGraph(); 747 748 ReachabilitySet ReachSet; 749 AliasMemSet MemSet; 750 751 std::vector<WorkListItem> WorkList, NextList; 752 initializeWorkList(WorkList, ReachSet, Graph); 753 // TODO: make sure we don't stop before the fix point is reached 754 while (!WorkList.empty()) { 755 for (const auto &Item : WorkList) 756 processWorkListItem(Item, Graph, ReachSet, MemSet, NextList); 757 758 NextList.swap(WorkList); 759 NextList.clear(); 760 } 761 762 // Now that we have all the reachability info, propagate AliasAttrs according 763 // to it 764 auto IValueAttrMap = buildAttrMap(Graph, ReachSet); 765 766 return FunctionInfo(Fn, GraphBuilder.getReturnValues(), ReachSet, 767 std::move(IValueAttrMap)); 768 } 769 770 void CFLAndersAAResult::scan(const Function &Fn) { 771 auto InsertPair = Cache.insert(std::make_pair(&Fn, Optional<FunctionInfo>())); 772 (void)InsertPair; 773 assert(InsertPair.second && 774 "Trying to scan a function that has already been cached"); 775 776 // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call 777 // may get evaluated after operator[], potentially triggering a DenseMap 778 // resize and invalidating the reference returned by operator[] 779 auto FunInfo = buildInfoFrom(Fn); 780 Cache[&Fn] = std::move(FunInfo); 781 Handles.emplace_front(const_cast<Function *>(&Fn), this); 782 } 783 784 void CFLAndersAAResult::evict(const Function *Fn) { Cache.erase(Fn); } 785 786 const Optional<CFLAndersAAResult::FunctionInfo> & 787 CFLAndersAAResult::ensureCached(const Function &Fn) { 788 auto Iter = Cache.find(&Fn); 789 if (Iter == Cache.end()) { 790 scan(Fn); 791 Iter = Cache.find(&Fn); 792 assert(Iter != Cache.end()); 793 assert(Iter->second.hasValue()); 794 } 795 return Iter->second; 796 } 797 798 const AliasSummary *CFLAndersAAResult::getAliasSummary(const Function &Fn) { 799 auto &FunInfo = ensureCached(Fn); 800 if (FunInfo.hasValue()) 801 return &FunInfo->getAliasSummary(); 802 else 803 return nullptr; 804 } 805 806 AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA, 807 const MemoryLocation &LocB) { 808 auto *ValA = LocA.Ptr; 809 auto *ValB = LocB.Ptr; 810 811 if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy()) 812 return NoAlias; 813 814 auto *Fn = parentFunctionOfValue(ValA); 815 if (!Fn) { 816 Fn = parentFunctionOfValue(ValB); 817 if (!Fn) { 818 // The only times this is known to happen are when globals + InlineAsm are 819 // involved 820 DEBUG(dbgs() 821 << "CFLAndersAA: could not extract parent function information.\n"); 822 return MayAlias; 823 } 824 } else { 825 assert(!parentFunctionOfValue(ValB) || parentFunctionOfValue(ValB) == Fn); 826 } 827 828 assert(Fn != nullptr); 829 auto &FunInfo = ensureCached(*Fn); 830 831 // AliasMap lookup 832 if (FunInfo->mayAlias(ValA, LocA.Size, ValB, LocB.Size)) 833 return MayAlias; 834 return NoAlias; 835 } 836 837 AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA, 838 const MemoryLocation &LocB) { 839 if (LocA.Ptr == LocB.Ptr) 840 return LocA.Size == LocB.Size ? MustAlias : PartialAlias; 841 842 // Comparisons between global variables and other constants should be 843 // handled by BasicAA. 844 // CFLAndersAA may report NoAlias when comparing a GlobalValue and 845 // ConstantExpr, but every query needs to have at least one Value tied to a 846 // Function, and neither GlobalValues nor ConstantExprs are. 847 if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) 848 return AAResultBase::alias(LocA, LocB); 849 850 AliasResult QueryResult = query(LocA, LocB); 851 if (QueryResult == MayAlias) 852 return AAResultBase::alias(LocA, LocB); 853 854 return QueryResult; 855 } 856 857 AnalysisKey CFLAndersAA::Key; 858 859 CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) { 860 return CFLAndersAAResult(AM.getResult<TargetLibraryAnalysis>(F)); 861 } 862 863 char CFLAndersAAWrapperPass::ID = 0; 864 INITIALIZE_PASS(CFLAndersAAWrapperPass, "cfl-anders-aa", 865 "Inclusion-Based CFL Alias Analysis", false, true) 866 867 ImmutablePass *llvm::createCFLAndersAAWrapperPass() { 868 return new CFLAndersAAWrapperPass(); 869 } 870 871 CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) { 872 initializeCFLAndersAAWrapperPassPass(*PassRegistry::getPassRegistry()); 873 } 874 875 void CFLAndersAAWrapperPass::initializePass() { 876 auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>(); 877 Result.reset(new CFLAndersAAResult(TLIWP.getTLI())); 878 } 879 880 void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 881 AU.setPreservesAll(); 882 AU.addRequired<TargetLibraryInfoWrapperPass>(); 883 } 884