1 //- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ---*- C++-*-// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a CFL-base, summary-based alias analysis algorithm. It 11 // does not depend on types. The algorithm is a mixture of the one described in 12 // "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast 13 // algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by 14 // Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a 15 // graph of the uses of a variable, where each node is a memory location, and 16 // each edge is an action that happened on that memory location. The "actions" 17 // can be one of Dereference, Reference, or Assign. The precision of this 18 // analysis is roughly the same as that of an one level context-sensitive 19 // Steensgaard's algorithm. 20 // 21 // Two variables are considered as aliasing iff you can reach one value's node 22 // from the other value's node and the language formed by concatenating all of 23 // the edge labels (actions) conforms to a context-free grammar. 24 // 25 // Because this algorithm requires a graph search on each query, we execute the 26 // algorithm outlined in "Fast algorithms..." (mentioned above) 27 // in order to transform the graph into sets of variables that may alias in 28 // ~nlogn time (n = number of variables), which makes queries take constant 29 // time. 30 //===----------------------------------------------------------------------===// 31 32 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and 33 // CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because 34 // FunctionPasses are only allowed to inspect the Function that they're being 35 // run on. Realistically, this likely isn't a problem until we allow 36 // FunctionPasses to run concurrently. 37 38 #include "llvm/Analysis/CFLSteensAliasAnalysis.h" 39 #include "CFLGraph.h" 40 #include "StratifiedSets.h" 41 #include "llvm/ADT/DenseMap.h" 42 #include "llvm/ADT/None.h" 43 #include "llvm/ADT/Optional.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/IR/Constants.h" 46 #include "llvm/IR/Function.h" 47 #include "llvm/Pass.h" 48 #include "llvm/Support/Compiler.h" 49 #include "llvm/Support/Debug.h" 50 #include "llvm/Support/ErrorHandling.h" 51 #include "llvm/Support/raw_ostream.h" 52 #include <algorithm> 53 #include <cassert> 54 #include <memory> 55 #include <tuple> 56 57 using namespace llvm; 58 using namespace llvm::cflaa; 59 60 #define DEBUG_TYPE "cfl-steens-aa" 61 62 CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI) 63 : AAResultBase(), TLI(TLI) {} 64 CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) 65 : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {} 66 CFLSteensAAResult::~CFLSteensAAResult() {} 67 68 /// Information we have about a function and would like to keep around. 69 class CFLSteensAAResult::FunctionInfo { 70 StratifiedSets<InstantiatedValue> Sets; 71 AliasSummary Summary; 72 73 public: 74 FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals, 75 StratifiedSets<InstantiatedValue> S); 76 77 const StratifiedSets<InstantiatedValue> &getStratifiedSets() const { 78 return Sets; 79 } 80 const AliasSummary &getAliasSummary() const { return Summary; } 81 }; 82 83 const StratifiedIndex StratifiedLink::SetSentinel = 84 std::numeric_limits<StratifiedIndex>::max(); 85 86 //===----------------------------------------------------------------------===// 87 // Function declarations that require types defined in the namespace above 88 //===----------------------------------------------------------------------===// 89 90 /// Determines whether it would be pointless to add the given Value to our sets. 91 static bool canSkipAddingToSets(Value *Val); 92 93 static Optional<Function *> parentFunctionOfValue(Value *Val) { 94 if (auto *Inst = dyn_cast<Instruction>(Val)) { 95 auto *Bb = Inst->getParent(); 96 return Bb->getParent(); 97 } 98 99 if (auto *Arg = dyn_cast<Argument>(Val)) 100 return Arg->getParent(); 101 return None; 102 } 103 104 static bool canSkipAddingToSets(Value *Val) { 105 // Constants can share instances, which may falsely unify multiple 106 // sets, e.g. in 107 // store i32* null, i32** %ptr1 108 // store i32* null, i32** %ptr2 109 // clearly ptr1 and ptr2 should not be unified into the same set, so 110 // we should filter out the (potentially shared) instance to 111 // i32* null. 112 if (isa<Constant>(Val)) { 113 // TODO: Because all of these things are constant, we can determine whether 114 // the data is *actually* mutable at graph building time. This will probably 115 // come for free/cheap with offset awareness. 116 bool CanStoreMutableData = isa<GlobalValue>(Val) || 117 isa<ConstantExpr>(Val) || 118 isa<ConstantAggregate>(Val); 119 return !CanStoreMutableData; 120 } 121 122 return false; 123 } 124 125 CFLSteensAAResult::FunctionInfo::FunctionInfo( 126 Function &Fn, const SmallVectorImpl<Value *> &RetVals, 127 StratifiedSets<InstantiatedValue> S) 128 : Sets(std::move(S)) { 129 // Historically, an arbitrary upper-bound of 50 args was selected. We may want 130 // to remove this if it doesn't really matter in practice. 131 if (Fn.arg_size() > MaxSupportedArgsInSummary) 132 return; 133 134 DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap; 135 136 // Our intention here is to record all InterfaceValues that share the same 137 // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we 138 // have its StratifiedIndex scanned here and check if the index is presented 139 // in InterfaceMap: if it is not, we add the correspondence to the map; 140 // otherwise, an aliasing relation is found and we add it to 141 // RetParamRelations. 142 143 auto AddToRetParamRelations = [&](unsigned InterfaceIndex, 144 StratifiedIndex SetIndex) { 145 unsigned Level = 0; 146 while (true) { 147 InterfaceValue CurrValue{InterfaceIndex, Level}; 148 149 auto Itr = InterfaceMap.find(SetIndex); 150 if (Itr != InterfaceMap.end()) { 151 if (CurrValue != Itr->second) 152 Summary.RetParamRelations.push_back( 153 ExternalRelation{CurrValue, Itr->second, UnknownOffset}); 154 break; 155 } 156 157 auto &Link = Sets.getLink(SetIndex); 158 InterfaceMap.insert(std::make_pair(SetIndex, CurrValue)); 159 auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs); 160 if (ExternalAttrs.any()) 161 Summary.RetParamAttributes.push_back( 162 ExternalAttribute{CurrValue, ExternalAttrs}); 163 164 if (!Link.hasBelow()) 165 break; 166 167 ++Level; 168 SetIndex = Link.Below; 169 } 170 }; 171 172 // Populate RetParamRelations for return values 173 for (auto *RetVal : RetVals) { 174 assert(RetVal != nullptr); 175 assert(RetVal->getType()->isPointerTy()); 176 auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0}); 177 if (RetInfo.hasValue()) 178 AddToRetParamRelations(0, RetInfo->Index); 179 } 180 181 // Populate RetParamRelations for parameters 182 unsigned I = 0; 183 for (auto &Param : Fn.args()) { 184 if (Param.getType()->isPointerTy()) { 185 auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0}); 186 if (ParamInfo.hasValue()) 187 AddToRetParamRelations(I + 1, ParamInfo->Index); 188 } 189 ++I; 190 } 191 } 192 193 // Builds the graph + StratifiedSets for a function. 194 CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) { 195 CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, TLI, *Fn); 196 StratifiedSetsBuilder<InstantiatedValue> SetBuilder; 197 198 // Add all CFLGraph nodes and all Dereference edges to StratifiedSets 199 auto &Graph = GraphBuilder.getCFLGraph(); 200 for (const auto &Mapping : Graph.value_mappings()) { 201 auto Val = Mapping.first; 202 if (canSkipAddingToSets(Val)) 203 continue; 204 auto &ValueInfo = Mapping.second; 205 206 assert(ValueInfo.getNumLevels() > 0); 207 SetBuilder.add(InstantiatedValue{Val, 0}); 208 SetBuilder.noteAttributes(InstantiatedValue{Val, 0}, 209 ValueInfo.getNodeInfoAtLevel(0).Attr); 210 for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) { 211 SetBuilder.add(InstantiatedValue{Val, I + 1}); 212 SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1}, 213 ValueInfo.getNodeInfoAtLevel(I + 1).Attr); 214 SetBuilder.addBelow(InstantiatedValue{Val, I}, 215 InstantiatedValue{Val, I + 1}); 216 } 217 } 218 219 // Add all assign edges to StratifiedSets 220 for (const auto &Mapping : Graph.value_mappings()) { 221 auto Val = Mapping.first; 222 if (canSkipAddingToSets(Val)) 223 continue; 224 auto &ValueInfo = Mapping.second; 225 226 for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { 227 auto Src = InstantiatedValue{Val, I}; 228 for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) 229 SetBuilder.addWith(Src, Edge.Other); 230 } 231 } 232 233 return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build()); 234 } 235 236 void CFLSteensAAResult::scan(Function *Fn) { 237 auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>())); 238 (void)InsertPair; 239 assert(InsertPair.second && 240 "Trying to scan a function that has already been cached"); 241 242 // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call 243 // may get evaluated after operator[], potentially triggering a DenseMap 244 // resize and invalidating the reference returned by operator[] 245 auto FunInfo = buildSetsFrom(Fn); 246 Cache[Fn] = std::move(FunInfo); 247 248 Handles.push_front(FunctionHandle(Fn, this)); 249 } 250 251 void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); } 252 253 /// Ensures that the given function is available in the cache, and returns the 254 /// entry. 255 const Optional<CFLSteensAAResult::FunctionInfo> & 256 CFLSteensAAResult::ensureCached(Function *Fn) { 257 auto Iter = Cache.find(Fn); 258 if (Iter == Cache.end()) { 259 scan(Fn); 260 Iter = Cache.find(Fn); 261 assert(Iter != Cache.end()); 262 assert(Iter->second.hasValue()); 263 } 264 return Iter->second; 265 } 266 267 const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) { 268 auto &FunInfo = ensureCached(&Fn); 269 if (FunInfo.hasValue()) 270 return &FunInfo->getAliasSummary(); 271 else 272 return nullptr; 273 } 274 275 AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, 276 const MemoryLocation &LocB) { 277 auto *ValA = const_cast<Value *>(LocA.Ptr); 278 auto *ValB = const_cast<Value *>(LocB.Ptr); 279 280 if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy()) 281 return NoAlias; 282 283 Function *Fn = nullptr; 284 auto MaybeFnA = parentFunctionOfValue(ValA); 285 auto MaybeFnB = parentFunctionOfValue(ValB); 286 if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) { 287 // The only times this is known to happen are when globals + InlineAsm are 288 // involved 289 DEBUG(dbgs() 290 << "CFLSteensAA: could not extract parent function information.\n"); 291 return MayAlias; 292 } 293 294 if (MaybeFnA.hasValue()) { 295 Fn = *MaybeFnA; 296 assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) && 297 "Interprocedural queries not supported"); 298 } else { 299 Fn = *MaybeFnB; 300 } 301 302 assert(Fn != nullptr); 303 auto &MaybeInfo = ensureCached(Fn); 304 assert(MaybeInfo.hasValue()); 305 306 auto &Sets = MaybeInfo->getStratifiedSets(); 307 auto MaybeA = Sets.find(InstantiatedValue{ValA, 0}); 308 if (!MaybeA.hasValue()) 309 return MayAlias; 310 311 auto MaybeB = Sets.find(InstantiatedValue{ValB, 0}); 312 if (!MaybeB.hasValue()) 313 return MayAlias; 314 315 auto SetA = *MaybeA; 316 auto SetB = *MaybeB; 317 auto AttrsA = Sets.getLink(SetA.Index).Attrs; 318 auto AttrsB = Sets.getLink(SetB.Index).Attrs; 319 320 // If both values are local (meaning the corresponding set has attribute 321 // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them: 322 // they may-alias each other if and only if they are in the same set. 323 // If at least one value is non-local (meaning it either is global/argument or 324 // it comes from unknown sources like integer cast), the situation becomes a 325 // bit more interesting. We follow three general rules described below: 326 // - Non-local values may alias each other 327 // - AttrNone values do not alias any non-local values 328 // - AttrEscaped do not alias globals/arguments, but they may alias 329 // AttrUnknown values 330 if (SetA.Index == SetB.Index) 331 return MayAlias; 332 if (AttrsA.none() || AttrsB.none()) 333 return NoAlias; 334 if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB)) 335 return MayAlias; 336 if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB)) 337 return MayAlias; 338 return NoAlias; 339 } 340 341 AnalysisKey CFLSteensAA::Key; 342 343 CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) { 344 return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F)); 345 } 346 347 char CFLSteensAAWrapperPass::ID = 0; 348 INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa", 349 "Unification-Based CFL Alias Analysis", false, true) 350 351 ImmutablePass *llvm::createCFLSteensAAWrapperPass() { 352 return new CFLSteensAAWrapperPass(); 353 } 354 355 CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) { 356 initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry()); 357 } 358 359 void CFLSteensAAWrapperPass::initializePass() { 360 auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>(); 361 Result.reset(new CFLSteensAAResult(TLIWP.getTLI())); 362 } 363 364 void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 365 AU.setPreservesAll(); 366 AU.addRequired<TargetLibraryInfoWrapperPass>(); 367 } 368