1 //======- CFLGraph.h - Abstract stratified sets implementation. --------======// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file defines CFLGraph, an auxiliary data structure used by CFL-based 11 /// alias analysis. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_ANALYSIS_CFLGRAPH_H 16 #define LLVM_ANALYSIS_CFLGRAPH_H 17 18 #include "AliasAnalysisSummary.h" 19 #include "llvm/ADT/SmallPtrSet.h" 20 #include "llvm/Analysis/MemoryBuiltins.h" 21 #include "llvm/IR/InstVisitor.h" 22 #include "llvm/IR/Instructions.h" 23 24 namespace llvm { 25 namespace cflaa { 26 27 /// \brief The Program Expression Graph (PEG) of CFL analysis 28 /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to 29 /// describe flow-insensitive pointer-related behaviors. Given an LLVM function, 30 /// the main purpose of this graph is to abstract away unrelated facts and 31 /// translate the rest into a form that can be easily digested by CFL analyses. 32 /// Each Node in the graph is an InstantiatedValue, and each edge represent a 33 /// pointer assignment between InstantiatedValue. Pointer 34 /// references/dereferences are not explicitly stored in the graph: we 35 /// implicitly assume that for each node (X, I) it has a dereference edge to (X, 36 /// I+1) and a reference edge to (X, I-1). 37 class CFLGraph { 38 public: 39 typedef InstantiatedValue Node; 40 41 struct Edge { 42 Node Other; 43 int64_t Offset; 44 }; 45 46 typedef std::vector<Edge> EdgeList; 47 48 struct NodeInfo { 49 EdgeList Edges, ReverseEdges; 50 AliasAttrs Attr; 51 }; 52 53 class ValueInfo { 54 std::vector<NodeInfo> Levels; 55 56 public: 57 bool addNodeToLevel(unsigned Level) { 58 auto NumLevels = Levels.size(); 59 if (NumLevels > Level) 60 return false; 61 Levels.resize(Level + 1); 62 return true; 63 } 64 65 NodeInfo &getNodeInfoAtLevel(unsigned Level) { 66 assert(Level < Levels.size()); 67 return Levels[Level]; 68 } 69 const NodeInfo &getNodeInfoAtLevel(unsigned Level) const { 70 assert(Level < Levels.size()); 71 return Levels[Level]; 72 } 73 74 unsigned getNumLevels() const { return Levels.size(); } 75 }; 76 77 private: 78 typedef DenseMap<Value *, ValueInfo> ValueMap; 79 ValueMap ValueImpls; 80 81 NodeInfo *getNode(Node N) { 82 auto Itr = ValueImpls.find(N.Val); 83 if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel) 84 return nullptr; 85 return &Itr->second.getNodeInfoAtLevel(N.DerefLevel); 86 } 87 88 public: 89 typedef ValueMap::const_iterator const_value_iterator; 90 91 bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) { 92 assert(N.Val != nullptr); 93 auto &ValInfo = ValueImpls[N.Val]; 94 auto Changed = ValInfo.addNodeToLevel(N.DerefLevel); 95 ValInfo.getNodeInfoAtLevel(N.DerefLevel).Attr |= Attr; 96 return Changed; 97 } 98 99 void addAttr(Node N, AliasAttrs Attr) { 100 auto *Info = getNode(N); 101 assert(Info != nullptr); 102 Info->Attr |= Attr; 103 } 104 105 void addEdge(Node From, Node To, int64_t Offset = 0) { 106 auto *FromInfo = getNode(From); 107 assert(FromInfo != nullptr); 108 auto *ToInfo = getNode(To); 109 assert(ToInfo != nullptr); 110 111 FromInfo->Edges.push_back(Edge{To, Offset}); 112 ToInfo->ReverseEdges.push_back(Edge{From, Offset}); 113 } 114 115 const NodeInfo *getNode(Node N) const { 116 auto Itr = ValueImpls.find(N.Val); 117 if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel) 118 return nullptr; 119 return &Itr->second.getNodeInfoAtLevel(N.DerefLevel); 120 } 121 122 AliasAttrs attrFor(Node N) const { 123 auto *Info = getNode(N); 124 assert(Info != nullptr); 125 return Info->Attr; 126 } 127 128 iterator_range<const_value_iterator> value_mappings() const { 129 return make_range<const_value_iterator>(ValueImpls.begin(), 130 ValueImpls.end()); 131 } 132 }; 133 134 ///\brief A builder class used to create CFLGraph instance from a given function 135 /// The CFL-AA that uses this builder must provide its own type as a template 136 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder 137 /// needs a way of obtaining the summary of other functions when callinsts are 138 /// encountered. 139 /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public 140 /// member function that takes a Function& and returns the corresponding summary 141 /// as a const AliasSummary*. 142 template <typename CFLAA> class CFLGraphBuilder { 143 // Input of the builder 144 CFLAA &Analysis; 145 const TargetLibraryInfo &TLI; 146 147 // Output of the builder 148 CFLGraph Graph; 149 SmallVector<Value *, 4> ReturnedValues; 150 151 // Helper class 152 /// Gets the edges our graph should have, based on an Instruction* 153 class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> { 154 CFLAA &AA; 155 const DataLayout &DL; 156 const TargetLibraryInfo &TLI; 157 158 CFLGraph &Graph; 159 SmallVectorImpl<Value *> &ReturnValues; 160 161 static bool hasUsefulEdges(ConstantExpr *CE) { 162 // ConstantExpr doesn't have terminators, invokes, or fences, so only 163 // needs 164 // to check for compares. 165 return CE->getOpcode() != Instruction::ICmp && 166 CE->getOpcode() != Instruction::FCmp; 167 } 168 169 // Returns possible functions called by CS into the given SmallVectorImpl. 170 // Returns true if targets found, false otherwise. 171 static bool getPossibleTargets(CallSite CS, 172 SmallVectorImpl<Function *> &Output) { 173 if (auto *Fn = CS.getCalledFunction()) { 174 Output.push_back(Fn); 175 return true; 176 } 177 178 // TODO: If the call is indirect, we might be able to enumerate all 179 // potential 180 // targets of the call and return them, rather than just failing. 181 return false; 182 } 183 184 void addNode(Value *Val, AliasAttrs Attr = AliasAttrs()) { 185 assert(Val != nullptr && Val->getType()->isPointerTy()); 186 if (auto GVal = dyn_cast<GlobalValue>(Val)) { 187 if (Graph.addNode(InstantiatedValue{GVal, 0}, 188 getGlobalOrArgAttrFromValue(*GVal))) 189 Graph.addNode(InstantiatedValue{GVal, 1}, getAttrUnknown()); 190 } else if (auto CExpr = dyn_cast<ConstantExpr>(Val)) { 191 if (hasUsefulEdges(CExpr)) { 192 if (Graph.addNode(InstantiatedValue{CExpr, 0})) 193 visitConstantExpr(CExpr); 194 } 195 } else 196 Graph.addNode(InstantiatedValue{Val, 0}, Attr); 197 } 198 199 void addAssignEdge(Value *From, Value *To, int64_t Offset = 0) { 200 assert(From != nullptr && To != nullptr); 201 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy()) 202 return; 203 addNode(From); 204 if (To != From) { 205 addNode(To); 206 Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 0}, 207 Offset); 208 } 209 } 210 211 void addDerefEdge(Value *From, Value *To, bool IsRead) { 212 assert(From != nullptr && To != nullptr); 213 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy()) 214 return; 215 addNode(From); 216 addNode(To); 217 if (IsRead) { 218 Graph.addNode(InstantiatedValue{From, 1}); 219 Graph.addEdge(InstantiatedValue{From, 1}, InstantiatedValue{To, 0}); 220 } else { 221 Graph.addNode(InstantiatedValue{To, 1}); 222 Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 1}); 223 } 224 } 225 226 void addLoadEdge(Value *From, Value *To) { addDerefEdge(From, To, true); } 227 void addStoreEdge(Value *From, Value *To) { addDerefEdge(From, To, false); } 228 229 public: 230 GetEdgesVisitor(CFLGraphBuilder &Builder, const DataLayout &DL) 231 : AA(Builder.Analysis), DL(DL), TLI(Builder.TLI), Graph(Builder.Graph), 232 ReturnValues(Builder.ReturnedValues) {} 233 234 void visitInstruction(Instruction &) { 235 llvm_unreachable("Unsupported instruction encountered"); 236 } 237 238 void visitReturnInst(ReturnInst &Inst) { 239 if (auto RetVal = Inst.getReturnValue()) { 240 if (RetVal->getType()->isPointerTy()) { 241 addNode(RetVal); 242 ReturnValues.push_back(RetVal); 243 } 244 } 245 } 246 247 void visitPtrToIntInst(PtrToIntInst &Inst) { 248 auto *Ptr = Inst.getOperand(0); 249 addNode(Ptr, getAttrEscaped()); 250 } 251 252 void visitIntToPtrInst(IntToPtrInst &Inst) { 253 auto *Ptr = &Inst; 254 addNode(Ptr, getAttrUnknown()); 255 } 256 257 void visitCastInst(CastInst &Inst) { 258 auto *Src = Inst.getOperand(0); 259 addAssignEdge(Src, &Inst); 260 } 261 262 void visitBinaryOperator(BinaryOperator &Inst) { 263 auto *Op1 = Inst.getOperand(0); 264 auto *Op2 = Inst.getOperand(1); 265 addAssignEdge(Op1, &Inst); 266 addAssignEdge(Op2, &Inst); 267 } 268 269 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) { 270 auto *Ptr = Inst.getPointerOperand(); 271 auto *Val = Inst.getNewValOperand(); 272 addStoreEdge(Val, Ptr); 273 } 274 275 void visitAtomicRMWInst(AtomicRMWInst &Inst) { 276 auto *Ptr = Inst.getPointerOperand(); 277 auto *Val = Inst.getValOperand(); 278 addStoreEdge(Val, Ptr); 279 } 280 281 void visitPHINode(PHINode &Inst) { 282 for (Value *Val : Inst.incoming_values()) 283 addAssignEdge(Val, &Inst); 284 } 285 286 void visitGEP(GEPOperator &GEPOp) { 287 uint64_t Offset = UnknownOffset; 288 APInt APOffset(DL.getPointerSizeInBits(GEPOp.getPointerAddressSpace()), 289 0); 290 if (GEPOp.accumulateConstantOffset(DL, APOffset)) 291 Offset = APOffset.getSExtValue(); 292 293 auto *Op = GEPOp.getPointerOperand(); 294 addAssignEdge(Op, &GEPOp, Offset); 295 } 296 297 void visitGetElementPtrInst(GetElementPtrInst &Inst) { 298 auto *GEPOp = cast<GEPOperator>(&Inst); 299 visitGEP(*GEPOp); 300 } 301 302 void visitSelectInst(SelectInst &Inst) { 303 // Condition is not processed here (The actual statement producing 304 // the condition result is processed elsewhere). For select, the 305 // condition is evaluated, but not loaded, stored, or assigned 306 // simply as a result of being the condition of a select. 307 308 auto *TrueVal = Inst.getTrueValue(); 309 auto *FalseVal = Inst.getFalseValue(); 310 addAssignEdge(TrueVal, &Inst); 311 addAssignEdge(FalseVal, &Inst); 312 } 313 314 void visitAllocaInst(AllocaInst &Inst) { addNode(&Inst); } 315 316 void visitLoadInst(LoadInst &Inst) { 317 auto *Ptr = Inst.getPointerOperand(); 318 auto *Val = &Inst; 319 addLoadEdge(Ptr, Val); 320 } 321 322 void visitStoreInst(StoreInst &Inst) { 323 auto *Ptr = Inst.getPointerOperand(); 324 auto *Val = Inst.getValueOperand(); 325 addStoreEdge(Val, Ptr); 326 } 327 328 void visitVAArgInst(VAArgInst &Inst) { 329 // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it 330 // does 331 // two things: 332 // 1. Loads a value from *((T*)*Ptr). 333 // 2. Increments (stores to) *Ptr by some target-specific amount. 334 // For now, we'll handle this like a landingpad instruction (by placing 335 // the 336 // result in its own group, and having that group alias externals). 337 if (Inst.getType()->isPointerTy()) 338 addNode(&Inst, getAttrUnknown()); 339 } 340 341 static bool isFunctionExternal(Function *Fn) { 342 return !Fn->hasExactDefinition(); 343 } 344 345 bool tryInterproceduralAnalysis(CallSite CS, 346 const SmallVectorImpl<Function *> &Fns) { 347 assert(Fns.size() > 0); 348 349 if (CS.arg_size() > MaxSupportedArgsInSummary) 350 return false; 351 352 // Exit early if we'll fail anyway 353 for (auto *Fn : Fns) { 354 if (isFunctionExternal(Fn) || Fn->isVarArg()) 355 return false; 356 // Fail if the caller does not provide enough arguments 357 assert(Fn->arg_size() <= CS.arg_size()); 358 if (!AA.getAliasSummary(*Fn)) 359 return false; 360 } 361 362 for (auto *Fn : Fns) { 363 auto Summary = AA.getAliasSummary(*Fn); 364 assert(Summary != nullptr); 365 366 auto &RetParamRelations = Summary->RetParamRelations; 367 for (auto &Relation : RetParamRelations) { 368 auto IRelation = instantiateExternalRelation(Relation, CS); 369 if (IRelation.hasValue()) { 370 Graph.addNode(IRelation->From); 371 Graph.addNode(IRelation->To); 372 Graph.addEdge(IRelation->From, IRelation->To); 373 } 374 } 375 376 auto &RetParamAttributes = Summary->RetParamAttributes; 377 for (auto &Attribute : RetParamAttributes) { 378 auto IAttr = instantiateExternalAttribute(Attribute, CS); 379 if (IAttr.hasValue()) 380 Graph.addNode(IAttr->IValue, IAttr->Attr); 381 } 382 } 383 384 return true; 385 } 386 387 void visitCallSite(CallSite CS) { 388 auto Inst = CS.getInstruction(); 389 390 // Make sure all arguments and return value are added to the graph first 391 for (Value *V : CS.args()) 392 if (V->getType()->isPointerTy()) 393 addNode(V); 394 if (Inst->getType()->isPointerTy()) 395 addNode(Inst); 396 397 // Check if Inst is a call to a library function that 398 // allocates/deallocates 399 // on the heap. Those kinds of functions do not introduce any aliases. 400 // TODO: address other common library functions such as realloc(), 401 // strdup(), 402 // etc. 403 if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) || 404 isFreeCall(Inst, &TLI)) 405 return; 406 407 // TODO: Add support for noalias args/all the other fun function 408 // attributes 409 // that we can tack on. 410 SmallVector<Function *, 4> Targets; 411 if (getPossibleTargets(CS, Targets)) 412 if (tryInterproceduralAnalysis(CS, Targets)) 413 return; 414 415 // Because the function is opaque, we need to note that anything 416 // could have happened to the arguments (unless the function is marked 417 // readonly or readnone), and that the result could alias just about 418 // anything, too (unless the result is marked noalias). 419 if (!CS.onlyReadsMemory()) 420 for (Value *V : CS.args()) { 421 if (V->getType()->isPointerTy()) { 422 // The argument itself escapes. 423 Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped()); 424 // The fate of argument memory is unknown. Note that since 425 // AliasAttrs is transitive with respect to dereference, we only 426 // need to specify it for the first-level memory. 427 Graph.addNode(InstantiatedValue{V, 1}, getAttrUnknown()); 428 } 429 } 430 431 if (Inst->getType()->isPointerTy()) { 432 auto *Fn = CS.getCalledFunction(); 433 if (Fn == nullptr || !Fn->doesNotAlias(0)) 434 // No need to call addNode() since we've added Inst at the 435 // beginning of this function and we know it is not a global. 436 Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown()); 437 } 438 } 439 440 /// Because vectors/aggregates are immutable and unaddressable, there's 441 /// nothing we can do to coax a value out of them, other than calling 442 /// Extract{Element,Value}. We can effectively treat them as pointers to 443 /// arbitrary memory locations we can store in and load from. 444 void visitExtractElementInst(ExtractElementInst &Inst) { 445 auto *Ptr = Inst.getVectorOperand(); 446 auto *Val = &Inst; 447 addLoadEdge(Ptr, Val); 448 } 449 450 void visitInsertElementInst(InsertElementInst &Inst) { 451 auto *Vec = Inst.getOperand(0); 452 auto *Val = Inst.getOperand(1); 453 addAssignEdge(Vec, &Inst); 454 addStoreEdge(Val, &Inst); 455 } 456 457 void visitLandingPadInst(LandingPadInst &Inst) { 458 // Exceptions come from "nowhere", from our analysis' perspective. 459 // So we place the instruction its own group, noting that said group may 460 // alias externals 461 if (Inst.getType()->isPointerTy()) 462 addNode(&Inst, getAttrUnknown()); 463 } 464 465 void visitInsertValueInst(InsertValueInst &Inst) { 466 auto *Agg = Inst.getOperand(0); 467 auto *Val = Inst.getOperand(1); 468 addAssignEdge(Agg, &Inst); 469 addStoreEdge(Val, &Inst); 470 } 471 472 void visitExtractValueInst(ExtractValueInst &Inst) { 473 auto *Ptr = Inst.getAggregateOperand(); 474 addLoadEdge(Ptr, &Inst); 475 } 476 477 void visitShuffleVectorInst(ShuffleVectorInst &Inst) { 478 auto *From1 = Inst.getOperand(0); 479 auto *From2 = Inst.getOperand(1); 480 addAssignEdge(From1, &Inst); 481 addAssignEdge(From2, &Inst); 482 } 483 484 void visitConstantExpr(ConstantExpr *CE) { 485 switch (CE->getOpcode()) { 486 case Instruction::GetElementPtr: { 487 auto GEPOp = cast<GEPOperator>(CE); 488 visitGEP(*GEPOp); 489 break; 490 } 491 case Instruction::PtrToInt: { 492 auto *Ptr = CE->getOperand(0); 493 addNode(Ptr, getAttrEscaped()); 494 break; 495 } 496 case Instruction::IntToPtr: { 497 addNode(CE, getAttrUnknown()); 498 break; 499 } 500 case Instruction::BitCast: 501 case Instruction::AddrSpaceCast: 502 case Instruction::Trunc: 503 case Instruction::ZExt: 504 case Instruction::SExt: 505 case Instruction::FPExt: 506 case Instruction::FPTrunc: 507 case Instruction::UIToFP: 508 case Instruction::SIToFP: 509 case Instruction::FPToUI: 510 case Instruction::FPToSI: { 511 auto *Src = CE->getOperand(0); 512 addAssignEdge(Src, CE); 513 break; 514 } 515 case Instruction::Select: { 516 auto *TrueVal = CE->getOperand(0); 517 auto *FalseVal = CE->getOperand(1); 518 addAssignEdge(TrueVal, CE); 519 addAssignEdge(FalseVal, CE); 520 break; 521 } 522 case Instruction::InsertElement: { 523 auto *Vec = CE->getOperand(0); 524 auto *Val = CE->getOperand(1); 525 addAssignEdge(Vec, CE); 526 addStoreEdge(Val, CE); 527 break; 528 } 529 case Instruction::ExtractElement: { 530 auto *Ptr = CE->getOperand(0); 531 addLoadEdge(Ptr, CE); 532 break; 533 } 534 case Instruction::InsertValue: { 535 auto *Agg = CE->getOperand(0); 536 auto *Val = CE->getOperand(1); 537 addAssignEdge(Agg, CE); 538 addStoreEdge(Val, CE); 539 break; 540 } 541 case Instruction::ExtractValue: { 542 auto *Ptr = CE->getOperand(0); 543 addLoadEdge(Ptr, CE); 544 } 545 case Instruction::ShuffleVector: { 546 auto *From1 = CE->getOperand(0); 547 auto *From2 = CE->getOperand(1); 548 addAssignEdge(From1, CE); 549 addAssignEdge(From2, CE); 550 break; 551 } 552 case Instruction::Add: 553 case Instruction::Sub: 554 case Instruction::FSub: 555 case Instruction::Mul: 556 case Instruction::FMul: 557 case Instruction::UDiv: 558 case Instruction::SDiv: 559 case Instruction::FDiv: 560 case Instruction::URem: 561 case Instruction::SRem: 562 case Instruction::FRem: 563 case Instruction::And: 564 case Instruction::Or: 565 case Instruction::Xor: 566 case Instruction::Shl: 567 case Instruction::LShr: 568 case Instruction::AShr: 569 case Instruction::ICmp: 570 case Instruction::FCmp: { 571 addAssignEdge(CE->getOperand(0), CE); 572 addAssignEdge(CE->getOperand(1), CE); 573 break; 574 } 575 default: 576 llvm_unreachable("Unknown instruction type encountered!"); 577 } 578 } 579 }; 580 581 // Helper functions 582 583 // Determines whether or not we an instruction is useless to us (e.g. 584 // FenceInst) 585 static bool hasUsefulEdges(Instruction *Inst) { 586 bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) && 587 !isa<InvokeInst>(Inst) && 588 !isa<ReturnInst>(Inst); 589 return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) && 590 !IsNonInvokeRetTerminator; 591 } 592 593 void addArgumentToGraph(Argument &Arg) { 594 if (Arg.getType()->isPointerTy()) { 595 Graph.addNode(InstantiatedValue{&Arg, 0}, 596 getGlobalOrArgAttrFromValue(Arg)); 597 // Pointees of a formal parameter is known to the caller 598 Graph.addNode(InstantiatedValue{&Arg, 1}, getAttrCaller()); 599 } 600 } 601 602 // Given an Instruction, this will add it to the graph, along with any 603 // Instructions that are potentially only available from said Instruction 604 // For example, given the following line: 605 // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2 606 // addInstructionToGraph would add both the `load` and `getelementptr` 607 // instructions to the graph appropriately. 608 void addInstructionToGraph(GetEdgesVisitor &Visitor, Instruction &Inst) { 609 if (!hasUsefulEdges(&Inst)) 610 return; 611 612 Visitor.visit(Inst); 613 } 614 615 // Builds the graph needed for constructing the StratifiedSets for the given 616 // function 617 void buildGraphFrom(Function &Fn) { 618 GetEdgesVisitor Visitor(*this, Fn.getParent()->getDataLayout()); 619 620 for (auto &Bb : Fn.getBasicBlockList()) 621 for (auto &Inst : Bb.getInstList()) 622 addInstructionToGraph(Visitor, Inst); 623 624 for (auto &Arg : Fn.args()) 625 addArgumentToGraph(Arg); 626 } 627 628 public: 629 CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn) 630 : Analysis(Analysis), TLI(TLI) { 631 buildGraphFrom(Fn); 632 } 633 634 const CFLGraph &getCFLGraph() const { return Graph; } 635 const SmallVector<Value *, 4> &getReturnValues() const { 636 return ReturnedValues; 637 } 638 }; 639 } 640 } 641 642 #endif 643