1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This file implements interprocedural passes which walk the 12 /// call-graph deducing and/or propagating function attributes. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Transforms/IPO/FunctionAttrs.h" 17 #include "llvm/Transforms/IPO.h" 18 #include "llvm/ADT/SCCIterator.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/ADT/SmallSet.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/ADT/StringSwitch.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/Analysis/AssumptionCache.h" 25 #include "llvm/Analysis/BasicAliasAnalysis.h" 26 #include "llvm/Analysis/CallGraph.h" 27 #include "llvm/Analysis/CallGraphSCCPass.h" 28 #include "llvm/Analysis/CaptureTracking.h" 29 #include "llvm/Analysis/TargetLibraryInfo.h" 30 #include "llvm/Analysis/ValueTracking.h" 31 #include "llvm/IR/GlobalVariable.h" 32 #include "llvm/IR/InstIterator.h" 33 #include "llvm/IR/IntrinsicInst.h" 34 #include "llvm/IR/LLVMContext.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include "llvm/Analysis/TargetLibraryInfo.h" 38 using namespace llvm; 39 40 #define DEBUG_TYPE "functionattrs" 41 42 STATISTIC(NumReadNone, "Number of functions marked readnone"); 43 STATISTIC(NumReadOnly, "Number of functions marked readonly"); 44 STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); 45 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); 46 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); 47 STATISTIC(NumNoAlias, "Number of function returns marked noalias"); 48 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); 49 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); 50 51 namespace { 52 typedef SmallSetVector<Function *, 8> SCCNodeSet; 53 } 54 55 namespace { 56 /// The three kinds of memory access relevant to 'readonly' and 57 /// 'readnone' attributes. 58 enum MemoryAccessKind { 59 MAK_ReadNone = 0, 60 MAK_ReadOnly = 1, 61 MAK_MayWrite = 2 62 }; 63 } 64 65 static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR, 66 const SCCNodeSet &SCCNodes) { 67 FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); 68 if (MRB == FMRB_DoesNotAccessMemory) 69 // Already perfect! 70 return MAK_ReadNone; 71 72 // Non-exact function definitions may not be selected at link time, and an 73 // alternative version that writes to memory may be selected. See the comment 74 // on GlobalValue::isDefinitionExact for more details. 75 if (!F.hasExactDefinition()) { 76 if (AliasAnalysis::onlyReadsMemory(MRB)) 77 return MAK_ReadOnly; 78 79 // Conservatively assume it writes to memory. 80 return MAK_MayWrite; 81 } 82 83 // Scan the function body for instructions that may read or write memory. 84 bool ReadsMemory = false; 85 for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { 86 Instruction *I = &*II; 87 88 // Some instructions can be ignored even if they read or write memory. 89 // Detect these now, skipping to the next instruction if one is found. 90 CallSite CS(cast<Value>(I)); 91 if (CS) { 92 // Ignore calls to functions in the same SCC, as long as the call sites 93 // don't have operand bundles. Calls with operand bundles are allowed to 94 // have memory effects not described by the memory effects of the call 95 // target. 96 if (!CS.hasOperandBundles() && CS.getCalledFunction() && 97 SCCNodes.count(CS.getCalledFunction())) 98 continue; 99 FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS); 100 101 // If the call doesn't access memory, we're done. 102 if (!(MRB & MRI_ModRef)) 103 continue; 104 105 if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { 106 // The call could access any memory. If that includes writes, give up. 107 if (MRB & MRI_Mod) 108 return MAK_MayWrite; 109 // If it reads, note it. 110 if (MRB & MRI_Ref) 111 ReadsMemory = true; 112 continue; 113 } 114 115 // Check whether all pointer arguments point to local memory, and 116 // ignore calls that only access local memory. 117 for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); 118 CI != CE; ++CI) { 119 Value *Arg = *CI; 120 if (!Arg->getType()->isPtrOrPtrVectorTy()) 121 continue; 122 123 AAMDNodes AAInfo; 124 I->getAAMetadata(AAInfo); 125 MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo); 126 127 // Skip accesses to local or constant memory as they don't impact the 128 // externally visible mod/ref behavior. 129 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 130 continue; 131 132 if (MRB & MRI_Mod) 133 // Writes non-local memory. Give up. 134 return MAK_MayWrite; 135 if (MRB & MRI_Ref) 136 // Ok, it reads non-local memory. 137 ReadsMemory = true; 138 } 139 continue; 140 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 141 // Ignore non-volatile loads from local memory. (Atomic is okay here.) 142 if (!LI->isVolatile()) { 143 MemoryLocation Loc = MemoryLocation::get(LI); 144 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 145 continue; 146 } 147 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 148 // Ignore non-volatile stores to local memory. (Atomic is okay here.) 149 if (!SI->isVolatile()) { 150 MemoryLocation Loc = MemoryLocation::get(SI); 151 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 152 continue; 153 } 154 } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { 155 // Ignore vaargs on local memory. 156 MemoryLocation Loc = MemoryLocation::get(VI); 157 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 158 continue; 159 } 160 161 // Any remaining instructions need to be taken seriously! Check if they 162 // read or write memory. 163 if (I->mayWriteToMemory()) 164 // Writes memory. Just give up. 165 return MAK_MayWrite; 166 167 // If this instruction may read memory, remember that. 168 ReadsMemory |= I->mayReadFromMemory(); 169 } 170 171 return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone; 172 } 173 174 /// Deduce readonly/readnone attributes for the SCC. 175 template <typename AARGetterT> 176 static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) { 177 // Check if any of the functions in the SCC read or write memory. If they 178 // write memory then they can't be marked readnone or readonly. 179 bool ReadsMemory = false; 180 for (Function *F : SCCNodes) { 181 // Call the callable parameter to look up AA results for this function. 182 AAResults &AAR = AARGetter(*F); 183 184 switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) { 185 case MAK_MayWrite: 186 return false; 187 case MAK_ReadOnly: 188 ReadsMemory = true; 189 break; 190 case MAK_ReadNone: 191 // Nothing to do! 192 break; 193 } 194 } 195 196 // Success! Functions in this SCC do not access memory, or only read memory. 197 // Give them the appropriate attribute. 198 bool MadeChange = false; 199 for (Function *F : SCCNodes) { 200 if (F->doesNotAccessMemory()) 201 // Already perfect! 202 continue; 203 204 if (F->onlyReadsMemory() && ReadsMemory) 205 // No change. 206 continue; 207 208 MadeChange = true; 209 210 // Clear out any existing attributes. 211 AttrBuilder B; 212 B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); 213 F->removeAttributes( 214 AttributeSet::FunctionIndex, 215 AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B)); 216 217 // Add in the new attribute. 218 F->addAttribute(AttributeSet::FunctionIndex, 219 ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); 220 221 if (ReadsMemory) 222 ++NumReadOnly; 223 else 224 ++NumReadNone; 225 } 226 227 return MadeChange; 228 } 229 230 namespace { 231 /// For a given pointer Argument, this retains a list of Arguments of functions 232 /// in the same SCC that the pointer data flows into. We use this to build an 233 /// SCC of the arguments. 234 struct ArgumentGraphNode { 235 Argument *Definition; 236 SmallVector<ArgumentGraphNode *, 4> Uses; 237 }; 238 239 class ArgumentGraph { 240 // We store pointers to ArgumentGraphNode objects, so it's important that 241 // that they not move around upon insert. 242 typedef std::map<Argument *, ArgumentGraphNode> ArgumentMapTy; 243 244 ArgumentMapTy ArgumentMap; 245 246 // There is no root node for the argument graph, in fact: 247 // void f(int *x, int *y) { if (...) f(x, y); } 248 // is an example where the graph is disconnected. The SCCIterator requires a 249 // single entry point, so we maintain a fake ("synthetic") root node that 250 // uses every node. Because the graph is directed and nothing points into 251 // the root, it will not participate in any SCCs (except for its own). 252 ArgumentGraphNode SyntheticRoot; 253 254 public: 255 ArgumentGraph() { SyntheticRoot.Definition = nullptr; } 256 257 typedef SmallVectorImpl<ArgumentGraphNode *>::iterator iterator; 258 259 iterator begin() { return SyntheticRoot.Uses.begin(); } 260 iterator end() { return SyntheticRoot.Uses.end(); } 261 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } 262 263 ArgumentGraphNode *operator[](Argument *A) { 264 ArgumentGraphNode &Node = ArgumentMap[A]; 265 Node.Definition = A; 266 SyntheticRoot.Uses.push_back(&Node); 267 return &Node; 268 } 269 }; 270 271 /// This tracker checks whether callees are in the SCC, and if so it does not 272 /// consider that a capture, instead adding it to the "Uses" list and 273 /// continuing with the analysis. 274 struct ArgumentUsesTracker : public CaptureTracker { 275 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) 276 : Captured(false), SCCNodes(SCCNodes) {} 277 278 void tooManyUses() override { Captured = true; } 279 280 bool captured(const Use *U) override { 281 CallSite CS(U->getUser()); 282 if (!CS.getInstruction()) { 283 Captured = true; 284 return true; 285 } 286 287 Function *F = CS.getCalledFunction(); 288 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { 289 Captured = true; 290 return true; 291 } 292 293 // Note: the callee and the two successor blocks *follow* the argument 294 // operands. This means there is no need to adjust UseIndex to account for 295 // these. 296 297 unsigned UseIndex = 298 std::distance(const_cast<const Use *>(CS.arg_begin()), U); 299 300 assert(UseIndex < CS.data_operands_size() && 301 "Indirect function calls should have been filtered above!"); 302 303 if (UseIndex >= CS.getNumArgOperands()) { 304 // Data operand, but not a argument operand -- must be a bundle operand 305 assert(CS.hasOperandBundles() && "Must be!"); 306 307 // CaptureTracking told us that we're being captured by an operand bundle 308 // use. In this case it does not matter if the callee is within our SCC 309 // or not -- we've been captured in some unknown way, and we have to be 310 // conservative. 311 Captured = true; 312 return true; 313 } 314 315 if (UseIndex >= F->arg_size()) { 316 assert(F->isVarArg() && "More params than args in non-varargs call"); 317 Captured = true; 318 return true; 319 } 320 321 Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); 322 return false; 323 } 324 325 bool Captured; // True only if certainly captured (used outside our SCC). 326 SmallVector<Argument *, 4> Uses; // Uses within our SCC. 327 328 const SCCNodeSet &SCCNodes; 329 }; 330 } 331 332 namespace llvm { 333 template <> struct GraphTraits<ArgumentGraphNode *> { 334 typedef ArgumentGraphNode NodeType; 335 typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType; 336 337 static inline NodeType *getEntryNode(NodeType *A) { return A; } 338 static inline ChildIteratorType child_begin(NodeType *N) { 339 return N->Uses.begin(); 340 } 341 static inline ChildIteratorType child_end(NodeType *N) { 342 return N->Uses.end(); 343 } 344 }; 345 template <> 346 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { 347 static NodeType *getEntryNode(ArgumentGraph *AG) { 348 return AG->getEntryNode(); 349 } 350 static ChildIteratorType nodes_begin(ArgumentGraph *AG) { 351 return AG->begin(); 352 } 353 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } 354 }; 355 } 356 357 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. 358 static Attribute::AttrKind 359 determinePointerReadAttrs(Argument *A, 360 const SmallPtrSet<Argument *, 8> &SCCNodes) { 361 362 SmallVector<Use *, 32> Worklist; 363 SmallSet<Use *, 32> Visited; 364 365 // inalloca arguments are always clobbered by the call. 366 if (A->hasInAllocaAttr()) 367 return Attribute::None; 368 369 bool IsRead = false; 370 // We don't need to track IsWritten. If A is written to, return immediately. 371 372 for (Use &U : A->uses()) { 373 Visited.insert(&U); 374 Worklist.push_back(&U); 375 } 376 377 while (!Worklist.empty()) { 378 Use *U = Worklist.pop_back_val(); 379 Instruction *I = cast<Instruction>(U->getUser()); 380 381 switch (I->getOpcode()) { 382 case Instruction::BitCast: 383 case Instruction::GetElementPtr: 384 case Instruction::PHI: 385 case Instruction::Select: 386 case Instruction::AddrSpaceCast: 387 // The original value is not read/written via this if the new value isn't. 388 for (Use &UU : I->uses()) 389 if (Visited.insert(&UU).second) 390 Worklist.push_back(&UU); 391 break; 392 393 case Instruction::Call: 394 case Instruction::Invoke: { 395 bool Captures = true; 396 397 if (I->getType()->isVoidTy()) 398 Captures = false; 399 400 auto AddUsersToWorklistIfCapturing = [&] { 401 if (Captures) 402 for (Use &UU : I->uses()) 403 if (Visited.insert(&UU).second) 404 Worklist.push_back(&UU); 405 }; 406 407 CallSite CS(I); 408 if (CS.doesNotAccessMemory()) { 409 AddUsersToWorklistIfCapturing(); 410 continue; 411 } 412 413 Function *F = CS.getCalledFunction(); 414 if (!F) { 415 if (CS.onlyReadsMemory()) { 416 IsRead = true; 417 AddUsersToWorklistIfCapturing(); 418 continue; 419 } 420 return Attribute::None; 421 } 422 423 // Note: the callee and the two successor blocks *follow* the argument 424 // operands. This means there is no need to adjust UseIndex to account 425 // for these. 426 427 unsigned UseIndex = std::distance(CS.arg_begin(), U); 428 429 // U cannot be the callee operand use: since we're exploring the 430 // transitive uses of an Argument, having such a use be a callee would 431 // imply the CallSite is an indirect call or invoke; and we'd take the 432 // early exit above. 433 assert(UseIndex < CS.data_operands_size() && 434 "Data operand use expected!"); 435 436 bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands(); 437 438 if (UseIndex >= F->arg_size() && !IsOperandBundleUse) { 439 assert(F->isVarArg() && "More params than args in non-varargs call"); 440 return Attribute::None; 441 } 442 443 Captures &= !CS.doesNotCapture(UseIndex); 444 445 // Since the optimizer (by design) cannot see the data flow corresponding 446 // to a operand bundle use, these cannot participate in the optimistic SCC 447 // analysis. Instead, we model the operand bundle uses as arguments in 448 // call to a function external to the SCC. 449 if (!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex)) || 450 IsOperandBundleUse) { 451 452 // The accessors used on CallSite here do the right thing for calls and 453 // invokes with operand bundles. 454 455 if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex)) 456 return Attribute::None; 457 if (!CS.doesNotAccessMemory(UseIndex)) 458 IsRead = true; 459 } 460 461 AddUsersToWorklistIfCapturing(); 462 break; 463 } 464 465 case Instruction::Load: 466 // A volatile load has side effects beyond what readonly can be relied 467 // upon. 468 if (cast<LoadInst>(I)->isVolatile()) 469 return Attribute::None; 470 471 IsRead = true; 472 break; 473 474 case Instruction::ICmp: 475 case Instruction::Ret: 476 break; 477 478 default: 479 return Attribute::None; 480 } 481 } 482 483 return IsRead ? Attribute::ReadOnly : Attribute::ReadNone; 484 } 485 486 /// Deduce nocapture attributes for the SCC. 487 static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { 488 bool Changed = false; 489 490 ArgumentGraph AG; 491 492 AttrBuilder B; 493 B.addAttribute(Attribute::NoCapture); 494 495 // Check each function in turn, determining which pointer arguments are not 496 // captured. 497 for (Function *F : SCCNodes) { 498 // We can infer and propagate function attributes only when we know that the 499 // definition we'll get at link time is *exactly* the definition we see now. 500 // For more details, see GlobalValue::mayBeDerefined. 501 if (!F->hasExactDefinition()) 502 continue; 503 504 // Functions that are readonly (or readnone) and nounwind and don't return 505 // a value can't capture arguments. Don't analyze them. 506 if (F->onlyReadsMemory() && F->doesNotThrow() && 507 F->getReturnType()->isVoidTy()) { 508 for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; 509 ++A) { 510 if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { 511 A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B)); 512 ++NumNoCapture; 513 Changed = true; 514 } 515 } 516 continue; 517 } 518 519 for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; 520 ++A) { 521 if (!A->getType()->isPointerTy()) 522 continue; 523 bool HasNonLocalUses = false; 524 if (!A->hasNoCaptureAttr()) { 525 ArgumentUsesTracker Tracker(SCCNodes); 526 PointerMayBeCaptured(&*A, &Tracker); 527 if (!Tracker.Captured) { 528 if (Tracker.Uses.empty()) { 529 // If it's trivially not captured, mark it nocapture now. 530 A->addAttr( 531 AttributeSet::get(F->getContext(), A->getArgNo() + 1, B)); 532 ++NumNoCapture; 533 Changed = true; 534 } else { 535 // If it's not trivially captured and not trivially not captured, 536 // then it must be calling into another function in our SCC. Save 537 // its particulars for Argument-SCC analysis later. 538 ArgumentGraphNode *Node = AG[&*A]; 539 for (SmallVectorImpl<Argument *>::iterator 540 UI = Tracker.Uses.begin(), 541 UE = Tracker.Uses.end(); 542 UI != UE; ++UI) { 543 Node->Uses.push_back(AG[*UI]); 544 if (*UI != &*A) 545 HasNonLocalUses = true; 546 } 547 } 548 } 549 // Otherwise, it's captured. Don't bother doing SCC analysis on it. 550 } 551 if (!HasNonLocalUses && !A->onlyReadsMemory()) { 552 // Can we determine that it's readonly/readnone without doing an SCC? 553 // Note that we don't allow any calls at all here, or else our result 554 // will be dependent on the iteration order through the functions in the 555 // SCC. 556 SmallPtrSet<Argument *, 8> Self; 557 Self.insert(&*A); 558 Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); 559 if (R != Attribute::None) { 560 AttrBuilder B; 561 B.addAttribute(R); 562 A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); 563 Changed = true; 564 R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; 565 } 566 } 567 } 568 } 569 570 // The graph we've collected is partial because we stopped scanning for 571 // argument uses once we solved the argument trivially. These partial nodes 572 // show up as ArgumentGraphNode objects with an empty Uses list, and for 573 // these nodes the final decision about whether they capture has already been 574 // made. If the definition doesn't have a 'nocapture' attribute by now, it 575 // captures. 576 577 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { 578 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; 579 if (ArgumentSCC.size() == 1) { 580 if (!ArgumentSCC[0]->Definition) 581 continue; // synthetic root node 582 583 // eg. "void f(int* x) { if (...) f(x); }" 584 if (ArgumentSCC[0]->Uses.size() == 1 && 585 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { 586 Argument *A = ArgumentSCC[0]->Definition; 587 A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); 588 ++NumNoCapture; 589 Changed = true; 590 } 591 continue; 592 } 593 594 bool SCCCaptured = false; 595 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); 596 I != E && !SCCCaptured; ++I) { 597 ArgumentGraphNode *Node = *I; 598 if (Node->Uses.empty()) { 599 if (!Node->Definition->hasNoCaptureAttr()) 600 SCCCaptured = true; 601 } 602 } 603 if (SCCCaptured) 604 continue; 605 606 SmallPtrSet<Argument *, 8> ArgumentSCCNodes; 607 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for 608 // quickly looking up whether a given Argument is in this ArgumentSCC. 609 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) { 610 ArgumentSCCNodes.insert((*I)->Definition); 611 } 612 613 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); 614 I != E && !SCCCaptured; ++I) { 615 ArgumentGraphNode *N = *I; 616 for (SmallVectorImpl<ArgumentGraphNode *>::iterator UI = N->Uses.begin(), 617 UE = N->Uses.end(); 618 UI != UE; ++UI) { 619 Argument *A = (*UI)->Definition; 620 if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) 621 continue; 622 SCCCaptured = true; 623 break; 624 } 625 } 626 if (SCCCaptured) 627 continue; 628 629 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 630 Argument *A = ArgumentSCC[i]->Definition; 631 A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); 632 ++NumNoCapture; 633 Changed = true; 634 } 635 636 // We also want to compute readonly/readnone. With a small number of false 637 // negatives, we can assume that any pointer which is captured isn't going 638 // to be provably readonly or readnone, since by definition we can't 639 // analyze all uses of a captured pointer. 640 // 641 // The false negatives happen when the pointer is captured by a function 642 // that promises readonly/readnone behaviour on the pointer, then the 643 // pointer's lifetime ends before anything that writes to arbitrary memory. 644 // Also, a readonly/readnone pointer may be returned, but returning a 645 // pointer is capturing it. 646 647 Attribute::AttrKind ReadAttr = Attribute::ReadNone; 648 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 649 Argument *A = ArgumentSCC[i]->Definition; 650 Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes); 651 if (K == Attribute::ReadNone) 652 continue; 653 if (K == Attribute::ReadOnly) { 654 ReadAttr = Attribute::ReadOnly; 655 continue; 656 } 657 ReadAttr = K; 658 break; 659 } 660 661 if (ReadAttr != Attribute::None) { 662 AttrBuilder B, R; 663 B.addAttribute(ReadAttr); 664 R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); 665 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 666 Argument *A = ArgumentSCC[i]->Definition; 667 // Clear out existing readonly/readnone attributes 668 A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, R)); 669 A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); 670 ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; 671 Changed = true; 672 } 673 } 674 } 675 676 return Changed; 677 } 678 679 /// Tests whether a function is "malloc-like". 680 /// 681 /// A function is "malloc-like" if it returns either null or a pointer that 682 /// doesn't alias any other pointer visible to the caller. 683 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { 684 SmallSetVector<Value *, 8> FlowsToReturn; 685 for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) 686 if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator())) 687 FlowsToReturn.insert(Ret->getReturnValue()); 688 689 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 690 Value *RetVal = FlowsToReturn[i]; 691 692 if (Constant *C = dyn_cast<Constant>(RetVal)) { 693 if (!C->isNullValue() && !isa<UndefValue>(C)) 694 return false; 695 696 continue; 697 } 698 699 if (isa<Argument>(RetVal)) 700 return false; 701 702 if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) 703 switch (RVI->getOpcode()) { 704 // Extend the analysis by looking upwards. 705 case Instruction::BitCast: 706 case Instruction::GetElementPtr: 707 case Instruction::AddrSpaceCast: 708 FlowsToReturn.insert(RVI->getOperand(0)); 709 continue; 710 case Instruction::Select: { 711 SelectInst *SI = cast<SelectInst>(RVI); 712 FlowsToReturn.insert(SI->getTrueValue()); 713 FlowsToReturn.insert(SI->getFalseValue()); 714 continue; 715 } 716 case Instruction::PHI: { 717 PHINode *PN = cast<PHINode>(RVI); 718 for (Value *IncValue : PN->incoming_values()) 719 FlowsToReturn.insert(IncValue); 720 continue; 721 } 722 723 // Check whether the pointer came from an allocation. 724 case Instruction::Alloca: 725 break; 726 case Instruction::Call: 727 case Instruction::Invoke: { 728 CallSite CS(RVI); 729 if (CS.paramHasAttr(0, Attribute::NoAlias)) 730 break; 731 if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) 732 break; 733 } // fall-through 734 default: 735 return false; // Did not come from an allocation. 736 } 737 738 if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) 739 return false; 740 } 741 742 return true; 743 } 744 745 /// Deduce noalias attributes for the SCC. 746 static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { 747 // Check each function in turn, determining which functions return noalias 748 // pointers. 749 for (Function *F : SCCNodes) { 750 // Already noalias. 751 if (F->doesNotAlias(0)) 752 continue; 753 754 // We can infer and propagate function attributes only when we know that the 755 // definition we'll get at link time is *exactly* the definition we see now. 756 // For more details, see GlobalValue::mayBeDerefined. 757 if (!F->hasExactDefinition()) 758 return false; 759 760 // We annotate noalias return values, which are only applicable to 761 // pointer types. 762 if (!F->getReturnType()->isPointerTy()) 763 continue; 764 765 if (!isFunctionMallocLike(F, SCCNodes)) 766 return false; 767 } 768 769 bool MadeChange = false; 770 for (Function *F : SCCNodes) { 771 if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy()) 772 continue; 773 774 F->setDoesNotAlias(0); 775 ++NumNoAlias; 776 MadeChange = true; 777 } 778 779 return MadeChange; 780 } 781 782 /// Tests whether this function is known to not return null. 783 /// 784 /// Requires that the function returns a pointer. 785 /// 786 /// Returns true if it believes the function will not return a null, and sets 787 /// \p Speculative based on whether the returned conclusion is a speculative 788 /// conclusion due to SCC calls. 789 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, 790 const TargetLibraryInfo &TLI, bool &Speculative) { 791 assert(F->getReturnType()->isPointerTy() && 792 "nonnull only meaningful on pointer types"); 793 Speculative = false; 794 795 SmallSetVector<Value *, 8> FlowsToReturn; 796 for (BasicBlock &BB : *F) 797 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 798 FlowsToReturn.insert(Ret->getReturnValue()); 799 800 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 801 Value *RetVal = FlowsToReturn[i]; 802 803 // If this value is locally known to be non-null, we're good 804 if (isKnownNonNull(RetVal, &TLI)) 805 continue; 806 807 // Otherwise, we need to look upwards since we can't make any local 808 // conclusions. 809 Instruction *RVI = dyn_cast<Instruction>(RetVal); 810 if (!RVI) 811 return false; 812 switch (RVI->getOpcode()) { 813 // Extend the analysis by looking upwards. 814 case Instruction::BitCast: 815 case Instruction::GetElementPtr: 816 case Instruction::AddrSpaceCast: 817 FlowsToReturn.insert(RVI->getOperand(0)); 818 continue; 819 case Instruction::Select: { 820 SelectInst *SI = cast<SelectInst>(RVI); 821 FlowsToReturn.insert(SI->getTrueValue()); 822 FlowsToReturn.insert(SI->getFalseValue()); 823 continue; 824 } 825 case Instruction::PHI: { 826 PHINode *PN = cast<PHINode>(RVI); 827 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 828 FlowsToReturn.insert(PN->getIncomingValue(i)); 829 continue; 830 } 831 case Instruction::Call: 832 case Instruction::Invoke: { 833 CallSite CS(RVI); 834 Function *Callee = CS.getCalledFunction(); 835 // A call to a node within the SCC is assumed to return null until 836 // proven otherwise 837 if (Callee && SCCNodes.count(Callee)) { 838 Speculative = true; 839 continue; 840 } 841 return false; 842 } 843 default: 844 return false; // Unknown source, may be null 845 }; 846 llvm_unreachable("should have either continued or returned"); 847 } 848 849 return true; 850 } 851 852 /// Deduce nonnull attributes for the SCC. 853 static bool addNonNullAttrs(const SCCNodeSet &SCCNodes, 854 const TargetLibraryInfo &TLI) { 855 // Speculative that all functions in the SCC return only nonnull 856 // pointers. We may refute this as we analyze functions. 857 bool SCCReturnsNonNull = true; 858 859 bool MadeChange = false; 860 861 // Check each function in turn, determining which functions return nonnull 862 // pointers. 863 for (Function *F : SCCNodes) { 864 // Already nonnull. 865 if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 866 Attribute::NonNull)) 867 continue; 868 869 // We can infer and propagate function attributes only when we know that the 870 // definition we'll get at link time is *exactly* the definition we see now. 871 // For more details, see GlobalValue::mayBeDerefined. 872 if (!F->hasExactDefinition()) 873 return false; 874 875 // We annotate nonnull return values, which are only applicable to 876 // pointer types. 877 if (!F->getReturnType()->isPointerTy()) 878 continue; 879 880 bool Speculative = false; 881 if (isReturnNonNull(F, SCCNodes, TLI, Speculative)) { 882 if (!Speculative) { 883 // Mark the function eagerly since we may discover a function 884 // which prevents us from speculating about the entire SCC 885 DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n"); 886 F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull); 887 ++NumNonNullReturn; 888 MadeChange = true; 889 } 890 continue; 891 } 892 // At least one function returns something which could be null, can't 893 // speculate any more. 894 SCCReturnsNonNull = false; 895 } 896 897 if (SCCReturnsNonNull) { 898 for (Function *F : SCCNodes) { 899 if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 900 Attribute::NonNull) || 901 !F->getReturnType()->isPointerTy()) 902 continue; 903 904 DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); 905 F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull); 906 ++NumNonNullReturn; 907 MadeChange = true; 908 } 909 } 910 911 return MadeChange; 912 } 913 914 /// Remove the convergent attribute from all functions in the SCC if every 915 /// callsite within the SCC is not convergent (except for calls to functions 916 /// within the SCC). Returns true if changes were made. 917 static bool removeConvergentAttrs(const SCCNodeSet &SCCNodes) { 918 // For every function in SCC, ensure that either 919 // * it is not convergent, or 920 // * we can remove its convergent attribute. 921 bool HasConvergentFn = false; 922 for (Function *F : SCCNodes) { 923 if (!F->isConvergent()) continue; 924 HasConvergentFn = true; 925 926 // Can't remove convergent from function declarations. 927 if (F->isDeclaration()) return false; 928 929 // Can't remove convergent if any of our functions has a convergent call to a 930 // function not in the SCC. 931 for (Instruction &I : instructions(*F)) { 932 CallSite CS(&I); 933 // Bail if CS is a convergent call to a function not in the SCC. 934 if (CS && CS.isConvergent() && 935 SCCNodes.count(CS.getCalledFunction()) == 0) 936 return false; 937 } 938 } 939 940 // If the SCC doesn't have any convergent functions, we have nothing to do. 941 if (!HasConvergentFn) return false; 942 943 // If we got here, all of the calls the SCC makes to functions not in the SCC 944 // are non-convergent. Therefore all of the SCC's functions can also be made 945 // non-convergent. We'll remove the attr from the callsites in 946 // InstCombineCalls. 947 for (Function *F : SCCNodes) { 948 if (!F->isConvergent()) continue; 949 950 DEBUG(dbgs() << "Removing convergent attr from fn " << F->getName() 951 << "\n"); 952 F->setNotConvergent(); 953 } 954 return true; 955 } 956 957 static bool setDoesNotRecurse(Function &F) { 958 if (F.doesNotRecurse()) 959 return false; 960 F.setDoesNotRecurse(); 961 ++NumNoRecurse; 962 return true; 963 } 964 965 static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) { 966 // Try and identify functions that do not recurse. 967 968 // If the SCC contains multiple nodes we know for sure there is recursion. 969 if (SCCNodes.size() != 1) 970 return false; 971 972 Function *F = *SCCNodes.begin(); 973 if (!F || F->isDeclaration() || F->doesNotRecurse()) 974 return false; 975 976 // If all of the calls in F are identifiable and are to norecurse functions, F 977 // is norecurse. This check also detects self-recursion as F is not currently 978 // marked norecurse, so any called from F to F will not be marked norecurse. 979 for (Instruction &I : instructions(*F)) 980 if (auto CS = CallSite(&I)) { 981 Function *Callee = CS.getCalledFunction(); 982 if (!Callee || Callee == F || !Callee->doesNotRecurse()) 983 // Function calls a potentially recursive function. 984 return false; 985 } 986 987 // Every call was to a non-recursive function other than this function, and 988 // we have no indirect recursion as the SCC size is one. This function cannot 989 // recurse. 990 return setDoesNotRecurse(*F); 991 } 992 993 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, 994 CGSCCAnalysisManager &AM) { 995 Module &M = *C.begin()->getFunction().getParent(); 996 const ModuleAnalysisManager &MAM = 997 AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C).getManager(); 998 FunctionAnalysisManager &FAM = 999 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C).getManager(); 1000 1001 // FIXME: Need some way to make it more reasonable to assume that this is 1002 // always cached. 1003 TargetLibraryInfo &TLI = *MAM.getCachedResult<TargetLibraryAnalysis>(M); 1004 1005 // We pass a lambda into functions to wire them up to the analysis manager 1006 // for getting function analyses. 1007 auto AARGetter = [&](Function &F) -> AAResults & { 1008 return FAM.getResult<AAManager>(F); 1009 }; 1010 1011 // Fill SCCNodes with the elements of the SCC. Also track whether there are 1012 // any external or opt-none nodes that will prevent us from optimizing any 1013 // part of the SCC. 1014 SCCNodeSet SCCNodes; 1015 bool HasUnknownCall = false; 1016 for (LazyCallGraph::Node &N : C) { 1017 Function &F = N.getFunction(); 1018 if (F.hasFnAttribute(Attribute::OptimizeNone)) { 1019 // Treat any function we're trying not to optimize as if it were an 1020 // indirect call and omit it from the node set used below. 1021 HasUnknownCall = true; 1022 continue; 1023 } 1024 // Track whether any functions in this SCC have an unknown call edge. 1025 // Note: if this is ever a performance hit, we can common it with 1026 // subsequent routines which also do scans over the instructions of the 1027 // function. 1028 if (!HasUnknownCall) 1029 for (Instruction &I : instructions(F)) 1030 if (auto CS = CallSite(&I)) 1031 if (!CS.getCalledFunction()) { 1032 HasUnknownCall = true; 1033 break; 1034 } 1035 1036 SCCNodes.insert(&F); 1037 } 1038 1039 bool Changed = false; 1040 Changed |= addReadAttrs(SCCNodes, AARGetter); 1041 Changed |= addArgumentAttrs(SCCNodes); 1042 1043 // If we have no external nodes participating in the SCC, we can deduce some 1044 // more precise attributes as well. 1045 if (!HasUnknownCall) { 1046 Changed |= addNoAliasAttrs(SCCNodes); 1047 Changed |= addNonNullAttrs(SCCNodes, TLI); 1048 Changed |= removeConvergentAttrs(SCCNodes); 1049 Changed |= addNoRecurseAttrs(SCCNodes); 1050 } 1051 1052 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 1053 } 1054 1055 namespace { 1056 struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { 1057 static char ID; // Pass identification, replacement for typeid 1058 PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) { 1059 initializePostOrderFunctionAttrsLegacyPassPass(*PassRegistry::getPassRegistry()); 1060 } 1061 1062 bool runOnSCC(CallGraphSCC &SCC) override; 1063 1064 void getAnalysisUsage(AnalysisUsage &AU) const override { 1065 AU.setPreservesCFG(); 1066 AU.addRequired<AssumptionCacheTracker>(); 1067 AU.addRequired<TargetLibraryInfoWrapperPass>(); 1068 getAAResultsAnalysisUsage(AU); 1069 CallGraphSCCPass::getAnalysisUsage(AU); 1070 } 1071 1072 private: 1073 TargetLibraryInfo *TLI; 1074 }; 1075 } 1076 1077 char PostOrderFunctionAttrsLegacyPass::ID = 0; 1078 INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "functionattrs", 1079 "Deduce function attributes", false, false) 1080 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 1081 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1082 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 1083 INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "functionattrs", 1084 "Deduce function attributes", false, false) 1085 1086 Pass *llvm::createPostOrderFunctionAttrsLegacyPass() { return new PostOrderFunctionAttrsLegacyPass(); } 1087 1088 bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) { 1089 if (skipSCC(SCC)) 1090 return false; 1091 1092 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 1093 bool Changed = false; 1094 1095 // We compute dedicated AA results for each function in the SCC as needed. We 1096 // use a lambda referencing external objects so that they live long enough to 1097 // be queried, but we re-use them each time. 1098 Optional<BasicAAResult> BAR; 1099 Optional<AAResults> AAR; 1100 auto AARGetter = [&](Function &F) -> AAResults & { 1101 BAR.emplace(createLegacyPMBasicAAResult(*this, F)); 1102 AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); 1103 return *AAR; 1104 }; 1105 1106 // Fill SCCNodes with the elements of the SCC. Used for quickly looking up 1107 // whether a given CallGraphNode is in this SCC. Also track whether there are 1108 // any external or opt-none nodes that will prevent us from optimizing any 1109 // part of the SCC. 1110 SCCNodeSet SCCNodes; 1111 bool ExternalNode = false; 1112 for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { 1113 Function *F = (*I)->getFunction(); 1114 if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) { 1115 // External node or function we're trying not to optimize - we both avoid 1116 // transform them and avoid leveraging information they provide. 1117 ExternalNode = true; 1118 continue; 1119 } 1120 1121 SCCNodes.insert(F); 1122 } 1123 1124 Changed |= addReadAttrs(SCCNodes, AARGetter); 1125 Changed |= addArgumentAttrs(SCCNodes); 1126 1127 // If we have no external nodes participating in the SCC, we can deduce some 1128 // more precise attributes as well. 1129 if (!ExternalNode) { 1130 Changed |= addNoAliasAttrs(SCCNodes); 1131 Changed |= addNonNullAttrs(SCCNodes, *TLI); 1132 Changed |= removeConvergentAttrs(SCCNodes); 1133 Changed |= addNoRecurseAttrs(SCCNodes); 1134 } 1135 1136 return Changed; 1137 } 1138 1139 namespace { 1140 /// A pass to do RPO deduction and propagation of function attributes. 1141 /// 1142 /// This pass provides a general RPO or "top down" propagation of 1143 /// function attributes. For a few (rare) cases, we can deduce significantly 1144 /// more about function attributes by working in RPO, so this pass 1145 /// provides the compliment to the post-order pass above where the majority of 1146 /// deduction is performed. 1147 // FIXME: Currently there is no RPO CGSCC pass structure to slide into and so 1148 // this is a boring module pass, but eventually it should be an RPO CGSCC pass 1149 // when such infrastructure is available. 1150 struct ReversePostOrderFunctionAttrs : public ModulePass { 1151 static char ID; // Pass identification, replacement for typeid 1152 ReversePostOrderFunctionAttrs() : ModulePass(ID) { 1153 initializeReversePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry()); 1154 } 1155 1156 bool runOnModule(Module &M) override; 1157 1158 void getAnalysisUsage(AnalysisUsage &AU) const override { 1159 AU.setPreservesCFG(); 1160 AU.addRequired<CallGraphWrapperPass>(); 1161 AU.addPreserved<CallGraphWrapperPass>(); 1162 } 1163 }; 1164 } 1165 1166 char ReversePostOrderFunctionAttrs::ID = 0; 1167 INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrs, "rpo-functionattrs", 1168 "Deduce function attributes in RPO", false, false) 1169 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1170 INITIALIZE_PASS_END(ReversePostOrderFunctionAttrs, "rpo-functionattrs", 1171 "Deduce function attributes in RPO", false, false) 1172 1173 Pass *llvm::createReversePostOrderFunctionAttrsPass() { 1174 return new ReversePostOrderFunctionAttrs(); 1175 } 1176 1177 static bool addNoRecurseAttrsTopDown(Function &F) { 1178 // We check the preconditions for the function prior to calling this to avoid 1179 // the cost of building up a reversible post-order list. We assert them here 1180 // to make sure none of the invariants this relies on were violated. 1181 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); 1182 assert(!F.doesNotRecurse() && 1183 "This function has already been deduced as norecurs!"); 1184 assert(F.hasInternalLinkage() && 1185 "Can only do top-down deduction for internal linkage functions!"); 1186 1187 // If F is internal and all of its uses are calls from a non-recursive 1188 // functions, then none of its calls could in fact recurse without going 1189 // through a function marked norecurse, and so we can mark this function too 1190 // as norecurse. Note that the uses must actually be calls -- otherwise 1191 // a pointer to this function could be returned from a norecurse function but 1192 // this function could be recursively (indirectly) called. Note that this 1193 // also detects if F is directly recursive as F is not yet marked as 1194 // a norecurse function. 1195 for (auto *U : F.users()) { 1196 auto *I = dyn_cast<Instruction>(U); 1197 if (!I) 1198 return false; 1199 CallSite CS(I); 1200 if (!CS || !CS.getParent()->getParent()->doesNotRecurse()) 1201 return false; 1202 } 1203 return setDoesNotRecurse(F); 1204 } 1205 1206 bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) { 1207 if (skipModule(M)) 1208 return false; 1209 1210 // We only have a post-order SCC traversal (because SCCs are inherently 1211 // discovered in post-order), so we accumulate them in a vector and then walk 1212 // it in reverse. This is simpler than using the RPO iterator infrastructure 1213 // because we need to combine SCC detection and the PO walk of the call 1214 // graph. We can also cheat egregiously because we're primarily interested in 1215 // synthesizing norecurse and so we can only save the singular SCCs as SCCs 1216 // with multiple functions in them will clearly be recursive. 1217 auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 1218 SmallVector<Function *, 16> Worklist; 1219 for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { 1220 if (I->size() != 1) 1221 continue; 1222 1223 Function *F = I->front()->getFunction(); 1224 if (F && !F->isDeclaration() && !F->doesNotRecurse() && 1225 F->hasInternalLinkage()) 1226 Worklist.push_back(F); 1227 } 1228 1229 bool Changed = false; 1230 for (auto *F : reverse(Worklist)) 1231 Changed |= addNoRecurseAttrsTopDown(*F); 1232 1233 return Changed; 1234 } 1235