1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file implements interprocedural passes which walk the 11 /// call-graph deducing and/or propagating function attributes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/FunctionAttrs.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/SCCIterator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/AssumptionCache.h" 24 #include "llvm/Analysis/BasicAliasAnalysis.h" 25 #include "llvm/Analysis/CFG.h" 26 #include "llvm/Analysis/CGSCCPassManager.h" 27 #include "llvm/Analysis/CallGraph.h" 28 #include "llvm/Analysis/CallGraphSCCPass.h" 29 #include "llvm/Analysis/CaptureTracking.h" 30 #include "llvm/Analysis/LazyCallGraph.h" 31 #include "llvm/Analysis/MemoryBuiltins.h" 32 #include "llvm/Analysis/MemoryLocation.h" 33 #include "llvm/Analysis/ValueTracking.h" 34 #include "llvm/IR/Argument.h" 35 #include "llvm/IR/Attributes.h" 36 #include "llvm/IR/BasicBlock.h" 37 #include "llvm/IR/Constant.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/InstIterator.h" 41 #include "llvm/IR/InstrTypes.h" 42 #include "llvm/IR/Instruction.h" 43 #include "llvm/IR/Instructions.h" 44 #include "llvm/IR/IntrinsicInst.h" 45 #include "llvm/IR/Metadata.h" 46 #include "llvm/IR/PassManager.h" 47 #include "llvm/IR/Type.h" 48 #include "llvm/IR/Use.h" 49 #include "llvm/IR/User.h" 50 #include "llvm/IR/Value.h" 51 #include "llvm/InitializePasses.h" 52 #include "llvm/Pass.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Compiler.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/ErrorHandling.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/Transforms/IPO.h" 60 #include <cassert> 61 #include <iterator> 62 #include <map> 63 #include <vector> 64 65 using namespace llvm; 66 67 #define DEBUG_TYPE "function-attrs" 68 69 STATISTIC(NumReadNone, "Number of functions marked readnone"); 70 STATISTIC(NumReadOnly, "Number of functions marked readonly"); 71 STATISTIC(NumWriteOnly, "Number of functions marked writeonly"); 72 STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); 73 STATISTIC(NumReturned, "Number of arguments marked returned"); 74 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); 75 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); 76 STATISTIC(NumNoAlias, "Number of function returns marked noalias"); 77 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); 78 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); 79 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); 80 STATISTIC(NumNoFree, "Number of functions marked as nofree"); 81 STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); 82 STATISTIC(NumNoSync, "Number of functions marked as nosync"); 83 84 static cl::opt<bool> EnableNonnullArgPropagation( 85 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, 86 cl::desc("Try to propagate nonnull argument attributes from callsites to " 87 "caller functions.")); 88 89 static cl::opt<bool> DisableNoUnwindInference( 90 "disable-nounwind-inference", cl::Hidden, 91 cl::desc("Stop inferring nounwind attribute during function-attrs pass")); 92 93 static cl::opt<bool> DisableNoFreeInference( 94 "disable-nofree-inference", cl::Hidden, 95 cl::desc("Stop inferring nofree attribute during function-attrs pass")); 96 97 namespace { 98 99 using SCCNodeSet = SmallSetVector<Function *, 8>; 100 101 } // end anonymous namespace 102 103 /// Returns the memory access attribute for function F using AAR for AA results, 104 /// where SCCNodes is the current SCC. 105 /// 106 /// If ThisBody is true, this function may examine the function body and will 107 /// return a result pertaining to this copy of the function. If it is false, the 108 /// result will be based only on AA results for the function declaration; it 109 /// will be assumed that some other (perhaps less optimized) version of the 110 /// function may be selected at link time. 111 static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, 112 AAResults &AAR, 113 const SCCNodeSet &SCCNodes) { 114 FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); 115 if (MRB == FMRB_DoesNotAccessMemory) 116 // Already perfect! 117 return MAK_ReadNone; 118 119 if (!ThisBody) { 120 if (AliasAnalysis::onlyReadsMemory(MRB)) 121 return MAK_ReadOnly; 122 123 if (AliasAnalysis::doesNotReadMemory(MRB)) 124 return MAK_WriteOnly; 125 126 // Conservatively assume it reads and writes to memory. 127 return MAK_MayWrite; 128 } 129 130 // Scan the function body for instructions that may read or write memory. 131 bool ReadsMemory = false; 132 bool WritesMemory = false; 133 for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { 134 Instruction *I = &*II; 135 136 // Some instructions can be ignored even if they read or write memory. 137 // Detect these now, skipping to the next instruction if one is found. 138 if (auto *Call = dyn_cast<CallBase>(I)) { 139 // Ignore calls to functions in the same SCC, as long as the call sites 140 // don't have operand bundles. Calls with operand bundles are allowed to 141 // have memory effects not described by the memory effects of the call 142 // target. 143 if (!Call->hasOperandBundles() && Call->getCalledFunction() && 144 SCCNodes.count(Call->getCalledFunction())) 145 continue; 146 FunctionModRefBehavior MRB = AAR.getModRefBehavior(Call); 147 ModRefInfo MRI = createModRefInfo(MRB); 148 149 // If the call doesn't access memory, we're done. 150 if (isNoModRef(MRI)) 151 continue; 152 153 // A pseudo probe call shouldn't change any function attribute since it 154 // doesn't translate to a real instruction. It comes with a memory access 155 // tag to prevent itself being removed by optimizations and not block 156 // other instructions being optimized. 157 if (isa<PseudoProbeInst>(I)) 158 continue; 159 160 if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { 161 // The call could access any memory. If that includes writes, note it. 162 if (isModSet(MRI)) 163 WritesMemory = true; 164 // If it reads, note it. 165 if (isRefSet(MRI)) 166 ReadsMemory = true; 167 continue; 168 } 169 170 // Check whether all pointer arguments point to local memory, and 171 // ignore calls that only access local memory. 172 for (auto CI = Call->arg_begin(), CE = Call->arg_end(); CI != CE; ++CI) { 173 Value *Arg = *CI; 174 if (!Arg->getType()->isPtrOrPtrVectorTy()) 175 continue; 176 177 AAMDNodes AAInfo; 178 I->getAAMetadata(AAInfo); 179 MemoryLocation Loc = MemoryLocation::getBeforeOrAfter(Arg, AAInfo); 180 181 // Skip accesses to local or constant memory as they don't impact the 182 // externally visible mod/ref behavior. 183 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 184 continue; 185 186 if (isModSet(MRI)) 187 // Writes non-local memory. 188 WritesMemory = true; 189 if (isRefSet(MRI)) 190 // Ok, it reads non-local memory. 191 ReadsMemory = true; 192 } 193 continue; 194 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 195 // Ignore non-volatile loads from local memory. (Atomic is okay here.) 196 if (!LI->isVolatile()) { 197 MemoryLocation Loc = MemoryLocation::get(LI); 198 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 199 continue; 200 } 201 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 202 // Ignore non-volatile stores to local memory. (Atomic is okay here.) 203 if (!SI->isVolatile()) { 204 MemoryLocation Loc = MemoryLocation::get(SI); 205 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 206 continue; 207 } 208 } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { 209 // Ignore vaargs on local memory. 210 MemoryLocation Loc = MemoryLocation::get(VI); 211 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 212 continue; 213 } 214 215 // Any remaining instructions need to be taken seriously! Check if they 216 // read or write memory. 217 // 218 // Writes memory, remember that. 219 WritesMemory |= I->mayWriteToMemory(); 220 221 // If this instruction may read memory, remember that. 222 ReadsMemory |= I->mayReadFromMemory(); 223 } 224 225 if (WritesMemory) { 226 if (!ReadsMemory) 227 return MAK_WriteOnly; 228 else 229 return MAK_MayWrite; 230 } 231 232 return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone; 233 } 234 235 MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F, 236 AAResults &AAR) { 237 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}); 238 } 239 240 /// Deduce readonly/readnone attributes for the SCC. 241 template <typename AARGetterT> 242 static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) { 243 // Check if any of the functions in the SCC read or write memory. If they 244 // write memory then they can't be marked readnone or readonly. 245 bool ReadsMemory = false; 246 bool WritesMemory = false; 247 for (Function *F : SCCNodes) { 248 // Call the callable parameter to look up AA results for this function. 249 AAResults &AAR = AARGetter(*F); 250 251 // Non-exact function definitions may not be selected at link time, and an 252 // alternative version that writes to memory may be selected. See the 253 // comment on GlobalValue::isDefinitionExact for more details. 254 switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(), 255 AAR, SCCNodes)) { 256 case MAK_MayWrite: 257 return false; 258 case MAK_ReadOnly: 259 ReadsMemory = true; 260 break; 261 case MAK_WriteOnly: 262 WritesMemory = true; 263 break; 264 case MAK_ReadNone: 265 // Nothing to do! 266 break; 267 } 268 } 269 270 // If the SCC contains both functions that read and functions that write, then 271 // we cannot add readonly attributes. 272 if (ReadsMemory && WritesMemory) 273 return false; 274 275 // Success! Functions in this SCC do not access memory, or only read memory. 276 // Give them the appropriate attribute. 277 bool MadeChange = false; 278 279 for (Function *F : SCCNodes) { 280 if (F->doesNotAccessMemory()) 281 // Already perfect! 282 continue; 283 284 if (F->onlyReadsMemory() && ReadsMemory) 285 // No change. 286 continue; 287 288 if (F->doesNotReadMemory() && WritesMemory) 289 continue; 290 291 MadeChange = true; 292 293 // Clear out any existing attributes. 294 AttrBuilder AttrsToRemove; 295 AttrsToRemove.addAttribute(Attribute::ReadOnly); 296 AttrsToRemove.addAttribute(Attribute::ReadNone); 297 AttrsToRemove.addAttribute(Attribute::WriteOnly); 298 299 if (!WritesMemory && !ReadsMemory) { 300 // Clear out any "access range attributes" if readnone was deduced. 301 AttrsToRemove.addAttribute(Attribute::ArgMemOnly); 302 AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly); 303 AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); 304 } 305 F->removeAttributes(AttributeList::FunctionIndex, AttrsToRemove); 306 307 // Add in the new attribute. 308 if (WritesMemory && !ReadsMemory) 309 F->addFnAttr(Attribute::WriteOnly); 310 else 311 F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); 312 313 if (WritesMemory && !ReadsMemory) 314 ++NumWriteOnly; 315 else if (ReadsMemory) 316 ++NumReadOnly; 317 else 318 ++NumReadNone; 319 } 320 321 return MadeChange; 322 } 323 324 namespace { 325 326 /// For a given pointer Argument, this retains a list of Arguments of functions 327 /// in the same SCC that the pointer data flows into. We use this to build an 328 /// SCC of the arguments. 329 struct ArgumentGraphNode { 330 Argument *Definition; 331 SmallVector<ArgumentGraphNode *, 4> Uses; 332 }; 333 334 class ArgumentGraph { 335 // We store pointers to ArgumentGraphNode objects, so it's important that 336 // that they not move around upon insert. 337 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; 338 339 ArgumentMapTy ArgumentMap; 340 341 // There is no root node for the argument graph, in fact: 342 // void f(int *x, int *y) { if (...) f(x, y); } 343 // is an example where the graph is disconnected. The SCCIterator requires a 344 // single entry point, so we maintain a fake ("synthetic") root node that 345 // uses every node. Because the graph is directed and nothing points into 346 // the root, it will not participate in any SCCs (except for its own). 347 ArgumentGraphNode SyntheticRoot; 348 349 public: 350 ArgumentGraph() { SyntheticRoot.Definition = nullptr; } 351 352 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; 353 354 iterator begin() { return SyntheticRoot.Uses.begin(); } 355 iterator end() { return SyntheticRoot.Uses.end(); } 356 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } 357 358 ArgumentGraphNode *operator[](Argument *A) { 359 ArgumentGraphNode &Node = ArgumentMap[A]; 360 Node.Definition = A; 361 SyntheticRoot.Uses.push_back(&Node); 362 return &Node; 363 } 364 }; 365 366 /// This tracker checks whether callees are in the SCC, and if so it does not 367 /// consider that a capture, instead adding it to the "Uses" list and 368 /// continuing with the analysis. 369 struct ArgumentUsesTracker : public CaptureTracker { 370 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} 371 372 void tooManyUses() override { Captured = true; } 373 374 bool captured(const Use *U) override { 375 CallBase *CB = dyn_cast<CallBase>(U->getUser()); 376 if (!CB) { 377 Captured = true; 378 return true; 379 } 380 381 Function *F = CB->getCalledFunction(); 382 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { 383 Captured = true; 384 return true; 385 } 386 387 // Note: the callee and the two successor blocks *follow* the argument 388 // operands. This means there is no need to adjust UseIndex to account for 389 // these. 390 391 unsigned UseIndex = 392 std::distance(const_cast<const Use *>(CB->arg_begin()), U); 393 394 assert(UseIndex < CB->data_operands_size() && 395 "Indirect function calls should have been filtered above!"); 396 397 if (UseIndex >= CB->getNumArgOperands()) { 398 // Data operand, but not a argument operand -- must be a bundle operand 399 assert(CB->hasOperandBundles() && "Must be!"); 400 401 // CaptureTracking told us that we're being captured by an operand bundle 402 // use. In this case it does not matter if the callee is within our SCC 403 // or not -- we've been captured in some unknown way, and we have to be 404 // conservative. 405 Captured = true; 406 return true; 407 } 408 409 if (UseIndex >= F->arg_size()) { 410 assert(F->isVarArg() && "More params than args in non-varargs call"); 411 Captured = true; 412 return true; 413 } 414 415 Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); 416 return false; 417 } 418 419 // True only if certainly captured (used outside our SCC). 420 bool Captured = false; 421 422 // Uses within our SCC. 423 SmallVector<Argument *, 4> Uses; 424 425 const SCCNodeSet &SCCNodes; 426 }; 427 428 } // end anonymous namespace 429 430 namespace llvm { 431 432 template <> struct GraphTraits<ArgumentGraphNode *> { 433 using NodeRef = ArgumentGraphNode *; 434 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; 435 436 static NodeRef getEntryNode(NodeRef A) { return A; } 437 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } 438 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } 439 }; 440 441 template <> 442 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { 443 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } 444 445 static ChildIteratorType nodes_begin(ArgumentGraph *AG) { 446 return AG->begin(); 447 } 448 449 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } 450 }; 451 452 } // end namespace llvm 453 454 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. 455 static Attribute::AttrKind 456 determinePointerReadAttrs(Argument *A, 457 const SmallPtrSet<Argument *, 8> &SCCNodes) { 458 SmallVector<Use *, 32> Worklist; 459 SmallPtrSet<Use *, 32> Visited; 460 461 // inalloca arguments are always clobbered by the call. 462 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) 463 return Attribute::None; 464 465 bool IsRead = false; 466 // We don't need to track IsWritten. If A is written to, return immediately. 467 468 for (Use &U : A->uses()) { 469 Visited.insert(&U); 470 Worklist.push_back(&U); 471 } 472 473 while (!Worklist.empty()) { 474 Use *U = Worklist.pop_back_val(); 475 Instruction *I = cast<Instruction>(U->getUser()); 476 477 switch (I->getOpcode()) { 478 case Instruction::BitCast: 479 case Instruction::GetElementPtr: 480 case Instruction::PHI: 481 case Instruction::Select: 482 case Instruction::AddrSpaceCast: 483 // The original value is not read/written via this if the new value isn't. 484 for (Use &UU : I->uses()) 485 if (Visited.insert(&UU).second) 486 Worklist.push_back(&UU); 487 break; 488 489 case Instruction::Call: 490 case Instruction::Invoke: { 491 bool Captures = true; 492 493 if (I->getType()->isVoidTy()) 494 Captures = false; 495 496 auto AddUsersToWorklistIfCapturing = [&] { 497 if (Captures) 498 for (Use &UU : I->uses()) 499 if (Visited.insert(&UU).second) 500 Worklist.push_back(&UU); 501 }; 502 503 CallBase &CB = cast<CallBase>(*I); 504 if (CB.doesNotAccessMemory()) { 505 AddUsersToWorklistIfCapturing(); 506 continue; 507 } 508 509 Function *F = CB.getCalledFunction(); 510 if (!F) { 511 if (CB.onlyReadsMemory()) { 512 IsRead = true; 513 AddUsersToWorklistIfCapturing(); 514 continue; 515 } 516 return Attribute::None; 517 } 518 519 // Note: the callee and the two successor blocks *follow* the argument 520 // operands. This means there is no need to adjust UseIndex to account 521 // for these. 522 523 unsigned UseIndex = std::distance(CB.arg_begin(), U); 524 525 // U cannot be the callee operand use: since we're exploring the 526 // transitive uses of an Argument, having such a use be a callee would 527 // imply the call site is an indirect call or invoke; and we'd take the 528 // early exit above. 529 assert(UseIndex < CB.data_operands_size() && 530 "Data operand use expected!"); 531 532 bool IsOperandBundleUse = UseIndex >= CB.getNumArgOperands(); 533 534 if (UseIndex >= F->arg_size() && !IsOperandBundleUse) { 535 assert(F->isVarArg() && "More params than args in non-varargs call"); 536 return Attribute::None; 537 } 538 539 Captures &= !CB.doesNotCapture(UseIndex); 540 541 // Since the optimizer (by design) cannot see the data flow corresponding 542 // to a operand bundle use, these cannot participate in the optimistic SCC 543 // analysis. Instead, we model the operand bundle uses as arguments in 544 // call to a function external to the SCC. 545 if (IsOperandBundleUse || 546 !SCCNodes.count(&*std::next(F->arg_begin(), UseIndex))) { 547 548 // The accessors used on call site here do the right thing for calls and 549 // invokes with operand bundles. 550 551 if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(UseIndex)) 552 return Attribute::None; 553 if (!CB.doesNotAccessMemory(UseIndex)) 554 IsRead = true; 555 } 556 557 AddUsersToWorklistIfCapturing(); 558 break; 559 } 560 561 case Instruction::Load: 562 // A volatile load has side effects beyond what readonly can be relied 563 // upon. 564 if (cast<LoadInst>(I)->isVolatile()) 565 return Attribute::None; 566 567 IsRead = true; 568 break; 569 570 case Instruction::ICmp: 571 case Instruction::Ret: 572 break; 573 574 default: 575 return Attribute::None; 576 } 577 } 578 579 return IsRead ? Attribute::ReadOnly : Attribute::ReadNone; 580 } 581 582 /// Deduce returned attributes for the SCC. 583 static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { 584 bool Changed = false; 585 586 // Check each function in turn, determining if an argument is always returned. 587 for (Function *F : SCCNodes) { 588 // We can infer and propagate function attributes only when we know that the 589 // definition we'll get at link time is *exactly* the definition we see now. 590 // For more details, see GlobalValue::mayBeDerefined. 591 if (!F->hasExactDefinition()) 592 continue; 593 594 if (F->getReturnType()->isVoidTy()) 595 continue; 596 597 // There is nothing to do if an argument is already marked as 'returned'. 598 if (llvm::any_of(F->args(), 599 [](const Argument &Arg) { return Arg.hasReturnedAttr(); })) 600 continue; 601 602 auto FindRetArg = [&]() -> Value * { 603 Value *RetArg = nullptr; 604 for (BasicBlock &BB : *F) 605 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 606 // Note that stripPointerCasts should look through functions with 607 // returned arguments. 608 Value *RetVal = Ret->getReturnValue()->stripPointerCasts(); 609 if (!isa<Argument>(RetVal) || RetVal->getType() != F->getReturnType()) 610 return nullptr; 611 612 if (!RetArg) 613 RetArg = RetVal; 614 else if (RetArg != RetVal) 615 return nullptr; 616 } 617 618 return RetArg; 619 }; 620 621 if (Value *RetArg = FindRetArg()) { 622 auto *A = cast<Argument>(RetArg); 623 A->addAttr(Attribute::Returned); 624 ++NumReturned; 625 Changed = true; 626 } 627 } 628 629 return Changed; 630 } 631 632 /// If a callsite has arguments that are also arguments to the parent function, 633 /// try to propagate attributes from the callsite's arguments to the parent's 634 /// arguments. This may be important because inlining can cause information loss 635 /// when attribute knowledge disappears with the inlined call. 636 static bool addArgumentAttrsFromCallsites(Function &F) { 637 if (!EnableNonnullArgPropagation) 638 return false; 639 640 bool Changed = false; 641 642 // For an argument attribute to transfer from a callsite to the parent, the 643 // call must be guaranteed to execute every time the parent is called. 644 // Conservatively, just check for calls in the entry block that are guaranteed 645 // to execute. 646 // TODO: This could be enhanced by testing if the callsite post-dominates the 647 // entry block or by doing simple forward walks or backward walks to the 648 // callsite. 649 BasicBlock &Entry = F.getEntryBlock(); 650 for (Instruction &I : Entry) { 651 if (auto *CB = dyn_cast<CallBase>(&I)) { 652 if (auto *CalledFunc = CB->getCalledFunction()) { 653 for (auto &CSArg : CalledFunc->args()) { 654 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) 655 continue; 656 657 // If the non-null callsite argument operand is an argument to 'F' 658 // (the caller) and the call is guaranteed to execute, then the value 659 // must be non-null throughout 'F'. 660 auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); 661 if (FArg && !FArg->hasNonNullAttr()) { 662 FArg->addAttr(Attribute::NonNull); 663 Changed = true; 664 } 665 } 666 } 667 } 668 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 669 break; 670 } 671 672 return Changed; 673 } 674 675 static bool addReadAttr(Argument *A, Attribute::AttrKind R) { 676 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone) 677 && "Must be a Read attribute."); 678 assert(A && "Argument must not be null."); 679 680 // If the argument already has the attribute, nothing needs to be done. 681 if (A->hasAttribute(R)) 682 return false; 683 684 // Otherwise, remove potentially conflicting attribute, add the new one, 685 // and update statistics. 686 A->removeAttr(Attribute::WriteOnly); 687 A->removeAttr(Attribute::ReadOnly); 688 A->removeAttr(Attribute::ReadNone); 689 A->addAttr(R); 690 R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; 691 return true; 692 } 693 694 /// Deduce nocapture attributes for the SCC. 695 static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { 696 bool Changed = false; 697 698 ArgumentGraph AG; 699 700 // Check each function in turn, determining which pointer arguments are not 701 // captured. 702 for (Function *F : SCCNodes) { 703 // We can infer and propagate function attributes only when we know that the 704 // definition we'll get at link time is *exactly* the definition we see now. 705 // For more details, see GlobalValue::mayBeDerefined. 706 if (!F->hasExactDefinition()) 707 continue; 708 709 Changed |= addArgumentAttrsFromCallsites(*F); 710 711 // Functions that are readonly (or readnone) and nounwind and don't return 712 // a value can't capture arguments. Don't analyze them. 713 if (F->onlyReadsMemory() && F->doesNotThrow() && 714 F->getReturnType()->isVoidTy()) { 715 for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; 716 ++A) { 717 if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { 718 A->addAttr(Attribute::NoCapture); 719 ++NumNoCapture; 720 Changed = true; 721 } 722 } 723 continue; 724 } 725 726 for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; 727 ++A) { 728 if (!A->getType()->isPointerTy()) 729 continue; 730 bool HasNonLocalUses = false; 731 if (!A->hasNoCaptureAttr()) { 732 ArgumentUsesTracker Tracker(SCCNodes); 733 PointerMayBeCaptured(&*A, &Tracker); 734 if (!Tracker.Captured) { 735 if (Tracker.Uses.empty()) { 736 // If it's trivially not captured, mark it nocapture now. 737 A->addAttr(Attribute::NoCapture); 738 ++NumNoCapture; 739 Changed = true; 740 } else { 741 // If it's not trivially captured and not trivially not captured, 742 // then it must be calling into another function in our SCC. Save 743 // its particulars for Argument-SCC analysis later. 744 ArgumentGraphNode *Node = AG[&*A]; 745 for (Argument *Use : Tracker.Uses) { 746 Node->Uses.push_back(AG[Use]); 747 if (Use != &*A) 748 HasNonLocalUses = true; 749 } 750 } 751 } 752 // Otherwise, it's captured. Don't bother doing SCC analysis on it. 753 } 754 if (!HasNonLocalUses && !A->onlyReadsMemory()) { 755 // Can we determine that it's readonly/readnone without doing an SCC? 756 // Note that we don't allow any calls at all here, or else our result 757 // will be dependent on the iteration order through the functions in the 758 // SCC. 759 SmallPtrSet<Argument *, 8> Self; 760 Self.insert(&*A); 761 Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); 762 if (R != Attribute::None) 763 Changed = addReadAttr(A, R); 764 } 765 } 766 } 767 768 // The graph we've collected is partial because we stopped scanning for 769 // argument uses once we solved the argument trivially. These partial nodes 770 // show up as ArgumentGraphNode objects with an empty Uses list, and for 771 // these nodes the final decision about whether they capture has already been 772 // made. If the definition doesn't have a 'nocapture' attribute by now, it 773 // captures. 774 775 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { 776 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; 777 if (ArgumentSCC.size() == 1) { 778 if (!ArgumentSCC[0]->Definition) 779 continue; // synthetic root node 780 781 // eg. "void f(int* x) { if (...) f(x); }" 782 if (ArgumentSCC[0]->Uses.size() == 1 && 783 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { 784 Argument *A = ArgumentSCC[0]->Definition; 785 A->addAttr(Attribute::NoCapture); 786 ++NumNoCapture; 787 Changed = true; 788 } 789 continue; 790 } 791 792 bool SCCCaptured = false; 793 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); 794 I != E && !SCCCaptured; ++I) { 795 ArgumentGraphNode *Node = *I; 796 if (Node->Uses.empty()) { 797 if (!Node->Definition->hasNoCaptureAttr()) 798 SCCCaptured = true; 799 } 800 } 801 if (SCCCaptured) 802 continue; 803 804 SmallPtrSet<Argument *, 8> ArgumentSCCNodes; 805 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for 806 // quickly looking up whether a given Argument is in this ArgumentSCC. 807 for (ArgumentGraphNode *I : ArgumentSCC) { 808 ArgumentSCCNodes.insert(I->Definition); 809 } 810 811 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); 812 I != E && !SCCCaptured; ++I) { 813 ArgumentGraphNode *N = *I; 814 for (ArgumentGraphNode *Use : N->Uses) { 815 Argument *A = Use->Definition; 816 if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) 817 continue; 818 SCCCaptured = true; 819 break; 820 } 821 } 822 if (SCCCaptured) 823 continue; 824 825 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 826 Argument *A = ArgumentSCC[i]->Definition; 827 A->addAttr(Attribute::NoCapture); 828 ++NumNoCapture; 829 Changed = true; 830 } 831 832 // We also want to compute readonly/readnone. With a small number of false 833 // negatives, we can assume that any pointer which is captured isn't going 834 // to be provably readonly or readnone, since by definition we can't 835 // analyze all uses of a captured pointer. 836 // 837 // The false negatives happen when the pointer is captured by a function 838 // that promises readonly/readnone behaviour on the pointer, then the 839 // pointer's lifetime ends before anything that writes to arbitrary memory. 840 // Also, a readonly/readnone pointer may be returned, but returning a 841 // pointer is capturing it. 842 843 Attribute::AttrKind ReadAttr = Attribute::ReadNone; 844 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 845 Argument *A = ArgumentSCC[i]->Definition; 846 Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes); 847 if (K == Attribute::ReadNone) 848 continue; 849 if (K == Attribute::ReadOnly) { 850 ReadAttr = Attribute::ReadOnly; 851 continue; 852 } 853 ReadAttr = K; 854 break; 855 } 856 857 if (ReadAttr != Attribute::None) { 858 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 859 Argument *A = ArgumentSCC[i]->Definition; 860 Changed = addReadAttr(A, ReadAttr); 861 } 862 } 863 } 864 865 return Changed; 866 } 867 868 /// Tests whether a function is "malloc-like". 869 /// 870 /// A function is "malloc-like" if it returns either null or a pointer that 871 /// doesn't alias any other pointer visible to the caller. 872 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { 873 SmallSetVector<Value *, 8> FlowsToReturn; 874 for (BasicBlock &BB : *F) 875 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 876 FlowsToReturn.insert(Ret->getReturnValue()); 877 878 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 879 Value *RetVal = FlowsToReturn[i]; 880 881 if (Constant *C = dyn_cast<Constant>(RetVal)) { 882 if (!C->isNullValue() && !isa<UndefValue>(C)) 883 return false; 884 885 continue; 886 } 887 888 if (isa<Argument>(RetVal)) 889 return false; 890 891 if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) 892 switch (RVI->getOpcode()) { 893 // Extend the analysis by looking upwards. 894 case Instruction::BitCast: 895 case Instruction::GetElementPtr: 896 case Instruction::AddrSpaceCast: 897 FlowsToReturn.insert(RVI->getOperand(0)); 898 continue; 899 case Instruction::Select: { 900 SelectInst *SI = cast<SelectInst>(RVI); 901 FlowsToReturn.insert(SI->getTrueValue()); 902 FlowsToReturn.insert(SI->getFalseValue()); 903 continue; 904 } 905 case Instruction::PHI: { 906 PHINode *PN = cast<PHINode>(RVI); 907 for (Value *IncValue : PN->incoming_values()) 908 FlowsToReturn.insert(IncValue); 909 continue; 910 } 911 912 // Check whether the pointer came from an allocation. 913 case Instruction::Alloca: 914 break; 915 case Instruction::Call: 916 case Instruction::Invoke: { 917 CallBase &CB = cast<CallBase>(*RVI); 918 if (CB.hasRetAttr(Attribute::NoAlias)) 919 break; 920 if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) 921 break; 922 LLVM_FALLTHROUGH; 923 } 924 default: 925 return false; // Did not come from an allocation. 926 } 927 928 if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) 929 return false; 930 } 931 932 return true; 933 } 934 935 /// Deduce noalias attributes for the SCC. 936 static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { 937 // Check each function in turn, determining which functions return noalias 938 // pointers. 939 for (Function *F : SCCNodes) { 940 // Already noalias. 941 if (F->returnDoesNotAlias()) 942 continue; 943 944 // We can infer and propagate function attributes only when we know that the 945 // definition we'll get at link time is *exactly* the definition we see now. 946 // For more details, see GlobalValue::mayBeDerefined. 947 if (!F->hasExactDefinition()) 948 return false; 949 950 // We annotate noalias return values, which are only applicable to 951 // pointer types. 952 if (!F->getReturnType()->isPointerTy()) 953 continue; 954 955 if (!isFunctionMallocLike(F, SCCNodes)) 956 return false; 957 } 958 959 bool MadeChange = false; 960 for (Function *F : SCCNodes) { 961 if (F->returnDoesNotAlias() || 962 !F->getReturnType()->isPointerTy()) 963 continue; 964 965 F->setReturnDoesNotAlias(); 966 ++NumNoAlias; 967 MadeChange = true; 968 } 969 970 return MadeChange; 971 } 972 973 /// Tests whether this function is known to not return null. 974 /// 975 /// Requires that the function returns a pointer. 976 /// 977 /// Returns true if it believes the function will not return a null, and sets 978 /// \p Speculative based on whether the returned conclusion is a speculative 979 /// conclusion due to SCC calls. 980 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, 981 bool &Speculative) { 982 assert(F->getReturnType()->isPointerTy() && 983 "nonnull only meaningful on pointer types"); 984 Speculative = false; 985 986 SmallSetVector<Value *, 8> FlowsToReturn; 987 for (BasicBlock &BB : *F) 988 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 989 FlowsToReturn.insert(Ret->getReturnValue()); 990 991 auto &DL = F->getParent()->getDataLayout(); 992 993 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 994 Value *RetVal = FlowsToReturn[i]; 995 996 // If this value is locally known to be non-null, we're good 997 if (isKnownNonZero(RetVal, DL)) 998 continue; 999 1000 // Otherwise, we need to look upwards since we can't make any local 1001 // conclusions. 1002 Instruction *RVI = dyn_cast<Instruction>(RetVal); 1003 if (!RVI) 1004 return false; 1005 switch (RVI->getOpcode()) { 1006 // Extend the analysis by looking upwards. 1007 case Instruction::BitCast: 1008 case Instruction::GetElementPtr: 1009 case Instruction::AddrSpaceCast: 1010 FlowsToReturn.insert(RVI->getOperand(0)); 1011 continue; 1012 case Instruction::Select: { 1013 SelectInst *SI = cast<SelectInst>(RVI); 1014 FlowsToReturn.insert(SI->getTrueValue()); 1015 FlowsToReturn.insert(SI->getFalseValue()); 1016 continue; 1017 } 1018 case Instruction::PHI: { 1019 PHINode *PN = cast<PHINode>(RVI); 1020 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1021 FlowsToReturn.insert(PN->getIncomingValue(i)); 1022 continue; 1023 } 1024 case Instruction::Call: 1025 case Instruction::Invoke: { 1026 CallBase &CB = cast<CallBase>(*RVI); 1027 Function *Callee = CB.getCalledFunction(); 1028 // A call to a node within the SCC is assumed to return null until 1029 // proven otherwise 1030 if (Callee && SCCNodes.count(Callee)) { 1031 Speculative = true; 1032 continue; 1033 } 1034 return false; 1035 } 1036 default: 1037 return false; // Unknown source, may be null 1038 }; 1039 llvm_unreachable("should have either continued or returned"); 1040 } 1041 1042 return true; 1043 } 1044 1045 /// Deduce nonnull attributes for the SCC. 1046 static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) { 1047 // Speculative that all functions in the SCC return only nonnull 1048 // pointers. We may refute this as we analyze functions. 1049 bool SCCReturnsNonNull = true; 1050 1051 bool MadeChange = false; 1052 1053 // Check each function in turn, determining which functions return nonnull 1054 // pointers. 1055 for (Function *F : SCCNodes) { 1056 // Already nonnull. 1057 if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, 1058 Attribute::NonNull)) 1059 continue; 1060 1061 // We can infer and propagate function attributes only when we know that the 1062 // definition we'll get at link time is *exactly* the definition we see now. 1063 // For more details, see GlobalValue::mayBeDerefined. 1064 if (!F->hasExactDefinition()) 1065 return false; 1066 1067 // We annotate nonnull return values, which are only applicable to 1068 // pointer types. 1069 if (!F->getReturnType()->isPointerTy()) 1070 continue; 1071 1072 bool Speculative = false; 1073 if (isReturnNonNull(F, SCCNodes, Speculative)) { 1074 if (!Speculative) { 1075 // Mark the function eagerly since we may discover a function 1076 // which prevents us from speculating about the entire SCC 1077 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() 1078 << " as nonnull\n"); 1079 F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); 1080 ++NumNonNullReturn; 1081 MadeChange = true; 1082 } 1083 continue; 1084 } 1085 // At least one function returns something which could be null, can't 1086 // speculate any more. 1087 SCCReturnsNonNull = false; 1088 } 1089 1090 if (SCCReturnsNonNull) { 1091 for (Function *F : SCCNodes) { 1092 if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, 1093 Attribute::NonNull) || 1094 !F->getReturnType()->isPointerTy()) 1095 continue; 1096 1097 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); 1098 F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); 1099 ++NumNonNullReturn; 1100 MadeChange = true; 1101 } 1102 } 1103 1104 return MadeChange; 1105 } 1106 1107 namespace { 1108 1109 /// Collects a set of attribute inference requests and performs them all in one 1110 /// go on a single SCC Node. Inference involves scanning function bodies 1111 /// looking for instructions that violate attribute assumptions. 1112 /// As soon as all the bodies are fine we are free to set the attribute. 1113 /// Customization of inference for individual attributes is performed by 1114 /// providing a handful of predicates for each attribute. 1115 class AttributeInferer { 1116 public: 1117 /// Describes a request for inference of a single attribute. 1118 struct InferenceDescriptor { 1119 1120 /// Returns true if this function does not have to be handled. 1121 /// General intent for this predicate is to provide an optimization 1122 /// for functions that do not need this attribute inference at all 1123 /// (say, for functions that already have the attribute). 1124 std::function<bool(const Function &)> SkipFunction; 1125 1126 /// Returns true if this instruction violates attribute assumptions. 1127 std::function<bool(Instruction &)> InstrBreaksAttribute; 1128 1129 /// Sets the inferred attribute for this function. 1130 std::function<void(Function &)> SetAttribute; 1131 1132 /// Attribute we derive. 1133 Attribute::AttrKind AKind; 1134 1135 /// If true, only "exact" definitions can be used to infer this attribute. 1136 /// See GlobalValue::isDefinitionExact. 1137 bool RequiresExactDefinition; 1138 1139 InferenceDescriptor(Attribute::AttrKind AK, 1140 std::function<bool(const Function &)> SkipFunc, 1141 std::function<bool(Instruction &)> InstrScan, 1142 std::function<void(Function &)> SetAttr, 1143 bool ReqExactDef) 1144 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), 1145 SetAttribute(SetAttr), AKind(AK), 1146 RequiresExactDefinition(ReqExactDef) {} 1147 }; 1148 1149 private: 1150 SmallVector<InferenceDescriptor, 4> InferenceDescriptors; 1151 1152 public: 1153 void registerAttrInference(InferenceDescriptor AttrInference) { 1154 InferenceDescriptors.push_back(AttrInference); 1155 } 1156 1157 bool run(const SCCNodeSet &SCCNodes); 1158 }; 1159 1160 /// Perform all the requested attribute inference actions according to the 1161 /// attribute predicates stored before. 1162 bool AttributeInferer::run(const SCCNodeSet &SCCNodes) { 1163 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; 1164 // Go through all the functions in SCC and check corresponding attribute 1165 // assumptions for each of them. Attributes that are invalid for this SCC 1166 // will be removed from InferInSCC. 1167 for (Function *F : SCCNodes) { 1168 1169 // No attributes whose assumptions are still valid - done. 1170 if (InferInSCC.empty()) 1171 return false; 1172 1173 // Check if our attributes ever need scanning/can be scanned. 1174 llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) { 1175 if (ID.SkipFunction(*F)) 1176 return false; 1177 1178 // Remove from further inference (invalidate) when visiting a function 1179 // that has no instructions to scan/has an unsuitable definition. 1180 return F->isDeclaration() || 1181 (ID.RequiresExactDefinition && !F->hasExactDefinition()); 1182 }); 1183 1184 // For each attribute still in InferInSCC that doesn't explicitly skip F, 1185 // set up the F instructions scan to verify assumptions of the attribute. 1186 SmallVector<InferenceDescriptor, 4> InferInThisFunc; 1187 llvm::copy_if( 1188 InferInSCC, std::back_inserter(InferInThisFunc), 1189 [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); 1190 1191 if (InferInThisFunc.empty()) 1192 continue; 1193 1194 // Start instruction scan. 1195 for (Instruction &I : instructions(*F)) { 1196 llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) { 1197 if (!ID.InstrBreaksAttribute(I)) 1198 return false; 1199 // Remove attribute from further inference on any other functions 1200 // because attribute assumptions have just been violated. 1201 llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) { 1202 return D.AKind == ID.AKind; 1203 }); 1204 // Remove attribute from the rest of current instruction scan. 1205 return true; 1206 }); 1207 1208 if (InferInThisFunc.empty()) 1209 break; 1210 } 1211 } 1212 1213 if (InferInSCC.empty()) 1214 return false; 1215 1216 bool Changed = false; 1217 for (Function *F : SCCNodes) 1218 // At this point InferInSCC contains only functions that were either: 1219 // - explicitly skipped from scan/inference, or 1220 // - verified to have no instructions that break attribute assumptions. 1221 // Hence we just go and force the attribute for all non-skipped functions. 1222 for (auto &ID : InferInSCC) { 1223 if (ID.SkipFunction(*F)) 1224 continue; 1225 Changed = true; 1226 ID.SetAttribute(*F); 1227 } 1228 return Changed; 1229 } 1230 1231 struct SCCNodesResult { 1232 SCCNodeSet SCCNodes; 1233 bool HasUnknownCall; 1234 }; 1235 1236 } // end anonymous namespace 1237 1238 /// Helper for non-Convergent inference predicate InstrBreaksAttribute. 1239 static bool InstrBreaksNonConvergent(Instruction &I, 1240 const SCCNodeSet &SCCNodes) { 1241 const CallBase *CB = dyn_cast<CallBase>(&I); 1242 // Breaks non-convergent assumption if CS is a convergent call to a function 1243 // not in the SCC. 1244 return CB && CB->isConvergent() && 1245 SCCNodes.count(CB->getCalledFunction()) == 0; 1246 } 1247 1248 /// Helper for NoUnwind inference predicate InstrBreaksAttribute. 1249 static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { 1250 if (!I.mayThrow()) 1251 return false; 1252 if (const auto *CI = dyn_cast<CallInst>(&I)) { 1253 if (Function *Callee = CI->getCalledFunction()) { 1254 // I is a may-throw call to a function inside our SCC. This doesn't 1255 // invalidate our current working assumption that the SCC is no-throw; we 1256 // just have to scan that other function. 1257 if (SCCNodes.contains(Callee)) 1258 return false; 1259 } 1260 } 1261 return true; 1262 } 1263 1264 /// Helper for NoFree inference predicate InstrBreaksAttribute. 1265 static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { 1266 CallBase *CB = dyn_cast<CallBase>(&I); 1267 if (!CB) 1268 return false; 1269 1270 if (CB->hasFnAttr(Attribute::NoFree)) 1271 return false; 1272 1273 Function *Callee = CB->getCalledFunction(); 1274 if (!Callee) 1275 return true; 1276 1277 if (Callee->doesNotFreeMemory()) 1278 return false; 1279 1280 if (SCCNodes.contains(Callee)) 1281 return false; 1282 1283 return true; 1284 } 1285 1286 /// Attempt to remove convergent function attribute when possible. 1287 /// 1288 /// Returns true if any changes to function attributes were made. 1289 static bool inferConvergent(const SCCNodeSet &SCCNodes) { 1290 AttributeInferer AI; 1291 1292 // Request to remove the convergent attribute from all functions in the SCC 1293 // if every callsite within the SCC is not convergent (except for calls 1294 // to functions within the SCC). 1295 // Note: Removal of the attr from the callsites will happen in 1296 // InstCombineCalls separately. 1297 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1298 Attribute::Convergent, 1299 // Skip non-convergent functions. 1300 [](const Function &F) { return !F.isConvergent(); }, 1301 // Instructions that break non-convergent assumption. 1302 [SCCNodes](Instruction &I) { 1303 return InstrBreaksNonConvergent(I, SCCNodes); 1304 }, 1305 [](Function &F) { 1306 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() 1307 << "\n"); 1308 F.setNotConvergent(); 1309 }, 1310 /* RequiresExactDefinition= */ false}); 1311 // Perform all the requested attribute inference actions. 1312 return AI.run(SCCNodes); 1313 } 1314 1315 /// Infer attributes from all functions in the SCC by scanning every 1316 /// instruction for compliance to the attribute assumptions. Currently it 1317 /// does: 1318 /// - addition of NoUnwind attribute 1319 /// 1320 /// Returns true if any changes to function attributes were made. 1321 static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) { 1322 AttributeInferer AI; 1323 1324 if (!DisableNoUnwindInference) 1325 // Request to infer nounwind attribute for all the functions in the SCC if 1326 // every callsite within the SCC is not throwing (except for calls to 1327 // functions within the SCC). Note that nounwind attribute suffers from 1328 // derefinement - results may change depending on how functions are 1329 // optimized. Thus it can be inferred only from exact definitions. 1330 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1331 Attribute::NoUnwind, 1332 // Skip non-throwing functions. 1333 [](const Function &F) { return F.doesNotThrow(); }, 1334 // Instructions that break non-throwing assumption. 1335 [&SCCNodes](Instruction &I) { 1336 return InstrBreaksNonThrowing(I, SCCNodes); 1337 }, 1338 [](Function &F) { 1339 LLVM_DEBUG(dbgs() 1340 << "Adding nounwind attr to fn " << F.getName() << "\n"); 1341 F.setDoesNotThrow(); 1342 ++NumNoUnwind; 1343 }, 1344 /* RequiresExactDefinition= */ true}); 1345 1346 if (!DisableNoFreeInference) 1347 // Request to infer nofree attribute for all the functions in the SCC if 1348 // every callsite within the SCC does not directly or indirectly free 1349 // memory (except for calls to functions within the SCC). Note that nofree 1350 // attribute suffers from derefinement - results may change depending on 1351 // how functions are optimized. Thus it can be inferred only from exact 1352 // definitions. 1353 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1354 Attribute::NoFree, 1355 // Skip functions known not to free memory. 1356 [](const Function &F) { return F.doesNotFreeMemory(); }, 1357 // Instructions that break non-deallocating assumption. 1358 [&SCCNodes](Instruction &I) { 1359 return InstrBreaksNoFree(I, SCCNodes); 1360 }, 1361 [](Function &F) { 1362 LLVM_DEBUG(dbgs() 1363 << "Adding nofree attr to fn " << F.getName() << "\n"); 1364 F.setDoesNotFreeMemory(); 1365 ++NumNoFree; 1366 }, 1367 /* RequiresExactDefinition= */ true}); 1368 1369 // Perform all the requested attribute inference actions. 1370 return AI.run(SCCNodes); 1371 } 1372 1373 static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) { 1374 // Try and identify functions that do not recurse. 1375 1376 // If the SCC contains multiple nodes we know for sure there is recursion. 1377 if (SCCNodes.size() != 1) 1378 return false; 1379 1380 Function *F = *SCCNodes.begin(); 1381 if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) 1382 return false; 1383 1384 // If all of the calls in F are identifiable and are to norecurse functions, F 1385 // is norecurse. This check also detects self-recursion as F is not currently 1386 // marked norecurse, so any called from F to F will not be marked norecurse. 1387 for (auto &BB : *F) 1388 for (auto &I : BB.instructionsWithoutDebug()) 1389 if (auto *CB = dyn_cast<CallBase>(&I)) { 1390 Function *Callee = CB->getCalledFunction(); 1391 if (!Callee || Callee == F || !Callee->doesNotRecurse()) 1392 // Function calls a potentially recursive function. 1393 return false; 1394 } 1395 1396 // Every call was to a non-recursive function other than this function, and 1397 // we have no indirect recursion as the SCC size is one. This function cannot 1398 // recurse. 1399 F->setDoesNotRecurse(); 1400 ++NumNoRecurse; 1401 return true; 1402 } 1403 1404 static bool instructionDoesNotReturn(Instruction &I) { 1405 if (auto *CB = dyn_cast<CallBase>(&I)) { 1406 Function *Callee = CB->getCalledFunction(); 1407 return Callee && Callee->doesNotReturn(); 1408 } 1409 return false; 1410 } 1411 1412 // A basic block can only return if it terminates with a ReturnInst and does not 1413 // contain calls to noreturn functions. 1414 static bool basicBlockCanReturn(BasicBlock &BB) { 1415 if (!isa<ReturnInst>(BB.getTerminator())) 1416 return false; 1417 return none_of(BB, instructionDoesNotReturn); 1418 } 1419 1420 // Set the noreturn function attribute if possible. 1421 static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) { 1422 bool Changed = false; 1423 1424 for (Function *F : SCCNodes) { 1425 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 1426 F->doesNotReturn()) 1427 continue; 1428 1429 // The function can return if any basic blocks can return. 1430 // FIXME: this doesn't handle recursion or unreachable blocks. 1431 if (none_of(*F, basicBlockCanReturn)) { 1432 F->setDoesNotReturn(); 1433 Changed = true; 1434 } 1435 } 1436 1437 return Changed; 1438 } 1439 1440 static bool functionWillReturn(const Function &F) { 1441 // Must-progress function without side-effects must return. 1442 if (F.mustProgress() && F.onlyReadsMemory()) 1443 return true; 1444 1445 // Can only analyze functions with a definition. 1446 if (F.isDeclaration()) 1447 return false; 1448 1449 // Functions with loops require more sophisticated analysis, as the loop 1450 // may be infinite. For now, don't try to handle them. 1451 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; 1452 FindFunctionBackedges(F, Backedges); 1453 if (!Backedges.empty()) 1454 return false; 1455 1456 // If there are no loops, then the function is willreturn if all calls in 1457 // it are willreturn. 1458 return all_of(instructions(F), [](const Instruction &I) { 1459 return I.willReturn(); 1460 }); 1461 } 1462 1463 // Set the willreturn function attribute if possible. 1464 static bool addWillReturn(const SCCNodeSet &SCCNodes) { 1465 bool Changed = false; 1466 1467 for (Function *F : SCCNodes) { 1468 if (!F || F->willReturn() || !functionWillReturn(*F)) 1469 continue; 1470 1471 F->setWillReturn(); 1472 NumWillReturn++; 1473 Changed = true; 1474 } 1475 1476 return Changed; 1477 } 1478 1479 // Return true if this is an atomic which has an ordering stronger than 1480 // unordered. Note that this is different than the predicate we use in 1481 // Attributor. Here we chose to be conservative and consider monotonic 1482 // operations potentially synchronizing. We generally don't do much with 1483 // monotonic operations, so this is simply risk reduction. 1484 static bool isOrderedAtomic(Instruction *I) { 1485 if (!I->isAtomic()) 1486 return false; 1487 1488 if (auto *FI = dyn_cast<FenceInst>(I)) 1489 // All legal orderings for fence are stronger than monotonic. 1490 return FI->getSyncScopeID() != SyncScope::SingleThread; 1491 else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I)) 1492 return true; 1493 else if (auto *SI = dyn_cast<StoreInst>(I)) 1494 return !SI->isUnordered(); 1495 else if (auto *LI = dyn_cast<LoadInst>(I)) 1496 return !LI->isUnordered(); 1497 else { 1498 llvm_unreachable("unknown atomic instruction?"); 1499 } 1500 } 1501 1502 static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { 1503 // Volatile may synchronize 1504 if (I.isVolatile()) 1505 return true; 1506 1507 // An ordered atomic may synchronize. (See comment about on monotonic.) 1508 if (isOrderedAtomic(&I)) 1509 return true; 1510 1511 auto *CB = dyn_cast<CallBase>(&I); 1512 if (!CB) 1513 // Non call site cases covered by the two checks above 1514 return false; 1515 1516 if (CB->hasFnAttr(Attribute::NoSync)) 1517 return false; 1518 1519 // readnone + not convergent implies nosync 1520 // (This is needed to initialize inference from declarations which aren't 1521 // explicitly nosync, but are readnone and not convergent.) 1522 if (CB->hasFnAttr(Attribute::ReadNone) && 1523 !CB->hasFnAttr(Attribute::Convergent)) 1524 return false; 1525 1526 // Non volatile memset/memcpy/memmoves are nosync 1527 // NOTE: Only intrinsics with volatile flags should be handled here. All 1528 // others should be marked in Intrinsics.td. 1529 if (auto *MI = dyn_cast<MemIntrinsic>(&I)) 1530 if (!MI->isVolatile()) 1531 return false; 1532 1533 // Speculatively assume in SCC. 1534 if (Function *Callee = CB->getCalledFunction()) 1535 if (SCCNodes.contains(Callee)) 1536 return false; 1537 1538 return true; 1539 } 1540 1541 // Infer the nosync attribute. 1542 static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) { 1543 AttributeInferer AI; 1544 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1545 Attribute::NoSync, 1546 // Skip already marked functions. 1547 [](const Function &F) { return F.hasNoSync(); }, 1548 // Instructions that break nosync assumption. 1549 [&SCCNodes](Instruction &I) { 1550 return InstrBreaksNoSync(I, SCCNodes); 1551 }, 1552 [](Function &F) { 1553 LLVM_DEBUG(dbgs() 1554 << "Adding nosync attr to fn " << F.getName() << "\n"); 1555 F.setNoSync(); 1556 ++NumNoSync; 1557 }, 1558 /* RequiresExactDefinition= */ true}); 1559 bool Changed = AI.run(SCCNodes); 1560 1561 // readnone + not convergent implies nosync 1562 // (This is here so that we don't have to duplicate the function local 1563 // memory reasoning of the readnone analysis.) 1564 for (Function *F : SCCNodes) { 1565 if (!F || F->hasNoSync()) 1566 continue; 1567 if (!F->doesNotAccessMemory() || F->isConvergent()) 1568 continue; 1569 F->setNoSync(); 1570 NumNoSync++; 1571 Changed = true; 1572 } 1573 return Changed; 1574 } 1575 1576 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { 1577 SCCNodesResult Res; 1578 Res.HasUnknownCall = false; 1579 for (Function *F : Functions) { 1580 if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) { 1581 // Treat any function we're trying not to optimize as if it were an 1582 // indirect call and omit it from the node set used below. 1583 Res.HasUnknownCall = true; 1584 continue; 1585 } 1586 // Track whether any functions in this SCC have an unknown call edge. 1587 // Note: if this is ever a performance hit, we can common it with 1588 // subsequent routines which also do scans over the instructions of the 1589 // function. 1590 if (!Res.HasUnknownCall) { 1591 for (Instruction &I : instructions(*F)) { 1592 if (auto *CB = dyn_cast<CallBase>(&I)) { 1593 if (!CB->getCalledFunction()) { 1594 Res.HasUnknownCall = true; 1595 break; 1596 } 1597 } 1598 } 1599 } 1600 Res.SCCNodes.insert(F); 1601 } 1602 return Res; 1603 } 1604 1605 template <typename AARGetterT> 1606 static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions, 1607 AARGetterT &&AARGetter) { 1608 SCCNodesResult Nodes = createSCCNodeSet(Functions); 1609 bool Changed = false; 1610 1611 // Bail if the SCC only contains optnone functions. 1612 if (Nodes.SCCNodes.empty()) 1613 return Changed; 1614 1615 Changed |= addArgumentReturnedAttrs(Nodes.SCCNodes); 1616 Changed |= addReadAttrs(Nodes.SCCNodes, AARGetter); 1617 Changed |= addArgumentAttrs(Nodes.SCCNodes); 1618 Changed |= inferConvergent(Nodes.SCCNodes); 1619 Changed |= addNoReturnAttrs(Nodes.SCCNodes); 1620 Changed |= addWillReturn(Nodes.SCCNodes); 1621 1622 // If we have no external nodes participating in the SCC, we can deduce some 1623 // more precise attributes as well. 1624 if (!Nodes.HasUnknownCall) { 1625 Changed |= addNoAliasAttrs(Nodes.SCCNodes); 1626 Changed |= addNonNullAttrs(Nodes.SCCNodes); 1627 Changed |= inferAttrsFromFunctionBodies(Nodes.SCCNodes); 1628 Changed |= addNoRecurseAttrs(Nodes.SCCNodes); 1629 } 1630 1631 Changed |= addNoSyncAttr(Nodes.SCCNodes); 1632 1633 return Changed; 1634 } 1635 1636 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, 1637 CGSCCAnalysisManager &AM, 1638 LazyCallGraph &CG, 1639 CGSCCUpdateResult &) { 1640 FunctionAnalysisManager &FAM = 1641 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 1642 1643 // We pass a lambda into functions to wire them up to the analysis manager 1644 // for getting function analyses. 1645 auto AARGetter = [&](Function &F) -> AAResults & { 1646 return FAM.getResult<AAManager>(F); 1647 }; 1648 1649 SmallVector<Function *, 8> Functions; 1650 for (LazyCallGraph::Node &N : C) { 1651 Functions.push_back(&N.getFunction()); 1652 } 1653 1654 if (deriveAttrsInPostOrder(Functions, AARGetter)) 1655 return PreservedAnalyses::none(); 1656 1657 return PreservedAnalyses::all(); 1658 } 1659 1660 namespace { 1661 1662 struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { 1663 // Pass identification, replacement for typeid 1664 static char ID; 1665 1666 PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) { 1667 initializePostOrderFunctionAttrsLegacyPassPass( 1668 *PassRegistry::getPassRegistry()); 1669 } 1670 1671 bool runOnSCC(CallGraphSCC &SCC) override; 1672 1673 void getAnalysisUsage(AnalysisUsage &AU) const override { 1674 AU.setPreservesCFG(); 1675 AU.addRequired<AssumptionCacheTracker>(); 1676 getAAResultsAnalysisUsage(AU); 1677 CallGraphSCCPass::getAnalysisUsage(AU); 1678 } 1679 }; 1680 1681 } // end anonymous namespace 1682 1683 char PostOrderFunctionAttrsLegacyPass::ID = 0; 1684 INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs", 1685 "Deduce function attributes", false, false) 1686 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 1687 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1688 INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs", 1689 "Deduce function attributes", false, false) 1690 1691 Pass *llvm::createPostOrderFunctionAttrsLegacyPass() { 1692 return new PostOrderFunctionAttrsLegacyPass(); 1693 } 1694 1695 template <typename AARGetterT> 1696 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { 1697 SmallVector<Function *, 8> Functions; 1698 for (CallGraphNode *I : SCC) { 1699 Functions.push_back(I->getFunction()); 1700 } 1701 1702 return deriveAttrsInPostOrder(Functions, AARGetter); 1703 } 1704 1705 bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) { 1706 if (skipSCC(SCC)) 1707 return false; 1708 return runImpl(SCC, LegacyAARGetter(*this)); 1709 } 1710 1711 namespace { 1712 1713 struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass { 1714 // Pass identification, replacement for typeid 1715 static char ID; 1716 1717 ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) { 1718 initializeReversePostOrderFunctionAttrsLegacyPassPass( 1719 *PassRegistry::getPassRegistry()); 1720 } 1721 1722 bool runOnModule(Module &M) override; 1723 1724 void getAnalysisUsage(AnalysisUsage &AU) const override { 1725 AU.setPreservesCFG(); 1726 AU.addRequired<CallGraphWrapperPass>(); 1727 AU.addPreserved<CallGraphWrapperPass>(); 1728 } 1729 }; 1730 1731 } // end anonymous namespace 1732 1733 char ReversePostOrderFunctionAttrsLegacyPass::ID = 0; 1734 1735 INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass, 1736 "rpo-function-attrs", "Deduce function attributes in RPO", 1737 false, false) 1738 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1739 INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass, 1740 "rpo-function-attrs", "Deduce function attributes in RPO", 1741 false, false) 1742 1743 Pass *llvm::createReversePostOrderFunctionAttrsPass() { 1744 return new ReversePostOrderFunctionAttrsLegacyPass(); 1745 } 1746 1747 static bool addNoRecurseAttrsTopDown(Function &F) { 1748 // We check the preconditions for the function prior to calling this to avoid 1749 // the cost of building up a reversible post-order list. We assert them here 1750 // to make sure none of the invariants this relies on were violated. 1751 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); 1752 assert(!F.doesNotRecurse() && 1753 "This function has already been deduced as norecurs!"); 1754 assert(F.hasInternalLinkage() && 1755 "Can only do top-down deduction for internal linkage functions!"); 1756 1757 // If F is internal and all of its uses are calls from a non-recursive 1758 // functions, then none of its calls could in fact recurse without going 1759 // through a function marked norecurse, and so we can mark this function too 1760 // as norecurse. Note that the uses must actually be calls -- otherwise 1761 // a pointer to this function could be returned from a norecurse function but 1762 // this function could be recursively (indirectly) called. Note that this 1763 // also detects if F is directly recursive as F is not yet marked as 1764 // a norecurse function. 1765 for (auto *U : F.users()) { 1766 auto *I = dyn_cast<Instruction>(U); 1767 if (!I) 1768 return false; 1769 CallBase *CB = dyn_cast<CallBase>(I); 1770 if (!CB || !CB->getParent()->getParent()->doesNotRecurse()) 1771 return false; 1772 } 1773 F.setDoesNotRecurse(); 1774 ++NumNoRecurse; 1775 return true; 1776 } 1777 1778 static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) { 1779 // We only have a post-order SCC traversal (because SCCs are inherently 1780 // discovered in post-order), so we accumulate them in a vector and then walk 1781 // it in reverse. This is simpler than using the RPO iterator infrastructure 1782 // because we need to combine SCC detection and the PO walk of the call 1783 // graph. We can also cheat egregiously because we're primarily interested in 1784 // synthesizing norecurse and so we can only save the singular SCCs as SCCs 1785 // with multiple functions in them will clearly be recursive. 1786 SmallVector<Function *, 16> Worklist; 1787 for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { 1788 if (I->size() != 1) 1789 continue; 1790 1791 Function *F = I->front()->getFunction(); 1792 if (F && !F->isDeclaration() && !F->doesNotRecurse() && 1793 F->hasInternalLinkage()) 1794 Worklist.push_back(F); 1795 } 1796 1797 bool Changed = false; 1798 for (auto *F : llvm::reverse(Worklist)) 1799 Changed |= addNoRecurseAttrsTopDown(*F); 1800 1801 return Changed; 1802 } 1803 1804 bool ReversePostOrderFunctionAttrsLegacyPass::runOnModule(Module &M) { 1805 if (skipModule(M)) 1806 return false; 1807 1808 auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 1809 1810 return deduceFunctionAttributeInRPO(M, CG); 1811 } 1812 1813 PreservedAnalyses 1814 ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { 1815 auto &CG = AM.getResult<CallGraphAnalysis>(M); 1816 1817 if (!deduceFunctionAttributeInRPO(M, CG)) 1818 return PreservedAnalyses::all(); 1819 1820 PreservedAnalyses PA; 1821 PA.preserve<CallGraphAnalysis>(); 1822 return PA; 1823 } 1824