1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file implements interprocedural passes which walk the 11 /// call-graph deducing and/or propagating function attributes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/FunctionAttrs.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/SCCIterator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/AssumptionCache.h" 24 #include "llvm/Analysis/BasicAliasAnalysis.h" 25 #include "llvm/Analysis/CFG.h" 26 #include "llvm/Analysis/CGSCCPassManager.h" 27 #include "llvm/Analysis/CallGraph.h" 28 #include "llvm/Analysis/CallGraphSCCPass.h" 29 #include "llvm/Analysis/CaptureTracking.h" 30 #include "llvm/Analysis/LazyCallGraph.h" 31 #include "llvm/Analysis/MemoryBuiltins.h" 32 #include "llvm/Analysis/MemoryLocation.h" 33 #include "llvm/Analysis/ValueTracking.h" 34 #include "llvm/IR/Argument.h" 35 #include "llvm/IR/Attributes.h" 36 #include "llvm/IR/BasicBlock.h" 37 #include "llvm/IR/Constant.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/InstIterator.h" 41 #include "llvm/IR/InstrTypes.h" 42 #include "llvm/IR/Instruction.h" 43 #include "llvm/IR/Instructions.h" 44 #include "llvm/IR/IntrinsicInst.h" 45 #include "llvm/IR/Metadata.h" 46 #include "llvm/IR/PassManager.h" 47 #include "llvm/IR/Type.h" 48 #include "llvm/IR/Use.h" 49 #include "llvm/IR/User.h" 50 #include "llvm/IR/Value.h" 51 #include "llvm/InitializePasses.h" 52 #include "llvm/Pass.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Compiler.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/ErrorHandling.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/Transforms/IPO.h" 60 #include "llvm/Transforms/Utils/Local.h" 61 #include <cassert> 62 #include <iterator> 63 #include <map> 64 #include <vector> 65 66 using namespace llvm; 67 68 #define DEBUG_TYPE "function-attrs" 69 70 STATISTIC(NumReadNone, "Number of functions marked readnone"); 71 STATISTIC(NumReadOnly, "Number of functions marked readonly"); 72 STATISTIC(NumWriteOnly, "Number of functions marked writeonly"); 73 STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); 74 STATISTIC(NumReturned, "Number of arguments marked returned"); 75 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); 76 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); 77 STATISTIC(NumNoAlias, "Number of function returns marked noalias"); 78 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); 79 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); 80 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); 81 STATISTIC(NumNoFree, "Number of functions marked as nofree"); 82 STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); 83 STATISTIC(NumNoSync, "Number of functions marked as nosync"); 84 85 static cl::opt<bool> EnableNonnullArgPropagation( 86 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, 87 cl::desc("Try to propagate nonnull argument attributes from callsites to " 88 "caller functions.")); 89 90 static cl::opt<bool> DisableNoUnwindInference( 91 "disable-nounwind-inference", cl::Hidden, 92 cl::desc("Stop inferring nounwind attribute during function-attrs pass")); 93 94 static cl::opt<bool> DisableNoFreeInference( 95 "disable-nofree-inference", cl::Hidden, 96 cl::desc("Stop inferring nofree attribute during function-attrs pass")); 97 98 namespace { 99 100 using SCCNodeSet = SmallSetVector<Function *, 8>; 101 102 } // end anonymous namespace 103 104 /// Returns the memory access attribute for function F using AAR for AA results, 105 /// where SCCNodes is the current SCC. 106 /// 107 /// If ThisBody is true, this function may examine the function body and will 108 /// return a result pertaining to this copy of the function. If it is false, the 109 /// result will be based only on AA results for the function declaration; it 110 /// will be assumed that some other (perhaps less optimized) version of the 111 /// function may be selected at link time. 112 static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, 113 AAResults &AAR, 114 const SCCNodeSet &SCCNodes) { 115 FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); 116 if (MRB == FMRB_DoesNotAccessMemory) 117 // Already perfect! 118 return MAK_ReadNone; 119 120 if (!ThisBody) { 121 if (AliasAnalysis::onlyReadsMemory(MRB)) 122 return MAK_ReadOnly; 123 124 if (AliasAnalysis::doesNotReadMemory(MRB)) 125 return MAK_WriteOnly; 126 127 // Conservatively assume it reads and writes to memory. 128 return MAK_MayWrite; 129 } 130 131 // Scan the function body for instructions that may read or write memory. 132 bool ReadsMemory = false; 133 bool WritesMemory = false; 134 for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { 135 Instruction *I = &*II; 136 137 // Some instructions can be ignored even if they read or write memory. 138 // Detect these now, skipping to the next instruction if one is found. 139 if (auto *Call = dyn_cast<CallBase>(I)) { 140 // Ignore calls to functions in the same SCC, as long as the call sites 141 // don't have operand bundles. Calls with operand bundles are allowed to 142 // have memory effects not described by the memory effects of the call 143 // target. 144 if (!Call->hasOperandBundles() && Call->getCalledFunction() && 145 SCCNodes.count(Call->getCalledFunction())) 146 continue; 147 FunctionModRefBehavior MRB = AAR.getModRefBehavior(Call); 148 ModRefInfo MRI = createModRefInfo(MRB); 149 150 // If the call doesn't access memory, we're done. 151 if (isNoModRef(MRI)) 152 continue; 153 154 // A pseudo probe call shouldn't change any function attribute since it 155 // doesn't translate to a real instruction. It comes with a memory access 156 // tag to prevent itself being removed by optimizations and not block 157 // other instructions being optimized. 158 if (isa<PseudoProbeInst>(I)) 159 continue; 160 161 if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { 162 // The call could access any memory. If that includes writes, note it. 163 if (isModSet(MRI)) 164 WritesMemory = true; 165 // If it reads, note it. 166 if (isRefSet(MRI)) 167 ReadsMemory = true; 168 continue; 169 } 170 171 // Check whether all pointer arguments point to local memory, and 172 // ignore calls that only access local memory. 173 for (auto CI = Call->arg_begin(), CE = Call->arg_end(); CI != CE; ++CI) { 174 Value *Arg = *CI; 175 if (!Arg->getType()->isPtrOrPtrVectorTy()) 176 continue; 177 178 AAMDNodes AAInfo; 179 I->getAAMetadata(AAInfo); 180 MemoryLocation Loc = MemoryLocation::getBeforeOrAfter(Arg, AAInfo); 181 182 // Skip accesses to local or constant memory as they don't impact the 183 // externally visible mod/ref behavior. 184 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 185 continue; 186 187 if (isModSet(MRI)) 188 // Writes non-local memory. 189 WritesMemory = true; 190 if (isRefSet(MRI)) 191 // Ok, it reads non-local memory. 192 ReadsMemory = true; 193 } 194 continue; 195 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 196 // Ignore non-volatile loads from local memory. (Atomic is okay here.) 197 if (!LI->isVolatile()) { 198 MemoryLocation Loc = MemoryLocation::get(LI); 199 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 200 continue; 201 } 202 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 203 // Ignore non-volatile stores to local memory. (Atomic is okay here.) 204 if (!SI->isVolatile()) { 205 MemoryLocation Loc = MemoryLocation::get(SI); 206 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 207 continue; 208 } 209 } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { 210 // Ignore vaargs on local memory. 211 MemoryLocation Loc = MemoryLocation::get(VI); 212 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 213 continue; 214 } 215 216 // Any remaining instructions need to be taken seriously! Check if they 217 // read or write memory. 218 // 219 // Writes memory, remember that. 220 WritesMemory |= I->mayWriteToMemory(); 221 222 // If this instruction may read memory, remember that. 223 ReadsMemory |= I->mayReadFromMemory(); 224 } 225 226 if (WritesMemory) { 227 if (!ReadsMemory) 228 return MAK_WriteOnly; 229 else 230 return MAK_MayWrite; 231 } 232 233 return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone; 234 } 235 236 MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F, 237 AAResults &AAR) { 238 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}); 239 } 240 241 /// Deduce readonly/readnone attributes for the SCC. 242 template <typename AARGetterT> 243 static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) { 244 // Check if any of the functions in the SCC read or write memory. If they 245 // write memory then they can't be marked readnone or readonly. 246 bool ReadsMemory = false; 247 bool WritesMemory = false; 248 for (Function *F : SCCNodes) { 249 // Call the callable parameter to look up AA results for this function. 250 AAResults &AAR = AARGetter(*F); 251 252 // Non-exact function definitions may not be selected at link time, and an 253 // alternative version that writes to memory may be selected. See the 254 // comment on GlobalValue::isDefinitionExact for more details. 255 switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(), 256 AAR, SCCNodes)) { 257 case MAK_MayWrite: 258 return false; 259 case MAK_ReadOnly: 260 ReadsMemory = true; 261 break; 262 case MAK_WriteOnly: 263 WritesMemory = true; 264 break; 265 case MAK_ReadNone: 266 // Nothing to do! 267 break; 268 } 269 } 270 271 // If the SCC contains both functions that read and functions that write, then 272 // we cannot add readonly attributes. 273 if (ReadsMemory && WritesMemory) 274 return false; 275 276 // Success! Functions in this SCC do not access memory, or only read memory. 277 // Give them the appropriate attribute. 278 bool MadeChange = false; 279 280 for (Function *F : SCCNodes) { 281 if (F->doesNotAccessMemory()) 282 // Already perfect! 283 continue; 284 285 if (F->onlyReadsMemory() && ReadsMemory) 286 // No change. 287 continue; 288 289 if (F->doesNotReadMemory() && WritesMemory) 290 continue; 291 292 MadeChange = true; 293 294 // Clear out any existing attributes. 295 AttrBuilder AttrsToRemove; 296 AttrsToRemove.addAttribute(Attribute::ReadOnly); 297 AttrsToRemove.addAttribute(Attribute::ReadNone); 298 AttrsToRemove.addAttribute(Attribute::WriteOnly); 299 300 if (!WritesMemory && !ReadsMemory) { 301 // Clear out any "access range attributes" if readnone was deduced. 302 AttrsToRemove.addAttribute(Attribute::ArgMemOnly); 303 AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly); 304 AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); 305 } 306 F->removeFnAttrs(AttrsToRemove); 307 308 // Add in the new attribute. 309 if (WritesMemory && !ReadsMemory) 310 F->addFnAttr(Attribute::WriteOnly); 311 else 312 F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); 313 314 if (WritesMemory && !ReadsMemory) 315 ++NumWriteOnly; 316 else if (ReadsMemory) 317 ++NumReadOnly; 318 else 319 ++NumReadNone; 320 } 321 322 return MadeChange; 323 } 324 325 namespace { 326 327 /// For a given pointer Argument, this retains a list of Arguments of functions 328 /// in the same SCC that the pointer data flows into. We use this to build an 329 /// SCC of the arguments. 330 struct ArgumentGraphNode { 331 Argument *Definition; 332 SmallVector<ArgumentGraphNode *, 4> Uses; 333 }; 334 335 class ArgumentGraph { 336 // We store pointers to ArgumentGraphNode objects, so it's important that 337 // that they not move around upon insert. 338 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; 339 340 ArgumentMapTy ArgumentMap; 341 342 // There is no root node for the argument graph, in fact: 343 // void f(int *x, int *y) { if (...) f(x, y); } 344 // is an example where the graph is disconnected. The SCCIterator requires a 345 // single entry point, so we maintain a fake ("synthetic") root node that 346 // uses every node. Because the graph is directed and nothing points into 347 // the root, it will not participate in any SCCs (except for its own). 348 ArgumentGraphNode SyntheticRoot; 349 350 public: 351 ArgumentGraph() { SyntheticRoot.Definition = nullptr; } 352 353 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; 354 355 iterator begin() { return SyntheticRoot.Uses.begin(); } 356 iterator end() { return SyntheticRoot.Uses.end(); } 357 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } 358 359 ArgumentGraphNode *operator[](Argument *A) { 360 ArgumentGraphNode &Node = ArgumentMap[A]; 361 Node.Definition = A; 362 SyntheticRoot.Uses.push_back(&Node); 363 return &Node; 364 } 365 }; 366 367 /// This tracker checks whether callees are in the SCC, and if so it does not 368 /// consider that a capture, instead adding it to the "Uses" list and 369 /// continuing with the analysis. 370 struct ArgumentUsesTracker : public CaptureTracker { 371 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} 372 373 void tooManyUses() override { Captured = true; } 374 375 bool captured(const Use *U) override { 376 CallBase *CB = dyn_cast<CallBase>(U->getUser()); 377 if (!CB) { 378 Captured = true; 379 return true; 380 } 381 382 Function *F = CB->getCalledFunction(); 383 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { 384 Captured = true; 385 return true; 386 } 387 388 // Note: the callee and the two successor blocks *follow* the argument 389 // operands. This means there is no need to adjust UseIndex to account for 390 // these. 391 392 unsigned UseIndex = 393 std::distance(const_cast<const Use *>(CB->arg_begin()), U); 394 395 assert(UseIndex < CB->data_operands_size() && 396 "Indirect function calls should have been filtered above!"); 397 398 if (UseIndex >= CB->getNumArgOperands()) { 399 // Data operand, but not a argument operand -- must be a bundle operand 400 assert(CB->hasOperandBundles() && "Must be!"); 401 402 // CaptureTracking told us that we're being captured by an operand bundle 403 // use. In this case it does not matter if the callee is within our SCC 404 // or not -- we've been captured in some unknown way, and we have to be 405 // conservative. 406 Captured = true; 407 return true; 408 } 409 410 if (UseIndex >= F->arg_size()) { 411 assert(F->isVarArg() && "More params than args in non-varargs call"); 412 Captured = true; 413 return true; 414 } 415 416 Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); 417 return false; 418 } 419 420 // True only if certainly captured (used outside our SCC). 421 bool Captured = false; 422 423 // Uses within our SCC. 424 SmallVector<Argument *, 4> Uses; 425 426 const SCCNodeSet &SCCNodes; 427 }; 428 429 } // end anonymous namespace 430 431 namespace llvm { 432 433 template <> struct GraphTraits<ArgumentGraphNode *> { 434 using NodeRef = ArgumentGraphNode *; 435 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; 436 437 static NodeRef getEntryNode(NodeRef A) { return A; } 438 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } 439 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } 440 }; 441 442 template <> 443 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { 444 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } 445 446 static ChildIteratorType nodes_begin(ArgumentGraph *AG) { 447 return AG->begin(); 448 } 449 450 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } 451 }; 452 453 } // end namespace llvm 454 455 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. 456 static Attribute::AttrKind 457 determinePointerReadAttrs(Argument *A, 458 const SmallPtrSet<Argument *, 8> &SCCNodes) { 459 SmallVector<Use *, 32> Worklist; 460 SmallPtrSet<Use *, 32> Visited; 461 462 // inalloca arguments are always clobbered by the call. 463 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) 464 return Attribute::None; 465 466 bool IsRead = false; 467 // We don't need to track IsWritten. If A is written to, return immediately. 468 469 for (Use &U : A->uses()) { 470 Visited.insert(&U); 471 Worklist.push_back(&U); 472 } 473 474 while (!Worklist.empty()) { 475 Use *U = Worklist.pop_back_val(); 476 Instruction *I = cast<Instruction>(U->getUser()); 477 478 switch (I->getOpcode()) { 479 case Instruction::BitCast: 480 case Instruction::GetElementPtr: 481 case Instruction::PHI: 482 case Instruction::Select: 483 case Instruction::AddrSpaceCast: 484 // The original value is not read/written via this if the new value isn't. 485 for (Use &UU : I->uses()) 486 if (Visited.insert(&UU).second) 487 Worklist.push_back(&UU); 488 break; 489 490 case Instruction::Call: 491 case Instruction::Invoke: { 492 bool Captures = true; 493 494 if (I->getType()->isVoidTy()) 495 Captures = false; 496 497 auto AddUsersToWorklistIfCapturing = [&] { 498 if (Captures) 499 for (Use &UU : I->uses()) 500 if (Visited.insert(&UU).second) 501 Worklist.push_back(&UU); 502 }; 503 504 CallBase &CB = cast<CallBase>(*I); 505 if (CB.doesNotAccessMemory()) { 506 AddUsersToWorklistIfCapturing(); 507 continue; 508 } 509 510 Function *F = CB.getCalledFunction(); 511 if (!F) { 512 if (CB.onlyReadsMemory()) { 513 IsRead = true; 514 AddUsersToWorklistIfCapturing(); 515 continue; 516 } 517 return Attribute::None; 518 } 519 520 // Note: the callee and the two successor blocks *follow* the argument 521 // operands. This means there is no need to adjust UseIndex to account 522 // for these. 523 524 unsigned UseIndex = std::distance(CB.arg_begin(), U); 525 526 // U cannot be the callee operand use: since we're exploring the 527 // transitive uses of an Argument, having such a use be a callee would 528 // imply the call site is an indirect call or invoke; and we'd take the 529 // early exit above. 530 assert(UseIndex < CB.data_operands_size() && 531 "Data operand use expected!"); 532 533 bool IsOperandBundleUse = UseIndex >= CB.getNumArgOperands(); 534 535 if (UseIndex >= F->arg_size() && !IsOperandBundleUse) { 536 assert(F->isVarArg() && "More params than args in non-varargs call"); 537 return Attribute::None; 538 } 539 540 Captures &= !CB.doesNotCapture(UseIndex); 541 542 // Since the optimizer (by design) cannot see the data flow corresponding 543 // to a operand bundle use, these cannot participate in the optimistic SCC 544 // analysis. Instead, we model the operand bundle uses as arguments in 545 // call to a function external to the SCC. 546 if (IsOperandBundleUse || 547 !SCCNodes.count(&*std::next(F->arg_begin(), UseIndex))) { 548 549 // The accessors used on call site here do the right thing for calls and 550 // invokes with operand bundles. 551 552 if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(UseIndex)) 553 return Attribute::None; 554 if (!CB.doesNotAccessMemory(UseIndex)) 555 IsRead = true; 556 } 557 558 AddUsersToWorklistIfCapturing(); 559 break; 560 } 561 562 case Instruction::Load: 563 // A volatile load has side effects beyond what readonly can be relied 564 // upon. 565 if (cast<LoadInst>(I)->isVolatile()) 566 return Attribute::None; 567 568 IsRead = true; 569 break; 570 571 case Instruction::ICmp: 572 case Instruction::Ret: 573 break; 574 575 default: 576 return Attribute::None; 577 } 578 } 579 580 return IsRead ? Attribute::ReadOnly : Attribute::ReadNone; 581 } 582 583 /// Deduce returned attributes for the SCC. 584 static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { 585 bool Changed = false; 586 587 // Check each function in turn, determining if an argument is always returned. 588 for (Function *F : SCCNodes) { 589 // We can infer and propagate function attributes only when we know that the 590 // definition we'll get at link time is *exactly* the definition we see now. 591 // For more details, see GlobalValue::mayBeDerefined. 592 if (!F->hasExactDefinition()) 593 continue; 594 595 if (F->getReturnType()->isVoidTy()) 596 continue; 597 598 // There is nothing to do if an argument is already marked as 'returned'. 599 if (llvm::any_of(F->args(), 600 [](const Argument &Arg) { return Arg.hasReturnedAttr(); })) 601 continue; 602 603 auto FindRetArg = [&]() -> Value * { 604 Value *RetArg = nullptr; 605 for (BasicBlock &BB : *F) 606 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 607 // Note that stripPointerCasts should look through functions with 608 // returned arguments. 609 Value *RetVal = Ret->getReturnValue()->stripPointerCasts(); 610 if (!isa<Argument>(RetVal) || RetVal->getType() != F->getReturnType()) 611 return nullptr; 612 613 if (!RetArg) 614 RetArg = RetVal; 615 else if (RetArg != RetVal) 616 return nullptr; 617 } 618 619 return RetArg; 620 }; 621 622 if (Value *RetArg = FindRetArg()) { 623 auto *A = cast<Argument>(RetArg); 624 A->addAttr(Attribute::Returned); 625 ++NumReturned; 626 Changed = true; 627 } 628 } 629 630 return Changed; 631 } 632 633 /// If a callsite has arguments that are also arguments to the parent function, 634 /// try to propagate attributes from the callsite's arguments to the parent's 635 /// arguments. This may be important because inlining can cause information loss 636 /// when attribute knowledge disappears with the inlined call. 637 static bool addArgumentAttrsFromCallsites(Function &F) { 638 if (!EnableNonnullArgPropagation) 639 return false; 640 641 bool Changed = false; 642 643 // For an argument attribute to transfer from a callsite to the parent, the 644 // call must be guaranteed to execute every time the parent is called. 645 // Conservatively, just check for calls in the entry block that are guaranteed 646 // to execute. 647 // TODO: This could be enhanced by testing if the callsite post-dominates the 648 // entry block or by doing simple forward walks or backward walks to the 649 // callsite. 650 BasicBlock &Entry = F.getEntryBlock(); 651 for (Instruction &I : Entry) { 652 if (auto *CB = dyn_cast<CallBase>(&I)) { 653 if (auto *CalledFunc = CB->getCalledFunction()) { 654 for (auto &CSArg : CalledFunc->args()) { 655 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) 656 continue; 657 658 // If the non-null callsite argument operand is an argument to 'F' 659 // (the caller) and the call is guaranteed to execute, then the value 660 // must be non-null throughout 'F'. 661 auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); 662 if (FArg && !FArg->hasNonNullAttr()) { 663 FArg->addAttr(Attribute::NonNull); 664 Changed = true; 665 } 666 } 667 } 668 } 669 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 670 break; 671 } 672 673 return Changed; 674 } 675 676 static bool addReadAttr(Argument *A, Attribute::AttrKind R) { 677 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone) 678 && "Must be a Read attribute."); 679 assert(A && "Argument must not be null."); 680 681 // If the argument already has the attribute, nothing needs to be done. 682 if (A->hasAttribute(R)) 683 return false; 684 685 // Otherwise, remove potentially conflicting attribute, add the new one, 686 // and update statistics. 687 A->removeAttr(Attribute::WriteOnly); 688 A->removeAttr(Attribute::ReadOnly); 689 A->removeAttr(Attribute::ReadNone); 690 A->addAttr(R); 691 R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; 692 return true; 693 } 694 695 /// Deduce nocapture attributes for the SCC. 696 static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { 697 bool Changed = false; 698 699 ArgumentGraph AG; 700 701 // Check each function in turn, determining which pointer arguments are not 702 // captured. 703 for (Function *F : SCCNodes) { 704 // We can infer and propagate function attributes only when we know that the 705 // definition we'll get at link time is *exactly* the definition we see now. 706 // For more details, see GlobalValue::mayBeDerefined. 707 if (!F->hasExactDefinition()) 708 continue; 709 710 Changed |= addArgumentAttrsFromCallsites(*F); 711 712 // Functions that are readonly (or readnone) and nounwind and don't return 713 // a value can't capture arguments. Don't analyze them. 714 if (F->onlyReadsMemory() && F->doesNotThrow() && 715 F->getReturnType()->isVoidTy()) { 716 for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; 717 ++A) { 718 if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { 719 A->addAttr(Attribute::NoCapture); 720 ++NumNoCapture; 721 Changed = true; 722 } 723 } 724 continue; 725 } 726 727 for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; 728 ++A) { 729 if (!A->getType()->isPointerTy()) 730 continue; 731 bool HasNonLocalUses = false; 732 if (!A->hasNoCaptureAttr()) { 733 ArgumentUsesTracker Tracker(SCCNodes); 734 PointerMayBeCaptured(&*A, &Tracker); 735 if (!Tracker.Captured) { 736 if (Tracker.Uses.empty()) { 737 // If it's trivially not captured, mark it nocapture now. 738 A->addAttr(Attribute::NoCapture); 739 ++NumNoCapture; 740 Changed = true; 741 } else { 742 // If it's not trivially captured and not trivially not captured, 743 // then it must be calling into another function in our SCC. Save 744 // its particulars for Argument-SCC analysis later. 745 ArgumentGraphNode *Node = AG[&*A]; 746 for (Argument *Use : Tracker.Uses) { 747 Node->Uses.push_back(AG[Use]); 748 if (Use != &*A) 749 HasNonLocalUses = true; 750 } 751 } 752 } 753 // Otherwise, it's captured. Don't bother doing SCC analysis on it. 754 } 755 if (!HasNonLocalUses && !A->onlyReadsMemory()) { 756 // Can we determine that it's readonly/readnone without doing an SCC? 757 // Note that we don't allow any calls at all here, or else our result 758 // will be dependent on the iteration order through the functions in the 759 // SCC. 760 SmallPtrSet<Argument *, 8> Self; 761 Self.insert(&*A); 762 Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); 763 if (R != Attribute::None) 764 Changed = addReadAttr(A, R); 765 } 766 } 767 } 768 769 // The graph we've collected is partial because we stopped scanning for 770 // argument uses once we solved the argument trivially. These partial nodes 771 // show up as ArgumentGraphNode objects with an empty Uses list, and for 772 // these nodes the final decision about whether they capture has already been 773 // made. If the definition doesn't have a 'nocapture' attribute by now, it 774 // captures. 775 776 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { 777 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; 778 if (ArgumentSCC.size() == 1) { 779 if (!ArgumentSCC[0]->Definition) 780 continue; // synthetic root node 781 782 // eg. "void f(int* x) { if (...) f(x); }" 783 if (ArgumentSCC[0]->Uses.size() == 1 && 784 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { 785 Argument *A = ArgumentSCC[0]->Definition; 786 A->addAttr(Attribute::NoCapture); 787 ++NumNoCapture; 788 Changed = true; 789 } 790 continue; 791 } 792 793 bool SCCCaptured = false; 794 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); 795 I != E && !SCCCaptured; ++I) { 796 ArgumentGraphNode *Node = *I; 797 if (Node->Uses.empty()) { 798 if (!Node->Definition->hasNoCaptureAttr()) 799 SCCCaptured = true; 800 } 801 } 802 if (SCCCaptured) 803 continue; 804 805 SmallPtrSet<Argument *, 8> ArgumentSCCNodes; 806 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for 807 // quickly looking up whether a given Argument is in this ArgumentSCC. 808 for (ArgumentGraphNode *I : ArgumentSCC) { 809 ArgumentSCCNodes.insert(I->Definition); 810 } 811 812 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); 813 I != E && !SCCCaptured; ++I) { 814 ArgumentGraphNode *N = *I; 815 for (ArgumentGraphNode *Use : N->Uses) { 816 Argument *A = Use->Definition; 817 if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) 818 continue; 819 SCCCaptured = true; 820 break; 821 } 822 } 823 if (SCCCaptured) 824 continue; 825 826 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 827 Argument *A = ArgumentSCC[i]->Definition; 828 A->addAttr(Attribute::NoCapture); 829 ++NumNoCapture; 830 Changed = true; 831 } 832 833 // We also want to compute readonly/readnone. With a small number of false 834 // negatives, we can assume that any pointer which is captured isn't going 835 // to be provably readonly or readnone, since by definition we can't 836 // analyze all uses of a captured pointer. 837 // 838 // The false negatives happen when the pointer is captured by a function 839 // that promises readonly/readnone behaviour on the pointer, then the 840 // pointer's lifetime ends before anything that writes to arbitrary memory. 841 // Also, a readonly/readnone pointer may be returned, but returning a 842 // pointer is capturing it. 843 844 Attribute::AttrKind ReadAttr = Attribute::ReadNone; 845 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 846 Argument *A = ArgumentSCC[i]->Definition; 847 Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes); 848 if (K == Attribute::ReadNone) 849 continue; 850 if (K == Attribute::ReadOnly) { 851 ReadAttr = Attribute::ReadOnly; 852 continue; 853 } 854 ReadAttr = K; 855 break; 856 } 857 858 if (ReadAttr != Attribute::None) { 859 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 860 Argument *A = ArgumentSCC[i]->Definition; 861 Changed = addReadAttr(A, ReadAttr); 862 } 863 } 864 } 865 866 return Changed; 867 } 868 869 /// Tests whether a function is "malloc-like". 870 /// 871 /// A function is "malloc-like" if it returns either null or a pointer that 872 /// doesn't alias any other pointer visible to the caller. 873 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { 874 SmallSetVector<Value *, 8> FlowsToReturn; 875 for (BasicBlock &BB : *F) 876 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 877 FlowsToReturn.insert(Ret->getReturnValue()); 878 879 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 880 Value *RetVal = FlowsToReturn[i]; 881 882 if (Constant *C = dyn_cast<Constant>(RetVal)) { 883 if (!C->isNullValue() && !isa<UndefValue>(C)) 884 return false; 885 886 continue; 887 } 888 889 if (isa<Argument>(RetVal)) 890 return false; 891 892 if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) 893 switch (RVI->getOpcode()) { 894 // Extend the analysis by looking upwards. 895 case Instruction::BitCast: 896 case Instruction::GetElementPtr: 897 case Instruction::AddrSpaceCast: 898 FlowsToReturn.insert(RVI->getOperand(0)); 899 continue; 900 case Instruction::Select: { 901 SelectInst *SI = cast<SelectInst>(RVI); 902 FlowsToReturn.insert(SI->getTrueValue()); 903 FlowsToReturn.insert(SI->getFalseValue()); 904 continue; 905 } 906 case Instruction::PHI: { 907 PHINode *PN = cast<PHINode>(RVI); 908 for (Value *IncValue : PN->incoming_values()) 909 FlowsToReturn.insert(IncValue); 910 continue; 911 } 912 913 // Check whether the pointer came from an allocation. 914 case Instruction::Alloca: 915 break; 916 case Instruction::Call: 917 case Instruction::Invoke: { 918 CallBase &CB = cast<CallBase>(*RVI); 919 if (CB.hasRetAttr(Attribute::NoAlias)) 920 break; 921 if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) 922 break; 923 LLVM_FALLTHROUGH; 924 } 925 default: 926 return false; // Did not come from an allocation. 927 } 928 929 if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) 930 return false; 931 } 932 933 return true; 934 } 935 936 /// Deduce noalias attributes for the SCC. 937 static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { 938 // Check each function in turn, determining which functions return noalias 939 // pointers. 940 for (Function *F : SCCNodes) { 941 // Already noalias. 942 if (F->returnDoesNotAlias()) 943 continue; 944 945 // We can infer and propagate function attributes only when we know that the 946 // definition we'll get at link time is *exactly* the definition we see now. 947 // For more details, see GlobalValue::mayBeDerefined. 948 if (!F->hasExactDefinition()) 949 return false; 950 951 // We annotate noalias return values, which are only applicable to 952 // pointer types. 953 if (!F->getReturnType()->isPointerTy()) 954 continue; 955 956 if (!isFunctionMallocLike(F, SCCNodes)) 957 return false; 958 } 959 960 bool MadeChange = false; 961 for (Function *F : SCCNodes) { 962 if (F->returnDoesNotAlias() || 963 !F->getReturnType()->isPointerTy()) 964 continue; 965 966 F->setReturnDoesNotAlias(); 967 ++NumNoAlias; 968 MadeChange = true; 969 } 970 971 return MadeChange; 972 } 973 974 /// Tests whether this function is known to not return null. 975 /// 976 /// Requires that the function returns a pointer. 977 /// 978 /// Returns true if it believes the function will not return a null, and sets 979 /// \p Speculative based on whether the returned conclusion is a speculative 980 /// conclusion due to SCC calls. 981 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, 982 bool &Speculative) { 983 assert(F->getReturnType()->isPointerTy() && 984 "nonnull only meaningful on pointer types"); 985 Speculative = false; 986 987 SmallSetVector<Value *, 8> FlowsToReturn; 988 for (BasicBlock &BB : *F) 989 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 990 FlowsToReturn.insert(Ret->getReturnValue()); 991 992 auto &DL = F->getParent()->getDataLayout(); 993 994 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 995 Value *RetVal = FlowsToReturn[i]; 996 997 // If this value is locally known to be non-null, we're good 998 if (isKnownNonZero(RetVal, DL)) 999 continue; 1000 1001 // Otherwise, we need to look upwards since we can't make any local 1002 // conclusions. 1003 Instruction *RVI = dyn_cast<Instruction>(RetVal); 1004 if (!RVI) 1005 return false; 1006 switch (RVI->getOpcode()) { 1007 // Extend the analysis by looking upwards. 1008 case Instruction::BitCast: 1009 case Instruction::GetElementPtr: 1010 case Instruction::AddrSpaceCast: 1011 FlowsToReturn.insert(RVI->getOperand(0)); 1012 continue; 1013 case Instruction::Select: { 1014 SelectInst *SI = cast<SelectInst>(RVI); 1015 FlowsToReturn.insert(SI->getTrueValue()); 1016 FlowsToReturn.insert(SI->getFalseValue()); 1017 continue; 1018 } 1019 case Instruction::PHI: { 1020 PHINode *PN = cast<PHINode>(RVI); 1021 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1022 FlowsToReturn.insert(PN->getIncomingValue(i)); 1023 continue; 1024 } 1025 case Instruction::Call: 1026 case Instruction::Invoke: { 1027 CallBase &CB = cast<CallBase>(*RVI); 1028 Function *Callee = CB.getCalledFunction(); 1029 // A call to a node within the SCC is assumed to return null until 1030 // proven otherwise 1031 if (Callee && SCCNodes.count(Callee)) { 1032 Speculative = true; 1033 continue; 1034 } 1035 return false; 1036 } 1037 default: 1038 return false; // Unknown source, may be null 1039 }; 1040 llvm_unreachable("should have either continued or returned"); 1041 } 1042 1043 return true; 1044 } 1045 1046 /// Deduce nonnull attributes for the SCC. 1047 static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) { 1048 // Speculative that all functions in the SCC return only nonnull 1049 // pointers. We may refute this as we analyze functions. 1050 bool SCCReturnsNonNull = true; 1051 1052 bool MadeChange = false; 1053 1054 // Check each function in turn, determining which functions return nonnull 1055 // pointers. 1056 for (Function *F : SCCNodes) { 1057 // Already nonnull. 1058 if (F->getAttributes().hasRetAttr(Attribute::NonNull)) 1059 continue; 1060 1061 // We can infer and propagate function attributes only when we know that the 1062 // definition we'll get at link time is *exactly* the definition we see now. 1063 // For more details, see GlobalValue::mayBeDerefined. 1064 if (!F->hasExactDefinition()) 1065 return false; 1066 1067 // We annotate nonnull return values, which are only applicable to 1068 // pointer types. 1069 if (!F->getReturnType()->isPointerTy()) 1070 continue; 1071 1072 bool Speculative = false; 1073 if (isReturnNonNull(F, SCCNodes, Speculative)) { 1074 if (!Speculative) { 1075 // Mark the function eagerly since we may discover a function 1076 // which prevents us from speculating about the entire SCC 1077 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() 1078 << " as nonnull\n"); 1079 F->addRetAttr(Attribute::NonNull); 1080 ++NumNonNullReturn; 1081 MadeChange = true; 1082 } 1083 continue; 1084 } 1085 // At least one function returns something which could be null, can't 1086 // speculate any more. 1087 SCCReturnsNonNull = false; 1088 } 1089 1090 if (SCCReturnsNonNull) { 1091 for (Function *F : SCCNodes) { 1092 if (F->getAttributes().hasRetAttr(Attribute::NonNull) || 1093 !F->getReturnType()->isPointerTy()) 1094 continue; 1095 1096 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); 1097 F->addRetAttr(Attribute::NonNull); 1098 ++NumNonNullReturn; 1099 MadeChange = true; 1100 } 1101 } 1102 1103 return MadeChange; 1104 } 1105 1106 namespace { 1107 1108 /// Collects a set of attribute inference requests and performs them all in one 1109 /// go on a single SCC Node. Inference involves scanning function bodies 1110 /// looking for instructions that violate attribute assumptions. 1111 /// As soon as all the bodies are fine we are free to set the attribute. 1112 /// Customization of inference for individual attributes is performed by 1113 /// providing a handful of predicates for each attribute. 1114 class AttributeInferer { 1115 public: 1116 /// Describes a request for inference of a single attribute. 1117 struct InferenceDescriptor { 1118 1119 /// Returns true if this function does not have to be handled. 1120 /// General intent for this predicate is to provide an optimization 1121 /// for functions that do not need this attribute inference at all 1122 /// (say, for functions that already have the attribute). 1123 std::function<bool(const Function &)> SkipFunction; 1124 1125 /// Returns true if this instruction violates attribute assumptions. 1126 std::function<bool(Instruction &)> InstrBreaksAttribute; 1127 1128 /// Sets the inferred attribute for this function. 1129 std::function<void(Function &)> SetAttribute; 1130 1131 /// Attribute we derive. 1132 Attribute::AttrKind AKind; 1133 1134 /// If true, only "exact" definitions can be used to infer this attribute. 1135 /// See GlobalValue::isDefinitionExact. 1136 bool RequiresExactDefinition; 1137 1138 InferenceDescriptor(Attribute::AttrKind AK, 1139 std::function<bool(const Function &)> SkipFunc, 1140 std::function<bool(Instruction &)> InstrScan, 1141 std::function<void(Function &)> SetAttr, 1142 bool ReqExactDef) 1143 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), 1144 SetAttribute(SetAttr), AKind(AK), 1145 RequiresExactDefinition(ReqExactDef) {} 1146 }; 1147 1148 private: 1149 SmallVector<InferenceDescriptor, 4> InferenceDescriptors; 1150 1151 public: 1152 void registerAttrInference(InferenceDescriptor AttrInference) { 1153 InferenceDescriptors.push_back(AttrInference); 1154 } 1155 1156 bool run(const SCCNodeSet &SCCNodes); 1157 }; 1158 1159 /// Perform all the requested attribute inference actions according to the 1160 /// attribute predicates stored before. 1161 bool AttributeInferer::run(const SCCNodeSet &SCCNodes) { 1162 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; 1163 // Go through all the functions in SCC and check corresponding attribute 1164 // assumptions for each of them. Attributes that are invalid for this SCC 1165 // will be removed from InferInSCC. 1166 for (Function *F : SCCNodes) { 1167 1168 // No attributes whose assumptions are still valid - done. 1169 if (InferInSCC.empty()) 1170 return false; 1171 1172 // Check if our attributes ever need scanning/can be scanned. 1173 llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) { 1174 if (ID.SkipFunction(*F)) 1175 return false; 1176 1177 // Remove from further inference (invalidate) when visiting a function 1178 // that has no instructions to scan/has an unsuitable definition. 1179 return F->isDeclaration() || 1180 (ID.RequiresExactDefinition && !F->hasExactDefinition()); 1181 }); 1182 1183 // For each attribute still in InferInSCC that doesn't explicitly skip F, 1184 // set up the F instructions scan to verify assumptions of the attribute. 1185 SmallVector<InferenceDescriptor, 4> InferInThisFunc; 1186 llvm::copy_if( 1187 InferInSCC, std::back_inserter(InferInThisFunc), 1188 [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); 1189 1190 if (InferInThisFunc.empty()) 1191 continue; 1192 1193 // Start instruction scan. 1194 for (Instruction &I : instructions(*F)) { 1195 llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) { 1196 if (!ID.InstrBreaksAttribute(I)) 1197 return false; 1198 // Remove attribute from further inference on any other functions 1199 // because attribute assumptions have just been violated. 1200 llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) { 1201 return D.AKind == ID.AKind; 1202 }); 1203 // Remove attribute from the rest of current instruction scan. 1204 return true; 1205 }); 1206 1207 if (InferInThisFunc.empty()) 1208 break; 1209 } 1210 } 1211 1212 if (InferInSCC.empty()) 1213 return false; 1214 1215 bool Changed = false; 1216 for (Function *F : SCCNodes) 1217 // At this point InferInSCC contains only functions that were either: 1218 // - explicitly skipped from scan/inference, or 1219 // - verified to have no instructions that break attribute assumptions. 1220 // Hence we just go and force the attribute for all non-skipped functions. 1221 for (auto &ID : InferInSCC) { 1222 if (ID.SkipFunction(*F)) 1223 continue; 1224 Changed = true; 1225 ID.SetAttribute(*F); 1226 } 1227 return Changed; 1228 } 1229 1230 struct SCCNodesResult { 1231 SCCNodeSet SCCNodes; 1232 bool HasUnknownCall; 1233 }; 1234 1235 } // end anonymous namespace 1236 1237 /// Helper for non-Convergent inference predicate InstrBreaksAttribute. 1238 static bool InstrBreaksNonConvergent(Instruction &I, 1239 const SCCNodeSet &SCCNodes) { 1240 const CallBase *CB = dyn_cast<CallBase>(&I); 1241 // Breaks non-convergent assumption if CS is a convergent call to a function 1242 // not in the SCC. 1243 return CB && CB->isConvergent() && 1244 SCCNodes.count(CB->getCalledFunction()) == 0; 1245 } 1246 1247 /// Helper for NoUnwind inference predicate InstrBreaksAttribute. 1248 static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { 1249 if (!I.mayThrow()) 1250 return false; 1251 if (const auto *CI = dyn_cast<CallInst>(&I)) { 1252 if (Function *Callee = CI->getCalledFunction()) { 1253 // I is a may-throw call to a function inside our SCC. This doesn't 1254 // invalidate our current working assumption that the SCC is no-throw; we 1255 // just have to scan that other function. 1256 if (SCCNodes.contains(Callee)) 1257 return false; 1258 } 1259 } 1260 return true; 1261 } 1262 1263 /// Helper for NoFree inference predicate InstrBreaksAttribute. 1264 static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { 1265 CallBase *CB = dyn_cast<CallBase>(&I); 1266 if (!CB) 1267 return false; 1268 1269 if (CB->hasFnAttr(Attribute::NoFree)) 1270 return false; 1271 1272 // Speculatively assume in SCC. 1273 if (Function *Callee = CB->getCalledFunction()) 1274 if (SCCNodes.contains(Callee)) 1275 return false; 1276 1277 return true; 1278 } 1279 1280 /// Attempt to remove convergent function attribute when possible. 1281 /// 1282 /// Returns true if any changes to function attributes were made. 1283 static bool inferConvergent(const SCCNodeSet &SCCNodes) { 1284 AttributeInferer AI; 1285 1286 // Request to remove the convergent attribute from all functions in the SCC 1287 // if every callsite within the SCC is not convergent (except for calls 1288 // to functions within the SCC). 1289 // Note: Removal of the attr from the callsites will happen in 1290 // InstCombineCalls separately. 1291 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1292 Attribute::Convergent, 1293 // Skip non-convergent functions. 1294 [](const Function &F) { return !F.isConvergent(); }, 1295 // Instructions that break non-convergent assumption. 1296 [SCCNodes](Instruction &I) { 1297 return InstrBreaksNonConvergent(I, SCCNodes); 1298 }, 1299 [](Function &F) { 1300 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() 1301 << "\n"); 1302 F.setNotConvergent(); 1303 }, 1304 /* RequiresExactDefinition= */ false}); 1305 // Perform all the requested attribute inference actions. 1306 return AI.run(SCCNodes); 1307 } 1308 1309 /// Infer attributes from all functions in the SCC by scanning every 1310 /// instruction for compliance to the attribute assumptions. Currently it 1311 /// does: 1312 /// - addition of NoUnwind attribute 1313 /// 1314 /// Returns true if any changes to function attributes were made. 1315 static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) { 1316 AttributeInferer AI; 1317 1318 if (!DisableNoUnwindInference) 1319 // Request to infer nounwind attribute for all the functions in the SCC if 1320 // every callsite within the SCC is not throwing (except for calls to 1321 // functions within the SCC). Note that nounwind attribute suffers from 1322 // derefinement - results may change depending on how functions are 1323 // optimized. Thus it can be inferred only from exact definitions. 1324 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1325 Attribute::NoUnwind, 1326 // Skip non-throwing functions. 1327 [](const Function &F) { return F.doesNotThrow(); }, 1328 // Instructions that break non-throwing assumption. 1329 [&SCCNodes](Instruction &I) { 1330 return InstrBreaksNonThrowing(I, SCCNodes); 1331 }, 1332 [](Function &F) { 1333 LLVM_DEBUG(dbgs() 1334 << "Adding nounwind attr to fn " << F.getName() << "\n"); 1335 F.setDoesNotThrow(); 1336 ++NumNoUnwind; 1337 }, 1338 /* RequiresExactDefinition= */ true}); 1339 1340 if (!DisableNoFreeInference) 1341 // Request to infer nofree attribute for all the functions in the SCC if 1342 // every callsite within the SCC does not directly or indirectly free 1343 // memory (except for calls to functions within the SCC). Note that nofree 1344 // attribute suffers from derefinement - results may change depending on 1345 // how functions are optimized. Thus it can be inferred only from exact 1346 // definitions. 1347 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1348 Attribute::NoFree, 1349 // Skip functions known not to free memory. 1350 [](const Function &F) { return F.doesNotFreeMemory(); }, 1351 // Instructions that break non-deallocating assumption. 1352 [&SCCNodes](Instruction &I) { 1353 return InstrBreaksNoFree(I, SCCNodes); 1354 }, 1355 [](Function &F) { 1356 LLVM_DEBUG(dbgs() 1357 << "Adding nofree attr to fn " << F.getName() << "\n"); 1358 F.setDoesNotFreeMemory(); 1359 ++NumNoFree; 1360 }, 1361 /* RequiresExactDefinition= */ true}); 1362 1363 // Perform all the requested attribute inference actions. 1364 return AI.run(SCCNodes); 1365 } 1366 1367 static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) { 1368 // Try and identify functions that do not recurse. 1369 1370 // If the SCC contains multiple nodes we know for sure there is recursion. 1371 if (SCCNodes.size() != 1) 1372 return false; 1373 1374 Function *F = *SCCNodes.begin(); 1375 if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) 1376 return false; 1377 1378 // If all of the calls in F are identifiable and are to norecurse functions, F 1379 // is norecurse. This check also detects self-recursion as F is not currently 1380 // marked norecurse, so any called from F to F will not be marked norecurse. 1381 for (auto &BB : *F) 1382 for (auto &I : BB.instructionsWithoutDebug()) 1383 if (auto *CB = dyn_cast<CallBase>(&I)) { 1384 Function *Callee = CB->getCalledFunction(); 1385 if (!Callee || Callee == F || !Callee->doesNotRecurse()) 1386 // Function calls a potentially recursive function. 1387 return false; 1388 } 1389 1390 // Every call was to a non-recursive function other than this function, and 1391 // we have no indirect recursion as the SCC size is one. This function cannot 1392 // recurse. 1393 F->setDoesNotRecurse(); 1394 ++NumNoRecurse; 1395 return true; 1396 } 1397 1398 static bool instructionDoesNotReturn(Instruction &I) { 1399 if (auto *CB = dyn_cast<CallBase>(&I)) 1400 return CB->hasFnAttr(Attribute::NoReturn); 1401 return false; 1402 } 1403 1404 // A basic block can only return if it terminates with a ReturnInst and does not 1405 // contain calls to noreturn functions. 1406 static bool basicBlockCanReturn(BasicBlock &BB) { 1407 if (!isa<ReturnInst>(BB.getTerminator())) 1408 return false; 1409 return none_of(BB, instructionDoesNotReturn); 1410 } 1411 1412 // Set the noreturn function attribute if possible. 1413 static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) { 1414 bool Changed = false; 1415 1416 for (Function *F : SCCNodes) { 1417 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 1418 F->doesNotReturn()) 1419 continue; 1420 1421 // The function can return if any basic blocks can return. 1422 // FIXME: this doesn't handle recursion or unreachable blocks. 1423 if (none_of(*F, basicBlockCanReturn)) { 1424 F->setDoesNotReturn(); 1425 Changed = true; 1426 } 1427 } 1428 1429 return Changed; 1430 } 1431 1432 static bool functionWillReturn(const Function &F) { 1433 // We can infer and propagate function attributes only when we know that the 1434 // definition we'll get at link time is *exactly* the definition we see now. 1435 // For more details, see GlobalValue::mayBeDerefined. 1436 if (!F.hasExactDefinition()) 1437 return false; 1438 1439 // Must-progress function without side-effects must return. 1440 if (F.mustProgress() && F.onlyReadsMemory()) 1441 return true; 1442 1443 // Can only analyze functions with a definition. 1444 if (F.isDeclaration()) 1445 return false; 1446 1447 // Functions with loops require more sophisticated analysis, as the loop 1448 // may be infinite. For now, don't try to handle them. 1449 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; 1450 FindFunctionBackedges(F, Backedges); 1451 if (!Backedges.empty()) 1452 return false; 1453 1454 // If there are no loops, then the function is willreturn if all calls in 1455 // it are willreturn. 1456 return all_of(instructions(F), [](const Instruction &I) { 1457 return I.willReturn(); 1458 }); 1459 } 1460 1461 // Set the willreturn function attribute if possible. 1462 static bool addWillReturn(const SCCNodeSet &SCCNodes) { 1463 bool Changed = false; 1464 1465 for (Function *F : SCCNodes) { 1466 if (!F || F->willReturn() || !functionWillReturn(*F)) 1467 continue; 1468 1469 F->setWillReturn(); 1470 NumWillReturn++; 1471 Changed = true; 1472 } 1473 1474 return Changed; 1475 } 1476 1477 // Return true if this is an atomic which has an ordering stronger than 1478 // unordered. Note that this is different than the predicate we use in 1479 // Attributor. Here we chose to be conservative and consider monotonic 1480 // operations potentially synchronizing. We generally don't do much with 1481 // monotonic operations, so this is simply risk reduction. 1482 static bool isOrderedAtomic(Instruction *I) { 1483 if (!I->isAtomic()) 1484 return false; 1485 1486 if (auto *FI = dyn_cast<FenceInst>(I)) 1487 // All legal orderings for fence are stronger than monotonic. 1488 return FI->getSyncScopeID() != SyncScope::SingleThread; 1489 else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I)) 1490 return true; 1491 else if (auto *SI = dyn_cast<StoreInst>(I)) 1492 return !SI->isUnordered(); 1493 else if (auto *LI = dyn_cast<LoadInst>(I)) 1494 return !LI->isUnordered(); 1495 else { 1496 llvm_unreachable("unknown atomic instruction?"); 1497 } 1498 } 1499 1500 static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { 1501 // Volatile may synchronize 1502 if (I.isVolatile()) 1503 return true; 1504 1505 // An ordered atomic may synchronize. (See comment about on monotonic.) 1506 if (isOrderedAtomic(&I)) 1507 return true; 1508 1509 auto *CB = dyn_cast<CallBase>(&I); 1510 if (!CB) 1511 // Non call site cases covered by the two checks above 1512 return false; 1513 1514 if (CB->hasFnAttr(Attribute::NoSync)) 1515 return false; 1516 1517 // Non volatile memset/memcpy/memmoves are nosync 1518 // NOTE: Only intrinsics with volatile flags should be handled here. All 1519 // others should be marked in Intrinsics.td. 1520 if (auto *MI = dyn_cast<MemIntrinsic>(&I)) 1521 if (!MI->isVolatile()) 1522 return false; 1523 1524 // Speculatively assume in SCC. 1525 if (Function *Callee = CB->getCalledFunction()) 1526 if (SCCNodes.contains(Callee)) 1527 return false; 1528 1529 return true; 1530 } 1531 1532 // Infer the nosync attribute. 1533 static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) { 1534 AttributeInferer AI; 1535 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1536 Attribute::NoSync, 1537 // Skip already marked functions. 1538 [](const Function &F) { return F.hasNoSync(); }, 1539 // Instructions that break nosync assumption. 1540 [&SCCNodes](Instruction &I) { 1541 return InstrBreaksNoSync(I, SCCNodes); 1542 }, 1543 [](Function &F) { 1544 LLVM_DEBUG(dbgs() 1545 << "Adding nosync attr to fn " << F.getName() << "\n"); 1546 F.setNoSync(); 1547 ++NumNoSync; 1548 }, 1549 /* RequiresExactDefinition= */ true}); 1550 return AI.run(SCCNodes); 1551 } 1552 1553 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { 1554 SCCNodesResult Res; 1555 Res.HasUnknownCall = false; 1556 for (Function *F : Functions) { 1557 if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) { 1558 // Treat any function we're trying not to optimize as if it were an 1559 // indirect call and omit it from the node set used below. 1560 Res.HasUnknownCall = true; 1561 continue; 1562 } 1563 // Track whether any functions in this SCC have an unknown call edge. 1564 // Note: if this is ever a performance hit, we can common it with 1565 // subsequent routines which also do scans over the instructions of the 1566 // function. 1567 if (!Res.HasUnknownCall) { 1568 for (Instruction &I : instructions(*F)) { 1569 if (auto *CB = dyn_cast<CallBase>(&I)) { 1570 if (!CB->getCalledFunction()) { 1571 Res.HasUnknownCall = true; 1572 break; 1573 } 1574 } 1575 } 1576 } 1577 Res.SCCNodes.insert(F); 1578 } 1579 return Res; 1580 } 1581 1582 template <typename AARGetterT> 1583 static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions, 1584 AARGetterT &&AARGetter) { 1585 SCCNodesResult Nodes = createSCCNodeSet(Functions); 1586 bool Changed = false; 1587 1588 // Bail if the SCC only contains optnone functions. 1589 if (Nodes.SCCNodes.empty()) 1590 return Changed; 1591 1592 Changed |= addArgumentReturnedAttrs(Nodes.SCCNodes); 1593 Changed |= addReadAttrs(Nodes.SCCNodes, AARGetter); 1594 Changed |= addArgumentAttrs(Nodes.SCCNodes); 1595 Changed |= inferConvergent(Nodes.SCCNodes); 1596 Changed |= addNoReturnAttrs(Nodes.SCCNodes); 1597 Changed |= addWillReturn(Nodes.SCCNodes); 1598 1599 // If we have no external nodes participating in the SCC, we can deduce some 1600 // more precise attributes as well. 1601 if (!Nodes.HasUnknownCall) { 1602 Changed |= addNoAliasAttrs(Nodes.SCCNodes); 1603 Changed |= addNonNullAttrs(Nodes.SCCNodes); 1604 Changed |= inferAttrsFromFunctionBodies(Nodes.SCCNodes); 1605 Changed |= addNoRecurseAttrs(Nodes.SCCNodes); 1606 } 1607 1608 Changed |= addNoSyncAttr(Nodes.SCCNodes); 1609 1610 // Finally, infer the maximal set of attributes from the ones we've inferred 1611 // above. This is handling the cases where one attribute on a signature 1612 // implies another, but for implementation reasons the inference rule for 1613 // the later is missing (or simply less sophisticated). 1614 for (Function *F : Nodes.SCCNodes) 1615 if (F) 1616 Changed |= inferAttributesFromOthers(*F); 1617 1618 return Changed; 1619 } 1620 1621 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, 1622 CGSCCAnalysisManager &AM, 1623 LazyCallGraph &CG, 1624 CGSCCUpdateResult &) { 1625 FunctionAnalysisManager &FAM = 1626 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 1627 1628 // We pass a lambda into functions to wire them up to the analysis manager 1629 // for getting function analyses. 1630 auto AARGetter = [&](Function &F) -> AAResults & { 1631 return FAM.getResult<AAManager>(F); 1632 }; 1633 1634 SmallVector<Function *, 8> Functions; 1635 for (LazyCallGraph::Node &N : C) { 1636 Functions.push_back(&N.getFunction()); 1637 } 1638 1639 if (deriveAttrsInPostOrder(Functions, AARGetter)) { 1640 // We have not changed the call graph or removed/added functions. 1641 PreservedAnalyses PA; 1642 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 1643 return PA; 1644 } 1645 1646 return PreservedAnalyses::all(); 1647 } 1648 1649 namespace { 1650 1651 struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { 1652 // Pass identification, replacement for typeid 1653 static char ID; 1654 1655 PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) { 1656 initializePostOrderFunctionAttrsLegacyPassPass( 1657 *PassRegistry::getPassRegistry()); 1658 } 1659 1660 bool runOnSCC(CallGraphSCC &SCC) override; 1661 1662 void getAnalysisUsage(AnalysisUsage &AU) const override { 1663 AU.setPreservesCFG(); 1664 AU.addRequired<AssumptionCacheTracker>(); 1665 getAAResultsAnalysisUsage(AU); 1666 CallGraphSCCPass::getAnalysisUsage(AU); 1667 } 1668 }; 1669 1670 } // end anonymous namespace 1671 1672 char PostOrderFunctionAttrsLegacyPass::ID = 0; 1673 INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs", 1674 "Deduce function attributes", false, false) 1675 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 1676 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1677 INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs", 1678 "Deduce function attributes", false, false) 1679 1680 Pass *llvm::createPostOrderFunctionAttrsLegacyPass() { 1681 return new PostOrderFunctionAttrsLegacyPass(); 1682 } 1683 1684 template <typename AARGetterT> 1685 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { 1686 SmallVector<Function *, 8> Functions; 1687 for (CallGraphNode *I : SCC) { 1688 Functions.push_back(I->getFunction()); 1689 } 1690 1691 return deriveAttrsInPostOrder(Functions, AARGetter); 1692 } 1693 1694 bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) { 1695 if (skipSCC(SCC)) 1696 return false; 1697 return runImpl(SCC, LegacyAARGetter(*this)); 1698 } 1699 1700 namespace { 1701 1702 struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass { 1703 // Pass identification, replacement for typeid 1704 static char ID; 1705 1706 ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) { 1707 initializeReversePostOrderFunctionAttrsLegacyPassPass( 1708 *PassRegistry::getPassRegistry()); 1709 } 1710 1711 bool runOnModule(Module &M) override; 1712 1713 void getAnalysisUsage(AnalysisUsage &AU) const override { 1714 AU.setPreservesCFG(); 1715 AU.addRequired<CallGraphWrapperPass>(); 1716 AU.addPreserved<CallGraphWrapperPass>(); 1717 } 1718 }; 1719 1720 } // end anonymous namespace 1721 1722 char ReversePostOrderFunctionAttrsLegacyPass::ID = 0; 1723 1724 INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass, 1725 "rpo-function-attrs", "Deduce function attributes in RPO", 1726 false, false) 1727 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1728 INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass, 1729 "rpo-function-attrs", "Deduce function attributes in RPO", 1730 false, false) 1731 1732 Pass *llvm::createReversePostOrderFunctionAttrsPass() { 1733 return new ReversePostOrderFunctionAttrsLegacyPass(); 1734 } 1735 1736 static bool addNoRecurseAttrsTopDown(Function &F) { 1737 // We check the preconditions for the function prior to calling this to avoid 1738 // the cost of building up a reversible post-order list. We assert them here 1739 // to make sure none of the invariants this relies on were violated. 1740 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); 1741 assert(!F.doesNotRecurse() && 1742 "This function has already been deduced as norecurs!"); 1743 assert(F.hasInternalLinkage() && 1744 "Can only do top-down deduction for internal linkage functions!"); 1745 1746 // If F is internal and all of its uses are calls from a non-recursive 1747 // functions, then none of its calls could in fact recurse without going 1748 // through a function marked norecurse, and so we can mark this function too 1749 // as norecurse. Note that the uses must actually be calls -- otherwise 1750 // a pointer to this function could be returned from a norecurse function but 1751 // this function could be recursively (indirectly) called. Note that this 1752 // also detects if F is directly recursive as F is not yet marked as 1753 // a norecurse function. 1754 for (auto *U : F.users()) { 1755 auto *I = dyn_cast<Instruction>(U); 1756 if (!I) 1757 return false; 1758 CallBase *CB = dyn_cast<CallBase>(I); 1759 if (!CB || !CB->getParent()->getParent()->doesNotRecurse()) 1760 return false; 1761 } 1762 F.setDoesNotRecurse(); 1763 ++NumNoRecurse; 1764 return true; 1765 } 1766 1767 static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) { 1768 // We only have a post-order SCC traversal (because SCCs are inherently 1769 // discovered in post-order), so we accumulate them in a vector and then walk 1770 // it in reverse. This is simpler than using the RPO iterator infrastructure 1771 // because we need to combine SCC detection and the PO walk of the call 1772 // graph. We can also cheat egregiously because we're primarily interested in 1773 // synthesizing norecurse and so we can only save the singular SCCs as SCCs 1774 // with multiple functions in them will clearly be recursive. 1775 SmallVector<Function *, 16> Worklist; 1776 for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { 1777 if (I->size() != 1) 1778 continue; 1779 1780 Function *F = I->front()->getFunction(); 1781 if (F && !F->isDeclaration() && !F->doesNotRecurse() && 1782 F->hasInternalLinkage()) 1783 Worklist.push_back(F); 1784 } 1785 1786 bool Changed = false; 1787 for (auto *F : llvm::reverse(Worklist)) 1788 Changed |= addNoRecurseAttrsTopDown(*F); 1789 1790 return Changed; 1791 } 1792 1793 bool ReversePostOrderFunctionAttrsLegacyPass::runOnModule(Module &M) { 1794 if (skipModule(M)) 1795 return false; 1796 1797 auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 1798 1799 return deduceFunctionAttributeInRPO(M, CG); 1800 } 1801 1802 PreservedAnalyses 1803 ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { 1804 auto &CG = AM.getResult<CallGraphAnalysis>(M); 1805 1806 if (!deduceFunctionAttributeInRPO(M, CG)) 1807 return PreservedAnalyses::all(); 1808 1809 PreservedAnalyses PA; 1810 PA.preserve<CallGraphAnalysis>(); 1811 return PA; 1812 } 1813