1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file implements interprocedural passes which walk the 11 /// call-graph deducing and/or propagating function attributes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/FunctionAttrs.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/SCCIterator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/AssumptionCache.h" 24 #include "llvm/Analysis/BasicAliasAnalysis.h" 25 #include "llvm/Analysis/CFG.h" 26 #include "llvm/Analysis/CGSCCPassManager.h" 27 #include "llvm/Analysis/CallGraph.h" 28 #include "llvm/Analysis/CallGraphSCCPass.h" 29 #include "llvm/Analysis/CaptureTracking.h" 30 #include "llvm/Analysis/LazyCallGraph.h" 31 #include "llvm/Analysis/MemoryBuiltins.h" 32 #include "llvm/Analysis/MemoryLocation.h" 33 #include "llvm/Analysis/ValueTracking.h" 34 #include "llvm/IR/Argument.h" 35 #include "llvm/IR/Attributes.h" 36 #include "llvm/IR/BasicBlock.h" 37 #include "llvm/IR/Constant.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/InstIterator.h" 41 #include "llvm/IR/InstrTypes.h" 42 #include "llvm/IR/Instruction.h" 43 #include "llvm/IR/Instructions.h" 44 #include "llvm/IR/IntrinsicInst.h" 45 #include "llvm/IR/Metadata.h" 46 #include "llvm/IR/PassManager.h" 47 #include "llvm/IR/Type.h" 48 #include "llvm/IR/Use.h" 49 #include "llvm/IR/User.h" 50 #include "llvm/IR/Value.h" 51 #include "llvm/InitializePasses.h" 52 #include "llvm/Pass.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Compiler.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/ErrorHandling.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/Transforms/IPO.h" 60 #include "llvm/Transforms/Utils/Local.h" 61 #include <cassert> 62 #include <iterator> 63 #include <map> 64 #include <vector> 65 66 using namespace llvm; 67 68 #define DEBUG_TYPE "function-attrs" 69 70 STATISTIC(NumReadNone, "Number of functions marked readnone"); 71 STATISTIC(NumReadOnly, "Number of functions marked readonly"); 72 STATISTIC(NumWriteOnly, "Number of functions marked writeonly"); 73 STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); 74 STATISTIC(NumReturned, "Number of arguments marked returned"); 75 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); 76 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); 77 STATISTIC(NumNoAlias, "Number of function returns marked noalias"); 78 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); 79 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); 80 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); 81 STATISTIC(NumNoFree, "Number of functions marked as nofree"); 82 STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); 83 STATISTIC(NumNoSync, "Number of functions marked as nosync"); 84 85 static cl::opt<bool> EnableNonnullArgPropagation( 86 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, 87 cl::desc("Try to propagate nonnull argument attributes from callsites to " 88 "caller functions.")); 89 90 static cl::opt<bool> DisableNoUnwindInference( 91 "disable-nounwind-inference", cl::Hidden, 92 cl::desc("Stop inferring nounwind attribute during function-attrs pass")); 93 94 static cl::opt<bool> DisableNoFreeInference( 95 "disable-nofree-inference", cl::Hidden, 96 cl::desc("Stop inferring nofree attribute during function-attrs pass")); 97 98 namespace { 99 100 using SCCNodeSet = SmallSetVector<Function *, 8>; 101 102 } // end anonymous namespace 103 104 /// Returns the memory access attribute for function F using AAR for AA results, 105 /// where SCCNodes is the current SCC. 106 /// 107 /// If ThisBody is true, this function may examine the function body and will 108 /// return a result pertaining to this copy of the function. If it is false, the 109 /// result will be based only on AA results for the function declaration; it 110 /// will be assumed that some other (perhaps less optimized) version of the 111 /// function may be selected at link time. 112 static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, 113 AAResults &AAR, 114 const SCCNodeSet &SCCNodes) { 115 FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); 116 if (MRB == FMRB_DoesNotAccessMemory) 117 // Already perfect! 118 return MAK_ReadNone; 119 120 if (!ThisBody) { 121 if (AliasAnalysis::onlyReadsMemory(MRB)) 122 return MAK_ReadOnly; 123 124 if (AliasAnalysis::doesNotReadMemory(MRB)) 125 return MAK_WriteOnly; 126 127 // Conservatively assume it reads and writes to memory. 128 return MAK_MayWrite; 129 } 130 131 // Scan the function body for instructions that may read or write memory. 132 bool ReadsMemory = false; 133 bool WritesMemory = false; 134 for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { 135 Instruction *I = &*II; 136 137 // Some instructions can be ignored even if they read or write memory. 138 // Detect these now, skipping to the next instruction if one is found. 139 if (auto *Call = dyn_cast<CallBase>(I)) { 140 // Ignore calls to functions in the same SCC, as long as the call sites 141 // don't have operand bundles. Calls with operand bundles are allowed to 142 // have memory effects not described by the memory effects of the call 143 // target. 144 if (!Call->hasOperandBundles() && Call->getCalledFunction() && 145 SCCNodes.count(Call->getCalledFunction())) 146 continue; 147 FunctionModRefBehavior MRB = AAR.getModRefBehavior(Call); 148 ModRefInfo MRI = createModRefInfo(MRB); 149 150 // If the call doesn't access memory, we're done. 151 if (isNoModRef(MRI)) 152 continue; 153 154 // A pseudo probe call shouldn't change any function attribute since it 155 // doesn't translate to a real instruction. It comes with a memory access 156 // tag to prevent itself being removed by optimizations and not block 157 // other instructions being optimized. 158 if (isa<PseudoProbeInst>(I)) 159 continue; 160 161 if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { 162 // The call could access any memory. If that includes writes, note it. 163 if (isModSet(MRI)) 164 WritesMemory = true; 165 // If it reads, note it. 166 if (isRefSet(MRI)) 167 ReadsMemory = true; 168 continue; 169 } 170 171 // Check whether all pointer arguments point to local memory, and 172 // ignore calls that only access local memory. 173 for (auto CI = Call->arg_begin(), CE = Call->arg_end(); CI != CE; ++CI) { 174 Value *Arg = *CI; 175 if (!Arg->getType()->isPtrOrPtrVectorTy()) 176 continue; 177 178 MemoryLocation Loc = 179 MemoryLocation::getBeforeOrAfter(Arg, I->getAAMetadata()); 180 181 // Skip accesses to local or constant memory as they don't impact the 182 // externally visible mod/ref behavior. 183 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 184 continue; 185 186 if (isModSet(MRI)) 187 // Writes non-local memory. 188 WritesMemory = true; 189 if (isRefSet(MRI)) 190 // Ok, it reads non-local memory. 191 ReadsMemory = true; 192 } 193 continue; 194 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 195 // Ignore non-volatile loads from local memory. (Atomic is okay here.) 196 if (!LI->isVolatile()) { 197 MemoryLocation Loc = MemoryLocation::get(LI); 198 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 199 continue; 200 } 201 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 202 // Ignore non-volatile stores to local memory. (Atomic is okay here.) 203 if (!SI->isVolatile()) { 204 MemoryLocation Loc = MemoryLocation::get(SI); 205 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 206 continue; 207 } 208 } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { 209 // Ignore vaargs on local memory. 210 MemoryLocation Loc = MemoryLocation::get(VI); 211 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 212 continue; 213 } 214 215 // Any remaining instructions need to be taken seriously! Check if they 216 // read or write memory. 217 // 218 // Writes memory, remember that. 219 WritesMemory |= I->mayWriteToMemory(); 220 221 // If this instruction may read memory, remember that. 222 ReadsMemory |= I->mayReadFromMemory(); 223 } 224 225 if (WritesMemory) { 226 if (!ReadsMemory) 227 return MAK_WriteOnly; 228 else 229 return MAK_MayWrite; 230 } 231 232 return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone; 233 } 234 235 MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F, 236 AAResults &AAR) { 237 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}); 238 } 239 240 /// Deduce readonly/readnone attributes for the SCC. 241 template <typename AARGetterT> 242 static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) { 243 // Check if any of the functions in the SCC read or write memory. If they 244 // write memory then they can't be marked readnone or readonly. 245 bool ReadsMemory = false; 246 bool WritesMemory = false; 247 for (Function *F : SCCNodes) { 248 // Call the callable parameter to look up AA results for this function. 249 AAResults &AAR = AARGetter(*F); 250 251 // Non-exact function definitions may not be selected at link time, and an 252 // alternative version that writes to memory may be selected. See the 253 // comment on GlobalValue::isDefinitionExact for more details. 254 switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(), 255 AAR, SCCNodes)) { 256 case MAK_MayWrite: 257 return false; 258 case MAK_ReadOnly: 259 ReadsMemory = true; 260 break; 261 case MAK_WriteOnly: 262 WritesMemory = true; 263 break; 264 case MAK_ReadNone: 265 // Nothing to do! 266 break; 267 } 268 } 269 270 // If the SCC contains both functions that read and functions that write, then 271 // we cannot add readonly attributes. 272 if (ReadsMemory && WritesMemory) 273 return false; 274 275 // Success! Functions in this SCC do not access memory, or only read memory. 276 // Give them the appropriate attribute. 277 bool MadeChange = false; 278 279 for (Function *F : SCCNodes) { 280 if (F->doesNotAccessMemory()) 281 // Already perfect! 282 continue; 283 284 if (F->onlyReadsMemory() && ReadsMemory) 285 // No change. 286 continue; 287 288 if (F->doesNotReadMemory() && WritesMemory) 289 continue; 290 291 MadeChange = true; 292 293 // Clear out any existing attributes. 294 AttrBuilder AttrsToRemove; 295 AttrsToRemove.addAttribute(Attribute::ReadOnly); 296 AttrsToRemove.addAttribute(Attribute::ReadNone); 297 AttrsToRemove.addAttribute(Attribute::WriteOnly); 298 299 if (!WritesMemory && !ReadsMemory) { 300 // Clear out any "access range attributes" if readnone was deduced. 301 AttrsToRemove.addAttribute(Attribute::ArgMemOnly); 302 AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly); 303 AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); 304 } 305 F->removeFnAttrs(AttrsToRemove); 306 307 // Add in the new attribute. 308 if (WritesMemory && !ReadsMemory) 309 F->addFnAttr(Attribute::WriteOnly); 310 else 311 F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); 312 313 if (WritesMemory && !ReadsMemory) 314 ++NumWriteOnly; 315 else if (ReadsMemory) 316 ++NumReadOnly; 317 else 318 ++NumReadNone; 319 } 320 321 return MadeChange; 322 } 323 324 namespace { 325 326 /// For a given pointer Argument, this retains a list of Arguments of functions 327 /// in the same SCC that the pointer data flows into. We use this to build an 328 /// SCC of the arguments. 329 struct ArgumentGraphNode { 330 Argument *Definition; 331 SmallVector<ArgumentGraphNode *, 4> Uses; 332 }; 333 334 class ArgumentGraph { 335 // We store pointers to ArgumentGraphNode objects, so it's important that 336 // that they not move around upon insert. 337 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; 338 339 ArgumentMapTy ArgumentMap; 340 341 // There is no root node for the argument graph, in fact: 342 // void f(int *x, int *y) { if (...) f(x, y); } 343 // is an example where the graph is disconnected. The SCCIterator requires a 344 // single entry point, so we maintain a fake ("synthetic") root node that 345 // uses every node. Because the graph is directed and nothing points into 346 // the root, it will not participate in any SCCs (except for its own). 347 ArgumentGraphNode SyntheticRoot; 348 349 public: 350 ArgumentGraph() { SyntheticRoot.Definition = nullptr; } 351 352 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; 353 354 iterator begin() { return SyntheticRoot.Uses.begin(); } 355 iterator end() { return SyntheticRoot.Uses.end(); } 356 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } 357 358 ArgumentGraphNode *operator[](Argument *A) { 359 ArgumentGraphNode &Node = ArgumentMap[A]; 360 Node.Definition = A; 361 SyntheticRoot.Uses.push_back(&Node); 362 return &Node; 363 } 364 }; 365 366 /// This tracker checks whether callees are in the SCC, and if so it does not 367 /// consider that a capture, instead adding it to the "Uses" list and 368 /// continuing with the analysis. 369 struct ArgumentUsesTracker : public CaptureTracker { 370 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} 371 372 void tooManyUses() override { Captured = true; } 373 374 bool captured(const Use *U) override { 375 CallBase *CB = dyn_cast<CallBase>(U->getUser()); 376 if (!CB) { 377 Captured = true; 378 return true; 379 } 380 381 Function *F = CB->getCalledFunction(); 382 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { 383 Captured = true; 384 return true; 385 } 386 387 // Note: the callee and the two successor blocks *follow* the argument 388 // operands. This means there is no need to adjust UseIndex to account for 389 // these. 390 391 unsigned UseIndex = 392 std::distance(const_cast<const Use *>(CB->arg_begin()), U); 393 394 assert(UseIndex < CB->data_operands_size() && 395 "Indirect function calls should have been filtered above!"); 396 397 if (UseIndex >= CB->getNumArgOperands()) { 398 // Data operand, but not a argument operand -- must be a bundle operand 399 assert(CB->hasOperandBundles() && "Must be!"); 400 401 // CaptureTracking told us that we're being captured by an operand bundle 402 // use. In this case it does not matter if the callee is within our SCC 403 // or not -- we've been captured in some unknown way, and we have to be 404 // conservative. 405 Captured = true; 406 return true; 407 } 408 409 if (UseIndex >= F->arg_size()) { 410 assert(F->isVarArg() && "More params than args in non-varargs call"); 411 Captured = true; 412 return true; 413 } 414 415 Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); 416 return false; 417 } 418 419 // True only if certainly captured (used outside our SCC). 420 bool Captured = false; 421 422 // Uses within our SCC. 423 SmallVector<Argument *, 4> Uses; 424 425 const SCCNodeSet &SCCNodes; 426 }; 427 428 } // end anonymous namespace 429 430 namespace llvm { 431 432 template <> struct GraphTraits<ArgumentGraphNode *> { 433 using NodeRef = ArgumentGraphNode *; 434 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; 435 436 static NodeRef getEntryNode(NodeRef A) { return A; } 437 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } 438 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } 439 }; 440 441 template <> 442 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { 443 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } 444 445 static ChildIteratorType nodes_begin(ArgumentGraph *AG) { 446 return AG->begin(); 447 } 448 449 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } 450 }; 451 452 } // end namespace llvm 453 454 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. 455 static Attribute::AttrKind 456 determinePointerReadAttrs(Argument *A, 457 const SmallPtrSet<Argument *, 8> &SCCNodes) { 458 SmallVector<Use *, 32> Worklist; 459 SmallPtrSet<Use *, 32> Visited; 460 461 // inalloca arguments are always clobbered by the call. 462 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) 463 return Attribute::None; 464 465 bool IsRead = false; 466 // We don't need to track IsWritten. If A is written to, return immediately. 467 468 for (Use &U : A->uses()) { 469 Visited.insert(&U); 470 Worklist.push_back(&U); 471 } 472 473 while (!Worklist.empty()) { 474 Use *U = Worklist.pop_back_val(); 475 Instruction *I = cast<Instruction>(U->getUser()); 476 477 switch (I->getOpcode()) { 478 case Instruction::BitCast: 479 case Instruction::GetElementPtr: 480 case Instruction::PHI: 481 case Instruction::Select: 482 case Instruction::AddrSpaceCast: 483 // The original value is not read/written via this if the new value isn't. 484 for (Use &UU : I->uses()) 485 if (Visited.insert(&UU).second) 486 Worklist.push_back(&UU); 487 break; 488 489 case Instruction::Call: 490 case Instruction::Invoke: { 491 bool Captures = true; 492 493 if (I->getType()->isVoidTy()) 494 Captures = false; 495 496 auto AddUsersToWorklistIfCapturing = [&] { 497 if (Captures) 498 for (Use &UU : I->uses()) 499 if (Visited.insert(&UU).second) 500 Worklist.push_back(&UU); 501 }; 502 503 CallBase &CB = cast<CallBase>(*I); 504 if (CB.doesNotAccessMemory()) { 505 AddUsersToWorklistIfCapturing(); 506 continue; 507 } 508 509 Function *F = CB.getCalledFunction(); 510 if (!F) { 511 if (CB.onlyReadsMemory()) { 512 IsRead = true; 513 AddUsersToWorklistIfCapturing(); 514 continue; 515 } 516 return Attribute::None; 517 } 518 519 // Note: the callee and the two successor blocks *follow* the argument 520 // operands. This means there is no need to adjust UseIndex to account 521 // for these. 522 523 unsigned UseIndex = std::distance(CB.arg_begin(), U); 524 525 // U cannot be the callee operand use: since we're exploring the 526 // transitive uses of an Argument, having such a use be a callee would 527 // imply the call site is an indirect call or invoke; and we'd take the 528 // early exit above. 529 assert(UseIndex < CB.data_operands_size() && 530 "Data operand use expected!"); 531 532 bool IsOperandBundleUse = UseIndex >= CB.getNumArgOperands(); 533 534 if (UseIndex >= F->arg_size() && !IsOperandBundleUse) { 535 assert(F->isVarArg() && "More params than args in non-varargs call"); 536 return Attribute::None; 537 } 538 539 Captures &= !CB.doesNotCapture(UseIndex); 540 541 // Since the optimizer (by design) cannot see the data flow corresponding 542 // to a operand bundle use, these cannot participate in the optimistic SCC 543 // analysis. Instead, we model the operand bundle uses as arguments in 544 // call to a function external to the SCC. 545 if (IsOperandBundleUse || 546 !SCCNodes.count(&*std::next(F->arg_begin(), UseIndex))) { 547 548 // The accessors used on call site here do the right thing for calls and 549 // invokes with operand bundles. 550 551 if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(UseIndex)) 552 return Attribute::None; 553 if (!CB.doesNotAccessMemory(UseIndex)) 554 IsRead = true; 555 } 556 557 AddUsersToWorklistIfCapturing(); 558 break; 559 } 560 561 case Instruction::Load: 562 // A volatile load has side effects beyond what readonly can be relied 563 // upon. 564 if (cast<LoadInst>(I)->isVolatile()) 565 return Attribute::None; 566 567 IsRead = true; 568 break; 569 570 case Instruction::ICmp: 571 case Instruction::Ret: 572 break; 573 574 default: 575 return Attribute::None; 576 } 577 } 578 579 return IsRead ? Attribute::ReadOnly : Attribute::ReadNone; 580 } 581 582 /// Deduce returned attributes for the SCC. 583 static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { 584 bool Changed = false; 585 586 // Check each function in turn, determining if an argument is always returned. 587 for (Function *F : SCCNodes) { 588 // We can infer and propagate function attributes only when we know that the 589 // definition we'll get at link time is *exactly* the definition we see now. 590 // For more details, see GlobalValue::mayBeDerefined. 591 if (!F->hasExactDefinition()) 592 continue; 593 594 if (F->getReturnType()->isVoidTy()) 595 continue; 596 597 // There is nothing to do if an argument is already marked as 'returned'. 598 if (llvm::any_of(F->args(), 599 [](const Argument &Arg) { return Arg.hasReturnedAttr(); })) 600 continue; 601 602 auto FindRetArg = [&]() -> Value * { 603 Value *RetArg = nullptr; 604 for (BasicBlock &BB : *F) 605 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 606 // Note that stripPointerCasts should look through functions with 607 // returned arguments. 608 Value *RetVal = Ret->getReturnValue()->stripPointerCasts(); 609 if (!isa<Argument>(RetVal) || RetVal->getType() != F->getReturnType()) 610 return nullptr; 611 612 if (!RetArg) 613 RetArg = RetVal; 614 else if (RetArg != RetVal) 615 return nullptr; 616 } 617 618 return RetArg; 619 }; 620 621 if (Value *RetArg = FindRetArg()) { 622 auto *A = cast<Argument>(RetArg); 623 A->addAttr(Attribute::Returned); 624 ++NumReturned; 625 Changed = true; 626 } 627 } 628 629 return Changed; 630 } 631 632 /// If a callsite has arguments that are also arguments to the parent function, 633 /// try to propagate attributes from the callsite's arguments to the parent's 634 /// arguments. This may be important because inlining can cause information loss 635 /// when attribute knowledge disappears with the inlined call. 636 static bool addArgumentAttrsFromCallsites(Function &F) { 637 if (!EnableNonnullArgPropagation) 638 return false; 639 640 bool Changed = false; 641 642 // For an argument attribute to transfer from a callsite to the parent, the 643 // call must be guaranteed to execute every time the parent is called. 644 // Conservatively, just check for calls in the entry block that are guaranteed 645 // to execute. 646 // TODO: This could be enhanced by testing if the callsite post-dominates the 647 // entry block or by doing simple forward walks or backward walks to the 648 // callsite. 649 BasicBlock &Entry = F.getEntryBlock(); 650 for (Instruction &I : Entry) { 651 if (auto *CB = dyn_cast<CallBase>(&I)) { 652 if (auto *CalledFunc = CB->getCalledFunction()) { 653 for (auto &CSArg : CalledFunc->args()) { 654 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) 655 continue; 656 657 // If the non-null callsite argument operand is an argument to 'F' 658 // (the caller) and the call is guaranteed to execute, then the value 659 // must be non-null throughout 'F'. 660 auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); 661 if (FArg && !FArg->hasNonNullAttr()) { 662 FArg->addAttr(Attribute::NonNull); 663 Changed = true; 664 } 665 } 666 } 667 } 668 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 669 break; 670 } 671 672 return Changed; 673 } 674 675 static bool addReadAttr(Argument *A, Attribute::AttrKind R) { 676 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone) 677 && "Must be a Read attribute."); 678 assert(A && "Argument must not be null."); 679 680 // If the argument already has the attribute, nothing needs to be done. 681 if (A->hasAttribute(R)) 682 return false; 683 684 // Otherwise, remove potentially conflicting attribute, add the new one, 685 // and update statistics. 686 A->removeAttr(Attribute::WriteOnly); 687 A->removeAttr(Attribute::ReadOnly); 688 A->removeAttr(Attribute::ReadNone); 689 A->addAttr(R); 690 R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; 691 return true; 692 } 693 694 /// Deduce nocapture attributes for the SCC. 695 static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { 696 bool Changed = false; 697 698 ArgumentGraph AG; 699 700 // Check each function in turn, determining which pointer arguments are not 701 // captured. 702 for (Function *F : SCCNodes) { 703 // We can infer and propagate function attributes only when we know that the 704 // definition we'll get at link time is *exactly* the definition we see now. 705 // For more details, see GlobalValue::mayBeDerefined. 706 if (!F->hasExactDefinition()) 707 continue; 708 709 Changed |= addArgumentAttrsFromCallsites(*F); 710 711 // Functions that are readonly (or readnone) and nounwind and don't return 712 // a value can't capture arguments. Don't analyze them. 713 if (F->onlyReadsMemory() && F->doesNotThrow() && 714 F->getReturnType()->isVoidTy()) { 715 for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; 716 ++A) { 717 if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { 718 A->addAttr(Attribute::NoCapture); 719 ++NumNoCapture; 720 Changed = true; 721 } 722 } 723 continue; 724 } 725 726 for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; 727 ++A) { 728 if (!A->getType()->isPointerTy()) 729 continue; 730 bool HasNonLocalUses = false; 731 if (!A->hasNoCaptureAttr()) { 732 ArgumentUsesTracker Tracker(SCCNodes); 733 PointerMayBeCaptured(&*A, &Tracker); 734 if (!Tracker.Captured) { 735 if (Tracker.Uses.empty()) { 736 // If it's trivially not captured, mark it nocapture now. 737 A->addAttr(Attribute::NoCapture); 738 ++NumNoCapture; 739 Changed = true; 740 } else { 741 // If it's not trivially captured and not trivially not captured, 742 // then it must be calling into another function in our SCC. Save 743 // its particulars for Argument-SCC analysis later. 744 ArgumentGraphNode *Node = AG[&*A]; 745 for (Argument *Use : Tracker.Uses) { 746 Node->Uses.push_back(AG[Use]); 747 if (Use != &*A) 748 HasNonLocalUses = true; 749 } 750 } 751 } 752 // Otherwise, it's captured. Don't bother doing SCC analysis on it. 753 } 754 if (!HasNonLocalUses && !A->onlyReadsMemory()) { 755 // Can we determine that it's readonly/readnone without doing an SCC? 756 // Note that we don't allow any calls at all here, or else our result 757 // will be dependent on the iteration order through the functions in the 758 // SCC. 759 SmallPtrSet<Argument *, 8> Self; 760 Self.insert(&*A); 761 Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); 762 if (R != Attribute::None) 763 Changed = addReadAttr(A, R); 764 } 765 } 766 } 767 768 // The graph we've collected is partial because we stopped scanning for 769 // argument uses once we solved the argument trivially. These partial nodes 770 // show up as ArgumentGraphNode objects with an empty Uses list, and for 771 // these nodes the final decision about whether they capture has already been 772 // made. If the definition doesn't have a 'nocapture' attribute by now, it 773 // captures. 774 775 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { 776 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; 777 if (ArgumentSCC.size() == 1) { 778 if (!ArgumentSCC[0]->Definition) 779 continue; // synthetic root node 780 781 // eg. "void f(int* x) { if (...) f(x); }" 782 if (ArgumentSCC[0]->Uses.size() == 1 && 783 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { 784 Argument *A = ArgumentSCC[0]->Definition; 785 A->addAttr(Attribute::NoCapture); 786 ++NumNoCapture; 787 Changed = true; 788 } 789 continue; 790 } 791 792 bool SCCCaptured = false; 793 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); 794 I != E && !SCCCaptured; ++I) { 795 ArgumentGraphNode *Node = *I; 796 if (Node->Uses.empty()) { 797 if (!Node->Definition->hasNoCaptureAttr()) 798 SCCCaptured = true; 799 } 800 } 801 if (SCCCaptured) 802 continue; 803 804 SmallPtrSet<Argument *, 8> ArgumentSCCNodes; 805 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for 806 // quickly looking up whether a given Argument is in this ArgumentSCC. 807 for (ArgumentGraphNode *I : ArgumentSCC) { 808 ArgumentSCCNodes.insert(I->Definition); 809 } 810 811 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); 812 I != E && !SCCCaptured; ++I) { 813 ArgumentGraphNode *N = *I; 814 for (ArgumentGraphNode *Use : N->Uses) { 815 Argument *A = Use->Definition; 816 if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) 817 continue; 818 SCCCaptured = true; 819 break; 820 } 821 } 822 if (SCCCaptured) 823 continue; 824 825 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 826 Argument *A = ArgumentSCC[i]->Definition; 827 A->addAttr(Attribute::NoCapture); 828 ++NumNoCapture; 829 Changed = true; 830 } 831 832 // We also want to compute readonly/readnone. With a small number of false 833 // negatives, we can assume that any pointer which is captured isn't going 834 // to be provably readonly or readnone, since by definition we can't 835 // analyze all uses of a captured pointer. 836 // 837 // The false negatives happen when the pointer is captured by a function 838 // that promises readonly/readnone behaviour on the pointer, then the 839 // pointer's lifetime ends before anything that writes to arbitrary memory. 840 // Also, a readonly/readnone pointer may be returned, but returning a 841 // pointer is capturing it. 842 843 Attribute::AttrKind ReadAttr = Attribute::ReadNone; 844 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 845 Argument *A = ArgumentSCC[i]->Definition; 846 Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes); 847 if (K == Attribute::ReadNone) 848 continue; 849 if (K == Attribute::ReadOnly) { 850 ReadAttr = Attribute::ReadOnly; 851 continue; 852 } 853 ReadAttr = K; 854 break; 855 } 856 857 if (ReadAttr != Attribute::None) { 858 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 859 Argument *A = ArgumentSCC[i]->Definition; 860 Changed = addReadAttr(A, ReadAttr); 861 } 862 } 863 } 864 865 return Changed; 866 } 867 868 /// Tests whether a function is "malloc-like". 869 /// 870 /// A function is "malloc-like" if it returns either null or a pointer that 871 /// doesn't alias any other pointer visible to the caller. 872 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { 873 SmallSetVector<Value *, 8> FlowsToReturn; 874 for (BasicBlock &BB : *F) 875 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 876 FlowsToReturn.insert(Ret->getReturnValue()); 877 878 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 879 Value *RetVal = FlowsToReturn[i]; 880 881 if (Constant *C = dyn_cast<Constant>(RetVal)) { 882 if (!C->isNullValue() && !isa<UndefValue>(C)) 883 return false; 884 885 continue; 886 } 887 888 if (isa<Argument>(RetVal)) 889 return false; 890 891 if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) 892 switch (RVI->getOpcode()) { 893 // Extend the analysis by looking upwards. 894 case Instruction::BitCast: 895 case Instruction::GetElementPtr: 896 case Instruction::AddrSpaceCast: 897 FlowsToReturn.insert(RVI->getOperand(0)); 898 continue; 899 case Instruction::Select: { 900 SelectInst *SI = cast<SelectInst>(RVI); 901 FlowsToReturn.insert(SI->getTrueValue()); 902 FlowsToReturn.insert(SI->getFalseValue()); 903 continue; 904 } 905 case Instruction::PHI: { 906 PHINode *PN = cast<PHINode>(RVI); 907 for (Value *IncValue : PN->incoming_values()) 908 FlowsToReturn.insert(IncValue); 909 continue; 910 } 911 912 // Check whether the pointer came from an allocation. 913 case Instruction::Alloca: 914 break; 915 case Instruction::Call: 916 case Instruction::Invoke: { 917 CallBase &CB = cast<CallBase>(*RVI); 918 if (CB.hasRetAttr(Attribute::NoAlias)) 919 break; 920 if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) 921 break; 922 LLVM_FALLTHROUGH; 923 } 924 default: 925 return false; // Did not come from an allocation. 926 } 927 928 if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) 929 return false; 930 } 931 932 return true; 933 } 934 935 /// Deduce noalias attributes for the SCC. 936 static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { 937 // Check each function in turn, determining which functions return noalias 938 // pointers. 939 for (Function *F : SCCNodes) { 940 // Already noalias. 941 if (F->returnDoesNotAlias()) 942 continue; 943 944 // We can infer and propagate function attributes only when we know that the 945 // definition we'll get at link time is *exactly* the definition we see now. 946 // For more details, see GlobalValue::mayBeDerefined. 947 if (!F->hasExactDefinition()) 948 return false; 949 950 // We annotate noalias return values, which are only applicable to 951 // pointer types. 952 if (!F->getReturnType()->isPointerTy()) 953 continue; 954 955 if (!isFunctionMallocLike(F, SCCNodes)) 956 return false; 957 } 958 959 bool MadeChange = false; 960 for (Function *F : SCCNodes) { 961 if (F->returnDoesNotAlias() || 962 !F->getReturnType()->isPointerTy()) 963 continue; 964 965 F->setReturnDoesNotAlias(); 966 ++NumNoAlias; 967 MadeChange = true; 968 } 969 970 return MadeChange; 971 } 972 973 /// Tests whether this function is known to not return null. 974 /// 975 /// Requires that the function returns a pointer. 976 /// 977 /// Returns true if it believes the function will not return a null, and sets 978 /// \p Speculative based on whether the returned conclusion is a speculative 979 /// conclusion due to SCC calls. 980 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, 981 bool &Speculative) { 982 assert(F->getReturnType()->isPointerTy() && 983 "nonnull only meaningful on pointer types"); 984 Speculative = false; 985 986 SmallSetVector<Value *, 8> FlowsToReturn; 987 for (BasicBlock &BB : *F) 988 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 989 FlowsToReturn.insert(Ret->getReturnValue()); 990 991 auto &DL = F->getParent()->getDataLayout(); 992 993 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 994 Value *RetVal = FlowsToReturn[i]; 995 996 // If this value is locally known to be non-null, we're good 997 if (isKnownNonZero(RetVal, DL)) 998 continue; 999 1000 // Otherwise, we need to look upwards since we can't make any local 1001 // conclusions. 1002 Instruction *RVI = dyn_cast<Instruction>(RetVal); 1003 if (!RVI) 1004 return false; 1005 switch (RVI->getOpcode()) { 1006 // Extend the analysis by looking upwards. 1007 case Instruction::BitCast: 1008 case Instruction::GetElementPtr: 1009 case Instruction::AddrSpaceCast: 1010 FlowsToReturn.insert(RVI->getOperand(0)); 1011 continue; 1012 case Instruction::Select: { 1013 SelectInst *SI = cast<SelectInst>(RVI); 1014 FlowsToReturn.insert(SI->getTrueValue()); 1015 FlowsToReturn.insert(SI->getFalseValue()); 1016 continue; 1017 } 1018 case Instruction::PHI: { 1019 PHINode *PN = cast<PHINode>(RVI); 1020 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1021 FlowsToReturn.insert(PN->getIncomingValue(i)); 1022 continue; 1023 } 1024 case Instruction::Call: 1025 case Instruction::Invoke: { 1026 CallBase &CB = cast<CallBase>(*RVI); 1027 Function *Callee = CB.getCalledFunction(); 1028 // A call to a node within the SCC is assumed to return null until 1029 // proven otherwise 1030 if (Callee && SCCNodes.count(Callee)) { 1031 Speculative = true; 1032 continue; 1033 } 1034 return false; 1035 } 1036 default: 1037 return false; // Unknown source, may be null 1038 }; 1039 llvm_unreachable("should have either continued or returned"); 1040 } 1041 1042 return true; 1043 } 1044 1045 /// Deduce nonnull attributes for the SCC. 1046 static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) { 1047 // Speculative that all functions in the SCC return only nonnull 1048 // pointers. We may refute this as we analyze functions. 1049 bool SCCReturnsNonNull = true; 1050 1051 bool MadeChange = false; 1052 1053 // Check each function in turn, determining which functions return nonnull 1054 // pointers. 1055 for (Function *F : SCCNodes) { 1056 // Already nonnull. 1057 if (F->getAttributes().hasRetAttr(Attribute::NonNull)) 1058 continue; 1059 1060 // We can infer and propagate function attributes only when we know that the 1061 // definition we'll get at link time is *exactly* the definition we see now. 1062 // For more details, see GlobalValue::mayBeDerefined. 1063 if (!F->hasExactDefinition()) 1064 return false; 1065 1066 // We annotate nonnull return values, which are only applicable to 1067 // pointer types. 1068 if (!F->getReturnType()->isPointerTy()) 1069 continue; 1070 1071 bool Speculative = false; 1072 if (isReturnNonNull(F, SCCNodes, Speculative)) { 1073 if (!Speculative) { 1074 // Mark the function eagerly since we may discover a function 1075 // which prevents us from speculating about the entire SCC 1076 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() 1077 << " as nonnull\n"); 1078 F->addRetAttr(Attribute::NonNull); 1079 ++NumNonNullReturn; 1080 MadeChange = true; 1081 } 1082 continue; 1083 } 1084 // At least one function returns something which could be null, can't 1085 // speculate any more. 1086 SCCReturnsNonNull = false; 1087 } 1088 1089 if (SCCReturnsNonNull) { 1090 for (Function *F : SCCNodes) { 1091 if (F->getAttributes().hasRetAttr(Attribute::NonNull) || 1092 !F->getReturnType()->isPointerTy()) 1093 continue; 1094 1095 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); 1096 F->addRetAttr(Attribute::NonNull); 1097 ++NumNonNullReturn; 1098 MadeChange = true; 1099 } 1100 } 1101 1102 return MadeChange; 1103 } 1104 1105 namespace { 1106 1107 /// Collects a set of attribute inference requests and performs them all in one 1108 /// go on a single SCC Node. Inference involves scanning function bodies 1109 /// looking for instructions that violate attribute assumptions. 1110 /// As soon as all the bodies are fine we are free to set the attribute. 1111 /// Customization of inference for individual attributes is performed by 1112 /// providing a handful of predicates for each attribute. 1113 class AttributeInferer { 1114 public: 1115 /// Describes a request for inference of a single attribute. 1116 struct InferenceDescriptor { 1117 1118 /// Returns true if this function does not have to be handled. 1119 /// General intent for this predicate is to provide an optimization 1120 /// for functions that do not need this attribute inference at all 1121 /// (say, for functions that already have the attribute). 1122 std::function<bool(const Function &)> SkipFunction; 1123 1124 /// Returns true if this instruction violates attribute assumptions. 1125 std::function<bool(Instruction &)> InstrBreaksAttribute; 1126 1127 /// Sets the inferred attribute for this function. 1128 std::function<void(Function &)> SetAttribute; 1129 1130 /// Attribute we derive. 1131 Attribute::AttrKind AKind; 1132 1133 /// If true, only "exact" definitions can be used to infer this attribute. 1134 /// See GlobalValue::isDefinitionExact. 1135 bool RequiresExactDefinition; 1136 1137 InferenceDescriptor(Attribute::AttrKind AK, 1138 std::function<bool(const Function &)> SkipFunc, 1139 std::function<bool(Instruction &)> InstrScan, 1140 std::function<void(Function &)> SetAttr, 1141 bool ReqExactDef) 1142 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), 1143 SetAttribute(SetAttr), AKind(AK), 1144 RequiresExactDefinition(ReqExactDef) {} 1145 }; 1146 1147 private: 1148 SmallVector<InferenceDescriptor, 4> InferenceDescriptors; 1149 1150 public: 1151 void registerAttrInference(InferenceDescriptor AttrInference) { 1152 InferenceDescriptors.push_back(AttrInference); 1153 } 1154 1155 bool run(const SCCNodeSet &SCCNodes); 1156 }; 1157 1158 /// Perform all the requested attribute inference actions according to the 1159 /// attribute predicates stored before. 1160 bool AttributeInferer::run(const SCCNodeSet &SCCNodes) { 1161 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; 1162 // Go through all the functions in SCC and check corresponding attribute 1163 // assumptions for each of them. Attributes that are invalid for this SCC 1164 // will be removed from InferInSCC. 1165 for (Function *F : SCCNodes) { 1166 1167 // No attributes whose assumptions are still valid - done. 1168 if (InferInSCC.empty()) 1169 return false; 1170 1171 // Check if our attributes ever need scanning/can be scanned. 1172 llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) { 1173 if (ID.SkipFunction(*F)) 1174 return false; 1175 1176 // Remove from further inference (invalidate) when visiting a function 1177 // that has no instructions to scan/has an unsuitable definition. 1178 return F->isDeclaration() || 1179 (ID.RequiresExactDefinition && !F->hasExactDefinition()); 1180 }); 1181 1182 // For each attribute still in InferInSCC that doesn't explicitly skip F, 1183 // set up the F instructions scan to verify assumptions of the attribute. 1184 SmallVector<InferenceDescriptor, 4> InferInThisFunc; 1185 llvm::copy_if( 1186 InferInSCC, std::back_inserter(InferInThisFunc), 1187 [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); 1188 1189 if (InferInThisFunc.empty()) 1190 continue; 1191 1192 // Start instruction scan. 1193 for (Instruction &I : instructions(*F)) { 1194 llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) { 1195 if (!ID.InstrBreaksAttribute(I)) 1196 return false; 1197 // Remove attribute from further inference on any other functions 1198 // because attribute assumptions have just been violated. 1199 llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) { 1200 return D.AKind == ID.AKind; 1201 }); 1202 // Remove attribute from the rest of current instruction scan. 1203 return true; 1204 }); 1205 1206 if (InferInThisFunc.empty()) 1207 break; 1208 } 1209 } 1210 1211 if (InferInSCC.empty()) 1212 return false; 1213 1214 bool Changed = false; 1215 for (Function *F : SCCNodes) 1216 // At this point InferInSCC contains only functions that were either: 1217 // - explicitly skipped from scan/inference, or 1218 // - verified to have no instructions that break attribute assumptions. 1219 // Hence we just go and force the attribute for all non-skipped functions. 1220 for (auto &ID : InferInSCC) { 1221 if (ID.SkipFunction(*F)) 1222 continue; 1223 Changed = true; 1224 ID.SetAttribute(*F); 1225 } 1226 return Changed; 1227 } 1228 1229 struct SCCNodesResult { 1230 SCCNodeSet SCCNodes; 1231 bool HasUnknownCall; 1232 }; 1233 1234 } // end anonymous namespace 1235 1236 /// Helper for non-Convergent inference predicate InstrBreaksAttribute. 1237 static bool InstrBreaksNonConvergent(Instruction &I, 1238 const SCCNodeSet &SCCNodes) { 1239 const CallBase *CB = dyn_cast<CallBase>(&I); 1240 // Breaks non-convergent assumption if CS is a convergent call to a function 1241 // not in the SCC. 1242 return CB && CB->isConvergent() && 1243 SCCNodes.count(CB->getCalledFunction()) == 0; 1244 } 1245 1246 /// Helper for NoUnwind inference predicate InstrBreaksAttribute. 1247 static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { 1248 if (!I.mayThrow()) 1249 return false; 1250 if (const auto *CI = dyn_cast<CallInst>(&I)) { 1251 if (Function *Callee = CI->getCalledFunction()) { 1252 // I is a may-throw call to a function inside our SCC. This doesn't 1253 // invalidate our current working assumption that the SCC is no-throw; we 1254 // just have to scan that other function. 1255 if (SCCNodes.contains(Callee)) 1256 return false; 1257 } 1258 } 1259 return true; 1260 } 1261 1262 /// Helper for NoFree inference predicate InstrBreaksAttribute. 1263 static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { 1264 CallBase *CB = dyn_cast<CallBase>(&I); 1265 if (!CB) 1266 return false; 1267 1268 if (CB->hasFnAttr(Attribute::NoFree)) 1269 return false; 1270 1271 // Speculatively assume in SCC. 1272 if (Function *Callee = CB->getCalledFunction()) 1273 if (SCCNodes.contains(Callee)) 1274 return false; 1275 1276 return true; 1277 } 1278 1279 /// Attempt to remove convergent function attribute when possible. 1280 /// 1281 /// Returns true if any changes to function attributes were made. 1282 static bool inferConvergent(const SCCNodeSet &SCCNodes) { 1283 AttributeInferer AI; 1284 1285 // Request to remove the convergent attribute from all functions in the SCC 1286 // if every callsite within the SCC is not convergent (except for calls 1287 // to functions within the SCC). 1288 // Note: Removal of the attr from the callsites will happen in 1289 // InstCombineCalls separately. 1290 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1291 Attribute::Convergent, 1292 // Skip non-convergent functions. 1293 [](const Function &F) { return !F.isConvergent(); }, 1294 // Instructions that break non-convergent assumption. 1295 [SCCNodes](Instruction &I) { 1296 return InstrBreaksNonConvergent(I, SCCNodes); 1297 }, 1298 [](Function &F) { 1299 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() 1300 << "\n"); 1301 F.setNotConvergent(); 1302 }, 1303 /* RequiresExactDefinition= */ false}); 1304 // Perform all the requested attribute inference actions. 1305 return AI.run(SCCNodes); 1306 } 1307 1308 /// Infer attributes from all functions in the SCC by scanning every 1309 /// instruction for compliance to the attribute assumptions. Currently it 1310 /// does: 1311 /// - addition of NoUnwind attribute 1312 /// 1313 /// Returns true if any changes to function attributes were made. 1314 static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) { 1315 AttributeInferer AI; 1316 1317 if (!DisableNoUnwindInference) 1318 // Request to infer nounwind attribute for all the functions in the SCC if 1319 // every callsite within the SCC is not throwing (except for calls to 1320 // functions within the SCC). Note that nounwind attribute suffers from 1321 // derefinement - results may change depending on how functions are 1322 // optimized. Thus it can be inferred only from exact definitions. 1323 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1324 Attribute::NoUnwind, 1325 // Skip non-throwing functions. 1326 [](const Function &F) { return F.doesNotThrow(); }, 1327 // Instructions that break non-throwing assumption. 1328 [&SCCNodes](Instruction &I) { 1329 return InstrBreaksNonThrowing(I, SCCNodes); 1330 }, 1331 [](Function &F) { 1332 LLVM_DEBUG(dbgs() 1333 << "Adding nounwind attr to fn " << F.getName() << "\n"); 1334 F.setDoesNotThrow(); 1335 ++NumNoUnwind; 1336 }, 1337 /* RequiresExactDefinition= */ true}); 1338 1339 if (!DisableNoFreeInference) 1340 // Request to infer nofree attribute for all the functions in the SCC if 1341 // every callsite within the SCC does not directly or indirectly free 1342 // memory (except for calls to functions within the SCC). Note that nofree 1343 // attribute suffers from derefinement - results may change depending on 1344 // how functions are optimized. Thus it can be inferred only from exact 1345 // definitions. 1346 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1347 Attribute::NoFree, 1348 // Skip functions known not to free memory. 1349 [](const Function &F) { return F.doesNotFreeMemory(); }, 1350 // Instructions that break non-deallocating assumption. 1351 [&SCCNodes](Instruction &I) { 1352 return InstrBreaksNoFree(I, SCCNodes); 1353 }, 1354 [](Function &F) { 1355 LLVM_DEBUG(dbgs() 1356 << "Adding nofree attr to fn " << F.getName() << "\n"); 1357 F.setDoesNotFreeMemory(); 1358 ++NumNoFree; 1359 }, 1360 /* RequiresExactDefinition= */ true}); 1361 1362 // Perform all the requested attribute inference actions. 1363 return AI.run(SCCNodes); 1364 } 1365 1366 static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) { 1367 // Try and identify functions that do not recurse. 1368 1369 // If the SCC contains multiple nodes we know for sure there is recursion. 1370 if (SCCNodes.size() != 1) 1371 return false; 1372 1373 Function *F = *SCCNodes.begin(); 1374 if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) 1375 return false; 1376 1377 // If all of the calls in F are identifiable and are to norecurse functions, F 1378 // is norecurse. This check also detects self-recursion as F is not currently 1379 // marked norecurse, so any called from F to F will not be marked norecurse. 1380 for (auto &BB : *F) 1381 for (auto &I : BB.instructionsWithoutDebug()) 1382 if (auto *CB = dyn_cast<CallBase>(&I)) { 1383 Function *Callee = CB->getCalledFunction(); 1384 if (!Callee || Callee == F || !Callee->doesNotRecurse()) 1385 // Function calls a potentially recursive function. 1386 return false; 1387 } 1388 1389 // Every call was to a non-recursive function other than this function, and 1390 // we have no indirect recursion as the SCC size is one. This function cannot 1391 // recurse. 1392 F->setDoesNotRecurse(); 1393 ++NumNoRecurse; 1394 return true; 1395 } 1396 1397 static bool instructionDoesNotReturn(Instruction &I) { 1398 if (auto *CB = dyn_cast<CallBase>(&I)) 1399 return CB->hasFnAttr(Attribute::NoReturn); 1400 return false; 1401 } 1402 1403 // A basic block can only return if it terminates with a ReturnInst and does not 1404 // contain calls to noreturn functions. 1405 static bool basicBlockCanReturn(BasicBlock &BB) { 1406 if (!isa<ReturnInst>(BB.getTerminator())) 1407 return false; 1408 return none_of(BB, instructionDoesNotReturn); 1409 } 1410 1411 // Set the noreturn function attribute if possible. 1412 static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) { 1413 bool Changed = false; 1414 1415 for (Function *F : SCCNodes) { 1416 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 1417 F->doesNotReturn()) 1418 continue; 1419 1420 // The function can return if any basic blocks can return. 1421 // FIXME: this doesn't handle recursion or unreachable blocks. 1422 if (none_of(*F, basicBlockCanReturn)) { 1423 F->setDoesNotReturn(); 1424 Changed = true; 1425 } 1426 } 1427 1428 return Changed; 1429 } 1430 1431 static bool functionWillReturn(const Function &F) { 1432 // We can infer and propagate function attributes only when we know that the 1433 // definition we'll get at link time is *exactly* the definition we see now. 1434 // For more details, see GlobalValue::mayBeDerefined. 1435 if (!F.hasExactDefinition()) 1436 return false; 1437 1438 // Must-progress function without side-effects must return. 1439 if (F.mustProgress() && F.onlyReadsMemory()) 1440 return true; 1441 1442 // Can only analyze functions with a definition. 1443 if (F.isDeclaration()) 1444 return false; 1445 1446 // Functions with loops require more sophisticated analysis, as the loop 1447 // may be infinite. For now, don't try to handle them. 1448 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; 1449 FindFunctionBackedges(F, Backedges); 1450 if (!Backedges.empty()) 1451 return false; 1452 1453 // If there are no loops, then the function is willreturn if all calls in 1454 // it are willreturn. 1455 return all_of(instructions(F), [](const Instruction &I) { 1456 return I.willReturn(); 1457 }); 1458 } 1459 1460 // Set the willreturn function attribute if possible. 1461 static bool addWillReturn(const SCCNodeSet &SCCNodes) { 1462 bool Changed = false; 1463 1464 for (Function *F : SCCNodes) { 1465 if (!F || F->willReturn() || !functionWillReturn(*F)) 1466 continue; 1467 1468 F->setWillReturn(); 1469 NumWillReturn++; 1470 Changed = true; 1471 } 1472 1473 return Changed; 1474 } 1475 1476 // Return true if this is an atomic which has an ordering stronger than 1477 // unordered. Note that this is different than the predicate we use in 1478 // Attributor. Here we chose to be conservative and consider monotonic 1479 // operations potentially synchronizing. We generally don't do much with 1480 // monotonic operations, so this is simply risk reduction. 1481 static bool isOrderedAtomic(Instruction *I) { 1482 if (!I->isAtomic()) 1483 return false; 1484 1485 if (auto *FI = dyn_cast<FenceInst>(I)) 1486 // All legal orderings for fence are stronger than monotonic. 1487 return FI->getSyncScopeID() != SyncScope::SingleThread; 1488 else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I)) 1489 return true; 1490 else if (auto *SI = dyn_cast<StoreInst>(I)) 1491 return !SI->isUnordered(); 1492 else if (auto *LI = dyn_cast<LoadInst>(I)) 1493 return !LI->isUnordered(); 1494 else { 1495 llvm_unreachable("unknown atomic instruction?"); 1496 } 1497 } 1498 1499 static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { 1500 // Volatile may synchronize 1501 if (I.isVolatile()) 1502 return true; 1503 1504 // An ordered atomic may synchronize. (See comment about on monotonic.) 1505 if (isOrderedAtomic(&I)) 1506 return true; 1507 1508 auto *CB = dyn_cast<CallBase>(&I); 1509 if (!CB) 1510 // Non call site cases covered by the two checks above 1511 return false; 1512 1513 if (CB->hasFnAttr(Attribute::NoSync)) 1514 return false; 1515 1516 // Non volatile memset/memcpy/memmoves are nosync 1517 // NOTE: Only intrinsics with volatile flags should be handled here. All 1518 // others should be marked in Intrinsics.td. 1519 if (auto *MI = dyn_cast<MemIntrinsic>(&I)) 1520 if (!MI->isVolatile()) 1521 return false; 1522 1523 // Speculatively assume in SCC. 1524 if (Function *Callee = CB->getCalledFunction()) 1525 if (SCCNodes.contains(Callee)) 1526 return false; 1527 1528 return true; 1529 } 1530 1531 // Infer the nosync attribute. 1532 static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) { 1533 AttributeInferer AI; 1534 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1535 Attribute::NoSync, 1536 // Skip already marked functions. 1537 [](const Function &F) { return F.hasNoSync(); }, 1538 // Instructions that break nosync assumption. 1539 [&SCCNodes](Instruction &I) { 1540 return InstrBreaksNoSync(I, SCCNodes); 1541 }, 1542 [](Function &F) { 1543 LLVM_DEBUG(dbgs() 1544 << "Adding nosync attr to fn " << F.getName() << "\n"); 1545 F.setNoSync(); 1546 ++NumNoSync; 1547 }, 1548 /* RequiresExactDefinition= */ true}); 1549 return AI.run(SCCNodes); 1550 } 1551 1552 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { 1553 SCCNodesResult Res; 1554 Res.HasUnknownCall = false; 1555 for (Function *F : Functions) { 1556 if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) { 1557 // Treat any function we're trying not to optimize as if it were an 1558 // indirect call and omit it from the node set used below. 1559 Res.HasUnknownCall = true; 1560 continue; 1561 } 1562 // Track whether any functions in this SCC have an unknown call edge. 1563 // Note: if this is ever a performance hit, we can common it with 1564 // subsequent routines which also do scans over the instructions of the 1565 // function. 1566 if (!Res.HasUnknownCall) { 1567 for (Instruction &I : instructions(*F)) { 1568 if (auto *CB = dyn_cast<CallBase>(&I)) { 1569 if (!CB->getCalledFunction()) { 1570 Res.HasUnknownCall = true; 1571 break; 1572 } 1573 } 1574 } 1575 } 1576 Res.SCCNodes.insert(F); 1577 } 1578 return Res; 1579 } 1580 1581 template <typename AARGetterT> 1582 static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions, 1583 AARGetterT &&AARGetter) { 1584 SCCNodesResult Nodes = createSCCNodeSet(Functions); 1585 bool Changed = false; 1586 1587 // Bail if the SCC only contains optnone functions. 1588 if (Nodes.SCCNodes.empty()) 1589 return Changed; 1590 1591 Changed |= addArgumentReturnedAttrs(Nodes.SCCNodes); 1592 Changed |= addReadAttrs(Nodes.SCCNodes, AARGetter); 1593 Changed |= addArgumentAttrs(Nodes.SCCNodes); 1594 Changed |= inferConvergent(Nodes.SCCNodes); 1595 Changed |= addNoReturnAttrs(Nodes.SCCNodes); 1596 Changed |= addWillReturn(Nodes.SCCNodes); 1597 1598 // If we have no external nodes participating in the SCC, we can deduce some 1599 // more precise attributes as well. 1600 if (!Nodes.HasUnknownCall) { 1601 Changed |= addNoAliasAttrs(Nodes.SCCNodes); 1602 Changed |= addNonNullAttrs(Nodes.SCCNodes); 1603 Changed |= inferAttrsFromFunctionBodies(Nodes.SCCNodes); 1604 Changed |= addNoRecurseAttrs(Nodes.SCCNodes); 1605 } 1606 1607 Changed |= addNoSyncAttr(Nodes.SCCNodes); 1608 1609 // Finally, infer the maximal set of attributes from the ones we've inferred 1610 // above. This is handling the cases where one attribute on a signature 1611 // implies another, but for implementation reasons the inference rule for 1612 // the later is missing (or simply less sophisticated). 1613 for (Function *F : Nodes.SCCNodes) 1614 if (F) 1615 Changed |= inferAttributesFromOthers(*F); 1616 1617 return Changed; 1618 } 1619 1620 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, 1621 CGSCCAnalysisManager &AM, 1622 LazyCallGraph &CG, 1623 CGSCCUpdateResult &) { 1624 FunctionAnalysisManager &FAM = 1625 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 1626 1627 // We pass a lambda into functions to wire them up to the analysis manager 1628 // for getting function analyses. 1629 auto AARGetter = [&](Function &F) -> AAResults & { 1630 return FAM.getResult<AAManager>(F); 1631 }; 1632 1633 SmallVector<Function *, 8> Functions; 1634 for (LazyCallGraph::Node &N : C) { 1635 Functions.push_back(&N.getFunction()); 1636 } 1637 1638 if (deriveAttrsInPostOrder(Functions, AARGetter)) { 1639 // We have not changed the call graph or removed/added functions. 1640 PreservedAnalyses PA; 1641 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 1642 return PA; 1643 } 1644 1645 return PreservedAnalyses::all(); 1646 } 1647 1648 namespace { 1649 1650 struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { 1651 // Pass identification, replacement for typeid 1652 static char ID; 1653 1654 PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) { 1655 initializePostOrderFunctionAttrsLegacyPassPass( 1656 *PassRegistry::getPassRegistry()); 1657 } 1658 1659 bool runOnSCC(CallGraphSCC &SCC) override; 1660 1661 void getAnalysisUsage(AnalysisUsage &AU) const override { 1662 AU.setPreservesCFG(); 1663 AU.addRequired<AssumptionCacheTracker>(); 1664 getAAResultsAnalysisUsage(AU); 1665 CallGraphSCCPass::getAnalysisUsage(AU); 1666 } 1667 }; 1668 1669 } // end anonymous namespace 1670 1671 char PostOrderFunctionAttrsLegacyPass::ID = 0; 1672 INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs", 1673 "Deduce function attributes", false, false) 1674 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 1675 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1676 INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs", 1677 "Deduce function attributes", false, false) 1678 1679 Pass *llvm::createPostOrderFunctionAttrsLegacyPass() { 1680 return new PostOrderFunctionAttrsLegacyPass(); 1681 } 1682 1683 template <typename AARGetterT> 1684 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { 1685 SmallVector<Function *, 8> Functions; 1686 for (CallGraphNode *I : SCC) { 1687 Functions.push_back(I->getFunction()); 1688 } 1689 1690 return deriveAttrsInPostOrder(Functions, AARGetter); 1691 } 1692 1693 bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) { 1694 if (skipSCC(SCC)) 1695 return false; 1696 return runImpl(SCC, LegacyAARGetter(*this)); 1697 } 1698 1699 namespace { 1700 1701 struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass { 1702 // Pass identification, replacement for typeid 1703 static char ID; 1704 1705 ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) { 1706 initializeReversePostOrderFunctionAttrsLegacyPassPass( 1707 *PassRegistry::getPassRegistry()); 1708 } 1709 1710 bool runOnModule(Module &M) override; 1711 1712 void getAnalysisUsage(AnalysisUsage &AU) const override { 1713 AU.setPreservesCFG(); 1714 AU.addRequired<CallGraphWrapperPass>(); 1715 AU.addPreserved<CallGraphWrapperPass>(); 1716 } 1717 }; 1718 1719 } // end anonymous namespace 1720 1721 char ReversePostOrderFunctionAttrsLegacyPass::ID = 0; 1722 1723 INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass, 1724 "rpo-function-attrs", "Deduce function attributes in RPO", 1725 false, false) 1726 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1727 INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass, 1728 "rpo-function-attrs", "Deduce function attributes in RPO", 1729 false, false) 1730 1731 Pass *llvm::createReversePostOrderFunctionAttrsPass() { 1732 return new ReversePostOrderFunctionAttrsLegacyPass(); 1733 } 1734 1735 static bool addNoRecurseAttrsTopDown(Function &F) { 1736 // We check the preconditions for the function prior to calling this to avoid 1737 // the cost of building up a reversible post-order list. We assert them here 1738 // to make sure none of the invariants this relies on were violated. 1739 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); 1740 assert(!F.doesNotRecurse() && 1741 "This function has already been deduced as norecurs!"); 1742 assert(F.hasInternalLinkage() && 1743 "Can only do top-down deduction for internal linkage functions!"); 1744 1745 // If F is internal and all of its uses are calls from a non-recursive 1746 // functions, then none of its calls could in fact recurse without going 1747 // through a function marked norecurse, and so we can mark this function too 1748 // as norecurse. Note that the uses must actually be calls -- otherwise 1749 // a pointer to this function could be returned from a norecurse function but 1750 // this function could be recursively (indirectly) called. Note that this 1751 // also detects if F is directly recursive as F is not yet marked as 1752 // a norecurse function. 1753 for (auto *U : F.users()) { 1754 auto *I = dyn_cast<Instruction>(U); 1755 if (!I) 1756 return false; 1757 CallBase *CB = dyn_cast<CallBase>(I); 1758 if (!CB || !CB->getParent()->getParent()->doesNotRecurse()) 1759 return false; 1760 } 1761 F.setDoesNotRecurse(); 1762 ++NumNoRecurse; 1763 return true; 1764 } 1765 1766 static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) { 1767 // We only have a post-order SCC traversal (because SCCs are inherently 1768 // discovered in post-order), so we accumulate them in a vector and then walk 1769 // it in reverse. This is simpler than using the RPO iterator infrastructure 1770 // because we need to combine SCC detection and the PO walk of the call 1771 // graph. We can also cheat egregiously because we're primarily interested in 1772 // synthesizing norecurse and so we can only save the singular SCCs as SCCs 1773 // with multiple functions in them will clearly be recursive. 1774 SmallVector<Function *, 16> Worklist; 1775 for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { 1776 if (I->size() != 1) 1777 continue; 1778 1779 Function *F = I->front()->getFunction(); 1780 if (F && !F->isDeclaration() && !F->doesNotRecurse() && 1781 F->hasInternalLinkage()) 1782 Worklist.push_back(F); 1783 } 1784 1785 bool Changed = false; 1786 for (auto *F : llvm::reverse(Worklist)) 1787 Changed |= addNoRecurseAttrsTopDown(*F); 1788 1789 return Changed; 1790 } 1791 1792 bool ReversePostOrderFunctionAttrsLegacyPass::runOnModule(Module &M) { 1793 if (skipModule(M)) 1794 return false; 1795 1796 auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 1797 1798 return deduceFunctionAttributeInRPO(M, CG); 1799 } 1800 1801 PreservedAnalyses 1802 ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { 1803 auto &CG = AM.getResult<CallGraphAnalysis>(M); 1804 1805 if (!deduceFunctionAttributeInRPO(M, CG)) 1806 return PreservedAnalyses::all(); 1807 1808 PreservedAnalyses PA; 1809 PA.preserve<CallGraphAnalysis>(); 1810 return PA; 1811 } 1812