1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file implements interprocedural passes which walk the 11 /// call-graph deducing and/or propagating function attributes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/FunctionAttrs.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/SCCIterator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/AssumptionCache.h" 24 #include "llvm/Analysis/BasicAliasAnalysis.h" 25 #include "llvm/Analysis/CFG.h" 26 #include "llvm/Analysis/CGSCCPassManager.h" 27 #include "llvm/Analysis/CallGraph.h" 28 #include "llvm/Analysis/CallGraphSCCPass.h" 29 #include "llvm/Analysis/CaptureTracking.h" 30 #include "llvm/Analysis/LazyCallGraph.h" 31 #include "llvm/Analysis/MemoryBuiltins.h" 32 #include "llvm/Analysis/MemoryLocation.h" 33 #include "llvm/Analysis/ValueTracking.h" 34 #include "llvm/IR/Argument.h" 35 #include "llvm/IR/Attributes.h" 36 #include "llvm/IR/BasicBlock.h" 37 #include "llvm/IR/Constant.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/InstIterator.h" 41 #include "llvm/IR/InstrTypes.h" 42 #include "llvm/IR/Instruction.h" 43 #include "llvm/IR/Instructions.h" 44 #include "llvm/IR/IntrinsicInst.h" 45 #include "llvm/IR/Metadata.h" 46 #include "llvm/IR/PassManager.h" 47 #include "llvm/IR/Type.h" 48 #include "llvm/IR/Use.h" 49 #include "llvm/IR/User.h" 50 #include "llvm/IR/Value.h" 51 #include "llvm/InitializePasses.h" 52 #include "llvm/Pass.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Compiler.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/ErrorHandling.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/Transforms/IPO.h" 60 #include "llvm/Transforms/Utils/Local.h" 61 #include <cassert> 62 #include <iterator> 63 #include <map> 64 #include <vector> 65 66 using namespace llvm; 67 68 #define DEBUG_TYPE "function-attrs" 69 70 STATISTIC(NumReadNone, "Number of functions marked readnone"); 71 STATISTIC(NumReadOnly, "Number of functions marked readonly"); 72 STATISTIC(NumWriteOnly, "Number of functions marked writeonly"); 73 STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); 74 STATISTIC(NumReturned, "Number of arguments marked returned"); 75 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); 76 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); 77 STATISTIC(NumNoAlias, "Number of function returns marked noalias"); 78 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); 79 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); 80 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); 81 STATISTIC(NumNoFree, "Number of functions marked as nofree"); 82 STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); 83 STATISTIC(NumNoSync, "Number of functions marked as nosync"); 84 85 static cl::opt<bool> EnableNonnullArgPropagation( 86 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, 87 cl::desc("Try to propagate nonnull argument attributes from callsites to " 88 "caller functions.")); 89 90 static cl::opt<bool> DisableNoUnwindInference( 91 "disable-nounwind-inference", cl::Hidden, 92 cl::desc("Stop inferring nounwind attribute during function-attrs pass")); 93 94 static cl::opt<bool> DisableNoFreeInference( 95 "disable-nofree-inference", cl::Hidden, 96 cl::desc("Stop inferring nofree attribute during function-attrs pass")); 97 98 namespace { 99 100 using SCCNodeSet = SmallSetVector<Function *, 8>; 101 102 } // end anonymous namespace 103 104 /// Returns the memory access attribute for function F using AAR for AA results, 105 /// where SCCNodes is the current SCC. 106 /// 107 /// If ThisBody is true, this function may examine the function body and will 108 /// return a result pertaining to this copy of the function. If it is false, the 109 /// result will be based only on AA results for the function declaration; it 110 /// will be assumed that some other (perhaps less optimized) version of the 111 /// function may be selected at link time. 112 static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, 113 AAResults &AAR, 114 const SCCNodeSet &SCCNodes) { 115 FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); 116 if (MRB == FMRB_DoesNotAccessMemory) 117 // Already perfect! 118 return MAK_ReadNone; 119 120 if (!ThisBody) { 121 if (AliasAnalysis::onlyReadsMemory(MRB)) 122 return MAK_ReadOnly; 123 124 if (AliasAnalysis::doesNotReadMemory(MRB)) 125 return MAK_WriteOnly; 126 127 // Conservatively assume it reads and writes to memory. 128 return MAK_MayWrite; 129 } 130 131 // Scan the function body for instructions that may read or write memory. 132 bool ReadsMemory = false; 133 bool WritesMemory = false; 134 for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { 135 Instruction *I = &*II; 136 137 // Some instructions can be ignored even if they read or write memory. 138 // Detect these now, skipping to the next instruction if one is found. 139 if (auto *Call = dyn_cast<CallBase>(I)) { 140 // Ignore calls to functions in the same SCC, as long as the call sites 141 // don't have operand bundles. Calls with operand bundles are allowed to 142 // have memory effects not described by the memory effects of the call 143 // target. 144 if (!Call->hasOperandBundles() && Call->getCalledFunction() && 145 SCCNodes.count(Call->getCalledFunction())) 146 continue; 147 FunctionModRefBehavior MRB = AAR.getModRefBehavior(Call); 148 ModRefInfo MRI = createModRefInfo(MRB); 149 150 // If the call doesn't access memory, we're done. 151 if (isNoModRef(MRI)) 152 continue; 153 154 // A pseudo probe call shouldn't change any function attribute since it 155 // doesn't translate to a real instruction. It comes with a memory access 156 // tag to prevent itself being removed by optimizations and not block 157 // other instructions being optimized. 158 if (isa<PseudoProbeInst>(I)) 159 continue; 160 161 if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { 162 // The call could access any memory. If that includes writes, note it. 163 if (isModSet(MRI)) 164 WritesMemory = true; 165 // If it reads, note it. 166 if (isRefSet(MRI)) 167 ReadsMemory = true; 168 continue; 169 } 170 171 // Check whether all pointer arguments point to local memory, and 172 // ignore calls that only access local memory. 173 for (auto CI = Call->arg_begin(), CE = Call->arg_end(); CI != CE; ++CI) { 174 Value *Arg = *CI; 175 if (!Arg->getType()->isPtrOrPtrVectorTy()) 176 continue; 177 178 AAMDNodes AAInfo; 179 I->getAAMetadata(AAInfo); 180 MemoryLocation Loc = MemoryLocation::getBeforeOrAfter(Arg, AAInfo); 181 182 // Skip accesses to local or constant memory as they don't impact the 183 // externally visible mod/ref behavior. 184 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 185 continue; 186 187 if (isModSet(MRI)) 188 // Writes non-local memory. 189 WritesMemory = true; 190 if (isRefSet(MRI)) 191 // Ok, it reads non-local memory. 192 ReadsMemory = true; 193 } 194 continue; 195 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 196 // Ignore non-volatile loads from local memory. (Atomic is okay here.) 197 if (!LI->isVolatile()) { 198 MemoryLocation Loc = MemoryLocation::get(LI); 199 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 200 continue; 201 } 202 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 203 // Ignore non-volatile stores to local memory. (Atomic is okay here.) 204 if (!SI->isVolatile()) { 205 MemoryLocation Loc = MemoryLocation::get(SI); 206 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 207 continue; 208 } 209 } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { 210 // Ignore vaargs on local memory. 211 MemoryLocation Loc = MemoryLocation::get(VI); 212 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 213 continue; 214 } 215 216 // Any remaining instructions need to be taken seriously! Check if they 217 // read or write memory. 218 // 219 // Writes memory, remember that. 220 WritesMemory |= I->mayWriteToMemory(); 221 222 // If this instruction may read memory, remember that. 223 ReadsMemory |= I->mayReadFromMemory(); 224 } 225 226 if (WritesMemory) { 227 if (!ReadsMemory) 228 return MAK_WriteOnly; 229 else 230 return MAK_MayWrite; 231 } 232 233 return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone; 234 } 235 236 MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F, 237 AAResults &AAR) { 238 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}); 239 } 240 241 /// Deduce readonly/readnone attributes for the SCC. 242 template <typename AARGetterT> 243 static void addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, 244 SmallSetVector<Function *, 8> &Changed) { 245 // Check if any of the functions in the SCC read or write memory. If they 246 // write memory then they can't be marked readnone or readonly. 247 bool ReadsMemory = false; 248 bool WritesMemory = false; 249 for (Function *F : SCCNodes) { 250 // Call the callable parameter to look up AA results for this function. 251 AAResults &AAR = AARGetter(*F); 252 253 // Non-exact function definitions may not be selected at link time, and an 254 // alternative version that writes to memory may be selected. See the 255 // comment on GlobalValue::isDefinitionExact for more details. 256 switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(), 257 AAR, SCCNodes)) { 258 case MAK_MayWrite: 259 return; 260 case MAK_ReadOnly: 261 ReadsMemory = true; 262 break; 263 case MAK_WriteOnly: 264 WritesMemory = true; 265 break; 266 case MAK_ReadNone: 267 // Nothing to do! 268 break; 269 } 270 } 271 272 // If the SCC contains both functions that read and functions that write, then 273 // we cannot add readonly attributes. 274 if (ReadsMemory && WritesMemory) 275 return; 276 277 // Success! Functions in this SCC do not access memory, or only read memory. 278 // Give them the appropriate attribute. 279 280 for (Function *F : SCCNodes) { 281 if (F->doesNotAccessMemory()) 282 // Already perfect! 283 continue; 284 285 if (F->onlyReadsMemory() && ReadsMemory) 286 // No change. 287 continue; 288 289 if (F->doesNotReadMemory() && WritesMemory) 290 continue; 291 292 Changed.insert(F); 293 294 // Clear out any existing attributes. 295 AttrBuilder AttrsToRemove; 296 AttrsToRemove.addAttribute(Attribute::ReadOnly); 297 AttrsToRemove.addAttribute(Attribute::ReadNone); 298 AttrsToRemove.addAttribute(Attribute::WriteOnly); 299 300 if (!WritesMemory && !ReadsMemory) { 301 // Clear out any "access range attributes" if readnone was deduced. 302 AttrsToRemove.addAttribute(Attribute::ArgMemOnly); 303 AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly); 304 AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); 305 } 306 F->removeAttributes(AttributeList::FunctionIndex, AttrsToRemove); 307 308 // Add in the new attribute. 309 if (WritesMemory && !ReadsMemory) 310 F->addFnAttr(Attribute::WriteOnly); 311 else 312 F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); 313 314 if (WritesMemory && !ReadsMemory) 315 ++NumWriteOnly; 316 else if (ReadsMemory) 317 ++NumReadOnly; 318 else 319 ++NumReadNone; 320 } 321 } 322 323 namespace { 324 325 /// For a given pointer Argument, this retains a list of Arguments of functions 326 /// in the same SCC that the pointer data flows into. We use this to build an 327 /// SCC of the arguments. 328 struct ArgumentGraphNode { 329 Argument *Definition; 330 SmallVector<ArgumentGraphNode *, 4> Uses; 331 }; 332 333 class ArgumentGraph { 334 // We store pointers to ArgumentGraphNode objects, so it's important that 335 // that they not move around upon insert. 336 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; 337 338 ArgumentMapTy ArgumentMap; 339 340 // There is no root node for the argument graph, in fact: 341 // void f(int *x, int *y) { if (...) f(x, y); } 342 // is an example where the graph is disconnected. The SCCIterator requires a 343 // single entry point, so we maintain a fake ("synthetic") root node that 344 // uses every node. Because the graph is directed and nothing points into 345 // the root, it will not participate in any SCCs (except for its own). 346 ArgumentGraphNode SyntheticRoot; 347 348 public: 349 ArgumentGraph() { SyntheticRoot.Definition = nullptr; } 350 351 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; 352 353 iterator begin() { return SyntheticRoot.Uses.begin(); } 354 iterator end() { return SyntheticRoot.Uses.end(); } 355 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } 356 357 ArgumentGraphNode *operator[](Argument *A) { 358 ArgumentGraphNode &Node = ArgumentMap[A]; 359 Node.Definition = A; 360 SyntheticRoot.Uses.push_back(&Node); 361 return &Node; 362 } 363 }; 364 365 /// This tracker checks whether callees are in the SCC, and if so it does not 366 /// consider that a capture, instead adding it to the "Uses" list and 367 /// continuing with the analysis. 368 struct ArgumentUsesTracker : public CaptureTracker { 369 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} 370 371 void tooManyUses() override { Captured = true; } 372 373 bool captured(const Use *U) override { 374 CallBase *CB = dyn_cast<CallBase>(U->getUser()); 375 if (!CB) { 376 Captured = true; 377 return true; 378 } 379 380 Function *F = CB->getCalledFunction(); 381 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { 382 Captured = true; 383 return true; 384 } 385 386 // Note: the callee and the two successor blocks *follow* the argument 387 // operands. This means there is no need to adjust UseIndex to account for 388 // these. 389 390 unsigned UseIndex = 391 std::distance(const_cast<const Use *>(CB->arg_begin()), U); 392 393 assert(UseIndex < CB->data_operands_size() && 394 "Indirect function calls should have been filtered above!"); 395 396 if (UseIndex >= CB->getNumArgOperands()) { 397 // Data operand, but not a argument operand -- must be a bundle operand 398 assert(CB->hasOperandBundles() && "Must be!"); 399 400 // CaptureTracking told us that we're being captured by an operand bundle 401 // use. In this case it does not matter if the callee is within our SCC 402 // or not -- we've been captured in some unknown way, and we have to be 403 // conservative. 404 Captured = true; 405 return true; 406 } 407 408 if (UseIndex >= F->arg_size()) { 409 assert(F->isVarArg() && "More params than args in non-varargs call"); 410 Captured = true; 411 return true; 412 } 413 414 Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); 415 return false; 416 } 417 418 // True only if certainly captured (used outside our SCC). 419 bool Captured = false; 420 421 // Uses within our SCC. 422 SmallVector<Argument *, 4> Uses; 423 424 const SCCNodeSet &SCCNodes; 425 }; 426 427 } // end anonymous namespace 428 429 namespace llvm { 430 431 template <> struct GraphTraits<ArgumentGraphNode *> { 432 using NodeRef = ArgumentGraphNode *; 433 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; 434 435 static NodeRef getEntryNode(NodeRef A) { return A; } 436 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } 437 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } 438 }; 439 440 template <> 441 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { 442 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } 443 444 static ChildIteratorType nodes_begin(ArgumentGraph *AG) { 445 return AG->begin(); 446 } 447 448 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } 449 }; 450 451 } // end namespace llvm 452 453 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. 454 static Attribute::AttrKind 455 determinePointerReadAttrs(Argument *A, 456 const SmallPtrSet<Argument *, 8> &SCCNodes) { 457 SmallVector<Use *, 32> Worklist; 458 SmallPtrSet<Use *, 32> Visited; 459 460 // inalloca arguments are always clobbered by the call. 461 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) 462 return Attribute::None; 463 464 bool IsRead = false; 465 // We don't need to track IsWritten. If A is written to, return immediately. 466 467 for (Use &U : A->uses()) { 468 Visited.insert(&U); 469 Worklist.push_back(&U); 470 } 471 472 while (!Worklist.empty()) { 473 Use *U = Worklist.pop_back_val(); 474 Instruction *I = cast<Instruction>(U->getUser()); 475 476 switch (I->getOpcode()) { 477 case Instruction::BitCast: 478 case Instruction::GetElementPtr: 479 case Instruction::PHI: 480 case Instruction::Select: 481 case Instruction::AddrSpaceCast: 482 // The original value is not read/written via this if the new value isn't. 483 for (Use &UU : I->uses()) 484 if (Visited.insert(&UU).second) 485 Worklist.push_back(&UU); 486 break; 487 488 case Instruction::Call: 489 case Instruction::Invoke: { 490 bool Captures = true; 491 492 if (I->getType()->isVoidTy()) 493 Captures = false; 494 495 auto AddUsersToWorklistIfCapturing = [&] { 496 if (Captures) 497 for (Use &UU : I->uses()) 498 if (Visited.insert(&UU).second) 499 Worklist.push_back(&UU); 500 }; 501 502 CallBase &CB = cast<CallBase>(*I); 503 if (CB.doesNotAccessMemory()) { 504 AddUsersToWorklistIfCapturing(); 505 continue; 506 } 507 508 Function *F = CB.getCalledFunction(); 509 if (!F) { 510 if (CB.onlyReadsMemory()) { 511 IsRead = true; 512 AddUsersToWorklistIfCapturing(); 513 continue; 514 } 515 return Attribute::None; 516 } 517 518 // Note: the callee and the two successor blocks *follow* the argument 519 // operands. This means there is no need to adjust UseIndex to account 520 // for these. 521 522 unsigned UseIndex = std::distance(CB.arg_begin(), U); 523 524 // U cannot be the callee operand use: since we're exploring the 525 // transitive uses of an Argument, having such a use be a callee would 526 // imply the call site is an indirect call or invoke; and we'd take the 527 // early exit above. 528 assert(UseIndex < CB.data_operands_size() && 529 "Data operand use expected!"); 530 531 bool IsOperandBundleUse = UseIndex >= CB.getNumArgOperands(); 532 533 if (UseIndex >= F->arg_size() && !IsOperandBundleUse) { 534 assert(F->isVarArg() && "More params than args in non-varargs call"); 535 return Attribute::None; 536 } 537 538 Captures &= !CB.doesNotCapture(UseIndex); 539 540 // Since the optimizer (by design) cannot see the data flow corresponding 541 // to a operand bundle use, these cannot participate in the optimistic SCC 542 // analysis. Instead, we model the operand bundle uses as arguments in 543 // call to a function external to the SCC. 544 if (IsOperandBundleUse || 545 !SCCNodes.count(&*std::next(F->arg_begin(), UseIndex))) { 546 547 // The accessors used on call site here do the right thing for calls and 548 // invokes with operand bundles. 549 550 if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(UseIndex)) 551 return Attribute::None; 552 if (!CB.doesNotAccessMemory(UseIndex)) 553 IsRead = true; 554 } 555 556 AddUsersToWorklistIfCapturing(); 557 break; 558 } 559 560 case Instruction::Load: 561 // A volatile load has side effects beyond what readonly can be relied 562 // upon. 563 if (cast<LoadInst>(I)->isVolatile()) 564 return Attribute::None; 565 566 IsRead = true; 567 break; 568 569 case Instruction::ICmp: 570 case Instruction::Ret: 571 break; 572 573 default: 574 return Attribute::None; 575 } 576 } 577 578 return IsRead ? Attribute::ReadOnly : Attribute::ReadNone; 579 } 580 581 /// Deduce returned attributes for the SCC. 582 static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes, 583 SmallSetVector<Function *, 8> &Changed) { 584 // Check each function in turn, determining if an argument is always returned. 585 for (Function *F : SCCNodes) { 586 // We can infer and propagate function attributes only when we know that the 587 // definition we'll get at link time is *exactly* the definition we see now. 588 // For more details, see GlobalValue::mayBeDerefined. 589 if (!F->hasExactDefinition()) 590 continue; 591 592 if (F->getReturnType()->isVoidTy()) 593 continue; 594 595 // There is nothing to do if an argument is already marked as 'returned'. 596 if (llvm::any_of(F->args(), 597 [](const Argument &Arg) { return Arg.hasReturnedAttr(); })) 598 continue; 599 600 auto FindRetArg = [&]() -> Value * { 601 Value *RetArg = nullptr; 602 for (BasicBlock &BB : *F) 603 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 604 // Note that stripPointerCasts should look through functions with 605 // returned arguments. 606 Value *RetVal = Ret->getReturnValue()->stripPointerCasts(); 607 if (!isa<Argument>(RetVal) || RetVal->getType() != F->getReturnType()) 608 return nullptr; 609 610 if (!RetArg) 611 RetArg = RetVal; 612 else if (RetArg != RetVal) 613 return nullptr; 614 } 615 616 return RetArg; 617 }; 618 619 if (Value *RetArg = FindRetArg()) { 620 auto *A = cast<Argument>(RetArg); 621 A->addAttr(Attribute::Returned); 622 ++NumReturned; 623 Changed.insert(F); 624 } 625 } 626 } 627 628 /// If a callsite has arguments that are also arguments to the parent function, 629 /// try to propagate attributes from the callsite's arguments to the parent's 630 /// arguments. This may be important because inlining can cause information loss 631 /// when attribute knowledge disappears with the inlined call. 632 static bool addArgumentAttrsFromCallsites(Function &F) { 633 if (!EnableNonnullArgPropagation) 634 return false; 635 636 bool Changed = false; 637 638 // For an argument attribute to transfer from a callsite to the parent, the 639 // call must be guaranteed to execute every time the parent is called. 640 // Conservatively, just check for calls in the entry block that are guaranteed 641 // to execute. 642 // TODO: This could be enhanced by testing if the callsite post-dominates the 643 // entry block or by doing simple forward walks or backward walks to the 644 // callsite. 645 BasicBlock &Entry = F.getEntryBlock(); 646 for (Instruction &I : Entry) { 647 if (auto *CB = dyn_cast<CallBase>(&I)) { 648 if (auto *CalledFunc = CB->getCalledFunction()) { 649 for (auto &CSArg : CalledFunc->args()) { 650 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) 651 continue; 652 653 // If the non-null callsite argument operand is an argument to 'F' 654 // (the caller) and the call is guaranteed to execute, then the value 655 // must be non-null throughout 'F'. 656 auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); 657 if (FArg && !FArg->hasNonNullAttr()) { 658 FArg->addAttr(Attribute::NonNull); 659 Changed = true; 660 } 661 } 662 } 663 } 664 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 665 break; 666 } 667 668 return Changed; 669 } 670 671 static bool addReadAttr(Argument *A, Attribute::AttrKind R) { 672 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone) 673 && "Must be a Read attribute."); 674 assert(A && "Argument must not be null."); 675 676 // If the argument already has the attribute, nothing needs to be done. 677 if (A->hasAttribute(R)) 678 return false; 679 680 // Otherwise, remove potentially conflicting attribute, add the new one, 681 // and update statistics. 682 A->removeAttr(Attribute::WriteOnly); 683 A->removeAttr(Attribute::ReadOnly); 684 A->removeAttr(Attribute::ReadNone); 685 A->addAttr(R); 686 R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; 687 return true; 688 } 689 690 /// Deduce nocapture attributes for the SCC. 691 static void addArgumentAttrs(const SCCNodeSet &SCCNodes, 692 SmallSetVector<Function *, 8> &Changed) { 693 ArgumentGraph AG; 694 695 // Check each function in turn, determining which pointer arguments are not 696 // captured. 697 for (Function *F : SCCNodes) { 698 // We can infer and propagate function attributes only when we know that the 699 // definition we'll get at link time is *exactly* the definition we see now. 700 // For more details, see GlobalValue::mayBeDerefined. 701 if (!F->hasExactDefinition()) 702 continue; 703 704 if (addArgumentAttrsFromCallsites(*F)) 705 Changed.insert(F); 706 707 // Functions that are readonly (or readnone) and nounwind and don't return 708 // a value can't capture arguments. Don't analyze them. 709 if (F->onlyReadsMemory() && F->doesNotThrow() && 710 F->getReturnType()->isVoidTy()) { 711 for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; 712 ++A) { 713 if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { 714 A->addAttr(Attribute::NoCapture); 715 ++NumNoCapture; 716 Changed.insert(F); 717 } 718 } 719 continue; 720 } 721 722 for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; 723 ++A) { 724 if (!A->getType()->isPointerTy()) 725 continue; 726 bool HasNonLocalUses = false; 727 if (!A->hasNoCaptureAttr()) { 728 ArgumentUsesTracker Tracker(SCCNodes); 729 PointerMayBeCaptured(&*A, &Tracker); 730 if (!Tracker.Captured) { 731 if (Tracker.Uses.empty()) { 732 // If it's trivially not captured, mark it nocapture now. 733 A->addAttr(Attribute::NoCapture); 734 ++NumNoCapture; 735 Changed.insert(F); 736 } else { 737 // If it's not trivially captured and not trivially not captured, 738 // then it must be calling into another function in our SCC. Save 739 // its particulars for Argument-SCC analysis later. 740 ArgumentGraphNode *Node = AG[&*A]; 741 for (Argument *Use : Tracker.Uses) { 742 Node->Uses.push_back(AG[Use]); 743 if (Use != &*A) 744 HasNonLocalUses = true; 745 } 746 } 747 } 748 // Otherwise, it's captured. Don't bother doing SCC analysis on it. 749 } 750 if (!HasNonLocalUses && !A->onlyReadsMemory()) { 751 // Can we determine that it's readonly/readnone without doing an SCC? 752 // Note that we don't allow any calls at all here, or else our result 753 // will be dependent on the iteration order through the functions in the 754 // SCC. 755 SmallPtrSet<Argument *, 8> Self; 756 Self.insert(&*A); 757 Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); 758 if (R != Attribute::None) 759 if (addReadAttr(A, R)) 760 Changed.insert(F); 761 } 762 } 763 } 764 765 // The graph we've collected is partial because we stopped scanning for 766 // argument uses once we solved the argument trivially. These partial nodes 767 // show up as ArgumentGraphNode objects with an empty Uses list, and for 768 // these nodes the final decision about whether they capture has already been 769 // made. If the definition doesn't have a 'nocapture' attribute by now, it 770 // captures. 771 772 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { 773 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; 774 if (ArgumentSCC.size() == 1) { 775 if (!ArgumentSCC[0]->Definition) 776 continue; // synthetic root node 777 778 // eg. "void f(int* x) { if (...) f(x); }" 779 if (ArgumentSCC[0]->Uses.size() == 1 && 780 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { 781 Argument *A = ArgumentSCC[0]->Definition; 782 A->addAttr(Attribute::NoCapture); 783 ++NumNoCapture; 784 Changed.insert(A->getParent()); 785 } 786 continue; 787 } 788 789 bool SCCCaptured = false; 790 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); 791 I != E && !SCCCaptured; ++I) { 792 ArgumentGraphNode *Node = *I; 793 if (Node->Uses.empty()) { 794 if (!Node->Definition->hasNoCaptureAttr()) 795 SCCCaptured = true; 796 } 797 } 798 if (SCCCaptured) 799 continue; 800 801 SmallPtrSet<Argument *, 8> ArgumentSCCNodes; 802 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for 803 // quickly looking up whether a given Argument is in this ArgumentSCC. 804 for (ArgumentGraphNode *I : ArgumentSCC) { 805 ArgumentSCCNodes.insert(I->Definition); 806 } 807 808 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); 809 I != E && !SCCCaptured; ++I) { 810 ArgumentGraphNode *N = *I; 811 for (ArgumentGraphNode *Use : N->Uses) { 812 Argument *A = Use->Definition; 813 if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) 814 continue; 815 SCCCaptured = true; 816 break; 817 } 818 } 819 if (SCCCaptured) 820 continue; 821 822 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 823 Argument *A = ArgumentSCC[i]->Definition; 824 A->addAttr(Attribute::NoCapture); 825 ++NumNoCapture; 826 Changed.insert(A->getParent()); 827 } 828 829 // We also want to compute readonly/readnone. With a small number of false 830 // negatives, we can assume that any pointer which is captured isn't going 831 // to be provably readonly or readnone, since by definition we can't 832 // analyze all uses of a captured pointer. 833 // 834 // The false negatives happen when the pointer is captured by a function 835 // that promises readonly/readnone behaviour on the pointer, then the 836 // pointer's lifetime ends before anything that writes to arbitrary memory. 837 // Also, a readonly/readnone pointer may be returned, but returning a 838 // pointer is capturing it. 839 840 Attribute::AttrKind ReadAttr = Attribute::ReadNone; 841 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 842 Argument *A = ArgumentSCC[i]->Definition; 843 Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes); 844 if (K == Attribute::ReadNone) 845 continue; 846 if (K == Attribute::ReadOnly) { 847 ReadAttr = Attribute::ReadOnly; 848 continue; 849 } 850 ReadAttr = K; 851 break; 852 } 853 854 if (ReadAttr != Attribute::None) { 855 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 856 Argument *A = ArgumentSCC[i]->Definition; 857 if (addReadAttr(A, ReadAttr)) 858 Changed.insert(A->getParent()); 859 } 860 } 861 } 862 } 863 864 /// Tests whether a function is "malloc-like". 865 /// 866 /// A function is "malloc-like" if it returns either null or a pointer that 867 /// doesn't alias any other pointer visible to the caller. 868 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { 869 SmallSetVector<Value *, 8> FlowsToReturn; 870 for (BasicBlock &BB : *F) 871 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 872 FlowsToReturn.insert(Ret->getReturnValue()); 873 874 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 875 Value *RetVal = FlowsToReturn[i]; 876 877 if (Constant *C = dyn_cast<Constant>(RetVal)) { 878 if (!C->isNullValue() && !isa<UndefValue>(C)) 879 return false; 880 881 continue; 882 } 883 884 if (isa<Argument>(RetVal)) 885 return false; 886 887 if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) 888 switch (RVI->getOpcode()) { 889 // Extend the analysis by looking upwards. 890 case Instruction::BitCast: 891 case Instruction::GetElementPtr: 892 case Instruction::AddrSpaceCast: 893 FlowsToReturn.insert(RVI->getOperand(0)); 894 continue; 895 case Instruction::Select: { 896 SelectInst *SI = cast<SelectInst>(RVI); 897 FlowsToReturn.insert(SI->getTrueValue()); 898 FlowsToReturn.insert(SI->getFalseValue()); 899 continue; 900 } 901 case Instruction::PHI: { 902 PHINode *PN = cast<PHINode>(RVI); 903 for (Value *IncValue : PN->incoming_values()) 904 FlowsToReturn.insert(IncValue); 905 continue; 906 } 907 908 // Check whether the pointer came from an allocation. 909 case Instruction::Alloca: 910 break; 911 case Instruction::Call: 912 case Instruction::Invoke: { 913 CallBase &CB = cast<CallBase>(*RVI); 914 if (CB.hasRetAttr(Attribute::NoAlias)) 915 break; 916 if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) 917 break; 918 LLVM_FALLTHROUGH; 919 } 920 default: 921 return false; // Did not come from an allocation. 922 } 923 924 if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) 925 return false; 926 } 927 928 return true; 929 } 930 931 /// Deduce noalias attributes for the SCC. 932 static void addNoAliasAttrs(const SCCNodeSet &SCCNodes, 933 SmallSetVector<Function *, 8> &Changed) { 934 // Check each function in turn, determining which functions return noalias 935 // pointers. 936 for (Function *F : SCCNodes) { 937 // Already noalias. 938 if (F->returnDoesNotAlias()) 939 continue; 940 941 // We can infer and propagate function attributes only when we know that the 942 // definition we'll get at link time is *exactly* the definition we see now. 943 // For more details, see GlobalValue::mayBeDerefined. 944 if (!F->hasExactDefinition()) 945 return; 946 947 // We annotate noalias return values, which are only applicable to 948 // pointer types. 949 if (!F->getReturnType()->isPointerTy()) 950 continue; 951 952 if (!isFunctionMallocLike(F, SCCNodes)) 953 return; 954 } 955 956 for (Function *F : SCCNodes) { 957 if (F->returnDoesNotAlias() || 958 !F->getReturnType()->isPointerTy()) 959 continue; 960 961 F->setReturnDoesNotAlias(); 962 ++NumNoAlias; 963 Changed.insert(F); 964 } 965 } 966 967 /// Tests whether this function is known to not return null. 968 /// 969 /// Requires that the function returns a pointer. 970 /// 971 /// Returns true if it believes the function will not return a null, and sets 972 /// \p Speculative based on whether the returned conclusion is a speculative 973 /// conclusion due to SCC calls. 974 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, 975 bool &Speculative) { 976 assert(F->getReturnType()->isPointerTy() && 977 "nonnull only meaningful on pointer types"); 978 Speculative = false; 979 980 SmallSetVector<Value *, 8> FlowsToReturn; 981 for (BasicBlock &BB : *F) 982 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 983 FlowsToReturn.insert(Ret->getReturnValue()); 984 985 auto &DL = F->getParent()->getDataLayout(); 986 987 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 988 Value *RetVal = FlowsToReturn[i]; 989 990 // If this value is locally known to be non-null, we're good 991 if (isKnownNonZero(RetVal, DL)) 992 continue; 993 994 // Otherwise, we need to look upwards since we can't make any local 995 // conclusions. 996 Instruction *RVI = dyn_cast<Instruction>(RetVal); 997 if (!RVI) 998 return false; 999 switch (RVI->getOpcode()) { 1000 // Extend the analysis by looking upwards. 1001 case Instruction::BitCast: 1002 case Instruction::GetElementPtr: 1003 case Instruction::AddrSpaceCast: 1004 FlowsToReturn.insert(RVI->getOperand(0)); 1005 continue; 1006 case Instruction::Select: { 1007 SelectInst *SI = cast<SelectInst>(RVI); 1008 FlowsToReturn.insert(SI->getTrueValue()); 1009 FlowsToReturn.insert(SI->getFalseValue()); 1010 continue; 1011 } 1012 case Instruction::PHI: { 1013 PHINode *PN = cast<PHINode>(RVI); 1014 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1015 FlowsToReturn.insert(PN->getIncomingValue(i)); 1016 continue; 1017 } 1018 case Instruction::Call: 1019 case Instruction::Invoke: { 1020 CallBase &CB = cast<CallBase>(*RVI); 1021 Function *Callee = CB.getCalledFunction(); 1022 // A call to a node within the SCC is assumed to return null until 1023 // proven otherwise 1024 if (Callee && SCCNodes.count(Callee)) { 1025 Speculative = true; 1026 continue; 1027 } 1028 return false; 1029 } 1030 default: 1031 return false; // Unknown source, may be null 1032 }; 1033 llvm_unreachable("should have either continued or returned"); 1034 } 1035 1036 return true; 1037 } 1038 1039 /// Deduce nonnull attributes for the SCC. 1040 static void addNonNullAttrs(const SCCNodeSet &SCCNodes, 1041 SmallSetVector<Function *, 8> &Changed) { 1042 // Speculative that all functions in the SCC return only nonnull 1043 // pointers. We may refute this as we analyze functions. 1044 bool SCCReturnsNonNull = true; 1045 1046 // Check each function in turn, determining which functions return nonnull 1047 // pointers. 1048 for (Function *F : SCCNodes) { 1049 // Already nonnull. 1050 if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, 1051 Attribute::NonNull)) 1052 continue; 1053 1054 // We can infer and propagate function attributes only when we know that the 1055 // definition we'll get at link time is *exactly* the definition we see now. 1056 // For more details, see GlobalValue::mayBeDerefined. 1057 if (!F->hasExactDefinition()) 1058 return; 1059 1060 // We annotate nonnull return values, which are only applicable to 1061 // pointer types. 1062 if (!F->getReturnType()->isPointerTy()) 1063 continue; 1064 1065 bool Speculative = false; 1066 if (isReturnNonNull(F, SCCNodes, Speculative)) { 1067 if (!Speculative) { 1068 // Mark the function eagerly since we may discover a function 1069 // which prevents us from speculating about the entire SCC 1070 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() 1071 << " as nonnull\n"); 1072 F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); 1073 ++NumNonNullReturn; 1074 Changed.insert(F); 1075 } 1076 continue; 1077 } 1078 // At least one function returns something which could be null, can't 1079 // speculate any more. 1080 SCCReturnsNonNull = false; 1081 } 1082 1083 if (SCCReturnsNonNull) { 1084 for (Function *F : SCCNodes) { 1085 if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, 1086 Attribute::NonNull) || 1087 !F->getReturnType()->isPointerTy()) 1088 continue; 1089 1090 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); 1091 F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); 1092 ++NumNonNullReturn; 1093 Changed.insert(F); 1094 } 1095 } 1096 } 1097 1098 namespace { 1099 1100 /// Collects a set of attribute inference requests and performs them all in one 1101 /// go on a single SCC Node. Inference involves scanning function bodies 1102 /// looking for instructions that violate attribute assumptions. 1103 /// As soon as all the bodies are fine we are free to set the attribute. 1104 /// Customization of inference for individual attributes is performed by 1105 /// providing a handful of predicates for each attribute. 1106 class AttributeInferer { 1107 public: 1108 /// Describes a request for inference of a single attribute. 1109 struct InferenceDescriptor { 1110 1111 /// Returns true if this function does not have to be handled. 1112 /// General intent for this predicate is to provide an optimization 1113 /// for functions that do not need this attribute inference at all 1114 /// (say, for functions that already have the attribute). 1115 std::function<bool(const Function &)> SkipFunction; 1116 1117 /// Returns true if this instruction violates attribute assumptions. 1118 std::function<bool(Instruction &)> InstrBreaksAttribute; 1119 1120 /// Sets the inferred attribute for this function. 1121 std::function<void(Function &)> SetAttribute; 1122 1123 /// Attribute we derive. 1124 Attribute::AttrKind AKind; 1125 1126 /// If true, only "exact" definitions can be used to infer this attribute. 1127 /// See GlobalValue::isDefinitionExact. 1128 bool RequiresExactDefinition; 1129 1130 InferenceDescriptor(Attribute::AttrKind AK, 1131 std::function<bool(const Function &)> SkipFunc, 1132 std::function<bool(Instruction &)> InstrScan, 1133 std::function<void(Function &)> SetAttr, 1134 bool ReqExactDef) 1135 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), 1136 SetAttribute(SetAttr), AKind(AK), 1137 RequiresExactDefinition(ReqExactDef) {} 1138 }; 1139 1140 private: 1141 SmallVector<InferenceDescriptor, 4> InferenceDescriptors; 1142 1143 public: 1144 void registerAttrInference(InferenceDescriptor AttrInference) { 1145 InferenceDescriptors.push_back(AttrInference); 1146 } 1147 1148 void run(const SCCNodeSet &SCCNodes, SmallSetVector<Function *, 8> &Changed); 1149 }; 1150 1151 /// Perform all the requested attribute inference actions according to the 1152 /// attribute predicates stored before. 1153 void AttributeInferer::run(const SCCNodeSet &SCCNodes, 1154 SmallSetVector<Function *, 8> &Changed) { 1155 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; 1156 // Go through all the functions in SCC and check corresponding attribute 1157 // assumptions for each of them. Attributes that are invalid for this SCC 1158 // will be removed from InferInSCC. 1159 for (Function *F : SCCNodes) { 1160 1161 // No attributes whose assumptions are still valid - done. 1162 if (InferInSCC.empty()) 1163 return; 1164 1165 // Check if our attributes ever need scanning/can be scanned. 1166 llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) { 1167 if (ID.SkipFunction(*F)) 1168 return false; 1169 1170 // Remove from further inference (invalidate) when visiting a function 1171 // that has no instructions to scan/has an unsuitable definition. 1172 return F->isDeclaration() || 1173 (ID.RequiresExactDefinition && !F->hasExactDefinition()); 1174 }); 1175 1176 // For each attribute still in InferInSCC that doesn't explicitly skip F, 1177 // set up the F instructions scan to verify assumptions of the attribute. 1178 SmallVector<InferenceDescriptor, 4> InferInThisFunc; 1179 llvm::copy_if( 1180 InferInSCC, std::back_inserter(InferInThisFunc), 1181 [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); 1182 1183 if (InferInThisFunc.empty()) 1184 continue; 1185 1186 // Start instruction scan. 1187 for (Instruction &I : instructions(*F)) { 1188 llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) { 1189 if (!ID.InstrBreaksAttribute(I)) 1190 return false; 1191 // Remove attribute from further inference on any other functions 1192 // because attribute assumptions have just been violated. 1193 llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) { 1194 return D.AKind == ID.AKind; 1195 }); 1196 // Remove attribute from the rest of current instruction scan. 1197 return true; 1198 }); 1199 1200 if (InferInThisFunc.empty()) 1201 break; 1202 } 1203 } 1204 1205 if (InferInSCC.empty()) 1206 return; 1207 1208 for (Function *F : SCCNodes) 1209 // At this point InferInSCC contains only functions that were either: 1210 // - explicitly skipped from scan/inference, or 1211 // - verified to have no instructions that break attribute assumptions. 1212 // Hence we just go and force the attribute for all non-skipped functions. 1213 for (auto &ID : InferInSCC) { 1214 if (ID.SkipFunction(*F)) 1215 continue; 1216 Changed.insert(F); 1217 ID.SetAttribute(*F); 1218 } 1219 } 1220 1221 struct SCCNodesResult { 1222 SCCNodeSet SCCNodes; 1223 bool HasUnknownCall; 1224 }; 1225 1226 } // end anonymous namespace 1227 1228 /// Helper for non-Convergent inference predicate InstrBreaksAttribute. 1229 static bool InstrBreaksNonConvergent(Instruction &I, 1230 const SCCNodeSet &SCCNodes) { 1231 const CallBase *CB = dyn_cast<CallBase>(&I); 1232 // Breaks non-convergent assumption if CS is a convergent call to a function 1233 // not in the SCC. 1234 return CB && CB->isConvergent() && 1235 SCCNodes.count(CB->getCalledFunction()) == 0; 1236 } 1237 1238 /// Helper for NoUnwind inference predicate InstrBreaksAttribute. 1239 static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { 1240 if (!I.mayThrow()) 1241 return false; 1242 if (const auto *CI = dyn_cast<CallInst>(&I)) { 1243 if (Function *Callee = CI->getCalledFunction()) { 1244 // I is a may-throw call to a function inside our SCC. This doesn't 1245 // invalidate our current working assumption that the SCC is no-throw; we 1246 // just have to scan that other function. 1247 if (SCCNodes.contains(Callee)) 1248 return false; 1249 } 1250 } 1251 return true; 1252 } 1253 1254 /// Helper for NoFree inference predicate InstrBreaksAttribute. 1255 static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { 1256 CallBase *CB = dyn_cast<CallBase>(&I); 1257 if (!CB) 1258 return false; 1259 1260 if (CB->hasFnAttr(Attribute::NoFree)) 1261 return false; 1262 1263 // Speculatively assume in SCC. 1264 if (Function *Callee = CB->getCalledFunction()) 1265 if (SCCNodes.contains(Callee)) 1266 return false; 1267 1268 return true; 1269 } 1270 1271 /// Attempt to remove convergent function attribute when possible. 1272 /// 1273 /// Returns true if any changes to function attributes were made. 1274 static void inferConvergent(const SCCNodeSet &SCCNodes, 1275 SmallSetVector<Function *, 8> &Changed) { 1276 AttributeInferer AI; 1277 1278 // Request to remove the convergent attribute from all functions in the SCC 1279 // if every callsite within the SCC is not convergent (except for calls 1280 // to functions within the SCC). 1281 // Note: Removal of the attr from the callsites will happen in 1282 // InstCombineCalls separately. 1283 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1284 Attribute::Convergent, 1285 // Skip non-convergent functions. 1286 [](const Function &F) { return !F.isConvergent(); }, 1287 // Instructions that break non-convergent assumption. 1288 [SCCNodes](Instruction &I) { 1289 return InstrBreaksNonConvergent(I, SCCNodes); 1290 }, 1291 [](Function &F) { 1292 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() 1293 << "\n"); 1294 F.setNotConvergent(); 1295 }, 1296 /* RequiresExactDefinition= */ false}); 1297 // Perform all the requested attribute inference actions. 1298 AI.run(SCCNodes, Changed); 1299 } 1300 1301 /// Infer attributes from all functions in the SCC by scanning every 1302 /// instruction for compliance to the attribute assumptions. Currently it 1303 /// does: 1304 /// - addition of NoUnwind attribute 1305 /// 1306 /// Returns true if any changes to function attributes were made. 1307 static void 1308 inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes, 1309 SmallSetVector<Function *, 8> &Changed) { 1310 AttributeInferer AI; 1311 1312 if (!DisableNoUnwindInference) 1313 // Request to infer nounwind attribute for all the functions in the SCC if 1314 // every callsite within the SCC is not throwing (except for calls to 1315 // functions within the SCC). Note that nounwind attribute suffers from 1316 // derefinement - results may change depending on how functions are 1317 // optimized. Thus it can be inferred only from exact definitions. 1318 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1319 Attribute::NoUnwind, 1320 // Skip non-throwing functions. 1321 [](const Function &F) { return F.doesNotThrow(); }, 1322 // Instructions that break non-throwing assumption. 1323 [&SCCNodes](Instruction &I) { 1324 return InstrBreaksNonThrowing(I, SCCNodes); 1325 }, 1326 [](Function &F) { 1327 LLVM_DEBUG(dbgs() 1328 << "Adding nounwind attr to fn " << F.getName() << "\n"); 1329 F.setDoesNotThrow(); 1330 ++NumNoUnwind; 1331 }, 1332 /* RequiresExactDefinition= */ true}); 1333 1334 if (!DisableNoFreeInference) 1335 // Request to infer nofree attribute for all the functions in the SCC if 1336 // every callsite within the SCC does not directly or indirectly free 1337 // memory (except for calls to functions within the SCC). Note that nofree 1338 // attribute suffers from derefinement - results may change depending on 1339 // how functions are optimized. Thus it can be inferred only from exact 1340 // definitions. 1341 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1342 Attribute::NoFree, 1343 // Skip functions known not to free memory. 1344 [](const Function &F) { return F.doesNotFreeMemory(); }, 1345 // Instructions that break non-deallocating assumption. 1346 [&SCCNodes](Instruction &I) { 1347 return InstrBreaksNoFree(I, SCCNodes); 1348 }, 1349 [](Function &F) { 1350 LLVM_DEBUG(dbgs() 1351 << "Adding nofree attr to fn " << F.getName() << "\n"); 1352 F.setDoesNotFreeMemory(); 1353 ++NumNoFree; 1354 }, 1355 /* RequiresExactDefinition= */ true}); 1356 1357 // Perform all the requested attribute inference actions. 1358 AI.run(SCCNodes, Changed); 1359 } 1360 1361 static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, 1362 SmallSetVector<Function *, 8> &Changed) { 1363 // Try and identify functions that do not recurse. 1364 1365 // If the SCC contains multiple nodes we know for sure there is recursion. 1366 if (SCCNodes.size() != 1) 1367 return; 1368 1369 Function *F = *SCCNodes.begin(); 1370 if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) 1371 return; 1372 1373 // If all of the calls in F are identifiable and are to norecurse functions, F 1374 // is norecurse. This check also detects self-recursion as F is not currently 1375 // marked norecurse, so any called from F to F will not be marked norecurse. 1376 for (auto &BB : *F) 1377 for (auto &I : BB.instructionsWithoutDebug()) 1378 if (auto *CB = dyn_cast<CallBase>(&I)) { 1379 Function *Callee = CB->getCalledFunction(); 1380 if (!Callee || Callee == F || !Callee->doesNotRecurse()) 1381 // Function calls a potentially recursive function. 1382 return; 1383 } 1384 1385 // Every call was to a non-recursive function other than this function, and 1386 // we have no indirect recursion as the SCC size is one. This function cannot 1387 // recurse. 1388 F->setDoesNotRecurse(); 1389 ++NumNoRecurse; 1390 Changed.insert(F); 1391 } 1392 1393 static bool instructionDoesNotReturn(Instruction &I) { 1394 if (auto *CB = dyn_cast<CallBase>(&I)) 1395 return CB->hasFnAttr(Attribute::NoReturn); 1396 return false; 1397 } 1398 1399 // A basic block can only return if it terminates with a ReturnInst and does not 1400 // contain calls to noreturn functions. 1401 static bool basicBlockCanReturn(BasicBlock &BB) { 1402 if (!isa<ReturnInst>(BB.getTerminator())) 1403 return false; 1404 return none_of(BB, instructionDoesNotReturn); 1405 } 1406 1407 // Set the noreturn function attribute if possible. 1408 static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, 1409 SmallSetVector<Function *, 8> &Changed) { 1410 for (Function *F : SCCNodes) { 1411 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 1412 F->doesNotReturn()) 1413 continue; 1414 1415 // The function can return if any basic blocks can return. 1416 // FIXME: this doesn't handle recursion or unreachable blocks. 1417 if (none_of(*F, basicBlockCanReturn)) { 1418 F->setDoesNotReturn(); 1419 Changed.insert(F); 1420 } 1421 } 1422 } 1423 1424 static bool functionWillReturn(const Function &F) { 1425 // We can infer and propagate function attributes only when we know that the 1426 // definition we'll get at link time is *exactly* the definition we see now. 1427 // For more details, see GlobalValue::mayBeDerefined. 1428 if (!F.hasExactDefinition()) 1429 return false; 1430 1431 // Must-progress function without side-effects must return. 1432 if (F.mustProgress() && F.onlyReadsMemory()) 1433 return true; 1434 1435 // Can only analyze functions with a definition. 1436 if (F.isDeclaration()) 1437 return false; 1438 1439 // Functions with loops require more sophisticated analysis, as the loop 1440 // may be infinite. For now, don't try to handle them. 1441 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; 1442 FindFunctionBackedges(F, Backedges); 1443 if (!Backedges.empty()) 1444 return false; 1445 1446 // If there are no loops, then the function is willreturn if all calls in 1447 // it are willreturn. 1448 return all_of(instructions(F), [](const Instruction &I) { 1449 return I.willReturn(); 1450 }); 1451 } 1452 1453 // Set the willreturn function attribute if possible. 1454 static void addWillReturn(const SCCNodeSet &SCCNodes, 1455 SmallSetVector<Function *, 8> &Changed) { 1456 for (Function *F : SCCNodes) { 1457 if (!F || F->willReturn() || !functionWillReturn(*F)) 1458 continue; 1459 1460 F->setWillReturn(); 1461 NumWillReturn++; 1462 Changed.insert(F); 1463 } 1464 } 1465 1466 // Return true if this is an atomic which has an ordering stronger than 1467 // unordered. Note that this is different than the predicate we use in 1468 // Attributor. Here we chose to be conservative and consider monotonic 1469 // operations potentially synchronizing. We generally don't do much with 1470 // monotonic operations, so this is simply risk reduction. 1471 static bool isOrderedAtomic(Instruction *I) { 1472 if (!I->isAtomic()) 1473 return false; 1474 1475 if (auto *FI = dyn_cast<FenceInst>(I)) 1476 // All legal orderings for fence are stronger than monotonic. 1477 return FI->getSyncScopeID() != SyncScope::SingleThread; 1478 else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I)) 1479 return true; 1480 else if (auto *SI = dyn_cast<StoreInst>(I)) 1481 return !SI->isUnordered(); 1482 else if (auto *LI = dyn_cast<LoadInst>(I)) 1483 return !LI->isUnordered(); 1484 else { 1485 llvm_unreachable("unknown atomic instruction?"); 1486 } 1487 } 1488 1489 static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { 1490 // Volatile may synchronize 1491 if (I.isVolatile()) 1492 return true; 1493 1494 // An ordered atomic may synchronize. (See comment about on monotonic.) 1495 if (isOrderedAtomic(&I)) 1496 return true; 1497 1498 auto *CB = dyn_cast<CallBase>(&I); 1499 if (!CB) 1500 // Non call site cases covered by the two checks above 1501 return false; 1502 1503 if (CB->hasFnAttr(Attribute::NoSync)) 1504 return false; 1505 1506 // Non volatile memset/memcpy/memmoves are nosync 1507 // NOTE: Only intrinsics with volatile flags should be handled here. All 1508 // others should be marked in Intrinsics.td. 1509 if (auto *MI = dyn_cast<MemIntrinsic>(&I)) 1510 if (!MI->isVolatile()) 1511 return false; 1512 1513 // Speculatively assume in SCC. 1514 if (Function *Callee = CB->getCalledFunction()) 1515 if (SCCNodes.contains(Callee)) 1516 return false; 1517 1518 return true; 1519 } 1520 1521 // Infer the nosync attribute. 1522 static void addNoSyncAttr(const SCCNodeSet &SCCNodes, 1523 SmallSetVector<Function *, 8> &Changed) { 1524 AttributeInferer AI; 1525 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1526 Attribute::NoSync, 1527 // Skip already marked functions. 1528 [](const Function &F) { return F.hasNoSync(); }, 1529 // Instructions that break nosync assumption. 1530 [&SCCNodes](Instruction &I) { 1531 return InstrBreaksNoSync(I, SCCNodes); 1532 }, 1533 [](Function &F) { 1534 LLVM_DEBUG(dbgs() 1535 << "Adding nosync attr to fn " << F.getName() << "\n"); 1536 F.setNoSync(); 1537 ++NumNoSync; 1538 }, 1539 /* RequiresExactDefinition= */ true}); 1540 AI.run(SCCNodes, Changed); 1541 } 1542 1543 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { 1544 SCCNodesResult Res; 1545 Res.HasUnknownCall = false; 1546 for (Function *F : Functions) { 1547 if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) { 1548 // Treat any function we're trying not to optimize as if it were an 1549 // indirect call and omit it from the node set used below. 1550 Res.HasUnknownCall = true; 1551 continue; 1552 } 1553 // Track whether any functions in this SCC have an unknown call edge. 1554 // Note: if this is ever a performance hit, we can common it with 1555 // subsequent routines which also do scans over the instructions of the 1556 // function. 1557 if (!Res.HasUnknownCall) { 1558 for (Instruction &I : instructions(*F)) { 1559 if (auto *CB = dyn_cast<CallBase>(&I)) { 1560 if (!CB->getCalledFunction()) { 1561 Res.HasUnknownCall = true; 1562 break; 1563 } 1564 } 1565 } 1566 } 1567 Res.SCCNodes.insert(F); 1568 } 1569 return Res; 1570 } 1571 1572 template <typename AARGetterT> 1573 static SmallSetVector<Function *, 8> 1574 deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) { 1575 SCCNodesResult Nodes = createSCCNodeSet(Functions); 1576 1577 // Bail if the SCC only contains optnone functions. 1578 if (Nodes.SCCNodes.empty()) 1579 return {}; 1580 1581 SmallSetVector<Function *, 8> Changed; 1582 1583 addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); 1584 addReadAttrs(Nodes.SCCNodes, AARGetter, Changed); 1585 addArgumentAttrs(Nodes.SCCNodes, Changed); 1586 inferConvergent(Nodes.SCCNodes, Changed); 1587 addNoReturnAttrs(Nodes.SCCNodes, Changed); 1588 addWillReturn(Nodes.SCCNodes, Changed); 1589 1590 // If we have no external nodes participating in the SCC, we can deduce some 1591 // more precise attributes as well. 1592 if (!Nodes.HasUnknownCall) { 1593 addNoAliasAttrs(Nodes.SCCNodes, Changed); 1594 addNonNullAttrs(Nodes.SCCNodes, Changed); 1595 inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed); 1596 addNoRecurseAttrs(Nodes.SCCNodes, Changed); 1597 } 1598 1599 addNoSyncAttr(Nodes.SCCNodes, Changed); 1600 1601 // Finally, infer the maximal set of attributes from the ones we've inferred 1602 // above. This is handling the cases where one attribute on a signature 1603 // implies another, but for implementation reasons the inference rule for 1604 // the later is missing (or simply less sophisticated). 1605 for (Function *F : Nodes.SCCNodes) 1606 if (F) 1607 if (inferAttributesFromOthers(*F)) 1608 Changed.insert(F); 1609 1610 return Changed; 1611 } 1612 1613 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, 1614 CGSCCAnalysisManager &AM, 1615 LazyCallGraph &CG, 1616 CGSCCUpdateResult &) { 1617 FunctionAnalysisManager &FAM = 1618 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 1619 1620 // We pass a lambda into functions to wire them up to the analysis manager 1621 // for getting function analyses. 1622 auto AARGetter = [&](Function &F) -> AAResults & { 1623 return FAM.getResult<AAManager>(F); 1624 }; 1625 1626 SmallVector<Function *, 8> Functions; 1627 for (LazyCallGraph::Node &N : C) { 1628 Functions.push_back(&N.getFunction()); 1629 } 1630 1631 auto ChangedFunctions = deriveAttrsInPostOrder(Functions, AARGetter); 1632 if (ChangedFunctions.empty()) { 1633 return PreservedAnalyses::all(); 1634 } 1635 1636 // Invalidate analyses for modified functions so that we don't have to 1637 // invalidate all analyses for all functions in this SCC. 1638 PreservedAnalyses FuncPA; 1639 // We haven't changed the CFG for modified functions. 1640 FuncPA.preserveSet<CFGAnalyses>(); 1641 for (Function *Changed : ChangedFunctions) 1642 FAM.invalidate(*Changed, FuncPA); 1643 1644 // We have not changed the call graph or removed/added functions. 1645 PreservedAnalyses PA; 1646 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 1647 PA.preserveSet<AllAnalysesOn<Function>>(); 1648 return PA; 1649 } 1650 1651 namespace { 1652 1653 struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { 1654 // Pass identification, replacement for typeid 1655 static char ID; 1656 1657 PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) { 1658 initializePostOrderFunctionAttrsLegacyPassPass( 1659 *PassRegistry::getPassRegistry()); 1660 } 1661 1662 bool runOnSCC(CallGraphSCC &SCC) override; 1663 1664 void getAnalysisUsage(AnalysisUsage &AU) const override { 1665 AU.setPreservesCFG(); 1666 AU.addRequired<AssumptionCacheTracker>(); 1667 getAAResultsAnalysisUsage(AU); 1668 CallGraphSCCPass::getAnalysisUsage(AU); 1669 } 1670 }; 1671 1672 } // end anonymous namespace 1673 1674 char PostOrderFunctionAttrsLegacyPass::ID = 0; 1675 INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs", 1676 "Deduce function attributes", false, false) 1677 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 1678 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1679 INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs", 1680 "Deduce function attributes", false, false) 1681 1682 Pass *llvm::createPostOrderFunctionAttrsLegacyPass() { 1683 return new PostOrderFunctionAttrsLegacyPass(); 1684 } 1685 1686 template <typename AARGetterT> 1687 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { 1688 SmallVector<Function *, 8> Functions; 1689 for (CallGraphNode *I : SCC) { 1690 Functions.push_back(I->getFunction()); 1691 } 1692 1693 return !deriveAttrsInPostOrder(Functions, AARGetter).empty(); 1694 } 1695 1696 bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) { 1697 if (skipSCC(SCC)) 1698 return false; 1699 return runImpl(SCC, LegacyAARGetter(*this)); 1700 } 1701 1702 namespace { 1703 1704 struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass { 1705 // Pass identification, replacement for typeid 1706 static char ID; 1707 1708 ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) { 1709 initializeReversePostOrderFunctionAttrsLegacyPassPass( 1710 *PassRegistry::getPassRegistry()); 1711 } 1712 1713 bool runOnModule(Module &M) override; 1714 1715 void getAnalysisUsage(AnalysisUsage &AU) const override { 1716 AU.setPreservesCFG(); 1717 AU.addRequired<CallGraphWrapperPass>(); 1718 AU.addPreserved<CallGraphWrapperPass>(); 1719 } 1720 }; 1721 1722 } // end anonymous namespace 1723 1724 char ReversePostOrderFunctionAttrsLegacyPass::ID = 0; 1725 1726 INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass, 1727 "rpo-function-attrs", "Deduce function attributes in RPO", 1728 false, false) 1729 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1730 INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass, 1731 "rpo-function-attrs", "Deduce function attributes in RPO", 1732 false, false) 1733 1734 Pass *llvm::createReversePostOrderFunctionAttrsPass() { 1735 return new ReversePostOrderFunctionAttrsLegacyPass(); 1736 } 1737 1738 static bool addNoRecurseAttrsTopDown(Function &F) { 1739 // We check the preconditions for the function prior to calling this to avoid 1740 // the cost of building up a reversible post-order list. We assert them here 1741 // to make sure none of the invariants this relies on were violated. 1742 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); 1743 assert(!F.doesNotRecurse() && 1744 "This function has already been deduced as norecurs!"); 1745 assert(F.hasInternalLinkage() && 1746 "Can only do top-down deduction for internal linkage functions!"); 1747 1748 // If F is internal and all of its uses are calls from a non-recursive 1749 // functions, then none of its calls could in fact recurse without going 1750 // through a function marked norecurse, and so we can mark this function too 1751 // as norecurse. Note that the uses must actually be calls -- otherwise 1752 // a pointer to this function could be returned from a norecurse function but 1753 // this function could be recursively (indirectly) called. Note that this 1754 // also detects if F is directly recursive as F is not yet marked as 1755 // a norecurse function. 1756 for (auto *U : F.users()) { 1757 auto *I = dyn_cast<Instruction>(U); 1758 if (!I) 1759 return false; 1760 CallBase *CB = dyn_cast<CallBase>(I); 1761 if (!CB || !CB->getParent()->getParent()->doesNotRecurse()) 1762 return false; 1763 } 1764 F.setDoesNotRecurse(); 1765 ++NumNoRecurse; 1766 return true; 1767 } 1768 1769 static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) { 1770 // We only have a post-order SCC traversal (because SCCs are inherently 1771 // discovered in post-order), so we accumulate them in a vector and then walk 1772 // it in reverse. This is simpler than using the RPO iterator infrastructure 1773 // because we need to combine SCC detection and the PO walk of the call 1774 // graph. We can also cheat egregiously because we're primarily interested in 1775 // synthesizing norecurse and so we can only save the singular SCCs as SCCs 1776 // with multiple functions in them will clearly be recursive. 1777 SmallVector<Function *, 16> Worklist; 1778 for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { 1779 if (I->size() != 1) 1780 continue; 1781 1782 Function *F = I->front()->getFunction(); 1783 if (F && !F->isDeclaration() && !F->doesNotRecurse() && 1784 F->hasInternalLinkage()) 1785 Worklist.push_back(F); 1786 } 1787 1788 bool Changed = false; 1789 for (auto *F : llvm::reverse(Worklist)) 1790 Changed |= addNoRecurseAttrsTopDown(*F); 1791 1792 return Changed; 1793 } 1794 1795 bool ReversePostOrderFunctionAttrsLegacyPass::runOnModule(Module &M) { 1796 if (skipModule(M)) 1797 return false; 1798 1799 auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 1800 1801 return deduceFunctionAttributeInRPO(M, CG); 1802 } 1803 1804 PreservedAnalyses 1805 ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { 1806 auto &CG = AM.getResult<CallGraphAnalysis>(M); 1807 1808 if (!deduceFunctionAttributeInRPO(M, CG)) 1809 return PreservedAnalyses::all(); 1810 1811 PreservedAnalyses PA; 1812 PA.preserve<CallGraphAnalysis>(); 1813 return PA; 1814 } 1815