1 //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This pass builds the coroutine frame and outlines resume and destroy parts 9 // of the coroutine into separate functions. 10 // 11 // We present a coroutine to an LLVM as an ordinary function with suspension 12 // points marked up with intrinsics. We let the optimizer party on the coroutine 13 // as a single function for as long as possible. Shortly before the coroutine is 14 // eligible to be inlined into its callers, we split up the coroutine into parts 15 // corresponding to an initial, resume and destroy invocations of the coroutine, 16 // add them to the current SCC and restart the IPO pipeline to optimize the 17 // coroutine subfunctions we extracted before proceeding to the caller of the 18 // coroutine. 19 //===----------------------------------------------------------------------===// 20 21 #include "llvm/Transforms/Coroutines/CoroSplit.h" 22 #include "CoroInstr.h" 23 #include "CoroInternal.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "llvm/ADT/Twine.h" 29 #include "llvm/Analysis/CFG.h" 30 #include "llvm/Analysis/CallGraph.h" 31 #include "llvm/Analysis/CallGraphSCCPass.h" 32 #include "llvm/Analysis/LazyCallGraph.h" 33 #include "llvm/IR/Argument.h" 34 #include "llvm/IR/Attributes.h" 35 #include "llvm/IR/BasicBlock.h" 36 #include "llvm/IR/CFG.h" 37 #include "llvm/IR/CallingConv.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/DataLayout.h" 40 #include "llvm/IR/DerivedTypes.h" 41 #include "llvm/IR/Dominators.h" 42 #include "llvm/IR/Function.h" 43 #include "llvm/IR/GlobalValue.h" 44 #include "llvm/IR/GlobalVariable.h" 45 #include "llvm/IR/IRBuilder.h" 46 #include "llvm/IR/InstIterator.h" 47 #include "llvm/IR/InstrTypes.h" 48 #include "llvm/IR/Instruction.h" 49 #include "llvm/IR/Instructions.h" 50 #include "llvm/IR/IntrinsicInst.h" 51 #include "llvm/IR/LLVMContext.h" 52 #include "llvm/IR/LegacyPassManager.h" 53 #include "llvm/IR/Module.h" 54 #include "llvm/IR/Type.h" 55 #include "llvm/IR/Value.h" 56 #include "llvm/IR/Verifier.h" 57 #include "llvm/InitializePasses.h" 58 #include "llvm/Pass.h" 59 #include "llvm/Support/Casting.h" 60 #include "llvm/Support/Debug.h" 61 #include "llvm/Support/PrettyStackTrace.h" 62 #include "llvm/Support/raw_ostream.h" 63 #include "llvm/Transforms/Scalar.h" 64 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 65 #include "llvm/Transforms/Utils/CallGraphUpdater.h" 66 #include "llvm/Transforms/Utils/Cloning.h" 67 #include "llvm/Transforms/Utils/Local.h" 68 #include "llvm/Transforms/Utils/ValueMapper.h" 69 #include <cassert> 70 #include <cstddef> 71 #include <cstdint> 72 #include <initializer_list> 73 #include <iterator> 74 75 using namespace llvm; 76 77 #define DEBUG_TYPE "coro-split" 78 79 namespace { 80 81 /// A little helper class for building 82 class CoroCloner { 83 public: 84 enum class Kind { 85 /// The shared resume function for a switch lowering. 86 SwitchResume, 87 88 /// The shared unwind function for a switch lowering. 89 SwitchUnwind, 90 91 /// The shared cleanup function for a switch lowering. 92 SwitchCleanup, 93 94 /// An individual continuation function. 95 Continuation, 96 97 /// An async resume function. 98 Async, 99 }; 100 101 private: 102 Function &OrigF; 103 Function *NewF; 104 const Twine &Suffix; 105 coro::Shape &Shape; 106 Kind FKind; 107 ValueToValueMapTy VMap; 108 IRBuilder<> Builder; 109 Value *NewFramePtr = nullptr; 110 111 /// The active suspend instruction; meaningful only for continuation and async 112 /// ABIs. 113 AnyCoroSuspendInst *ActiveSuspend = nullptr; 114 115 public: 116 /// Create a cloner for a switch lowering. 117 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, 118 Kind FKind) 119 : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), 120 FKind(FKind), Builder(OrigF.getContext()) { 121 assert(Shape.ABI == coro::ABI::Switch); 122 } 123 124 /// Create a cloner for a continuation lowering. 125 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, 126 Function *NewF, AnyCoroSuspendInst *ActiveSuspend) 127 : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape), 128 FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation), 129 Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend) { 130 assert(Shape.ABI == coro::ABI::Retcon || 131 Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async); 132 assert(NewF && "need existing function for continuation"); 133 assert(ActiveSuspend && "need active suspend point for continuation"); 134 } 135 136 Function *getFunction() const { 137 assert(NewF != nullptr && "declaration not yet set"); 138 return NewF; 139 } 140 141 void create(); 142 143 private: 144 bool isSwitchDestroyFunction() { 145 switch (FKind) { 146 case Kind::Async: 147 case Kind::Continuation: 148 case Kind::SwitchResume: 149 return false; 150 case Kind::SwitchUnwind: 151 case Kind::SwitchCleanup: 152 return true; 153 } 154 llvm_unreachable("Unknown CoroCloner::Kind enum"); 155 } 156 157 void replaceEntryBlock(); 158 Value *deriveNewFramePointer(); 159 void replaceRetconOrAsyncSuspendUses(); 160 void replaceCoroSuspends(); 161 void replaceCoroEnds(); 162 void replaceSwiftErrorOps(); 163 void salvageDebugInfo(); 164 void handleFinalSuspend(); 165 }; 166 167 } // end anonymous namespace 168 169 static void maybeFreeRetconStorage(IRBuilder<> &Builder, 170 const coro::Shape &Shape, Value *FramePtr, 171 CallGraph *CG) { 172 assert(Shape.ABI == coro::ABI::Retcon || 173 Shape.ABI == coro::ABI::RetconOnce); 174 if (Shape.RetconLowering.IsFrameInlineInStorage) 175 return; 176 177 Shape.emitDealloc(Builder, FramePtr, CG); 178 } 179 180 /// Replace an llvm.coro.end.async. 181 /// Will inline the must tail call function call if there is one. 182 /// \returns true if cleanup of the coro.end block is needed, false otherwise. 183 static bool replaceCoroEndAsync(AnyCoroEndInst *End) { 184 IRBuilder<> Builder(End); 185 186 auto *EndAsync = dyn_cast<CoroAsyncEndInst>(End); 187 if (!EndAsync) { 188 Builder.CreateRetVoid(); 189 return true /*needs cleanup of coro.end block*/; 190 } 191 192 auto *MustTailCallFunc = EndAsync->getMustTailCallFunction(); 193 if (!MustTailCallFunc) { 194 Builder.CreateRetVoid(); 195 return true /*needs cleanup of coro.end block*/; 196 } 197 198 // Move the must tail call from the predecessor block into the end block. 199 auto *CoroEndBlock = End->getParent(); 200 auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor(); 201 assert(MustTailCallFuncBlock && "Must have a single predecessor block"); 202 auto It = MustTailCallFuncBlock->getTerminator()->getIterator(); 203 auto *MustTailCall = cast<CallInst>(&*std::prev(It)); 204 CoroEndBlock->getInstList().splice( 205 End->getIterator(), MustTailCallFuncBlock->getInstList(), MustTailCall); 206 207 // Insert the return instruction. 208 Builder.SetInsertPoint(End); 209 Builder.CreateRetVoid(); 210 InlineFunctionInfo FnInfo; 211 212 // Remove the rest of the block, by splitting it into an unreachable block. 213 auto *BB = End->getParent(); 214 BB->splitBasicBlock(End); 215 BB->getTerminator()->eraseFromParent(); 216 217 auto InlineRes = InlineFunction(*MustTailCall, FnInfo); 218 assert(InlineRes.isSuccess() && "Expected inlining to succeed"); 219 (void)InlineRes; 220 221 // We have cleaned up the coro.end block above. 222 return false; 223 } 224 225 /// Replace a non-unwind call to llvm.coro.end. 226 static void replaceFallthroughCoroEnd(AnyCoroEndInst *End, 227 const coro::Shape &Shape, Value *FramePtr, 228 bool InResume, CallGraph *CG) { 229 // Start inserting right before the coro.end. 230 IRBuilder<> Builder(End); 231 232 // Create the return instruction. 233 switch (Shape.ABI) { 234 // The cloned functions in switch-lowering always return void. 235 case coro::ABI::Switch: 236 // coro.end doesn't immediately end the coroutine in the main function 237 // in this lowering, because we need to deallocate the coroutine. 238 if (!InResume) 239 return; 240 Builder.CreateRetVoid(); 241 break; 242 243 // In async lowering this returns. 244 case coro::ABI::Async: { 245 bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End); 246 if (!CoroEndBlockNeedsCleanup) 247 return; 248 break; 249 } 250 251 // In unique continuation lowering, the continuations always return void. 252 // But we may have implicitly allocated storage. 253 case coro::ABI::RetconOnce: 254 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 255 Builder.CreateRetVoid(); 256 break; 257 258 // In non-unique continuation lowering, we signal completion by returning 259 // a null continuation. 260 case coro::ABI::Retcon: { 261 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 262 auto RetTy = Shape.getResumeFunctionType()->getReturnType(); 263 auto RetStructTy = dyn_cast<StructType>(RetTy); 264 PointerType *ContinuationTy = 265 cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy); 266 267 Value *ReturnValue = ConstantPointerNull::get(ContinuationTy); 268 if (RetStructTy) { 269 ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy), 270 ReturnValue, 0); 271 } 272 Builder.CreateRet(ReturnValue); 273 break; 274 } 275 } 276 277 // Remove the rest of the block, by splitting it into an unreachable block. 278 auto *BB = End->getParent(); 279 BB->splitBasicBlock(End); 280 BB->getTerminator()->eraseFromParent(); 281 } 282 283 /// Replace an unwind call to llvm.coro.end. 284 static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, 285 Value *FramePtr, bool InResume, 286 CallGraph *CG) { 287 IRBuilder<> Builder(End); 288 289 switch (Shape.ABI) { 290 // In switch-lowering, this does nothing in the main function. 291 case coro::ABI::Switch: 292 if (!InResume) 293 return; 294 break; 295 // In async lowering this does nothing. 296 case coro::ABI::Async: 297 break; 298 // In continuation-lowering, this frees the continuation storage. 299 case coro::ABI::Retcon: 300 case coro::ABI::RetconOnce: 301 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 302 break; 303 } 304 305 // If coro.end has an associated bundle, add cleanupret instruction. 306 if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) { 307 auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]); 308 auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr); 309 End->getParent()->splitBasicBlock(End); 310 CleanupRet->getParent()->getTerminator()->eraseFromParent(); 311 } 312 } 313 314 static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, 315 Value *FramePtr, bool InResume, CallGraph *CG) { 316 if (End->isUnwind()) 317 replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); 318 else 319 replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG); 320 321 auto &Context = End->getContext(); 322 End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context) 323 : ConstantInt::getFalse(Context)); 324 End->eraseFromParent(); 325 } 326 327 // Create an entry block for a resume function with a switch that will jump to 328 // suspend points. 329 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { 330 assert(Shape.ABI == coro::ABI::Switch); 331 LLVMContext &C = F.getContext(); 332 333 // resume.entry: 334 // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, 335 // i32 2 336 // % index = load i32, i32* %index.addr 337 // switch i32 %index, label %unreachable [ 338 // i32 0, label %resume.0 339 // i32 1, label %resume.1 340 // ... 341 // ] 342 343 auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F); 344 auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F); 345 346 IRBuilder<> Builder(NewEntry); 347 auto *FramePtr = Shape.FramePtr; 348 auto *FrameTy = Shape.FrameTy; 349 auto *GepIndex = Builder.CreateStructGEP( 350 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); 351 auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index"); 352 auto *Switch = 353 Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size()); 354 Shape.SwitchLowering.ResumeSwitch = Switch; 355 356 size_t SuspendIndex = 0; 357 for (auto *AnyS : Shape.CoroSuspends) { 358 auto *S = cast<CoroSuspendInst>(AnyS); 359 ConstantInt *IndexVal = Shape.getIndex(SuspendIndex); 360 361 // Replace CoroSave with a store to Index: 362 // %index.addr = getelementptr %f.frame... (index field number) 363 // store i32 0, i32* %index.addr1 364 auto *Save = S->getCoroSave(); 365 Builder.SetInsertPoint(Save); 366 if (S->isFinal()) { 367 // Final suspend point is represented by storing zero in ResumeFnAddr. 368 auto *GepIndex = Builder.CreateStructGEP(FrameTy, FramePtr, 369 coro::Shape::SwitchFieldIndex::Resume, 370 "ResumeFn.addr"); 371 auto *NullPtr = ConstantPointerNull::get(cast<PointerType>( 372 cast<PointerType>(GepIndex->getType())->getElementType())); 373 Builder.CreateStore(NullPtr, GepIndex); 374 } else { 375 auto *GepIndex = Builder.CreateStructGEP( 376 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); 377 Builder.CreateStore(IndexVal, GepIndex); 378 } 379 Save->replaceAllUsesWith(ConstantTokenNone::get(C)); 380 Save->eraseFromParent(); 381 382 // Split block before and after coro.suspend and add a jump from an entry 383 // switch: 384 // 385 // whateverBB: 386 // whatever 387 // %0 = call i8 @llvm.coro.suspend(token none, i1 false) 388 // switch i8 %0, label %suspend[i8 0, label %resume 389 // i8 1, label %cleanup] 390 // becomes: 391 // 392 // whateverBB: 393 // whatever 394 // br label %resume.0.landing 395 // 396 // resume.0: ; <--- jump from the switch in the resume.entry 397 // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false) 398 // br label %resume.0.landing 399 // 400 // resume.0.landing: 401 // %1 = phi i8[-1, %whateverBB], [%0, %resume.0] 402 // switch i8 % 1, label %suspend [i8 0, label %resume 403 // i8 1, label %cleanup] 404 405 auto *SuspendBB = S->getParent(); 406 auto *ResumeBB = 407 SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex)); 408 auto *LandingBB = ResumeBB->splitBasicBlock( 409 S->getNextNode(), ResumeBB->getName() + Twine(".landing")); 410 Switch->addCase(IndexVal, ResumeBB); 411 412 cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB); 413 auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "", &LandingBB->front()); 414 S->replaceAllUsesWith(PN); 415 PN->addIncoming(Builder.getInt8(-1), SuspendBB); 416 PN->addIncoming(S, ResumeBB); 417 418 ++SuspendIndex; 419 } 420 421 Builder.SetInsertPoint(UnreachBB); 422 Builder.CreateUnreachable(); 423 424 Shape.SwitchLowering.ResumeEntryBlock = NewEntry; 425 } 426 427 428 // Rewrite final suspend point handling. We do not use suspend index to 429 // represent the final suspend point. Instead we zero-out ResumeFnAddr in the 430 // coroutine frame, since it is undefined behavior to resume a coroutine 431 // suspended at the final suspend point. Thus, in the resume function, we can 432 // simply remove the last case (when coro::Shape is built, the final suspend 433 // point (if present) is always the last element of CoroSuspends array). 434 // In the destroy function, we add a code sequence to check if ResumeFnAddress 435 // is Null, and if so, jump to the appropriate label to handle cleanup from the 436 // final suspend point. 437 void CoroCloner::handleFinalSuspend() { 438 assert(Shape.ABI == coro::ABI::Switch && 439 Shape.SwitchLowering.HasFinalSuspend); 440 auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]); 441 auto FinalCaseIt = std::prev(Switch->case_end()); 442 BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); 443 Switch->removeCase(FinalCaseIt); 444 if (isSwitchDestroyFunction()) { 445 BasicBlock *OldSwitchBB = Switch->getParent(); 446 auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch"); 447 Builder.SetInsertPoint(OldSwitchBB->getTerminator()); 448 auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr, 449 coro::Shape::SwitchFieldIndex::Resume, 450 "ResumeFn.addr"); 451 auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(), 452 GepIndex); 453 auto *Cond = Builder.CreateIsNull(Load); 454 Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); 455 OldSwitchBB->getTerminator()->eraseFromParent(); 456 } 457 } 458 459 static FunctionType * 460 getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) { 461 auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Suspend); 462 auto *StructTy = cast<StructType>(AsyncSuspend->getType()); 463 auto &Context = Suspend->getParent()->getParent()->getContext(); 464 auto *VoidTy = Type::getVoidTy(Context); 465 return FunctionType::get(VoidTy, StructTy->elements(), false); 466 } 467 468 static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, 469 const Twine &Suffix, 470 Module::iterator InsertBefore, 471 AnyCoroSuspendInst *ActiveSuspend) { 472 Module *M = OrigF.getParent(); 473 auto *FnTy = (Shape.ABI != coro::ABI::Async) 474 ? Shape.getResumeFunctionType() 475 : getFunctionTypeFromAsyncSuspend(ActiveSuspend); 476 477 Function *NewF = 478 Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, 479 OrigF.getName() + Suffix); 480 NewF->addParamAttr(0, Attribute::NonNull); 481 482 // For the async lowering ABI we can't guarantee that the context argument is 483 // not access via a different pointer not based on the argument. 484 if (Shape.ABI != coro::ABI::Async) 485 NewF->addParamAttr(0, Attribute::NoAlias); 486 487 M->getFunctionList().insert(InsertBefore, NewF); 488 489 return NewF; 490 } 491 492 /// Replace uses of the active llvm.coro.suspend.retcon/async call with the 493 /// arguments to the continuation function. 494 /// 495 /// This assumes that the builder has a meaningful insertion point. 496 void CoroCloner::replaceRetconOrAsyncSuspendUses() { 497 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || 498 Shape.ABI == coro::ABI::Async); 499 500 auto NewS = VMap[ActiveSuspend]; 501 if (NewS->use_empty()) return; 502 503 // Copy out all the continuation arguments after the buffer pointer into 504 // an easily-indexed data structure for convenience. 505 SmallVector<Value*, 8> Args; 506 // The async ABI includes all arguments -- including the first argument. 507 bool IsAsyncABI = Shape.ABI == coro::ABI::Async; 508 for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(NewF->arg_begin()), 509 E = NewF->arg_end(); 510 I != E; ++I) 511 Args.push_back(&*I); 512 513 // If the suspend returns a single scalar value, we can just do a simple 514 // replacement. 515 if (!isa<StructType>(NewS->getType())) { 516 assert(Args.size() == 1); 517 NewS->replaceAllUsesWith(Args.front()); 518 return; 519 } 520 521 // Try to peephole extracts of an aggregate return. 522 for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) { 523 auto EVI = dyn_cast<ExtractValueInst>((UI++)->getUser()); 524 if (!EVI || EVI->getNumIndices() != 1) 525 continue; 526 527 EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]); 528 EVI->eraseFromParent(); 529 } 530 531 // If we have no remaining uses, we're done. 532 if (NewS->use_empty()) return; 533 534 // Otherwise, we need to create an aggregate. 535 Value *Agg = UndefValue::get(NewS->getType()); 536 for (size_t I = 0, E = Args.size(); I != E; ++I) 537 Agg = Builder.CreateInsertValue(Agg, Args[I], I); 538 539 NewS->replaceAllUsesWith(Agg); 540 } 541 542 void CoroCloner::replaceCoroSuspends() { 543 Value *SuspendResult; 544 545 switch (Shape.ABI) { 546 // In switch lowering, replace coro.suspend with the appropriate value 547 // for the type of function we're extracting. 548 // Replacing coro.suspend with (0) will result in control flow proceeding to 549 // a resume label associated with a suspend point, replacing it with (1) will 550 // result in control flow proceeding to a cleanup label associated with this 551 // suspend point. 552 case coro::ABI::Switch: 553 SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0); 554 break; 555 556 // In async lowering there are no uses of the result. 557 case coro::ABI::Async: 558 return; 559 560 // In returned-continuation lowering, the arguments from earlier 561 // continuations are theoretically arbitrary, and they should have been 562 // spilled. 563 case coro::ABI::RetconOnce: 564 case coro::ABI::Retcon: 565 return; 566 } 567 568 for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) { 569 // The active suspend was handled earlier. 570 if (CS == ActiveSuspend) continue; 571 572 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]); 573 MappedCS->replaceAllUsesWith(SuspendResult); 574 MappedCS->eraseFromParent(); 575 } 576 } 577 578 void CoroCloner::replaceCoroEnds() { 579 for (AnyCoroEndInst *CE : Shape.CoroEnds) { 580 // We use a null call graph because there's no call graph node for 581 // the cloned function yet. We'll just be rebuilding that later. 582 auto *NewCE = cast<AnyCoroEndInst>(VMap[CE]); 583 replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr); 584 } 585 } 586 587 static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, 588 ValueToValueMapTy *VMap) { 589 if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty()) 590 return; 591 Value *CachedSlot = nullptr; 592 auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * { 593 if (CachedSlot) { 594 assert(CachedSlot->getType()->getPointerElementType() == ValueTy && 595 "multiple swifterror slots in function with different types"); 596 return CachedSlot; 597 } 598 599 // Check if the function has a swifterror argument. 600 for (auto &Arg : F.args()) { 601 if (Arg.isSwiftError()) { 602 CachedSlot = &Arg; 603 assert(Arg.getType()->getPointerElementType() == ValueTy && 604 "swifterror argument does not have expected type"); 605 return &Arg; 606 } 607 } 608 609 // Create a swifterror alloca. 610 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); 611 auto Alloca = Builder.CreateAlloca(ValueTy); 612 Alloca->setSwiftError(true); 613 614 CachedSlot = Alloca; 615 return Alloca; 616 }; 617 618 for (CallInst *Op : Shape.SwiftErrorOps) { 619 auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op; 620 IRBuilder<> Builder(MappedOp); 621 622 // If there are no arguments, this is a 'get' operation. 623 Value *MappedResult; 624 if (Op->getNumArgOperands() == 0) { 625 auto ValueTy = Op->getType(); 626 auto Slot = getSwiftErrorSlot(ValueTy); 627 MappedResult = Builder.CreateLoad(ValueTy, Slot); 628 } else { 629 assert(Op->getNumArgOperands() == 1); 630 auto Value = MappedOp->getArgOperand(0); 631 auto ValueTy = Value->getType(); 632 auto Slot = getSwiftErrorSlot(ValueTy); 633 Builder.CreateStore(Value, Slot); 634 MappedResult = Slot; 635 } 636 637 MappedOp->replaceAllUsesWith(MappedResult); 638 MappedOp->eraseFromParent(); 639 } 640 641 // If we're updating the original function, we've invalidated SwiftErrorOps. 642 if (VMap == nullptr) { 643 Shape.SwiftErrorOps.clear(); 644 } 645 } 646 647 void CoroCloner::replaceSwiftErrorOps() { 648 ::replaceSwiftErrorOps(*NewF, Shape, &VMap); 649 } 650 651 void CoroCloner::salvageDebugInfo() { 652 SmallVector<DbgVariableIntrinsic *, 8> Worklist; 653 SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache; 654 for (auto &BB : *NewF) 655 for (auto &I : BB) 656 if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) 657 Worklist.push_back(DVI); 658 for (DbgVariableIntrinsic *DVI : Worklist) 659 coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.ReuseFrameSlot); 660 661 // Remove all salvaged dbg.declare intrinsics that became 662 // either unreachable or stale due to the CoroSplit transformation. 663 DominatorTree DomTree(*NewF); 664 auto IsUnreachableBlock = [&](BasicBlock *BB) { 665 return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr, 666 &DomTree); 667 }; 668 for (DbgVariableIntrinsic *DVI : Worklist) { 669 if (IsUnreachableBlock(DVI->getParent())) 670 DVI->eraseFromParent(); 671 else if (dyn_cast_or_null<AllocaInst>(DVI->getVariableLocationOp(0))) { 672 // Count all non-debuginfo uses in reachable blocks. 673 unsigned Uses = 0; 674 for (auto *User : DVI->getVariableLocationOp(0)->users()) 675 if (auto *I = dyn_cast<Instruction>(User)) 676 if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent())) 677 ++Uses; 678 if (!Uses) 679 DVI->eraseFromParent(); 680 } 681 } 682 } 683 684 void CoroCloner::replaceEntryBlock() { 685 // In the original function, the AllocaSpillBlock is a block immediately 686 // following the allocation of the frame object which defines GEPs for 687 // all the allocas that have been moved into the frame, and it ends by 688 // branching to the original beginning of the coroutine. Make this 689 // the entry block of the cloned function. 690 auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]); 691 auto *OldEntry = &NewF->getEntryBlock(); 692 Entry->setName("entry" + Suffix); 693 Entry->moveBefore(OldEntry); 694 Entry->getTerminator()->eraseFromParent(); 695 696 // Clear all predecessors of the new entry block. There should be 697 // exactly one predecessor, which we created when splitting out 698 // AllocaSpillBlock to begin with. 699 assert(Entry->hasOneUse()); 700 auto BranchToEntry = cast<BranchInst>(Entry->user_back()); 701 assert(BranchToEntry->isUnconditional()); 702 Builder.SetInsertPoint(BranchToEntry); 703 Builder.CreateUnreachable(); 704 BranchToEntry->eraseFromParent(); 705 706 // Branch from the entry to the appropriate place. 707 Builder.SetInsertPoint(Entry); 708 switch (Shape.ABI) { 709 case coro::ABI::Switch: { 710 // In switch-lowering, we built a resume-entry block in the original 711 // function. Make the entry block branch to this. 712 auto *SwitchBB = 713 cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]); 714 Builder.CreateBr(SwitchBB); 715 break; 716 } 717 case coro::ABI::Async: 718 case coro::ABI::Retcon: 719 case coro::ABI::RetconOnce: { 720 // In continuation ABIs, we want to branch to immediately after the 721 // active suspend point. Earlier phases will have put the suspend in its 722 // own basic block, so just thread our jump directly to its successor. 723 assert((Shape.ABI == coro::ABI::Async && 724 isa<CoroSuspendAsyncInst>(ActiveSuspend)) || 725 ((Shape.ABI == coro::ABI::Retcon || 726 Shape.ABI == coro::ABI::RetconOnce) && 727 isa<CoroSuspendRetconInst>(ActiveSuspend))); 728 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[ActiveSuspend]); 729 auto Branch = cast<BranchInst>(MappedCS->getNextNode()); 730 assert(Branch->isUnconditional()); 731 Builder.CreateBr(Branch->getSuccessor(0)); 732 break; 733 } 734 } 735 736 // Any static alloca that's still being used but not reachable from the new 737 // entry needs to be moved to the new entry. 738 Function *F = OldEntry->getParent(); 739 DominatorTree DT{*F}; 740 for (auto IT = inst_begin(F), End = inst_end(F); IT != End;) { 741 Instruction &I = *IT++; 742 auto *Alloca = dyn_cast<AllocaInst>(&I); 743 if (!Alloca || I.use_empty()) 744 continue; 745 if (DT.isReachableFromEntry(I.getParent()) || 746 !isa<ConstantInt>(Alloca->getArraySize())) 747 continue; 748 I.moveBefore(*Entry, Entry->getFirstInsertionPt()); 749 } 750 } 751 752 /// Derive the value of the new frame pointer. 753 Value *CoroCloner::deriveNewFramePointer() { 754 // Builder should be inserting to the front of the new entry block. 755 756 switch (Shape.ABI) { 757 // In switch-lowering, the argument is the frame pointer. 758 case coro::ABI::Switch: 759 return &*NewF->arg_begin(); 760 // In async-lowering, one of the arguments is an async context as determined 761 // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of 762 // the resume function from the async context projection function associated 763 // with the active suspend. The frame is located as a tail to the async 764 // context header. 765 case coro::ABI::Async: { 766 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend); 767 auto *CalleeContext = 768 NewF->getArg(ActiveAsyncSuspend->getStorageArgumentIndex()); 769 auto *FramePtrTy = Shape.FrameTy->getPointerTo(); 770 auto *ProjectionFunc = 771 ActiveAsyncSuspend->getAsyncContextProjectionFunction(); 772 auto DbgLoc = 773 cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc(); 774 // Calling i8* (i8*) 775 auto *CallerContext = Builder.CreateCall( 776 cast<FunctionType>(ProjectionFunc->getType()->getPointerElementType()), 777 ProjectionFunc, CalleeContext); 778 CallerContext->setCallingConv(ProjectionFunc->getCallingConv()); 779 CallerContext->setDebugLoc(DbgLoc); 780 // The frame is located after the async_context header. 781 auto &Context = Builder.getContext(); 782 auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32( 783 Type::getInt8Ty(Context), CallerContext, 784 Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr"); 785 // Inline the projection function. 786 InlineFunctionInfo InlineInfo; 787 auto InlineRes = InlineFunction(*CallerContext, InlineInfo); 788 assert(InlineRes.isSuccess()); 789 (void)InlineRes; 790 return Builder.CreateBitCast(FramePtrAddr, FramePtrTy); 791 } 792 // In continuation-lowering, the argument is the opaque storage. 793 case coro::ABI::Retcon: 794 case coro::ABI::RetconOnce: { 795 Argument *NewStorage = &*NewF->arg_begin(); 796 auto FramePtrTy = Shape.FrameTy->getPointerTo(); 797 798 // If the storage is inline, just bitcast to the storage to the frame type. 799 if (Shape.RetconLowering.IsFrameInlineInStorage) 800 return Builder.CreateBitCast(NewStorage, FramePtrTy); 801 802 // Otherwise, load the real frame from the opaque storage. 803 auto FramePtrPtr = 804 Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo()); 805 return Builder.CreateLoad(FramePtrTy, FramePtrPtr); 806 } 807 } 808 llvm_unreachable("bad ABI"); 809 } 810 811 static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context, 812 unsigned ParamIndex, 813 uint64_t Size, Align Alignment) { 814 AttrBuilder ParamAttrs; 815 ParamAttrs.addAttribute(Attribute::NonNull); 816 ParamAttrs.addAttribute(Attribute::NoAlias); 817 ParamAttrs.addAlignmentAttr(Alignment); 818 ParamAttrs.addDereferenceableAttr(Size); 819 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); 820 } 821 822 /// Clone the body of the original function into a resume function of 823 /// some sort. 824 void CoroCloner::create() { 825 // Create the new function if we don't already have one. 826 if (!NewF) { 827 NewF = createCloneDeclaration(OrigF, Shape, Suffix, 828 OrigF.getParent()->end(), ActiveSuspend); 829 } 830 831 // Replace all args with undefs. The buildCoroutineFrame algorithm already 832 // rewritten access to the args that occurs after suspend points with loads 833 // and stores to/from the coroutine frame. 834 for (Argument &A : OrigF.args()) 835 VMap[&A] = UndefValue::get(A.getType()); 836 837 SmallVector<ReturnInst *, 4> Returns; 838 839 // Ignore attempts to change certain attributes of the function. 840 // TODO: maybe there should be a way to suppress this during cloning? 841 auto savedVisibility = NewF->getVisibility(); 842 auto savedUnnamedAddr = NewF->getUnnamedAddr(); 843 auto savedDLLStorageClass = NewF->getDLLStorageClass(); 844 845 // NewF's linkage (which CloneFunctionInto does *not* change) might not 846 // be compatible with the visibility of OrigF (which it *does* change), 847 // so protect against that. 848 auto savedLinkage = NewF->getLinkage(); 849 NewF->setLinkage(llvm::GlobalValue::ExternalLinkage); 850 851 CloneFunctionInto(NewF, &OrigF, VMap, 852 CloneFunctionChangeType::LocalChangesOnly, Returns); 853 854 auto &Context = NewF->getContext(); 855 856 // For async functions / continuations, adjust the scope line of the 857 // clone to the line number of the suspend point. The scope line is 858 // associated with all pre-prologue instructions. This avoids a jump 859 // in the linetable from the function declaration to the suspend point. 860 if (DISubprogram *SP = NewF->getSubprogram()) { 861 assert(SP != OrigF.getSubprogram() && SP->isDistinct()); 862 if (ActiveSuspend) 863 if (auto DL = ActiveSuspend->getDebugLoc()) 864 SP->setScopeLine(DL->getLine()); 865 // Update the linkage name to reflect the modified symbol name. It 866 // is necessary to update the linkage name in Swift, since the 867 // mangling changes for resume functions. It might also be the 868 // right thing to do in C++, but due to a limitation in LLVM's 869 // AsmPrinter we can only do this if the function doesn't have an 870 // abstract specification, since the DWARF backend expects the 871 // abstract specification to contain the linkage name and asserts 872 // that they are identical. 873 if (!SP->getDeclaration() && SP->getUnit() && 874 SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift) 875 SP->replaceLinkageName(MDString::get(Context, NewF->getName())); 876 } 877 878 NewF->setLinkage(savedLinkage); 879 NewF->setVisibility(savedVisibility); 880 NewF->setUnnamedAddr(savedUnnamedAddr); 881 NewF->setDLLStorageClass(savedDLLStorageClass); 882 883 // Replace the attributes of the new function: 884 auto OrigAttrs = NewF->getAttributes(); 885 auto NewAttrs = AttributeList(); 886 887 switch (Shape.ABI) { 888 case coro::ABI::Switch: 889 // Bootstrap attributes by copying function attributes from the 890 // original function. This should include optimization settings and so on. 891 NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex, 892 OrigAttrs.getFnAttributes()); 893 894 addFramePointerAttrs(NewAttrs, Context, 0, 895 Shape.FrameSize, Shape.FrameAlign); 896 break; 897 case coro::ABI::Async: { 898 // Transfer the original function's attributes. 899 auto FnAttrs = OrigF.getAttributes().getFnAttributes(); 900 NewAttrs = 901 NewAttrs.addAttributes(Context, AttributeList::FunctionIndex, FnAttrs); 902 break; 903 } 904 case coro::ABI::Retcon: 905 case coro::ABI::RetconOnce: 906 // If we have a continuation prototype, just use its attributes, 907 // full-stop. 908 NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); 909 910 addFramePointerAttrs(NewAttrs, Context, 0, 911 Shape.getRetconCoroId()->getStorageSize(), 912 Shape.getRetconCoroId()->getStorageAlignment()); 913 break; 914 } 915 916 switch (Shape.ABI) { 917 // In these ABIs, the cloned functions always return 'void', and the 918 // existing return sites are meaningless. Note that for unique 919 // continuations, this includes the returns associated with suspends; 920 // this is fine because we can't suspend twice. 921 case coro::ABI::Switch: 922 case coro::ABI::RetconOnce: 923 // Remove old returns. 924 for (ReturnInst *Return : Returns) 925 changeToUnreachable(Return, /*UseLLVMTrap=*/false); 926 break; 927 928 // With multi-suspend continuations, we'll already have eliminated the 929 // original returns and inserted returns before all the suspend points, 930 // so we want to leave any returns in place. 931 case coro::ABI::Retcon: 932 break; 933 // Async lowering will insert musttail call functions at all suspend points 934 // followed by a return. 935 // Don't change returns to unreachable because that will trip up the verifier. 936 // These returns should be unreachable from the clone. 937 case coro::ABI::Async: 938 break; 939 } 940 941 NewF->setAttributes(NewAttrs); 942 NewF->setCallingConv(Shape.getResumeFunctionCC()); 943 944 // Set up the new entry block. 945 replaceEntryBlock(); 946 947 Builder.SetInsertPoint(&NewF->getEntryBlock().front()); 948 NewFramePtr = deriveNewFramePointer(); 949 950 // Remap frame pointer. 951 Value *OldFramePtr = VMap[Shape.FramePtr]; 952 NewFramePtr->takeName(OldFramePtr); 953 OldFramePtr->replaceAllUsesWith(NewFramePtr); 954 955 // Remap vFrame pointer. 956 auto *NewVFrame = Builder.CreateBitCast( 957 NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame"); 958 Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]); 959 OldVFrame->replaceAllUsesWith(NewVFrame); 960 961 switch (Shape.ABI) { 962 case coro::ABI::Switch: 963 // Rewrite final suspend handling as it is not done via switch (allows to 964 // remove final case from the switch, since it is undefined behavior to 965 // resume the coroutine suspended at the final suspend point. 966 if (Shape.SwitchLowering.HasFinalSuspend) 967 handleFinalSuspend(); 968 break; 969 case coro::ABI::Async: 970 case coro::ABI::Retcon: 971 case coro::ABI::RetconOnce: 972 // Replace uses of the active suspend with the corresponding 973 // continuation-function arguments. 974 assert(ActiveSuspend != nullptr && 975 "no active suspend when lowering a continuation-style coroutine"); 976 replaceRetconOrAsyncSuspendUses(); 977 break; 978 } 979 980 // Handle suspends. 981 replaceCoroSuspends(); 982 983 // Handle swifterror. 984 replaceSwiftErrorOps(); 985 986 // Remove coro.end intrinsics. 987 replaceCoroEnds(); 988 989 // Salvage debug info that points into the coroutine frame. 990 salvageDebugInfo(); 991 992 // Eliminate coro.free from the clones, replacing it with 'null' in cleanup, 993 // to suppress deallocation code. 994 if (Shape.ABI == coro::ABI::Switch) 995 coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]), 996 /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup); 997 } 998 999 // Create a resume clone by cloning the body of the original function, setting 1000 // new entry block and replacing coro.suspend an appropriate value to force 1001 // resume or cleanup pass for every suspend point. 1002 static Function *createClone(Function &F, const Twine &Suffix, 1003 coro::Shape &Shape, CoroCloner::Kind FKind) { 1004 CoroCloner Cloner(F, Suffix, Shape, FKind); 1005 Cloner.create(); 1006 return Cloner.getFunction(); 1007 } 1008 1009 /// Remove calls to llvm.coro.end in the original function. 1010 static void removeCoroEnds(const coro::Shape &Shape, CallGraph *CG) { 1011 for (auto End : Shape.CoroEnds) { 1012 replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG); 1013 } 1014 } 1015 1016 static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { 1017 assert(Shape.ABI == coro::ABI::Async); 1018 1019 auto *FuncPtrStruct = cast<ConstantStruct>( 1020 Shape.AsyncLowering.AsyncFuncPointer->getInitializer()); 1021 auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0); 1022 auto *OrigContextSize = FuncPtrStruct->getOperand(1); 1023 auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(), 1024 Shape.AsyncLowering.ContextSize); 1025 auto *NewFuncPtrStruct = ConstantStruct::get( 1026 FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize); 1027 1028 Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); 1029 } 1030 1031 static void replaceFrameSize(coro::Shape &Shape) { 1032 if (Shape.ABI == coro::ABI::Async) 1033 updateAsyncFuncPointerContextSize(Shape); 1034 1035 if (Shape.CoroSizes.empty()) 1036 return; 1037 1038 // In the same function all coro.sizes should have the same result type. 1039 auto *SizeIntrin = Shape.CoroSizes.back(); 1040 Module *M = SizeIntrin->getModule(); 1041 const DataLayout &DL = M->getDataLayout(); 1042 auto Size = DL.getTypeAllocSize(Shape.FrameTy); 1043 auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); 1044 1045 for (CoroSizeInst *CS : Shape.CoroSizes) { 1046 CS->replaceAllUsesWith(SizeConstant); 1047 CS->eraseFromParent(); 1048 } 1049 } 1050 1051 // Create a global constant array containing pointers to functions provided and 1052 // set Info parameter of CoroBegin to point at this constant. Example: 1053 // 1054 // @f.resumers = internal constant [2 x void(%f.frame*)*] 1055 // [void(%f.frame*)* @f.resume, void(%f.frame*)* @f.destroy] 1056 // define void @f() { 1057 // ... 1058 // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, 1059 // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*)) 1060 // 1061 // Assumes that all the functions have the same signature. 1062 static void setCoroInfo(Function &F, coro::Shape &Shape, 1063 ArrayRef<Function *> Fns) { 1064 // This only works under the switch-lowering ABI because coro elision 1065 // only works on the switch-lowering ABI. 1066 assert(Shape.ABI == coro::ABI::Switch); 1067 1068 SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end()); 1069 assert(!Args.empty()); 1070 Function *Part = *Fns.begin(); 1071 Module *M = Part->getParent(); 1072 auto *ArrTy = ArrayType::get(Part->getType(), Args.size()); 1073 1074 auto *ConstVal = ConstantArray::get(ArrTy, Args); 1075 auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true, 1076 GlobalVariable::PrivateLinkage, ConstVal, 1077 F.getName() + Twine(".resumers")); 1078 1079 // Update coro.begin instruction to refer to this constant. 1080 LLVMContext &C = F.getContext(); 1081 auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C)); 1082 Shape.getSwitchCoroId()->setInfo(BC); 1083 } 1084 1085 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame. 1086 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, 1087 Function *DestroyFn, Function *CleanupFn) { 1088 assert(Shape.ABI == coro::ABI::Switch); 1089 1090 IRBuilder<> Builder(Shape.FramePtr->getNextNode()); 1091 auto *ResumeAddr = Builder.CreateStructGEP( 1092 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume, 1093 "resume.addr"); 1094 Builder.CreateStore(ResumeFn, ResumeAddr); 1095 1096 Value *DestroyOrCleanupFn = DestroyFn; 1097 1098 CoroIdInst *CoroId = Shape.getSwitchCoroId(); 1099 if (CoroAllocInst *CA = CoroId->getCoroAlloc()) { 1100 // If there is a CoroAlloc and it returns false (meaning we elide the 1101 // allocation, use CleanupFn instead of DestroyFn). 1102 DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn); 1103 } 1104 1105 auto *DestroyAddr = Builder.CreateStructGEP( 1106 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy, 1107 "destroy.addr"); 1108 Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr); 1109 } 1110 1111 static void postSplitCleanup(Function &F) { 1112 removeUnreachableBlocks(F); 1113 1114 // For now, we do a mandatory verification step because we don't 1115 // entirely trust this pass. Note that we don't want to add a verifier 1116 // pass to FPM below because it will also verify all the global data. 1117 if (verifyFunction(F, &errs())) 1118 report_fatal_error("Broken function"); 1119 1120 legacy::FunctionPassManager FPM(F.getParent()); 1121 1122 FPM.add(createSCCPPass()); 1123 FPM.add(createCFGSimplificationPass()); 1124 FPM.add(createEarlyCSEPass()); 1125 FPM.add(createCFGSimplificationPass()); 1126 1127 FPM.doInitialization(); 1128 FPM.run(F); 1129 FPM.doFinalization(); 1130 } 1131 1132 // Assuming we arrived at the block NewBlock from Prev instruction, store 1133 // PHI's incoming values in the ResolvedValues map. 1134 static void 1135 scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock, 1136 DenseMap<Value *, Value *> &ResolvedValues) { 1137 auto *PrevBB = Prev->getParent(); 1138 for (PHINode &PN : NewBlock->phis()) { 1139 auto V = PN.getIncomingValueForBlock(PrevBB); 1140 // See if we already resolved it. 1141 auto VI = ResolvedValues.find(V); 1142 if (VI != ResolvedValues.end()) 1143 V = VI->second; 1144 // Remember the value. 1145 ResolvedValues[&PN] = V; 1146 } 1147 } 1148 1149 // Replace a sequence of branches leading to a ret, with a clone of a ret 1150 // instruction. Suspend instruction represented by a switch, track the PHI 1151 // values and select the correct case successor when possible. 1152 static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) { 1153 DenseMap<Value *, Value *> ResolvedValues; 1154 BasicBlock *UnconditionalSucc = nullptr; 1155 1156 Instruction *I = InitialInst; 1157 while (I->isTerminator() || 1158 (isa<CmpInst>(I) && I->getNextNode()->isTerminator())) { 1159 if (isa<ReturnInst>(I)) { 1160 if (I != InitialInst) { 1161 // If InitialInst is an unconditional branch, 1162 // remove PHI values that come from basic block of InitialInst 1163 if (UnconditionalSucc) 1164 UnconditionalSucc->removePredecessor(InitialInst->getParent(), true); 1165 ReplaceInstWithInst(InitialInst, I->clone()); 1166 } 1167 return true; 1168 } 1169 if (auto *BR = dyn_cast<BranchInst>(I)) { 1170 if (BR->isUnconditional()) { 1171 BasicBlock *BB = BR->getSuccessor(0); 1172 if (I == InitialInst) 1173 UnconditionalSucc = BB; 1174 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); 1175 I = BB->getFirstNonPHIOrDbgOrLifetime(); 1176 continue; 1177 } 1178 } else if (auto *CondCmp = dyn_cast<CmpInst>(I)) { 1179 auto *BR = dyn_cast<BranchInst>(I->getNextNode()); 1180 if (BR && BR->isConditional() && CondCmp == BR->getCondition()) { 1181 // If the case number of suspended switch instruction is reduced to 1182 // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator. 1183 // And the comparsion looks like : %cond = icmp eq i8 %V, constant. 1184 ConstantInt *CondConst = dyn_cast<ConstantInt>(CondCmp->getOperand(1)); 1185 if (CondConst && CondCmp->getPredicate() == CmpInst::ICMP_EQ) { 1186 Value *V = CondCmp->getOperand(0); 1187 auto it = ResolvedValues.find(V); 1188 if (it != ResolvedValues.end()) 1189 V = it->second; 1190 1191 if (ConstantInt *Cond0 = dyn_cast<ConstantInt>(V)) { 1192 BasicBlock *BB = Cond0->equalsInt(CondConst->getZExtValue()) 1193 ? BR->getSuccessor(0) 1194 : BR->getSuccessor(1); 1195 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); 1196 I = BB->getFirstNonPHIOrDbgOrLifetime(); 1197 continue; 1198 } 1199 } 1200 } 1201 } else if (auto *SI = dyn_cast<SwitchInst>(I)) { 1202 Value *V = SI->getCondition(); 1203 auto it = ResolvedValues.find(V); 1204 if (it != ResolvedValues.end()) 1205 V = it->second; 1206 if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) { 1207 BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor(); 1208 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); 1209 I = BB->getFirstNonPHIOrDbgOrLifetime(); 1210 continue; 1211 } 1212 } 1213 return false; 1214 } 1215 return false; 1216 } 1217 1218 // Check whether CI obeys the rules of musttail attribute. 1219 static bool shouldBeMustTail(const CallInst &CI, const Function &F) { 1220 if (CI.isInlineAsm()) 1221 return false; 1222 1223 // Match prototypes and calling conventions of resume function. 1224 FunctionType *CalleeTy = CI.getFunctionType(); 1225 if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1)) 1226 return false; 1227 1228 Type *CalleeParmTy = CalleeTy->getParamType(0); 1229 if (!CalleeParmTy->isPointerTy() || 1230 (CalleeParmTy->getPointerAddressSpace() != 0)) 1231 return false; 1232 1233 if (CI.getCallingConv() != F.getCallingConv()) 1234 return false; 1235 1236 // CI should not has any ABI-impacting function attributes. 1237 static const Attribute::AttrKind ABIAttrs[] = { 1238 Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca, 1239 Attribute::Preallocated, Attribute::InReg, Attribute::Returned, 1240 Attribute::SwiftSelf, Attribute::SwiftError}; 1241 AttributeList Attrs = CI.getAttributes(); 1242 for (auto AK : ABIAttrs) 1243 if (Attrs.hasParamAttribute(0, AK)) 1244 return false; 1245 1246 return true; 1247 } 1248 1249 // Add musttail to any resume instructions that is immediately followed by a 1250 // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call 1251 // for symmetrical coroutine control transfer (C++ Coroutines TS extension). 1252 // This transformation is done only in the resume part of the coroutine that has 1253 // identical signature and calling convention as the coro.resume call. 1254 static void addMustTailToCoroResumes(Function &F) { 1255 bool changed = false; 1256 1257 // Collect potential resume instructions. 1258 SmallVector<CallInst *, 4> Resumes; 1259 for (auto &I : instructions(F)) 1260 if (auto *Call = dyn_cast<CallInst>(&I)) 1261 if (shouldBeMustTail(*Call, F)) 1262 Resumes.push_back(Call); 1263 1264 // Set musttail on those that are followed by a ret instruction. 1265 for (CallInst *Call : Resumes) 1266 if (simplifyTerminatorLeadingToRet(Call->getNextNode())) { 1267 Call->setTailCallKind(CallInst::TCK_MustTail); 1268 changed = true; 1269 } 1270 1271 if (changed) 1272 removeUnreachableBlocks(F); 1273 } 1274 1275 // Coroutine has no suspend points. Remove heap allocation for the coroutine 1276 // frame if possible. 1277 static void handleNoSuspendCoroutine(coro::Shape &Shape) { 1278 auto *CoroBegin = Shape.CoroBegin; 1279 auto *CoroId = CoroBegin->getId(); 1280 auto *AllocInst = CoroId->getCoroAlloc(); 1281 switch (Shape.ABI) { 1282 case coro::ABI::Switch: { 1283 auto SwitchId = cast<CoroIdInst>(CoroId); 1284 coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); 1285 if (AllocInst) { 1286 IRBuilder<> Builder(AllocInst); 1287 auto *Frame = Builder.CreateAlloca(Shape.FrameTy); 1288 Frame->setAlignment(Shape.FrameAlign); 1289 auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy()); 1290 AllocInst->replaceAllUsesWith(Builder.getFalse()); 1291 AllocInst->eraseFromParent(); 1292 CoroBegin->replaceAllUsesWith(VFrame); 1293 } else { 1294 CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); 1295 } 1296 1297 break; 1298 } 1299 case coro::ABI::Async: 1300 case coro::ABI::Retcon: 1301 case coro::ABI::RetconOnce: 1302 CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType())); 1303 break; 1304 } 1305 1306 CoroBegin->eraseFromParent(); 1307 } 1308 1309 // SimplifySuspendPoint needs to check that there is no calls between 1310 // coro_save and coro_suspend, since any of the calls may potentially resume 1311 // the coroutine and if that is the case we cannot eliminate the suspend point. 1312 static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) { 1313 for (Instruction *I = From; I != To; I = I->getNextNode()) { 1314 // Assume that no intrinsic can resume the coroutine. 1315 if (isa<IntrinsicInst>(I)) 1316 continue; 1317 1318 if (isa<CallBase>(I)) 1319 return true; 1320 } 1321 return false; 1322 } 1323 1324 static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) { 1325 SmallPtrSet<BasicBlock *, 8> Set; 1326 SmallVector<BasicBlock *, 8> Worklist; 1327 1328 Set.insert(SaveBB); 1329 Worklist.push_back(ResDesBB); 1330 1331 // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr 1332 // returns a token consumed by suspend instruction, all blocks in between 1333 // will have to eventually hit SaveBB when going backwards from ResDesBB. 1334 while (!Worklist.empty()) { 1335 auto *BB = Worklist.pop_back_val(); 1336 Set.insert(BB); 1337 for (auto *Pred : predecessors(BB)) 1338 if (Set.count(Pred) == 0) 1339 Worklist.push_back(Pred); 1340 } 1341 1342 // SaveBB and ResDesBB are checked separately in hasCallsBetween. 1343 Set.erase(SaveBB); 1344 Set.erase(ResDesBB); 1345 1346 for (auto *BB : Set) 1347 if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr)) 1348 return true; 1349 1350 return false; 1351 } 1352 1353 static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) { 1354 auto *SaveBB = Save->getParent(); 1355 auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent(); 1356 1357 if (SaveBB == ResumeOrDestroyBB) 1358 return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy); 1359 1360 // Any calls from Save to the end of the block? 1361 if (hasCallsInBlockBetween(Save->getNextNode(), nullptr)) 1362 return true; 1363 1364 // Any calls from begging of the block up to ResumeOrDestroy? 1365 if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(), 1366 ResumeOrDestroy)) 1367 return true; 1368 1369 // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB? 1370 if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB)) 1371 return true; 1372 1373 return false; 1374 } 1375 1376 // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the 1377 // suspend point and replace it with nornal control flow. 1378 static bool simplifySuspendPoint(CoroSuspendInst *Suspend, 1379 CoroBeginInst *CoroBegin) { 1380 Instruction *Prev = Suspend->getPrevNode(); 1381 if (!Prev) { 1382 auto *Pred = Suspend->getParent()->getSinglePredecessor(); 1383 if (!Pred) 1384 return false; 1385 Prev = Pred->getTerminator(); 1386 } 1387 1388 CallBase *CB = dyn_cast<CallBase>(Prev); 1389 if (!CB) 1390 return false; 1391 1392 auto *Callee = CB->getCalledOperand()->stripPointerCasts(); 1393 1394 // See if the callsite is for resumption or destruction of the coroutine. 1395 auto *SubFn = dyn_cast<CoroSubFnInst>(Callee); 1396 if (!SubFn) 1397 return false; 1398 1399 // Does not refer to the current coroutine, we cannot do anything with it. 1400 if (SubFn->getFrame() != CoroBegin) 1401 return false; 1402 1403 // See if the transformation is safe. Specifically, see if there are any 1404 // calls in between Save and CallInstr. They can potenitally resume the 1405 // coroutine rendering this optimization unsafe. 1406 auto *Save = Suspend->getCoroSave(); 1407 if (hasCallsBetween(Save, CB)) 1408 return false; 1409 1410 // Replace llvm.coro.suspend with the value that results in resumption over 1411 // the resume or cleanup path. 1412 Suspend->replaceAllUsesWith(SubFn->getRawIndex()); 1413 Suspend->eraseFromParent(); 1414 Save->eraseFromParent(); 1415 1416 // No longer need a call to coro.resume or coro.destroy. 1417 if (auto *Invoke = dyn_cast<InvokeInst>(CB)) { 1418 BranchInst::Create(Invoke->getNormalDest(), Invoke); 1419 } 1420 1421 // Grab the CalledValue from CB before erasing the CallInstr. 1422 auto *CalledValue = CB->getCalledOperand(); 1423 CB->eraseFromParent(); 1424 1425 // If no more users remove it. Usually it is a bitcast of SubFn. 1426 if (CalledValue != SubFn && CalledValue->user_empty()) 1427 if (auto *I = dyn_cast<Instruction>(CalledValue)) 1428 I->eraseFromParent(); 1429 1430 // Now we are good to remove SubFn. 1431 if (SubFn->user_empty()) 1432 SubFn->eraseFromParent(); 1433 1434 return true; 1435 } 1436 1437 // Remove suspend points that are simplified. 1438 static void simplifySuspendPoints(coro::Shape &Shape) { 1439 // Currently, the only simplification we do is switch-lowering-specific. 1440 if (Shape.ABI != coro::ABI::Switch) 1441 return; 1442 1443 auto &S = Shape.CoroSuspends; 1444 size_t I = 0, N = S.size(); 1445 if (N == 0) 1446 return; 1447 while (true) { 1448 auto SI = cast<CoroSuspendInst>(S[I]); 1449 // Leave final.suspend to handleFinalSuspend since it is undefined behavior 1450 // to resume a coroutine suspended at the final suspend point. 1451 if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) { 1452 if (--N == I) 1453 break; 1454 std::swap(S[I], S[N]); 1455 continue; 1456 } 1457 if (++I == N) 1458 break; 1459 } 1460 S.resize(N); 1461 } 1462 1463 static void splitSwitchCoroutine(Function &F, coro::Shape &Shape, 1464 SmallVectorImpl<Function *> &Clones) { 1465 assert(Shape.ABI == coro::ABI::Switch); 1466 1467 createResumeEntryBlock(F, Shape); 1468 auto ResumeClone = createClone(F, ".resume", Shape, 1469 CoroCloner::Kind::SwitchResume); 1470 auto DestroyClone = createClone(F, ".destroy", Shape, 1471 CoroCloner::Kind::SwitchUnwind); 1472 auto CleanupClone = createClone(F, ".cleanup", Shape, 1473 CoroCloner::Kind::SwitchCleanup); 1474 1475 postSplitCleanup(*ResumeClone); 1476 postSplitCleanup(*DestroyClone); 1477 postSplitCleanup(*CleanupClone); 1478 1479 addMustTailToCoroResumes(*ResumeClone); 1480 1481 // Store addresses resume/destroy/cleanup functions in the coroutine frame. 1482 updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); 1483 1484 assert(Clones.empty()); 1485 Clones.push_back(ResumeClone); 1486 Clones.push_back(DestroyClone); 1487 Clones.push_back(CleanupClone); 1488 1489 // Create a constant array referring to resume/destroy/clone functions pointed 1490 // by the last argument of @llvm.coro.info, so that CoroElide pass can 1491 // determined correct function to call. 1492 setCoroInfo(F, Shape, Clones); 1493 } 1494 1495 static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend, 1496 Value *Continuation) { 1497 auto *ResumeIntrinsic = Suspend->getResumeFunction(); 1498 auto &Context = Suspend->getParent()->getParent()->getContext(); 1499 auto *Int8PtrTy = Type::getInt8PtrTy(Context); 1500 1501 IRBuilder<> Builder(ResumeIntrinsic); 1502 auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy); 1503 ResumeIntrinsic->replaceAllUsesWith(Val); 1504 ResumeIntrinsic->eraseFromParent(); 1505 Suspend->setOperand(CoroSuspendAsyncInst::ResumeFunctionArg, 1506 UndefValue::get(Int8PtrTy)); 1507 } 1508 1509 /// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs. 1510 static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy, 1511 ArrayRef<Value *> FnArgs, 1512 SmallVectorImpl<Value *> &CallArgs) { 1513 size_t ArgIdx = 0; 1514 for (auto paramTy : FnTy->params()) { 1515 assert(ArgIdx < FnArgs.size()); 1516 if (paramTy != FnArgs[ArgIdx]->getType()) 1517 CallArgs.push_back( 1518 Builder.CreateBitOrPointerCast(FnArgs[ArgIdx], paramTy)); 1519 else 1520 CallArgs.push_back(FnArgs[ArgIdx]); 1521 ++ArgIdx; 1522 } 1523 } 1524 1525 CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, 1526 ArrayRef<Value *> Arguments, 1527 IRBuilder<> &Builder) { 1528 auto *FnTy = 1529 cast<FunctionType>(MustTailCallFn->getType()->getPointerElementType()); 1530 // Coerce the arguments, llvm optimizations seem to ignore the types in 1531 // vaarg functions and throws away casts in optimized mode. 1532 SmallVector<Value *, 8> CallArgs; 1533 coerceArguments(Builder, FnTy, Arguments, CallArgs); 1534 1535 auto *TailCall = Builder.CreateCall(FnTy, MustTailCallFn, CallArgs); 1536 TailCall->setTailCallKind(CallInst::TCK_MustTail); 1537 TailCall->setDebugLoc(Loc); 1538 TailCall->setCallingConv(MustTailCallFn->getCallingConv()); 1539 return TailCall; 1540 } 1541 1542 static void splitAsyncCoroutine(Function &F, coro::Shape &Shape, 1543 SmallVectorImpl<Function *> &Clones) { 1544 assert(Shape.ABI == coro::ABI::Async); 1545 assert(Clones.empty()); 1546 // Reset various things that the optimizer might have decided it 1547 // "knows" about the coroutine function due to not seeing a return. 1548 F.removeFnAttr(Attribute::NoReturn); 1549 F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); 1550 F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull); 1551 1552 auto &Context = F.getContext(); 1553 auto *Int8PtrTy = Type::getInt8PtrTy(Context); 1554 1555 auto *Id = cast<CoroIdAsyncInst>(Shape.CoroBegin->getId()); 1556 IRBuilder<> Builder(Id); 1557 1558 auto *FramePtr = Id->getStorage(); 1559 FramePtr = Builder.CreateBitOrPointerCast(FramePtr, Int8PtrTy); 1560 FramePtr = Builder.CreateConstInBoundsGEP1_32( 1561 Type::getInt8Ty(Context), FramePtr, Shape.AsyncLowering.FrameOffset, 1562 "async.ctx.frameptr"); 1563 1564 // Map all uses of llvm.coro.begin to the allocated frame pointer. 1565 { 1566 // Make sure we don't invalidate Shape.FramePtr. 1567 TrackingVH<Instruction> Handle(Shape.FramePtr); 1568 Shape.CoroBegin->replaceAllUsesWith(FramePtr); 1569 Shape.FramePtr = Handle.getValPtr(); 1570 } 1571 1572 // Create all the functions in order after the main function. 1573 auto NextF = std::next(F.getIterator()); 1574 1575 // Create a continuation function for each of the suspend points. 1576 Clones.reserve(Shape.CoroSuspends.size()); 1577 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) { 1578 auto *Suspend = cast<CoroSuspendAsyncInst>(Shape.CoroSuspends[Idx]); 1579 1580 // Create the clone declaration. 1581 auto *Continuation = createCloneDeclaration( 1582 F, Shape, ".resume." + Twine(Idx), NextF, Suspend); 1583 Clones.push_back(Continuation); 1584 1585 // Insert a branch to a new return block immediately before the suspend 1586 // point. 1587 auto *SuspendBB = Suspend->getParent(); 1588 auto *NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); 1589 auto *Branch = cast<BranchInst>(SuspendBB->getTerminator()); 1590 1591 // Place it before the first suspend. 1592 auto *ReturnBB = 1593 BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB); 1594 Branch->setSuccessor(0, ReturnBB); 1595 1596 IRBuilder<> Builder(ReturnBB); 1597 1598 // Insert the call to the tail call function and inline it. 1599 auto *Fn = Suspend->getMustTailCallFunction(); 1600 SmallVector<Value *, 8> Args(Suspend->args()); 1601 auto FnArgs = ArrayRef<Value *>(Args).drop_front( 1602 CoroSuspendAsyncInst::MustTailCallFuncArg + 1); 1603 auto *TailCall = 1604 coro::createMustTailCall(Suspend->getDebugLoc(), Fn, FnArgs, Builder); 1605 Builder.CreateRetVoid(); 1606 InlineFunctionInfo FnInfo; 1607 auto InlineRes = InlineFunction(*TailCall, FnInfo); 1608 assert(InlineRes.isSuccess() && "Expected inlining to succeed"); 1609 (void)InlineRes; 1610 1611 // Replace the lvm.coro.async.resume intrisic call. 1612 replaceAsyncResumeFunction(Suspend, Continuation); 1613 } 1614 1615 assert(Clones.size() == Shape.CoroSuspends.size()); 1616 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) { 1617 auto *Suspend = Shape.CoroSuspends[Idx]; 1618 auto *Clone = Clones[Idx]; 1619 1620 CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend).create(); 1621 } 1622 } 1623 1624 static void splitRetconCoroutine(Function &F, coro::Shape &Shape, 1625 SmallVectorImpl<Function *> &Clones) { 1626 assert(Shape.ABI == coro::ABI::Retcon || 1627 Shape.ABI == coro::ABI::RetconOnce); 1628 assert(Clones.empty()); 1629 1630 // Reset various things that the optimizer might have decided it 1631 // "knows" about the coroutine function due to not seeing a return. 1632 F.removeFnAttr(Attribute::NoReturn); 1633 F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); 1634 F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull); 1635 1636 // Allocate the frame. 1637 auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId()); 1638 Value *RawFramePtr; 1639 if (Shape.RetconLowering.IsFrameInlineInStorage) { 1640 RawFramePtr = Id->getStorage(); 1641 } else { 1642 IRBuilder<> Builder(Id); 1643 1644 // Determine the size of the frame. 1645 const DataLayout &DL = F.getParent()->getDataLayout(); 1646 auto Size = DL.getTypeAllocSize(Shape.FrameTy); 1647 1648 // Allocate. We don't need to update the call graph node because we're 1649 // going to recompute it from scratch after splitting. 1650 // FIXME: pass the required alignment 1651 RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr); 1652 RawFramePtr = 1653 Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); 1654 1655 // Stash the allocated frame pointer in the continuation storage. 1656 auto Dest = Builder.CreateBitCast(Id->getStorage(), 1657 RawFramePtr->getType()->getPointerTo()); 1658 Builder.CreateStore(RawFramePtr, Dest); 1659 } 1660 1661 // Map all uses of llvm.coro.begin to the allocated frame pointer. 1662 { 1663 // Make sure we don't invalidate Shape.FramePtr. 1664 TrackingVH<Instruction> Handle(Shape.FramePtr); 1665 Shape.CoroBegin->replaceAllUsesWith(RawFramePtr); 1666 Shape.FramePtr = Handle.getValPtr(); 1667 } 1668 1669 // Create a unique return block. 1670 BasicBlock *ReturnBB = nullptr; 1671 SmallVector<PHINode *, 4> ReturnPHIs; 1672 1673 // Create all the functions in order after the main function. 1674 auto NextF = std::next(F.getIterator()); 1675 1676 // Create a continuation function for each of the suspend points. 1677 Clones.reserve(Shape.CoroSuspends.size()); 1678 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { 1679 auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]); 1680 1681 // Create the clone declaration. 1682 auto Continuation = 1683 createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF, nullptr); 1684 Clones.push_back(Continuation); 1685 1686 // Insert a branch to the unified return block immediately before 1687 // the suspend point. 1688 auto SuspendBB = Suspend->getParent(); 1689 auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); 1690 auto Branch = cast<BranchInst>(SuspendBB->getTerminator()); 1691 1692 // Create the unified return block. 1693 if (!ReturnBB) { 1694 // Place it before the first suspend. 1695 ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F, 1696 NewSuspendBB); 1697 Shape.RetconLowering.ReturnBlock = ReturnBB; 1698 1699 IRBuilder<> Builder(ReturnBB); 1700 1701 // Create PHIs for all the return values. 1702 assert(ReturnPHIs.empty()); 1703 1704 // First, the continuation. 1705 ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(), 1706 Shape.CoroSuspends.size())); 1707 1708 // Next, all the directly-yielded values. 1709 for (auto ResultTy : Shape.getRetconResultTypes()) 1710 ReturnPHIs.push_back(Builder.CreatePHI(ResultTy, 1711 Shape.CoroSuspends.size())); 1712 1713 // Build the return value. 1714 auto RetTy = F.getReturnType(); 1715 1716 // Cast the continuation value if necessary. 1717 // We can't rely on the types matching up because that type would 1718 // have to be infinite. 1719 auto CastedContinuationTy = 1720 (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0)); 1721 auto *CastedContinuation = 1722 Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy); 1723 1724 Value *RetV; 1725 if (ReturnPHIs.size() == 1) { 1726 RetV = CastedContinuation; 1727 } else { 1728 RetV = UndefValue::get(RetTy); 1729 RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0); 1730 for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I) 1731 RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I); 1732 } 1733 1734 Builder.CreateRet(RetV); 1735 } 1736 1737 // Branch to the return block. 1738 Branch->setSuccessor(0, ReturnBB); 1739 ReturnPHIs[0]->addIncoming(Continuation, SuspendBB); 1740 size_t NextPHIIndex = 1; 1741 for (auto &VUse : Suspend->value_operands()) 1742 ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB); 1743 assert(NextPHIIndex == ReturnPHIs.size()); 1744 } 1745 1746 assert(Clones.size() == Shape.CoroSuspends.size()); 1747 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { 1748 auto Suspend = Shape.CoroSuspends[i]; 1749 auto Clone = Clones[i]; 1750 1751 CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create(); 1752 } 1753 } 1754 1755 namespace { 1756 class PrettyStackTraceFunction : public PrettyStackTraceEntry { 1757 Function &F; 1758 public: 1759 PrettyStackTraceFunction(Function &F) : F(F) {} 1760 void print(raw_ostream &OS) const override { 1761 OS << "While splitting coroutine "; 1762 F.printAsOperand(OS, /*print type*/ false, F.getParent()); 1763 OS << "\n"; 1764 } 1765 }; 1766 } 1767 1768 static coro::Shape splitCoroutine(Function &F, 1769 SmallVectorImpl<Function *> &Clones, 1770 bool ReuseFrameSlot) { 1771 PrettyStackTraceFunction prettyStackTrace(F); 1772 1773 // The suspend-crossing algorithm in buildCoroutineFrame get tripped 1774 // up by uses in unreachable blocks, so remove them as a first pass. 1775 removeUnreachableBlocks(F); 1776 1777 coro::Shape Shape(F, ReuseFrameSlot); 1778 if (!Shape.CoroBegin) 1779 return Shape; 1780 1781 simplifySuspendPoints(Shape); 1782 buildCoroutineFrame(F, Shape); 1783 replaceFrameSize(Shape); 1784 1785 // If there are no suspend points, no split required, just remove 1786 // the allocation and deallocation blocks, they are not needed. 1787 if (Shape.CoroSuspends.empty()) { 1788 handleNoSuspendCoroutine(Shape); 1789 } else { 1790 switch (Shape.ABI) { 1791 case coro::ABI::Switch: 1792 splitSwitchCoroutine(F, Shape, Clones); 1793 break; 1794 case coro::ABI::Async: 1795 splitAsyncCoroutine(F, Shape, Clones); 1796 break; 1797 case coro::ABI::Retcon: 1798 case coro::ABI::RetconOnce: 1799 splitRetconCoroutine(F, Shape, Clones); 1800 break; 1801 } 1802 } 1803 1804 // Replace all the swifterror operations in the original function. 1805 // This invalidates SwiftErrorOps in the Shape. 1806 replaceSwiftErrorOps(F, Shape, nullptr); 1807 1808 return Shape; 1809 } 1810 1811 static void 1812 updateCallGraphAfterCoroutineSplit(Function &F, const coro::Shape &Shape, 1813 const SmallVectorImpl<Function *> &Clones, 1814 CallGraph &CG, CallGraphSCC &SCC) { 1815 if (!Shape.CoroBegin) 1816 return; 1817 1818 removeCoroEnds(Shape, &CG); 1819 postSplitCleanup(F); 1820 1821 // Update call graph and add the functions we created to the SCC. 1822 coro::updateCallGraph(F, Clones, CG, SCC); 1823 } 1824 1825 static void updateCallGraphAfterCoroutineSplit( 1826 LazyCallGraph::Node &N, const coro::Shape &Shape, 1827 const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C, 1828 LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, 1829 FunctionAnalysisManager &FAM) { 1830 if (!Shape.CoroBegin) 1831 return; 1832 1833 for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { 1834 auto &Context = End->getContext(); 1835 End->replaceAllUsesWith(ConstantInt::getFalse(Context)); 1836 End->eraseFromParent(); 1837 } 1838 1839 if (!Clones.empty()) { 1840 switch (Shape.ABI) { 1841 case coro::ABI::Switch: 1842 // Each clone in the Switch lowering is independent of the other clones. 1843 // Let the LazyCallGraph know about each one separately. 1844 for (Function *Clone : Clones) 1845 CG.addSplitFunction(N.getFunction(), *Clone); 1846 break; 1847 case coro::ABI::Async: 1848 case coro::ABI::Retcon: 1849 case coro::ABI::RetconOnce: 1850 // Each clone in the Async/Retcon lowering references of the other clones. 1851 // Let the LazyCallGraph know about all of them at once. 1852 if (!Clones.empty()) 1853 CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones); 1854 break; 1855 } 1856 1857 // Let the CGSCC infra handle the changes to the original function. 1858 updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM); 1859 } 1860 1861 // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges 1862 // to the split functions. 1863 postSplitCleanup(N.getFunction()); 1864 updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM); 1865 } 1866 1867 // When we see the coroutine the first time, we insert an indirect call to a 1868 // devirt trigger function and mark the coroutine that it is now ready for 1869 // split. 1870 // Async lowering uses this after it has split the function to restart the 1871 // pipeline. 1872 static void prepareForSplit(Function &F, CallGraph &CG, 1873 bool MarkForAsyncRestart = false) { 1874 Module &M = *F.getParent(); 1875 LLVMContext &Context = F.getContext(); 1876 #ifndef NDEBUG 1877 Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN); 1878 assert(DevirtFn && "coro.devirt.trigger function not found"); 1879 #endif 1880 1881 F.addFnAttr(CORO_PRESPLIT_ATTR, MarkForAsyncRestart 1882 ? ASYNC_RESTART_AFTER_SPLIT 1883 : PREPARED_FOR_SPLIT); 1884 1885 // Insert an indirect call sequence that will be devirtualized by CoroElide 1886 // pass: 1887 // %0 = call i8* @llvm.coro.subfn.addr(i8* null, i8 -1) 1888 // %1 = bitcast i8* %0 to void(i8*)* 1889 // call void %1(i8* null) 1890 coro::LowererBase Lowerer(M); 1891 Instruction *InsertPt = 1892 MarkForAsyncRestart ? F.getEntryBlock().getFirstNonPHIOrDbgOrLifetime() 1893 : F.getEntryBlock().getTerminator(); 1894 auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context)); 1895 auto *DevirtFnAddr = 1896 Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt); 1897 FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Context), 1898 {Type::getInt8PtrTy(Context)}, false); 1899 auto *IndirectCall = CallInst::Create(FnTy, DevirtFnAddr, Null, "", InsertPt); 1900 1901 // Update CG graph with an indirect call we just added. 1902 CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode()); 1903 } 1904 1905 // Make sure that there is a devirtualization trigger function that the 1906 // coro-split pass uses to force a restart of the CGSCC pipeline. If the devirt 1907 // trigger function is not found, we will create one and add it to the current 1908 // SCC. 1909 static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) { 1910 Module &M = CG.getModule(); 1911 if (M.getFunction(CORO_DEVIRT_TRIGGER_FN)) 1912 return; 1913 1914 LLVMContext &C = M.getContext(); 1915 auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C), 1916 /*isVarArg=*/false); 1917 Function *DevirtFn = 1918 Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage, 1919 CORO_DEVIRT_TRIGGER_FN, &M); 1920 DevirtFn->addFnAttr(Attribute::AlwaysInline); 1921 auto *Entry = BasicBlock::Create(C, "entry", DevirtFn); 1922 ReturnInst::Create(C, Entry); 1923 1924 auto *Node = CG.getOrInsertFunction(DevirtFn); 1925 1926 SmallVector<CallGraphNode *, 8> Nodes(SCC.begin(), SCC.end()); 1927 Nodes.push_back(Node); 1928 SCC.initialize(Nodes); 1929 } 1930 1931 /// Replace a call to llvm.coro.prepare.retcon. 1932 static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG, 1933 LazyCallGraph::SCC &C) { 1934 auto CastFn = Prepare->getArgOperand(0); // as an i8* 1935 auto Fn = CastFn->stripPointerCasts(); // as its original type 1936 1937 // Attempt to peephole this pattern: 1938 // %0 = bitcast [[TYPE]] @some_function to i8* 1939 // %1 = call @llvm.coro.prepare.retcon(i8* %0) 1940 // %2 = bitcast %1 to [[TYPE]] 1941 // ==> 1942 // %2 = @some_function 1943 for (auto UI = Prepare->use_begin(), UE = Prepare->use_end(); UI != UE;) { 1944 // Look for bitcasts back to the original function type. 1945 auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser()); 1946 if (!Cast || Cast->getType() != Fn->getType()) 1947 continue; 1948 1949 // Replace and remove the cast. 1950 Cast->replaceAllUsesWith(Fn); 1951 Cast->eraseFromParent(); 1952 } 1953 1954 // Replace any remaining uses with the function as an i8*. 1955 // This can never directly be a callee, so we don't need to update CG. 1956 Prepare->replaceAllUsesWith(CastFn); 1957 Prepare->eraseFromParent(); 1958 1959 // Kill dead bitcasts. 1960 while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) { 1961 if (!Cast->use_empty()) 1962 break; 1963 CastFn = Cast->getOperand(0); 1964 Cast->eraseFromParent(); 1965 } 1966 } 1967 /// Replace a call to llvm.coro.prepare.retcon. 1968 static void replacePrepare(CallInst *Prepare, CallGraph &CG) { 1969 auto CastFn = Prepare->getArgOperand(0); // as an i8* 1970 auto Fn = CastFn->stripPointerCasts(); // as its original type 1971 1972 // Find call graph nodes for the preparation. 1973 CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr; 1974 if (auto ConcreteFn = dyn_cast<Function>(Fn)) { 1975 PrepareUserNode = CG[Prepare->getFunction()]; 1976 FnNode = CG[ConcreteFn]; 1977 } 1978 1979 // Attempt to peephole this pattern: 1980 // %0 = bitcast [[TYPE]] @some_function to i8* 1981 // %1 = call @llvm.coro.prepare.retcon(i8* %0) 1982 // %2 = bitcast %1 to [[TYPE]] 1983 // ==> 1984 // %2 = @some_function 1985 for (auto UI = Prepare->use_begin(), UE = Prepare->use_end(); 1986 UI != UE; ) { 1987 // Look for bitcasts back to the original function type. 1988 auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser()); 1989 if (!Cast || Cast->getType() != Fn->getType()) continue; 1990 1991 // Check whether the replacement will introduce new direct calls. 1992 // If so, we'll need to update the call graph. 1993 if (PrepareUserNode) { 1994 for (auto &Use : Cast->uses()) { 1995 if (auto *CB = dyn_cast<CallBase>(Use.getUser())) { 1996 if (!CB->isCallee(&Use)) 1997 continue; 1998 PrepareUserNode->removeCallEdgeFor(*CB); 1999 PrepareUserNode->addCalledFunction(CB, FnNode); 2000 } 2001 } 2002 } 2003 2004 // Replace and remove the cast. 2005 Cast->replaceAllUsesWith(Fn); 2006 Cast->eraseFromParent(); 2007 } 2008 2009 // Replace any remaining uses with the function as an i8*. 2010 // This can never directly be a callee, so we don't need to update CG. 2011 Prepare->replaceAllUsesWith(CastFn); 2012 Prepare->eraseFromParent(); 2013 2014 // Kill dead bitcasts. 2015 while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) { 2016 if (!Cast->use_empty()) break; 2017 CastFn = Cast->getOperand(0); 2018 Cast->eraseFromParent(); 2019 } 2020 } 2021 2022 static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG, 2023 LazyCallGraph::SCC &C) { 2024 bool Changed = false; 2025 for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end(); PI != PE;) { 2026 // Intrinsics can only be used in calls. 2027 auto *Prepare = cast<CallInst>((PI++)->getUser()); 2028 replacePrepare(Prepare, CG, C); 2029 Changed = true; 2030 } 2031 2032 return Changed; 2033 } 2034 2035 /// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent 2036 /// IPO from operating on calls to a retcon coroutine before it's been 2037 /// split. This is only safe to do after we've split all retcon 2038 /// coroutines in the module. We can do that this in this pass because 2039 /// this pass does promise to split all retcon coroutines (as opposed to 2040 /// switch coroutines, which are lowered in multiple stages). 2041 static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) { 2042 bool Changed = false; 2043 for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end(); 2044 PI != PE; ) { 2045 // Intrinsics can only be used in calls. 2046 auto *Prepare = cast<CallInst>((PI++)->getUser()); 2047 replacePrepare(Prepare, CG); 2048 Changed = true; 2049 } 2050 2051 return Changed; 2052 } 2053 2054 static bool declaresCoroSplitIntrinsics(const Module &M) { 2055 return coro::declaresIntrinsics(M, {"llvm.coro.begin", 2056 "llvm.coro.prepare.retcon", 2057 "llvm.coro.prepare.async"}); 2058 } 2059 2060 static void addPrepareFunction(const Module &M, 2061 SmallVectorImpl<Function *> &Fns, 2062 StringRef Name) { 2063 auto *PrepareFn = M.getFunction(Name); 2064 if (PrepareFn && !PrepareFn->use_empty()) 2065 Fns.push_back(PrepareFn); 2066 } 2067 2068 PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, 2069 CGSCCAnalysisManager &AM, 2070 LazyCallGraph &CG, CGSCCUpdateResult &UR) { 2071 // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a 2072 // non-zero number of nodes, so we assume that here and grab the first 2073 // node's function's module. 2074 Module &M = *C.begin()->getFunction().getParent(); 2075 auto &FAM = 2076 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 2077 2078 if (!declaresCoroSplitIntrinsics(M)) 2079 return PreservedAnalyses::all(); 2080 2081 // Check for uses of llvm.coro.prepare.retcon/async. 2082 SmallVector<Function *, 2> PrepareFns; 2083 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon"); 2084 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async"); 2085 2086 // Find coroutines for processing. 2087 SmallVector<LazyCallGraph::Node *, 4> Coroutines; 2088 for (LazyCallGraph::Node &N : C) 2089 if (N.getFunction().hasFnAttribute(CORO_PRESPLIT_ATTR)) 2090 Coroutines.push_back(&N); 2091 2092 if (Coroutines.empty() && PrepareFns.empty()) 2093 return PreservedAnalyses::all(); 2094 2095 if (Coroutines.empty()) { 2096 for (auto *PrepareFn : PrepareFns) { 2097 replaceAllPrepares(PrepareFn, CG, C); 2098 } 2099 } 2100 2101 // Split all the coroutines. 2102 for (LazyCallGraph::Node *N : Coroutines) { 2103 Function &F = N->getFunction(); 2104 Attribute Attr = F.getFnAttribute(CORO_PRESPLIT_ATTR); 2105 StringRef Value = Attr.getValueAsString(); 2106 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName() 2107 << "' state: " << Value << "\n"); 2108 if (Value == UNPREPARED_FOR_SPLIT) { 2109 // Enqueue a second iteration of the CGSCC pipeline on this SCC. 2110 UR.CWorklist.insert(&C); 2111 F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT); 2112 continue; 2113 } 2114 F.removeFnAttr(CORO_PRESPLIT_ATTR); 2115 2116 SmallVector<Function *, 4> Clones; 2117 const coro::Shape Shape = splitCoroutine(F, Clones, ReuseFrameSlot); 2118 updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); 2119 2120 if ((Shape.ABI == coro::ABI::Async || Shape.ABI == coro::ABI::Retcon || 2121 Shape.ABI == coro::ABI::RetconOnce) && 2122 !Shape.CoroSuspends.empty()) { 2123 // Run the CGSCC pipeline on the newly split functions. 2124 // All clones will be in the same RefSCC, so choose a random clone. 2125 UR.RCWorklist.insert(CG.lookupRefSCC(CG.get(*Clones[0]))); 2126 } 2127 } 2128 2129 if (!PrepareFns.empty()) { 2130 for (auto *PrepareFn : PrepareFns) { 2131 replaceAllPrepares(PrepareFn, CG, C); 2132 } 2133 } 2134 2135 return PreservedAnalyses::none(); 2136 } 2137 2138 namespace { 2139 2140 // We present a coroutine to LLVM as an ordinary function with suspension 2141 // points marked up with intrinsics. We let the optimizer party on the coroutine 2142 // as a single function for as long as possible. Shortly before the coroutine is 2143 // eligible to be inlined into its callers, we split up the coroutine into parts 2144 // corresponding to initial, resume and destroy invocations of the coroutine, 2145 // add them to the current SCC and restart the IPO pipeline to optimize the 2146 // coroutine subfunctions we extracted before proceeding to the caller of the 2147 // coroutine. 2148 struct CoroSplitLegacy : public CallGraphSCCPass { 2149 static char ID; // Pass identification, replacement for typeid 2150 2151 CoroSplitLegacy(bool ReuseFrameSlot = false) 2152 : CallGraphSCCPass(ID), ReuseFrameSlot(ReuseFrameSlot) { 2153 initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry()); 2154 } 2155 2156 bool Run = false; 2157 bool ReuseFrameSlot; 2158 2159 // A coroutine is identified by the presence of coro.begin intrinsic, if 2160 // we don't have any, this pass has nothing to do. 2161 bool doInitialization(CallGraph &CG) override { 2162 Run = declaresCoroSplitIntrinsics(CG.getModule()); 2163 return CallGraphSCCPass::doInitialization(CG); 2164 } 2165 2166 bool runOnSCC(CallGraphSCC &SCC) override { 2167 if (!Run) 2168 return false; 2169 2170 // Check for uses of llvm.coro.prepare.retcon. 2171 SmallVector<Function *, 2> PrepareFns; 2172 auto &M = SCC.getCallGraph().getModule(); 2173 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon"); 2174 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async"); 2175 2176 // Find coroutines for processing. 2177 SmallVector<Function *, 4> Coroutines; 2178 for (CallGraphNode *CGN : SCC) 2179 if (auto *F = CGN->getFunction()) 2180 if (F->hasFnAttribute(CORO_PRESPLIT_ATTR)) 2181 Coroutines.push_back(F); 2182 2183 if (Coroutines.empty() && PrepareFns.empty()) 2184 return false; 2185 2186 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 2187 2188 if (Coroutines.empty()) { 2189 bool Changed = false; 2190 for (auto *PrepareFn : PrepareFns) 2191 Changed |= replaceAllPrepares(PrepareFn, CG); 2192 return Changed; 2193 } 2194 2195 createDevirtTriggerFunc(CG, SCC); 2196 2197 // Split all the coroutines. 2198 for (Function *F : Coroutines) { 2199 Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR); 2200 StringRef Value = Attr.getValueAsString(); 2201 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F->getName() 2202 << "' state: " << Value << "\n"); 2203 // Async lowering marks coroutines to trigger a restart of the pipeline 2204 // after it has split them. 2205 if (Value == ASYNC_RESTART_AFTER_SPLIT) { 2206 F->removeFnAttr(CORO_PRESPLIT_ATTR); 2207 continue; 2208 } 2209 if (Value == UNPREPARED_FOR_SPLIT) { 2210 prepareForSplit(*F, CG); 2211 continue; 2212 } 2213 F->removeFnAttr(CORO_PRESPLIT_ATTR); 2214 2215 SmallVector<Function *, 4> Clones; 2216 const coro::Shape Shape = splitCoroutine(*F, Clones, ReuseFrameSlot); 2217 updateCallGraphAfterCoroutineSplit(*F, Shape, Clones, CG, SCC); 2218 if (Shape.ABI == coro::ABI::Async) { 2219 // Restart SCC passes. 2220 // Mark function for CoroElide pass. It will devirtualize causing a 2221 // restart of the SCC pipeline. 2222 prepareForSplit(*F, CG, true /*MarkForAsyncRestart*/); 2223 } 2224 } 2225 2226 for (auto *PrepareFn : PrepareFns) 2227 replaceAllPrepares(PrepareFn, CG); 2228 2229 return true; 2230 } 2231 2232 void getAnalysisUsage(AnalysisUsage &AU) const override { 2233 CallGraphSCCPass::getAnalysisUsage(AU); 2234 } 2235 2236 StringRef getPassName() const override { return "Coroutine Splitting"; } 2237 }; 2238 2239 } // end anonymous namespace 2240 2241 char CoroSplitLegacy::ID = 0; 2242 2243 INITIALIZE_PASS_BEGIN( 2244 CoroSplitLegacy, "coro-split", 2245 "Split coroutine into a set of functions driving its state machine", false, 2246 false) 2247 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 2248 INITIALIZE_PASS_END( 2249 CoroSplitLegacy, "coro-split", 2250 "Split coroutine into a set of functions driving its state machine", false, 2251 false) 2252 2253 Pass *llvm::createCoroSplitLegacyPass(bool ReuseFrameSlot) { 2254 return new CoroSplitLegacy(ReuseFrameSlot); 2255 } 2256