1 //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This pass builds the coroutine frame and outlines resume and destroy parts
9 // of the coroutine into separate functions.
10 //
11 // We present a coroutine to an LLVM as an ordinary function with suspension
12 // points marked up with intrinsics. We let the optimizer party on the coroutine
13 // as a single function for as long as possible. Shortly before the coroutine is
14 // eligible to be inlined into its callers, we split up the coroutine into parts
15 // corresponding to an initial, resume and destroy invocations of the coroutine,
16 // add them to the current SCC and restart the IPO pipeline to optimize the
17 // coroutine subfunctions we extracted before proceeding to the caller of the
18 // coroutine.
19 //===----------------------------------------------------------------------===//
20 
21 #include "llvm/Transforms/Coroutines/CoroSplit.h"
22 #include "CoroInstr.h"
23 #include "CoroInternal.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/ADT/Twine.h"
29 #include "llvm/Analysis/CFG.h"
30 #include "llvm/Analysis/CallGraph.h"
31 #include "llvm/Analysis/CallGraphSCCPass.h"
32 #include "llvm/Analysis/ConstantFolding.h"
33 #include "llvm/Analysis/LazyCallGraph.h"
34 #include "llvm/BinaryFormat/Dwarf.h"
35 #include "llvm/IR/Argument.h"
36 #include "llvm/IR/Attributes.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/CFG.h"
39 #include "llvm/IR/CallingConv.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/Dominators.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/IR/GlobalValue.h"
46 #include "llvm/IR/GlobalVariable.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/InstIterator.h"
49 #include "llvm/IR/InstrTypes.h"
50 #include "llvm/IR/Instruction.h"
51 #include "llvm/IR/Instructions.h"
52 #include "llvm/IR/IntrinsicInst.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/LegacyPassManager.h"
55 #include "llvm/IR/Module.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/Value.h"
58 #include "llvm/IR/Verifier.h"
59 #include "llvm/InitializePasses.h"
60 #include "llvm/Pass.h"
61 #include "llvm/Support/Casting.h"
62 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/PrettyStackTrace.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Transforms/Scalar.h"
66 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
67 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
68 #include "llvm/Transforms/Utils/Cloning.h"
69 #include "llvm/Transforms/Utils/Local.h"
70 #include "llvm/Transforms/Utils/ValueMapper.h"
71 #include <cassert>
72 #include <cstddef>
73 #include <cstdint>
74 #include <initializer_list>
75 #include <iterator>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "coro-split"
80 
81 namespace {
82 
83 /// A little helper class for building
84 class CoroCloner {
85 public:
86   enum class Kind {
87     /// The shared resume function for a switch lowering.
88     SwitchResume,
89 
90     /// The shared unwind function for a switch lowering.
91     SwitchUnwind,
92 
93     /// The shared cleanup function for a switch lowering.
94     SwitchCleanup,
95 
96     /// An individual continuation function.
97     Continuation,
98 
99     /// An async resume function.
100     Async,
101   };
102 
103 private:
104   Function &OrigF;
105   Function *NewF;
106   const Twine &Suffix;
107   coro::Shape &Shape;
108   Kind FKind;
109   ValueToValueMapTy VMap;
110   IRBuilder<> Builder;
111   Value *NewFramePtr = nullptr;
112 
113   /// The active suspend instruction; meaningful only for continuation and async
114   /// ABIs.
115   AnyCoroSuspendInst *ActiveSuspend = nullptr;
116 
117 public:
118   /// Create a cloner for a switch lowering.
119   CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
120              Kind FKind)
121     : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape),
122       FKind(FKind), Builder(OrigF.getContext()) {
123     assert(Shape.ABI == coro::ABI::Switch);
124   }
125 
126   /// Create a cloner for a continuation lowering.
127   CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
128              Function *NewF, AnyCoroSuspendInst *ActiveSuspend)
129       : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
130         FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation),
131         Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend) {
132     assert(Shape.ABI == coro::ABI::Retcon ||
133            Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async);
134     assert(NewF && "need existing function for continuation");
135     assert(ActiveSuspend && "need active suspend point for continuation");
136   }
137 
138   Function *getFunction() const {
139     assert(NewF != nullptr && "declaration not yet set");
140     return NewF;
141   }
142 
143   void create();
144 
145 private:
146   bool isSwitchDestroyFunction() {
147     switch (FKind) {
148     case Kind::Async:
149     case Kind::Continuation:
150     case Kind::SwitchResume:
151       return false;
152     case Kind::SwitchUnwind:
153     case Kind::SwitchCleanup:
154       return true;
155     }
156     llvm_unreachable("Unknown CoroCloner::Kind enum");
157   }
158 
159   void replaceEntryBlock();
160   Value *deriveNewFramePointer();
161   void replaceRetconOrAsyncSuspendUses();
162   void replaceCoroSuspends();
163   void replaceCoroEnds();
164   void replaceSwiftErrorOps();
165   void salvageDebugInfo();
166   void handleFinalSuspend();
167 };
168 
169 } // end anonymous namespace
170 
171 static void maybeFreeRetconStorage(IRBuilder<> &Builder,
172                                    const coro::Shape &Shape, Value *FramePtr,
173                                    CallGraph *CG) {
174   assert(Shape.ABI == coro::ABI::Retcon ||
175          Shape.ABI == coro::ABI::RetconOnce);
176   if (Shape.RetconLowering.IsFrameInlineInStorage)
177     return;
178 
179   Shape.emitDealloc(Builder, FramePtr, CG);
180 }
181 
182 /// Replace an llvm.coro.end.async.
183 /// Will inline the must tail call function call if there is one.
184 /// \returns true if cleanup of the coro.end block is needed, false otherwise.
185 static bool replaceCoroEndAsync(AnyCoroEndInst *End) {
186   IRBuilder<> Builder(End);
187 
188   auto *EndAsync = dyn_cast<CoroAsyncEndInst>(End);
189   if (!EndAsync) {
190     Builder.CreateRetVoid();
191     return true /*needs cleanup of coro.end block*/;
192   }
193 
194   auto *MustTailCallFunc = EndAsync->getMustTailCallFunction();
195   if (!MustTailCallFunc) {
196     Builder.CreateRetVoid();
197     return true /*needs cleanup of coro.end block*/;
198   }
199 
200   // Move the must tail call from the predecessor block into the end block.
201   auto *CoroEndBlock = End->getParent();
202   auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor();
203   assert(MustTailCallFuncBlock && "Must have a single predecessor block");
204   auto It = MustTailCallFuncBlock->getTerminator()->getIterator();
205   auto *MustTailCall = cast<CallInst>(&*std::prev(It));
206   CoroEndBlock->getInstList().splice(
207       End->getIterator(), MustTailCallFuncBlock->getInstList(), MustTailCall);
208 
209   // Insert the return instruction.
210   Builder.SetInsertPoint(End);
211   Builder.CreateRetVoid();
212   InlineFunctionInfo FnInfo;
213 
214   // Remove the rest of the block, by splitting it into an unreachable block.
215   auto *BB = End->getParent();
216   BB->splitBasicBlock(End);
217   BB->getTerminator()->eraseFromParent();
218 
219   auto InlineRes = InlineFunction(*MustTailCall, FnInfo);
220   assert(InlineRes.isSuccess() && "Expected inlining to succeed");
221   (void)InlineRes;
222 
223   // We have cleaned up the coro.end block above.
224   return false;
225 }
226 
227 /// Replace a non-unwind call to llvm.coro.end.
228 static void replaceFallthroughCoroEnd(AnyCoroEndInst *End,
229                                       const coro::Shape &Shape, Value *FramePtr,
230                                       bool InResume, CallGraph *CG) {
231   // Start inserting right before the coro.end.
232   IRBuilder<> Builder(End);
233 
234   // Create the return instruction.
235   switch (Shape.ABI) {
236   // The cloned functions in switch-lowering always return void.
237   case coro::ABI::Switch:
238     // coro.end doesn't immediately end the coroutine in the main function
239     // in this lowering, because we need to deallocate the coroutine.
240     if (!InResume)
241       return;
242     Builder.CreateRetVoid();
243     break;
244 
245   // In async lowering this returns.
246   case coro::ABI::Async: {
247     bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End);
248     if (!CoroEndBlockNeedsCleanup)
249       return;
250     break;
251   }
252 
253   // In unique continuation lowering, the continuations always return void.
254   // But we may have implicitly allocated storage.
255   case coro::ABI::RetconOnce:
256     maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
257     Builder.CreateRetVoid();
258     break;
259 
260   // In non-unique continuation lowering, we signal completion by returning
261   // a null continuation.
262   case coro::ABI::Retcon: {
263     maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
264     auto RetTy = Shape.getResumeFunctionType()->getReturnType();
265     auto RetStructTy = dyn_cast<StructType>(RetTy);
266     PointerType *ContinuationTy =
267       cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy);
268 
269     Value *ReturnValue = ConstantPointerNull::get(ContinuationTy);
270     if (RetStructTy) {
271       ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy),
272                                               ReturnValue, 0);
273     }
274     Builder.CreateRet(ReturnValue);
275     break;
276   }
277   }
278 
279   // Remove the rest of the block, by splitting it into an unreachable block.
280   auto *BB = End->getParent();
281   BB->splitBasicBlock(End);
282   BB->getTerminator()->eraseFromParent();
283 }
284 
285 // Mark a coroutine as done, which implies that the coroutine is finished and
286 // never get resumed.
287 //
288 // In resume-switched ABI, the done state is represented by storing zero in
289 // ResumeFnAddr.
290 //
291 // NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the
292 // pointer to the frame in splitted function is not stored in `Shape`.
293 static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape,
294                                 Value *FramePtr) {
295   assert(
296       Shape.ABI == coro::ABI::Switch &&
297       "markCoroutineAsDone is only supported for Switch-Resumed ABI for now.");
298   auto *GepIndex = Builder.CreateStructGEP(
299       Shape.FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Resume,
300       "ResumeFn.addr");
301   auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
302       Shape.FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume)));
303   Builder.CreateStore(NullPtr, GepIndex);
304 }
305 
306 /// Replace an unwind call to llvm.coro.end.
307 static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
308                                  Value *FramePtr, bool InResume,
309                                  CallGraph *CG) {
310   IRBuilder<> Builder(End);
311 
312   switch (Shape.ABI) {
313   // In switch-lowering, this does nothing in the main function.
314   case coro::ABI::Switch: {
315     // In C++'s specification, the coroutine should be marked as done
316     // if promise.unhandled_exception() throws.  The frontend will
317     // call coro.end(true) along this path.
318     //
319     // FIXME: We should refactor this once there is other language
320     // which uses Switch-Resumed style other than C++.
321     markCoroutineAsDone(Builder, Shape, FramePtr);
322     if (!InResume)
323       return;
324     break;
325   }
326   // In async lowering this does nothing.
327   case coro::ABI::Async:
328     break;
329   // In continuation-lowering, this frees the continuation storage.
330   case coro::ABI::Retcon:
331   case coro::ABI::RetconOnce:
332     maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
333     break;
334   }
335 
336   // If coro.end has an associated bundle, add cleanupret instruction.
337   if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) {
338     auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]);
339     auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr);
340     End->getParent()->splitBasicBlock(End);
341     CleanupRet->getParent()->getTerminator()->eraseFromParent();
342   }
343 }
344 
345 static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
346                            Value *FramePtr, bool InResume, CallGraph *CG) {
347   if (End->isUnwind())
348     replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
349   else
350     replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG);
351 
352   auto &Context = End->getContext();
353   End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context)
354                                    : ConstantInt::getFalse(Context));
355   End->eraseFromParent();
356 }
357 
358 // Create an entry block for a resume function with a switch that will jump to
359 // suspend points.
360 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
361   assert(Shape.ABI == coro::ABI::Switch);
362   LLVMContext &C = F.getContext();
363 
364   // resume.entry:
365   //  %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0,
366   //  i32 2
367   //  % index = load i32, i32* %index.addr
368   //  switch i32 %index, label %unreachable [
369   //    i32 0, label %resume.0
370   //    i32 1, label %resume.1
371   //    ...
372   //  ]
373 
374   auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F);
375   auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F);
376 
377   IRBuilder<> Builder(NewEntry);
378   auto *FramePtr = Shape.FramePtr;
379   auto *FrameTy = Shape.FrameTy;
380   auto *GepIndex = Builder.CreateStructGEP(
381       FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
382   auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
383   auto *Switch =
384       Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
385   Shape.SwitchLowering.ResumeSwitch = Switch;
386 
387   size_t SuspendIndex = 0;
388   for (auto *AnyS : Shape.CoroSuspends) {
389     auto *S = cast<CoroSuspendInst>(AnyS);
390     ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
391 
392     // Replace CoroSave with a store to Index:
393     //    %index.addr = getelementptr %f.frame... (index field number)
394     //    store i32 0, i32* %index.addr1
395     auto *Save = S->getCoroSave();
396     Builder.SetInsertPoint(Save);
397     if (S->isFinal()) {
398       // The coroutine should be marked done if it reaches the final suspend
399       // point.
400       markCoroutineAsDone(Builder, Shape, FramePtr);
401     } else {
402       auto *GepIndex = Builder.CreateStructGEP(
403           FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
404       Builder.CreateStore(IndexVal, GepIndex);
405     }
406     Save->replaceAllUsesWith(ConstantTokenNone::get(C));
407     Save->eraseFromParent();
408 
409     // Split block before and after coro.suspend and add a jump from an entry
410     // switch:
411     //
412     //  whateverBB:
413     //    whatever
414     //    %0 = call i8 @llvm.coro.suspend(token none, i1 false)
415     //    switch i8 %0, label %suspend[i8 0, label %resume
416     //                                 i8 1, label %cleanup]
417     // becomes:
418     //
419     //  whateverBB:
420     //     whatever
421     //     br label %resume.0.landing
422     //
423     //  resume.0: ; <--- jump from the switch in the resume.entry
424     //     %0 = tail call i8 @llvm.coro.suspend(token none, i1 false)
425     //     br label %resume.0.landing
426     //
427     //  resume.0.landing:
428     //     %1 = phi i8[-1, %whateverBB], [%0, %resume.0]
429     //     switch i8 % 1, label %suspend [i8 0, label %resume
430     //                                    i8 1, label %cleanup]
431 
432     auto *SuspendBB = S->getParent();
433     auto *ResumeBB =
434         SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex));
435     auto *LandingBB = ResumeBB->splitBasicBlock(
436         S->getNextNode(), ResumeBB->getName() + Twine(".landing"));
437     Switch->addCase(IndexVal, ResumeBB);
438 
439     cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB);
440     auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "", &LandingBB->front());
441     S->replaceAllUsesWith(PN);
442     PN->addIncoming(Builder.getInt8(-1), SuspendBB);
443     PN->addIncoming(S, ResumeBB);
444 
445     ++SuspendIndex;
446   }
447 
448   Builder.SetInsertPoint(UnreachBB);
449   Builder.CreateUnreachable();
450 
451   Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
452 }
453 
454 
455 // Rewrite final suspend point handling. We do not use suspend index to
456 // represent the final suspend point. Instead we zero-out ResumeFnAddr in the
457 // coroutine frame, since it is undefined behavior to resume a coroutine
458 // suspended at the final suspend point. Thus, in the resume function, we can
459 // simply remove the last case (when coro::Shape is built, the final suspend
460 // point (if present) is always the last element of CoroSuspends array).
461 // In the destroy function, we add a code sequence to check if ResumeFnAddress
462 // is Null, and if so, jump to the appropriate label to handle cleanup from the
463 // final suspend point.
464 void CoroCloner::handleFinalSuspend() {
465   assert(Shape.ABI == coro::ABI::Switch &&
466          Shape.SwitchLowering.HasFinalSuspend);
467   auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]);
468   auto FinalCaseIt = std::prev(Switch->case_end());
469   BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor();
470   Switch->removeCase(FinalCaseIt);
471   if (isSwitchDestroyFunction()) {
472     BasicBlock *OldSwitchBB = Switch->getParent();
473     auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch");
474     Builder.SetInsertPoint(OldSwitchBB->getTerminator());
475     auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr,
476                                        coro::Shape::SwitchFieldIndex::Resume,
477                                              "ResumeFn.addr");
478     auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(),
479                                     GepIndex);
480     auto *Cond = Builder.CreateIsNull(Load);
481     Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB);
482     OldSwitchBB->getTerminator()->eraseFromParent();
483   }
484 }
485 
486 static FunctionType *
487 getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) {
488   auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Suspend);
489   auto *StructTy = cast<StructType>(AsyncSuspend->getType());
490   auto &Context = Suspend->getParent()->getParent()->getContext();
491   auto *VoidTy = Type::getVoidTy(Context);
492   return FunctionType::get(VoidTy, StructTy->elements(), false);
493 }
494 
495 static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
496                                         const Twine &Suffix,
497                                         Module::iterator InsertBefore,
498                                         AnyCoroSuspendInst *ActiveSuspend) {
499   Module *M = OrigF.getParent();
500   auto *FnTy = (Shape.ABI != coro::ABI::Async)
501                    ? Shape.getResumeFunctionType()
502                    : getFunctionTypeFromAsyncSuspend(ActiveSuspend);
503 
504   Function *NewF =
505       Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
506                        OrigF.getName() + Suffix);
507   if (Shape.ABI != coro::ABI::Async)
508     NewF->addParamAttr(0, Attribute::NonNull);
509 
510   // For the async lowering ABI we can't guarantee that the context argument is
511   // not access via a different pointer not based on the argument.
512   if (Shape.ABI != coro::ABI::Async)
513     NewF->addParamAttr(0, Attribute::NoAlias);
514 
515   M->getFunctionList().insert(InsertBefore, NewF);
516 
517   return NewF;
518 }
519 
520 /// Replace uses of the active llvm.coro.suspend.retcon/async call with the
521 /// arguments to the continuation function.
522 ///
523 /// This assumes that the builder has a meaningful insertion point.
524 void CoroCloner::replaceRetconOrAsyncSuspendUses() {
525   assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
526          Shape.ABI == coro::ABI::Async);
527 
528   auto NewS = VMap[ActiveSuspend];
529   if (NewS->use_empty()) return;
530 
531   // Copy out all the continuation arguments after the buffer pointer into
532   // an easily-indexed data structure for convenience.
533   SmallVector<Value*, 8> Args;
534   // The async ABI includes all arguments -- including the first argument.
535   bool IsAsyncABI = Shape.ABI == coro::ABI::Async;
536   for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(NewF->arg_begin()),
537             E = NewF->arg_end();
538        I != E; ++I)
539     Args.push_back(&*I);
540 
541   // If the suspend returns a single scalar value, we can just do a simple
542   // replacement.
543   if (!isa<StructType>(NewS->getType())) {
544     assert(Args.size() == 1);
545     NewS->replaceAllUsesWith(Args.front());
546     return;
547   }
548 
549   // Try to peephole extracts of an aggregate return.
550   for (Use &U : llvm::make_early_inc_range(NewS->uses())) {
551     auto *EVI = dyn_cast<ExtractValueInst>(U.getUser());
552     if (!EVI || EVI->getNumIndices() != 1)
553       continue;
554 
555     EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]);
556     EVI->eraseFromParent();
557   }
558 
559   // If we have no remaining uses, we're done.
560   if (NewS->use_empty()) return;
561 
562   // Otherwise, we need to create an aggregate.
563   Value *Agg = UndefValue::get(NewS->getType());
564   for (size_t I = 0, E = Args.size(); I != E; ++I)
565     Agg = Builder.CreateInsertValue(Agg, Args[I], I);
566 
567   NewS->replaceAllUsesWith(Agg);
568 }
569 
570 void CoroCloner::replaceCoroSuspends() {
571   Value *SuspendResult;
572 
573   switch (Shape.ABI) {
574   // In switch lowering, replace coro.suspend with the appropriate value
575   // for the type of function we're extracting.
576   // Replacing coro.suspend with (0) will result in control flow proceeding to
577   // a resume label associated with a suspend point, replacing it with (1) will
578   // result in control flow proceeding to a cleanup label associated with this
579   // suspend point.
580   case coro::ABI::Switch:
581     SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0);
582     break;
583 
584   // In async lowering there are no uses of the result.
585   case coro::ABI::Async:
586     return;
587 
588   // In returned-continuation lowering, the arguments from earlier
589   // continuations are theoretically arbitrary, and they should have been
590   // spilled.
591   case coro::ABI::RetconOnce:
592   case coro::ABI::Retcon:
593     return;
594   }
595 
596   for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) {
597     // The active suspend was handled earlier.
598     if (CS == ActiveSuspend) continue;
599 
600     auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]);
601     MappedCS->replaceAllUsesWith(SuspendResult);
602     MappedCS->eraseFromParent();
603   }
604 }
605 
606 void CoroCloner::replaceCoroEnds() {
607   for (AnyCoroEndInst *CE : Shape.CoroEnds) {
608     // We use a null call graph because there's no call graph node for
609     // the cloned function yet.  We'll just be rebuilding that later.
610     auto *NewCE = cast<AnyCoroEndInst>(VMap[CE]);
611     replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr);
612   }
613 }
614 
615 static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
616                                  ValueToValueMapTy *VMap) {
617   if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty())
618     return;
619   Value *CachedSlot = nullptr;
620   auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
621     if (CachedSlot) {
622       assert(cast<PointerType>(CachedSlot->getType())
623                  ->isOpaqueOrPointeeTypeMatches(ValueTy) &&
624              "multiple swifterror slots in function with different types");
625       return CachedSlot;
626     }
627 
628     // Check if the function has a swifterror argument.
629     for (auto &Arg : F.args()) {
630       if (Arg.isSwiftError()) {
631         CachedSlot = &Arg;
632         assert(cast<PointerType>(Arg.getType())
633                    ->isOpaqueOrPointeeTypeMatches(ValueTy) &&
634                "swifterror argument does not have expected type");
635         return &Arg;
636       }
637     }
638 
639     // Create a swifterror alloca.
640     IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
641     auto Alloca = Builder.CreateAlloca(ValueTy);
642     Alloca->setSwiftError(true);
643 
644     CachedSlot = Alloca;
645     return Alloca;
646   };
647 
648   for (CallInst *Op : Shape.SwiftErrorOps) {
649     auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op;
650     IRBuilder<> Builder(MappedOp);
651 
652     // If there are no arguments, this is a 'get' operation.
653     Value *MappedResult;
654     if (Op->arg_empty()) {
655       auto ValueTy = Op->getType();
656       auto Slot = getSwiftErrorSlot(ValueTy);
657       MappedResult = Builder.CreateLoad(ValueTy, Slot);
658     } else {
659       assert(Op->arg_size() == 1);
660       auto Value = MappedOp->getArgOperand(0);
661       auto ValueTy = Value->getType();
662       auto Slot = getSwiftErrorSlot(ValueTy);
663       Builder.CreateStore(Value, Slot);
664       MappedResult = Slot;
665     }
666 
667     MappedOp->replaceAllUsesWith(MappedResult);
668     MappedOp->eraseFromParent();
669   }
670 
671   // If we're updating the original function, we've invalidated SwiftErrorOps.
672   if (VMap == nullptr) {
673     Shape.SwiftErrorOps.clear();
674   }
675 }
676 
677 void CoroCloner::replaceSwiftErrorOps() {
678   ::replaceSwiftErrorOps(*NewF, Shape, &VMap);
679 }
680 
681 void CoroCloner::salvageDebugInfo() {
682   SmallVector<DbgVariableIntrinsic *, 8> Worklist;
683   SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
684   for (auto &BB : *NewF)
685     for (auto &I : BB)
686       if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
687         Worklist.push_back(DVI);
688   for (DbgVariableIntrinsic *DVI : Worklist)
689     coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.OptimizeFrame);
690 
691   // Remove all salvaged dbg.declare intrinsics that became
692   // either unreachable or stale due to the CoroSplit transformation.
693   DominatorTree DomTree(*NewF);
694   auto IsUnreachableBlock = [&](BasicBlock *BB) {
695     return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr,
696                                    &DomTree);
697   };
698   for (DbgVariableIntrinsic *DVI : Worklist) {
699     if (IsUnreachableBlock(DVI->getParent()))
700       DVI->eraseFromParent();
701     else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) {
702       // Count all non-debuginfo uses in reachable blocks.
703       unsigned Uses = 0;
704       for (auto *User : DVI->getVariableLocationOp(0)->users())
705         if (auto *I = dyn_cast<Instruction>(User))
706           if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent()))
707             ++Uses;
708       if (!Uses)
709         DVI->eraseFromParent();
710     }
711   }
712 }
713 
714 void CoroCloner::replaceEntryBlock() {
715   // In the original function, the AllocaSpillBlock is a block immediately
716   // following the allocation of the frame object which defines GEPs for
717   // all the allocas that have been moved into the frame, and it ends by
718   // branching to the original beginning of the coroutine.  Make this
719   // the entry block of the cloned function.
720   auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
721   auto *OldEntry = &NewF->getEntryBlock();
722   Entry->setName("entry" + Suffix);
723   Entry->moveBefore(OldEntry);
724   Entry->getTerminator()->eraseFromParent();
725 
726   // Clear all predecessors of the new entry block.  There should be
727   // exactly one predecessor, which we created when splitting out
728   // AllocaSpillBlock to begin with.
729   assert(Entry->hasOneUse());
730   auto BranchToEntry = cast<BranchInst>(Entry->user_back());
731   assert(BranchToEntry->isUnconditional());
732   Builder.SetInsertPoint(BranchToEntry);
733   Builder.CreateUnreachable();
734   BranchToEntry->eraseFromParent();
735 
736   // Branch from the entry to the appropriate place.
737   Builder.SetInsertPoint(Entry);
738   switch (Shape.ABI) {
739   case coro::ABI::Switch: {
740     // In switch-lowering, we built a resume-entry block in the original
741     // function.  Make the entry block branch to this.
742     auto *SwitchBB =
743       cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]);
744     Builder.CreateBr(SwitchBB);
745     break;
746   }
747   case coro::ABI::Async:
748   case coro::ABI::Retcon:
749   case coro::ABI::RetconOnce: {
750     // In continuation ABIs, we want to branch to immediately after the
751     // active suspend point.  Earlier phases will have put the suspend in its
752     // own basic block, so just thread our jump directly to its successor.
753     assert((Shape.ABI == coro::ABI::Async &&
754             isa<CoroSuspendAsyncInst>(ActiveSuspend)) ||
755            ((Shape.ABI == coro::ABI::Retcon ||
756              Shape.ABI == coro::ABI::RetconOnce) &&
757             isa<CoroSuspendRetconInst>(ActiveSuspend)));
758     auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[ActiveSuspend]);
759     auto Branch = cast<BranchInst>(MappedCS->getNextNode());
760     assert(Branch->isUnconditional());
761     Builder.CreateBr(Branch->getSuccessor(0));
762     break;
763   }
764   }
765 
766   // Any static alloca that's still being used but not reachable from the new
767   // entry needs to be moved to the new entry.
768   Function *F = OldEntry->getParent();
769   DominatorTree DT{*F};
770   for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
771     auto *Alloca = dyn_cast<AllocaInst>(&I);
772     if (!Alloca || I.use_empty())
773       continue;
774     if (DT.isReachableFromEntry(I.getParent()) ||
775         !isa<ConstantInt>(Alloca->getArraySize()))
776       continue;
777     I.moveBefore(*Entry, Entry->getFirstInsertionPt());
778   }
779 }
780 
781 /// Derive the value of the new frame pointer.
782 Value *CoroCloner::deriveNewFramePointer() {
783   // Builder should be inserting to the front of the new entry block.
784 
785   switch (Shape.ABI) {
786   // In switch-lowering, the argument is the frame pointer.
787   case coro::ABI::Switch:
788     return &*NewF->arg_begin();
789   // In async-lowering, one of the arguments is an async context as determined
790   // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of
791   // the resume function from the async context projection function associated
792   // with the active suspend. The frame is located as a tail to the async
793   // context header.
794   case coro::ABI::Async: {
795     auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);
796     auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff;
797     auto *CalleeContext = NewF->getArg(ContextIdx);
798     auto *FramePtrTy = Shape.FrameTy->getPointerTo();
799     auto *ProjectionFunc =
800         ActiveAsyncSuspend->getAsyncContextProjectionFunction();
801     auto DbgLoc =
802         cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc();
803     // Calling i8* (i8*)
804     auto *CallerContext = Builder.CreateCall(ProjectionFunc->getFunctionType(),
805                                              ProjectionFunc, CalleeContext);
806     CallerContext->setCallingConv(ProjectionFunc->getCallingConv());
807     CallerContext->setDebugLoc(DbgLoc);
808     // The frame is located after the async_context header.
809     auto &Context = Builder.getContext();
810     auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32(
811         Type::getInt8Ty(Context), CallerContext,
812         Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr");
813     // Inline the projection function.
814     InlineFunctionInfo InlineInfo;
815     auto InlineRes = InlineFunction(*CallerContext, InlineInfo);
816     assert(InlineRes.isSuccess());
817     (void)InlineRes;
818     return Builder.CreateBitCast(FramePtrAddr, FramePtrTy);
819   }
820   // In continuation-lowering, the argument is the opaque storage.
821   case coro::ABI::Retcon:
822   case coro::ABI::RetconOnce: {
823     Argument *NewStorage = &*NewF->arg_begin();
824     auto FramePtrTy = Shape.FrameTy->getPointerTo();
825 
826     // If the storage is inline, just bitcast to the storage to the frame type.
827     if (Shape.RetconLowering.IsFrameInlineInStorage)
828       return Builder.CreateBitCast(NewStorage, FramePtrTy);
829 
830     // Otherwise, load the real frame from the opaque storage.
831     auto FramePtrPtr =
832       Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo());
833     return Builder.CreateLoad(FramePtrTy, FramePtrPtr);
834   }
835   }
836   llvm_unreachable("bad ABI");
837 }
838 
839 static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
840                                  unsigned ParamIndex,
841                                  uint64_t Size, Align Alignment) {
842   AttrBuilder ParamAttrs(Context);
843   ParamAttrs.addAttribute(Attribute::NonNull);
844   ParamAttrs.addAttribute(Attribute::NoAlias);
845   ParamAttrs.addAlignmentAttr(Alignment);
846   ParamAttrs.addDereferenceableAttr(Size);
847   Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
848 }
849 
850 static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context,
851                                  unsigned ParamIndex) {
852   AttrBuilder ParamAttrs(Context);
853   ParamAttrs.addAttribute(Attribute::SwiftAsync);
854   Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
855 }
856 
857 static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context,
858                               unsigned ParamIndex) {
859   AttrBuilder ParamAttrs(Context);
860   ParamAttrs.addAttribute(Attribute::SwiftSelf);
861   Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
862 }
863 
864 /// Clone the body of the original function into a resume function of
865 /// some sort.
866 void CoroCloner::create() {
867   // Create the new function if we don't already have one.
868   if (!NewF) {
869     NewF = createCloneDeclaration(OrigF, Shape, Suffix,
870                                   OrigF.getParent()->end(), ActiveSuspend);
871   }
872 
873   // Replace all args with undefs. The buildCoroutineFrame algorithm already
874   // rewritten access to the args that occurs after suspend points with loads
875   // and stores to/from the coroutine frame.
876   for (Argument &A : OrigF.args())
877     VMap[&A] = UndefValue::get(A.getType());
878 
879   SmallVector<ReturnInst *, 4> Returns;
880 
881   // Ignore attempts to change certain attributes of the function.
882   // TODO: maybe there should be a way to suppress this during cloning?
883   auto savedVisibility = NewF->getVisibility();
884   auto savedUnnamedAddr = NewF->getUnnamedAddr();
885   auto savedDLLStorageClass = NewF->getDLLStorageClass();
886 
887   // NewF's linkage (which CloneFunctionInto does *not* change) might not
888   // be compatible with the visibility of OrigF (which it *does* change),
889   // so protect against that.
890   auto savedLinkage = NewF->getLinkage();
891   NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
892 
893   CloneFunctionInto(NewF, &OrigF, VMap,
894                     CloneFunctionChangeType::LocalChangesOnly, Returns);
895 
896   auto &Context = NewF->getContext();
897 
898   // For async functions / continuations, adjust the scope line of the
899   // clone to the line number of the suspend point. However, only
900   // adjust the scope line when the files are the same. This ensures
901   // line number and file name belong together. The scope line is
902   // associated with all pre-prologue instructions. This avoids a jump
903   // in the linetable from the function declaration to the suspend point.
904   if (DISubprogram *SP = NewF->getSubprogram()) {
905     assert(SP != OrigF.getSubprogram() && SP->isDistinct());
906     if (ActiveSuspend)
907       if (auto DL = ActiveSuspend->getDebugLoc())
908         if (SP->getFile() == DL->getFile())
909           SP->setScopeLine(DL->getLine());
910     // Update the linkage name to reflect the modified symbol name. It
911     // is necessary to update the linkage name in Swift, since the
912     // mangling changes for resume functions. It might also be the
913     // right thing to do in C++, but due to a limitation in LLVM's
914     // AsmPrinter we can only do this if the function doesn't have an
915     // abstract specification, since the DWARF backend expects the
916     // abstract specification to contain the linkage name and asserts
917     // that they are identical.
918     if (!SP->getDeclaration() && SP->getUnit() &&
919         SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift)
920       SP->replaceLinkageName(MDString::get(Context, NewF->getName()));
921   }
922 
923   NewF->setLinkage(savedLinkage);
924   NewF->setVisibility(savedVisibility);
925   NewF->setUnnamedAddr(savedUnnamedAddr);
926   NewF->setDLLStorageClass(savedDLLStorageClass);
927 
928   // Replace the attributes of the new function:
929   auto OrigAttrs = NewF->getAttributes();
930   auto NewAttrs = AttributeList();
931 
932   switch (Shape.ABI) {
933   case coro::ABI::Switch:
934     // Bootstrap attributes by copying function attributes from the
935     // original function.  This should include optimization settings and so on.
936     NewAttrs = NewAttrs.addFnAttributes(Context, AttrBuilder(Context, OrigAttrs.getFnAttrs()));
937 
938     addFramePointerAttrs(NewAttrs, Context, 0,
939                          Shape.FrameSize, Shape.FrameAlign);
940     break;
941   case coro::ABI::Async: {
942     auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);
943     if (OrigF.hasParamAttribute(Shape.AsyncLowering.ContextArgNo,
944                                 Attribute::SwiftAsync)) {
945       uint32_t ArgAttributeIndices =
946           ActiveAsyncSuspend->getStorageArgumentIndex();
947       auto ContextArgIndex = ArgAttributeIndices & 0xff;
948       addAsyncContextAttrs(NewAttrs, Context, ContextArgIndex);
949 
950       // `swiftasync` must preceed `swiftself` so 0 is not a valid index for
951       // `swiftself`.
952       auto SwiftSelfIndex = ArgAttributeIndices >> 8;
953       if (SwiftSelfIndex)
954         addSwiftSelfAttrs(NewAttrs, Context, SwiftSelfIndex);
955     }
956 
957     // Transfer the original function's attributes.
958     auto FnAttrs = OrigF.getAttributes().getFnAttrs();
959     NewAttrs = NewAttrs.addFnAttributes(Context, AttrBuilder(Context, FnAttrs));
960     break;
961   }
962   case coro::ABI::Retcon:
963   case coro::ABI::RetconOnce:
964     // If we have a continuation prototype, just use its attributes,
965     // full-stop.
966     NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
967 
968     addFramePointerAttrs(NewAttrs, Context, 0,
969                          Shape.getRetconCoroId()->getStorageSize(),
970                          Shape.getRetconCoroId()->getStorageAlignment());
971     break;
972   }
973 
974   switch (Shape.ABI) {
975   // In these ABIs, the cloned functions always return 'void', and the
976   // existing return sites are meaningless.  Note that for unique
977   // continuations, this includes the returns associated with suspends;
978   // this is fine because we can't suspend twice.
979   case coro::ABI::Switch:
980   case coro::ABI::RetconOnce:
981     // Remove old returns.
982     for (ReturnInst *Return : Returns)
983       changeToUnreachable(Return);
984     break;
985 
986   // With multi-suspend continuations, we'll already have eliminated the
987   // original returns and inserted returns before all the suspend points,
988   // so we want to leave any returns in place.
989   case coro::ABI::Retcon:
990     break;
991   // Async lowering will insert musttail call functions at all suspend points
992   // followed by a return.
993   // Don't change returns to unreachable because that will trip up the verifier.
994   // These returns should be unreachable from the clone.
995   case coro::ABI::Async:
996     break;
997   }
998 
999   NewF->setAttributes(NewAttrs);
1000   NewF->setCallingConv(Shape.getResumeFunctionCC());
1001 
1002   // Set up the new entry block.
1003   replaceEntryBlock();
1004 
1005   Builder.SetInsertPoint(&NewF->getEntryBlock().front());
1006   NewFramePtr = deriveNewFramePointer();
1007 
1008   // Remap frame pointer.
1009   Value *OldFramePtr = VMap[Shape.FramePtr];
1010   NewFramePtr->takeName(OldFramePtr);
1011   OldFramePtr->replaceAllUsesWith(NewFramePtr);
1012 
1013   // Remap vFrame pointer.
1014   auto *NewVFrame = Builder.CreateBitCast(
1015       NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame");
1016   Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]);
1017   OldVFrame->replaceAllUsesWith(NewVFrame);
1018 
1019   switch (Shape.ABI) {
1020   case coro::ABI::Switch:
1021     // Rewrite final suspend handling as it is not done via switch (allows to
1022     // remove final case from the switch, since it is undefined behavior to
1023     // resume the coroutine suspended at the final suspend point.
1024     if (Shape.SwitchLowering.HasFinalSuspend)
1025       handleFinalSuspend();
1026     break;
1027   case coro::ABI::Async:
1028   case coro::ABI::Retcon:
1029   case coro::ABI::RetconOnce:
1030     // Replace uses of the active suspend with the corresponding
1031     // continuation-function arguments.
1032     assert(ActiveSuspend != nullptr &&
1033            "no active suspend when lowering a continuation-style coroutine");
1034     replaceRetconOrAsyncSuspendUses();
1035     break;
1036   }
1037 
1038   // Handle suspends.
1039   replaceCoroSuspends();
1040 
1041   // Handle swifterror.
1042   replaceSwiftErrorOps();
1043 
1044   // Remove coro.end intrinsics.
1045   replaceCoroEnds();
1046 
1047   // Salvage debug info that points into the coroutine frame.
1048   salvageDebugInfo();
1049 
1050   // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
1051   // to suppress deallocation code.
1052   if (Shape.ABI == coro::ABI::Switch)
1053     coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
1054                           /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup);
1055 }
1056 
1057 // Create a resume clone by cloning the body of the original function, setting
1058 // new entry block and replacing coro.suspend an appropriate value to force
1059 // resume or cleanup pass for every suspend point.
1060 static Function *createClone(Function &F, const Twine &Suffix,
1061                              coro::Shape &Shape, CoroCloner::Kind FKind) {
1062   CoroCloner Cloner(F, Suffix, Shape, FKind);
1063   Cloner.create();
1064   return Cloner.getFunction();
1065 }
1066 
1067 /// Remove calls to llvm.coro.end in the original function.
1068 static void removeCoroEnds(const coro::Shape &Shape, CallGraph *CG) {
1069   for (auto End : Shape.CoroEnds) {
1070     replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG);
1071   }
1072 }
1073 
1074 static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
1075   assert(Shape.ABI == coro::ABI::Async);
1076 
1077   auto *FuncPtrStruct = cast<ConstantStruct>(
1078       Shape.AsyncLowering.AsyncFuncPointer->getInitializer());
1079   auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0);
1080   auto *OrigContextSize = FuncPtrStruct->getOperand(1);
1081   auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(),
1082                                           Shape.AsyncLowering.ContextSize);
1083   auto *NewFuncPtrStruct = ConstantStruct::get(
1084       FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize);
1085 
1086   Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
1087 }
1088 
1089 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
1090   if (Shape.ABI == coro::ABI::Async)
1091     updateAsyncFuncPointerContextSize(Shape);
1092 
1093   for (CoroAlignInst *CA : Shape.CoroAligns) {
1094     CA->replaceAllUsesWith(
1095         ConstantInt::get(CA->getType(), Shape.FrameAlign.value()));
1096     CA->eraseFromParent();
1097   }
1098 
1099   if (Shape.CoroSizes.empty())
1100     return;
1101 
1102   // In the same function all coro.sizes should have the same result type.
1103   auto *SizeIntrin = Shape.CoroSizes.back();
1104   Module *M = SizeIntrin->getModule();
1105   const DataLayout &DL = M->getDataLayout();
1106   auto Size = DL.getTypeAllocSize(Shape.FrameTy);
1107   auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
1108 
1109   for (CoroSizeInst *CS : Shape.CoroSizes) {
1110     CS->replaceAllUsesWith(SizeConstant);
1111     CS->eraseFromParent();
1112   }
1113 }
1114 
1115 // Create a global constant array containing pointers to functions provided and
1116 // set Info parameter of CoroBegin to point at this constant. Example:
1117 //
1118 //   @f.resumers = internal constant [2 x void(%f.frame*)*]
1119 //                    [void(%f.frame*)* @f.resume, void(%f.frame*)* @f.destroy]
1120 //   define void @f() {
1121 //     ...
1122 //     call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
1123 //                    i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*))
1124 //
1125 // Assumes that all the functions have the same signature.
1126 static void setCoroInfo(Function &F, coro::Shape &Shape,
1127                         ArrayRef<Function *> Fns) {
1128   // This only works under the switch-lowering ABI because coro elision
1129   // only works on the switch-lowering ABI.
1130   assert(Shape.ABI == coro::ABI::Switch);
1131 
1132   SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end());
1133   assert(!Args.empty());
1134   Function *Part = *Fns.begin();
1135   Module *M = Part->getParent();
1136   auto *ArrTy = ArrayType::get(Part->getType(), Args.size());
1137 
1138   auto *ConstVal = ConstantArray::get(ArrTy, Args);
1139   auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true,
1140                                 GlobalVariable::PrivateLinkage, ConstVal,
1141                                 F.getName() + Twine(".resumers"));
1142 
1143   // Update coro.begin instruction to refer to this constant.
1144   LLVMContext &C = F.getContext();
1145   auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C));
1146   Shape.getSwitchCoroId()->setInfo(BC);
1147 }
1148 
1149 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
1150 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
1151                             Function *DestroyFn, Function *CleanupFn) {
1152   assert(Shape.ABI == coro::ABI::Switch);
1153 
1154   IRBuilder<> Builder(Shape.FramePtr->getNextNode());
1155   auto *ResumeAddr = Builder.CreateStructGEP(
1156       Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume,
1157       "resume.addr");
1158   Builder.CreateStore(ResumeFn, ResumeAddr);
1159 
1160   Value *DestroyOrCleanupFn = DestroyFn;
1161 
1162   CoroIdInst *CoroId = Shape.getSwitchCoroId();
1163   if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
1164     // If there is a CoroAlloc and it returns false (meaning we elide the
1165     // allocation, use CleanupFn instead of DestroyFn).
1166     DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn);
1167   }
1168 
1169   auto *DestroyAddr = Builder.CreateStructGEP(
1170       Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy,
1171       "destroy.addr");
1172   Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr);
1173 }
1174 
1175 static void postSplitCleanup(Function &F) {
1176   removeUnreachableBlocks(F);
1177 
1178 #ifndef NDEBUG
1179   // For now, we do a mandatory verification step because we don't
1180   // entirely trust this pass.  Note that we don't want to add a verifier
1181   // pass to FPM below because it will also verify all the global data.
1182   if (verifyFunction(F, &errs()))
1183     report_fatal_error("Broken function");
1184 #endif
1185 }
1186 
1187 // Assuming we arrived at the block NewBlock from Prev instruction, store
1188 // PHI's incoming values in the ResolvedValues map.
1189 static void
1190 scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
1191                           DenseMap<Value *, Value *> &ResolvedValues) {
1192   auto *PrevBB = Prev->getParent();
1193   for (PHINode &PN : NewBlock->phis()) {
1194     auto V = PN.getIncomingValueForBlock(PrevBB);
1195     // See if we already resolved it.
1196     auto VI = ResolvedValues.find(V);
1197     if (VI != ResolvedValues.end())
1198       V = VI->second;
1199     // Remember the value.
1200     ResolvedValues[&PN] = V;
1201   }
1202 }
1203 
1204 // Replace a sequence of branches leading to a ret, with a clone of a ret
1205 // instruction. Suspend instruction represented by a switch, track the PHI
1206 // values and select the correct case successor when possible.
1207 static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
1208   DenseMap<Value *, Value *> ResolvedValues;
1209   BasicBlock *UnconditionalSucc = nullptr;
1210   assert(InitialInst->getModule());
1211   const DataLayout &DL = InitialInst->getModule()->getDataLayout();
1212 
1213   auto GetFirstValidInstruction = [](Instruction *I) {
1214     while (I) {
1215       // BitCastInst wouldn't generate actual code so that we could skip it.
1216       if (isa<BitCastInst>(I) || I->isDebugOrPseudoInst() ||
1217           I->isLifetimeStartOrEnd())
1218         I = I->getNextNode();
1219       else if (isInstructionTriviallyDead(I))
1220         // Duing we are in the middle of the transformation, we need to erase
1221         // the dead instruction manually.
1222         I = &*I->eraseFromParent();
1223       else
1224         break;
1225     }
1226     return I;
1227   };
1228 
1229   auto TryResolveConstant = [&ResolvedValues](Value *V) {
1230     auto It = ResolvedValues.find(V);
1231     if (It != ResolvedValues.end())
1232       V = It->second;
1233     return dyn_cast<ConstantInt>(V);
1234   };
1235 
1236   Instruction *I = InitialInst;
1237   while (I->isTerminator() || isa<CmpInst>(I)) {
1238     if (isa<ReturnInst>(I)) {
1239       if (I != InitialInst) {
1240         // If InitialInst is an unconditional branch,
1241         // remove PHI values that come from basic block of InitialInst
1242         if (UnconditionalSucc)
1243           UnconditionalSucc->removePredecessor(InitialInst->getParent(), true);
1244         ReplaceInstWithInst(InitialInst, I->clone());
1245       }
1246       return true;
1247     }
1248     if (auto *BR = dyn_cast<BranchInst>(I)) {
1249       if (BR->isUnconditional()) {
1250         BasicBlock *Succ = BR->getSuccessor(0);
1251         if (I == InitialInst)
1252           UnconditionalSucc = Succ;
1253         scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues);
1254         I = GetFirstValidInstruction(Succ->getFirstNonPHIOrDbgOrLifetime());
1255         continue;
1256       }
1257 
1258       BasicBlock *BB = BR->getParent();
1259       // Handle the case the condition of the conditional branch is constant.
1260       // e.g.,
1261       //
1262       //     br i1 false, label %cleanup, label %CoroEnd
1263       //
1264       // It is possible during the transformation. We could continue the
1265       // simplifying in this case.
1266       if (ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true)) {
1267         // Handle this branch in next iteration.
1268         I = BB->getTerminator();
1269         continue;
1270       }
1271     } else if (auto *CondCmp = dyn_cast<CmpInst>(I)) {
1272       // If the case number of suspended switch instruction is reduced to
1273       // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator.
1274       auto *BR = dyn_cast<BranchInst>(
1275           GetFirstValidInstruction(CondCmp->getNextNode()));
1276       if (!BR || !BR->isConditional() || CondCmp != BR->getCondition())
1277         return false;
1278 
1279       // And the comparsion looks like : %cond = icmp eq i8 %V, constant.
1280       // So we try to resolve constant for the first operand only since the
1281       // second operand should be literal constant by design.
1282       ConstantInt *Cond0 = TryResolveConstant(CondCmp->getOperand(0));
1283       auto *Cond1 = dyn_cast<ConstantInt>(CondCmp->getOperand(1));
1284       if (!Cond0 || !Cond1)
1285         return false;
1286 
1287       // Both operands of the CmpInst are Constant. So that we could evaluate
1288       // it immediately to get the destination.
1289       auto *ConstResult =
1290           dyn_cast_or_null<ConstantInt>(ConstantFoldCompareInstOperands(
1291               CondCmp->getPredicate(), Cond0, Cond1, DL));
1292       if (!ConstResult)
1293         return false;
1294 
1295       CondCmp->replaceAllUsesWith(ConstResult);
1296       CondCmp->eraseFromParent();
1297 
1298       // Handle this branch in next iteration.
1299       I = BR;
1300       continue;
1301     } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
1302       ConstantInt *Cond = TryResolveConstant(SI->getCondition());
1303       if (!Cond)
1304         return false;
1305 
1306       BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor();
1307       scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
1308       I = GetFirstValidInstruction(BB->getFirstNonPHIOrDbgOrLifetime());
1309       continue;
1310     }
1311 
1312     return false;
1313   }
1314   return false;
1315 }
1316 
1317 // Check whether CI obeys the rules of musttail attribute.
1318 static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
1319   if (CI.isInlineAsm())
1320     return false;
1321 
1322   // Match prototypes and calling conventions of resume function.
1323   FunctionType *CalleeTy = CI.getFunctionType();
1324   if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1))
1325     return false;
1326 
1327   Type *CalleeParmTy = CalleeTy->getParamType(0);
1328   if (!CalleeParmTy->isPointerTy() ||
1329       (CalleeParmTy->getPointerAddressSpace() != 0))
1330     return false;
1331 
1332   if (CI.getCallingConv() != F.getCallingConv())
1333     return false;
1334 
1335   // CI should not has any ABI-impacting function attributes.
1336   static const Attribute::AttrKind ABIAttrs[] = {
1337       Attribute::StructRet,    Attribute::ByVal,     Attribute::InAlloca,
1338       Attribute::Preallocated, Attribute::InReg,     Attribute::Returned,
1339       Attribute::SwiftSelf,    Attribute::SwiftError};
1340   AttributeList Attrs = CI.getAttributes();
1341   for (auto AK : ABIAttrs)
1342     if (Attrs.hasParamAttr(0, AK))
1343       return false;
1344 
1345   return true;
1346 }
1347 
1348 // Add musttail to any resume instructions that is immediately followed by a
1349 // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
1350 // for symmetrical coroutine control transfer (C++ Coroutines TS extension).
1351 // This transformation is done only in the resume part of the coroutine that has
1352 // identical signature and calling convention as the coro.resume call.
1353 static void addMustTailToCoroResumes(Function &F) {
1354   bool changed = false;
1355 
1356   // Collect potential resume instructions.
1357   SmallVector<CallInst *, 4> Resumes;
1358   for (auto &I : instructions(F))
1359     if (auto *Call = dyn_cast<CallInst>(&I))
1360       if (shouldBeMustTail(*Call, F))
1361         Resumes.push_back(Call);
1362 
1363   // Set musttail on those that are followed by a ret instruction.
1364   for (CallInst *Call : Resumes)
1365     if (simplifyTerminatorLeadingToRet(Call->getNextNode())) {
1366       Call->setTailCallKind(CallInst::TCK_MustTail);
1367       changed = true;
1368     }
1369 
1370   if (changed)
1371     removeUnreachableBlocks(F);
1372 }
1373 
1374 // Coroutine has no suspend points. Remove heap allocation for the coroutine
1375 // frame if possible.
1376 static void handleNoSuspendCoroutine(coro::Shape &Shape) {
1377   auto *CoroBegin = Shape.CoroBegin;
1378   auto *CoroId = CoroBegin->getId();
1379   auto *AllocInst = CoroId->getCoroAlloc();
1380   switch (Shape.ABI) {
1381   case coro::ABI::Switch: {
1382     auto SwitchId = cast<CoroIdInst>(CoroId);
1383     coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
1384     if (AllocInst) {
1385       IRBuilder<> Builder(AllocInst);
1386       auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
1387       Frame->setAlignment(Shape.FrameAlign);
1388       auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
1389       AllocInst->replaceAllUsesWith(Builder.getFalse());
1390       AllocInst->eraseFromParent();
1391       CoroBegin->replaceAllUsesWith(VFrame);
1392     } else {
1393       CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
1394     }
1395 
1396     break;
1397   }
1398   case coro::ABI::Async:
1399   case coro::ABI::Retcon:
1400   case coro::ABI::RetconOnce:
1401     CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType()));
1402     break;
1403   }
1404 
1405   CoroBegin->eraseFromParent();
1406 }
1407 
1408 // SimplifySuspendPoint needs to check that there is no calls between
1409 // coro_save and coro_suspend, since any of the calls may potentially resume
1410 // the coroutine and if that is the case we cannot eliminate the suspend point.
1411 static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
1412   for (Instruction *I = From; I != To; I = I->getNextNode()) {
1413     // Assume that no intrinsic can resume the coroutine.
1414     if (isa<IntrinsicInst>(I))
1415       continue;
1416 
1417     if (isa<CallBase>(I))
1418       return true;
1419   }
1420   return false;
1421 }
1422 
1423 static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
1424   SmallPtrSet<BasicBlock *, 8> Set;
1425   SmallVector<BasicBlock *, 8> Worklist;
1426 
1427   Set.insert(SaveBB);
1428   Worklist.push_back(ResDesBB);
1429 
1430   // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
1431   // returns a token consumed by suspend instruction, all blocks in between
1432   // will have to eventually hit SaveBB when going backwards from ResDesBB.
1433   while (!Worklist.empty()) {
1434     auto *BB = Worklist.pop_back_val();
1435     Set.insert(BB);
1436     for (auto *Pred : predecessors(BB))
1437       if (!Set.contains(Pred))
1438         Worklist.push_back(Pred);
1439   }
1440 
1441   // SaveBB and ResDesBB are checked separately in hasCallsBetween.
1442   Set.erase(SaveBB);
1443   Set.erase(ResDesBB);
1444 
1445   for (auto *BB : Set)
1446     if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr))
1447       return true;
1448 
1449   return false;
1450 }
1451 
1452 static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
1453   auto *SaveBB = Save->getParent();
1454   auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
1455 
1456   if (SaveBB == ResumeOrDestroyBB)
1457     return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy);
1458 
1459   // Any calls from Save to the end of the block?
1460   if (hasCallsInBlockBetween(Save->getNextNode(), nullptr))
1461     return true;
1462 
1463   // Any calls from begging of the block up to ResumeOrDestroy?
1464   if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(),
1465                              ResumeOrDestroy))
1466     return true;
1467 
1468   // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
1469   if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB))
1470     return true;
1471 
1472   return false;
1473 }
1474 
1475 // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
1476 // suspend point and replace it with nornal control flow.
1477 static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
1478                                  CoroBeginInst *CoroBegin) {
1479   Instruction *Prev = Suspend->getPrevNode();
1480   if (!Prev) {
1481     auto *Pred = Suspend->getParent()->getSinglePredecessor();
1482     if (!Pred)
1483       return false;
1484     Prev = Pred->getTerminator();
1485   }
1486 
1487   CallBase *CB = dyn_cast<CallBase>(Prev);
1488   if (!CB)
1489     return false;
1490 
1491   auto *Callee = CB->getCalledOperand()->stripPointerCasts();
1492 
1493   // See if the callsite is for resumption or destruction of the coroutine.
1494   auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
1495   if (!SubFn)
1496     return false;
1497 
1498   // Does not refer to the current coroutine, we cannot do anything with it.
1499   if (SubFn->getFrame() != CoroBegin)
1500     return false;
1501 
1502   // See if the transformation is safe. Specifically, see if there are any
1503   // calls in between Save and CallInstr. They can potenitally resume the
1504   // coroutine rendering this optimization unsafe.
1505   auto *Save = Suspend->getCoroSave();
1506   if (hasCallsBetween(Save, CB))
1507     return false;
1508 
1509   // Replace llvm.coro.suspend with the value that results in resumption over
1510   // the resume or cleanup path.
1511   Suspend->replaceAllUsesWith(SubFn->getRawIndex());
1512   Suspend->eraseFromParent();
1513   Save->eraseFromParent();
1514 
1515   // No longer need a call to coro.resume or coro.destroy.
1516   if (auto *Invoke = dyn_cast<InvokeInst>(CB)) {
1517     BranchInst::Create(Invoke->getNormalDest(), Invoke);
1518   }
1519 
1520   // Grab the CalledValue from CB before erasing the CallInstr.
1521   auto *CalledValue = CB->getCalledOperand();
1522   CB->eraseFromParent();
1523 
1524   // If no more users remove it. Usually it is a bitcast of SubFn.
1525   if (CalledValue != SubFn && CalledValue->user_empty())
1526     if (auto *I = dyn_cast<Instruction>(CalledValue))
1527       I->eraseFromParent();
1528 
1529   // Now we are good to remove SubFn.
1530   if (SubFn->user_empty())
1531     SubFn->eraseFromParent();
1532 
1533   return true;
1534 }
1535 
1536 // Remove suspend points that are simplified.
1537 static void simplifySuspendPoints(coro::Shape &Shape) {
1538   // Currently, the only simplification we do is switch-lowering-specific.
1539   if (Shape.ABI != coro::ABI::Switch)
1540     return;
1541 
1542   auto &S = Shape.CoroSuspends;
1543   size_t I = 0, N = S.size();
1544   if (N == 0)
1545     return;
1546   while (true) {
1547     auto SI = cast<CoroSuspendInst>(S[I]);
1548     // Leave final.suspend to handleFinalSuspend since it is undefined behavior
1549     // to resume a coroutine suspended at the final suspend point.
1550     if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) {
1551       if (--N == I)
1552         break;
1553       std::swap(S[I], S[N]);
1554       continue;
1555     }
1556     if (++I == N)
1557       break;
1558   }
1559   S.resize(N);
1560 }
1561 
1562 static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
1563                                  SmallVectorImpl<Function *> &Clones) {
1564   assert(Shape.ABI == coro::ABI::Switch);
1565 
1566   createResumeEntryBlock(F, Shape);
1567   auto ResumeClone = createClone(F, ".resume", Shape,
1568                                  CoroCloner::Kind::SwitchResume);
1569   auto DestroyClone = createClone(F, ".destroy", Shape,
1570                                   CoroCloner::Kind::SwitchUnwind);
1571   auto CleanupClone = createClone(F, ".cleanup", Shape,
1572                                   CoroCloner::Kind::SwitchCleanup);
1573 
1574   postSplitCleanup(*ResumeClone);
1575   postSplitCleanup(*DestroyClone);
1576   postSplitCleanup(*CleanupClone);
1577 
1578   addMustTailToCoroResumes(*ResumeClone);
1579 
1580   // Store addresses resume/destroy/cleanup functions in the coroutine frame.
1581   updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
1582 
1583   assert(Clones.empty());
1584   Clones.push_back(ResumeClone);
1585   Clones.push_back(DestroyClone);
1586   Clones.push_back(CleanupClone);
1587 
1588   // Create a constant array referring to resume/destroy/clone functions pointed
1589   // by the last argument of @llvm.coro.info, so that CoroElide pass can
1590   // determined correct function to call.
1591   setCoroInfo(F, Shape, Clones);
1592 }
1593 
1594 static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend,
1595                                        Value *Continuation) {
1596   auto *ResumeIntrinsic = Suspend->getResumeFunction();
1597   auto &Context = Suspend->getParent()->getParent()->getContext();
1598   auto *Int8PtrTy = Type::getInt8PtrTy(Context);
1599 
1600   IRBuilder<> Builder(ResumeIntrinsic);
1601   auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy);
1602   ResumeIntrinsic->replaceAllUsesWith(Val);
1603   ResumeIntrinsic->eraseFromParent();
1604   Suspend->setOperand(CoroSuspendAsyncInst::ResumeFunctionArg,
1605                       UndefValue::get(Int8PtrTy));
1606 }
1607 
1608 /// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs.
1609 static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy,
1610                             ArrayRef<Value *> FnArgs,
1611                             SmallVectorImpl<Value *> &CallArgs) {
1612   size_t ArgIdx = 0;
1613   for (auto paramTy : FnTy->params()) {
1614     assert(ArgIdx < FnArgs.size());
1615     if (paramTy != FnArgs[ArgIdx]->getType())
1616       CallArgs.push_back(
1617           Builder.CreateBitOrPointerCast(FnArgs[ArgIdx], paramTy));
1618     else
1619       CallArgs.push_back(FnArgs[ArgIdx]);
1620     ++ArgIdx;
1621   }
1622 }
1623 
1624 CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
1625                                    ArrayRef<Value *> Arguments,
1626                                    IRBuilder<> &Builder) {
1627   auto *FnTy = MustTailCallFn->getFunctionType();
1628   // Coerce the arguments, llvm optimizations seem to ignore the types in
1629   // vaarg functions and throws away casts in optimized mode.
1630   SmallVector<Value *, 8> CallArgs;
1631   coerceArguments(Builder, FnTy, Arguments, CallArgs);
1632 
1633   auto *TailCall = Builder.CreateCall(FnTy, MustTailCallFn, CallArgs);
1634   TailCall->setTailCallKind(CallInst::TCK_MustTail);
1635   TailCall->setDebugLoc(Loc);
1636   TailCall->setCallingConv(MustTailCallFn->getCallingConv());
1637   return TailCall;
1638 }
1639 
1640 static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
1641                                 SmallVectorImpl<Function *> &Clones) {
1642   assert(Shape.ABI == coro::ABI::Async);
1643   assert(Clones.empty());
1644   // Reset various things that the optimizer might have decided it
1645   // "knows" about the coroutine function due to not seeing a return.
1646   F.removeFnAttr(Attribute::NoReturn);
1647   F.removeRetAttr(Attribute::NoAlias);
1648   F.removeRetAttr(Attribute::NonNull);
1649 
1650   auto &Context = F.getContext();
1651   auto *Int8PtrTy = Type::getInt8PtrTy(Context);
1652 
1653   auto *Id = cast<CoroIdAsyncInst>(Shape.CoroBegin->getId());
1654   IRBuilder<> Builder(Id);
1655 
1656   auto *FramePtr = Id->getStorage();
1657   FramePtr = Builder.CreateBitOrPointerCast(FramePtr, Int8PtrTy);
1658   FramePtr = Builder.CreateConstInBoundsGEP1_32(
1659       Type::getInt8Ty(Context), FramePtr, Shape.AsyncLowering.FrameOffset,
1660       "async.ctx.frameptr");
1661 
1662   // Map all uses of llvm.coro.begin to the allocated frame pointer.
1663   {
1664     // Make sure we don't invalidate Shape.FramePtr.
1665     TrackingVH<Instruction> Handle(Shape.FramePtr);
1666     Shape.CoroBegin->replaceAllUsesWith(FramePtr);
1667     Shape.FramePtr = Handle.getValPtr();
1668   }
1669 
1670   // Create all the functions in order after the main function.
1671   auto NextF = std::next(F.getIterator());
1672 
1673   // Create a continuation function for each of the suspend points.
1674   Clones.reserve(Shape.CoroSuspends.size());
1675   for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
1676     auto *Suspend = cast<CoroSuspendAsyncInst>(Shape.CoroSuspends[Idx]);
1677 
1678     // Create the clone declaration.
1679     auto ResumeNameSuffix = ".resume.";
1680     auto ProjectionFunctionName =
1681         Suspend->getAsyncContextProjectionFunction()->getName();
1682     bool UseSwiftMangling = false;
1683     if (ProjectionFunctionName.equals("__swift_async_resume_project_context")) {
1684       ResumeNameSuffix = "TQ";
1685       UseSwiftMangling = true;
1686     } else if (ProjectionFunctionName.equals(
1687                    "__swift_async_resume_get_context")) {
1688       ResumeNameSuffix = "TY";
1689       UseSwiftMangling = true;
1690     }
1691     auto *Continuation = createCloneDeclaration(
1692         F, Shape,
1693         UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_"
1694                          : ResumeNameSuffix + Twine(Idx),
1695         NextF, Suspend);
1696     Clones.push_back(Continuation);
1697 
1698     // Insert a branch to a new return block immediately before the suspend
1699     // point.
1700     auto *SuspendBB = Suspend->getParent();
1701     auto *NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
1702     auto *Branch = cast<BranchInst>(SuspendBB->getTerminator());
1703 
1704     // Place it before the first suspend.
1705     auto *ReturnBB =
1706         BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB);
1707     Branch->setSuccessor(0, ReturnBB);
1708 
1709     IRBuilder<> Builder(ReturnBB);
1710 
1711     // Insert the call to the tail call function and inline it.
1712     auto *Fn = Suspend->getMustTailCallFunction();
1713     SmallVector<Value *, 8> Args(Suspend->args());
1714     auto FnArgs = ArrayRef<Value *>(Args).drop_front(
1715         CoroSuspendAsyncInst::MustTailCallFuncArg + 1);
1716     auto *TailCall =
1717         coro::createMustTailCall(Suspend->getDebugLoc(), Fn, FnArgs, Builder);
1718     Builder.CreateRetVoid();
1719     InlineFunctionInfo FnInfo;
1720     auto InlineRes = InlineFunction(*TailCall, FnInfo);
1721     assert(InlineRes.isSuccess() && "Expected inlining to succeed");
1722     (void)InlineRes;
1723 
1724     // Replace the lvm.coro.async.resume intrisic call.
1725     replaceAsyncResumeFunction(Suspend, Continuation);
1726   }
1727 
1728   assert(Clones.size() == Shape.CoroSuspends.size());
1729   for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
1730     auto *Suspend = Shape.CoroSuspends[Idx];
1731     auto *Clone = Clones[Idx];
1732 
1733     CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend).create();
1734   }
1735 }
1736 
1737 static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
1738                                  SmallVectorImpl<Function *> &Clones) {
1739   assert(Shape.ABI == coro::ABI::Retcon ||
1740          Shape.ABI == coro::ABI::RetconOnce);
1741   assert(Clones.empty());
1742 
1743   // Reset various things that the optimizer might have decided it
1744   // "knows" about the coroutine function due to not seeing a return.
1745   F.removeFnAttr(Attribute::NoReturn);
1746   F.removeRetAttr(Attribute::NoAlias);
1747   F.removeRetAttr(Attribute::NonNull);
1748 
1749   // Allocate the frame.
1750   auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
1751   Value *RawFramePtr;
1752   if (Shape.RetconLowering.IsFrameInlineInStorage) {
1753     RawFramePtr = Id->getStorage();
1754   } else {
1755     IRBuilder<> Builder(Id);
1756 
1757     // Determine the size of the frame.
1758     const DataLayout &DL = F.getParent()->getDataLayout();
1759     auto Size = DL.getTypeAllocSize(Shape.FrameTy);
1760 
1761     // Allocate.  We don't need to update the call graph node because we're
1762     // going to recompute it from scratch after splitting.
1763     // FIXME: pass the required alignment
1764     RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr);
1765     RawFramePtr =
1766       Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
1767 
1768     // Stash the allocated frame pointer in the continuation storage.
1769     auto Dest = Builder.CreateBitCast(Id->getStorage(),
1770                                       RawFramePtr->getType()->getPointerTo());
1771     Builder.CreateStore(RawFramePtr, Dest);
1772   }
1773 
1774   // Map all uses of llvm.coro.begin to the allocated frame pointer.
1775   {
1776     // Make sure we don't invalidate Shape.FramePtr.
1777     TrackingVH<Instruction> Handle(Shape.FramePtr);
1778     Shape.CoroBegin->replaceAllUsesWith(RawFramePtr);
1779     Shape.FramePtr = Handle.getValPtr();
1780   }
1781 
1782   // Create a unique return block.
1783   BasicBlock *ReturnBB = nullptr;
1784   SmallVector<PHINode *, 4> ReturnPHIs;
1785 
1786   // Create all the functions in order after the main function.
1787   auto NextF = std::next(F.getIterator());
1788 
1789   // Create a continuation function for each of the suspend points.
1790   Clones.reserve(Shape.CoroSuspends.size());
1791   for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1792     auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]);
1793 
1794     // Create the clone declaration.
1795     auto Continuation =
1796         createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF, nullptr);
1797     Clones.push_back(Continuation);
1798 
1799     // Insert a branch to the unified return block immediately before
1800     // the suspend point.
1801     auto SuspendBB = Suspend->getParent();
1802     auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
1803     auto Branch = cast<BranchInst>(SuspendBB->getTerminator());
1804 
1805     // Create the unified return block.
1806     if (!ReturnBB) {
1807       // Place it before the first suspend.
1808       ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F,
1809                                     NewSuspendBB);
1810       Shape.RetconLowering.ReturnBlock = ReturnBB;
1811 
1812       IRBuilder<> Builder(ReturnBB);
1813 
1814       // Create PHIs for all the return values.
1815       assert(ReturnPHIs.empty());
1816 
1817       // First, the continuation.
1818       ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(),
1819                                              Shape.CoroSuspends.size()));
1820 
1821       // Next, all the directly-yielded values.
1822       for (auto ResultTy : Shape.getRetconResultTypes())
1823         ReturnPHIs.push_back(Builder.CreatePHI(ResultTy,
1824                                                Shape.CoroSuspends.size()));
1825 
1826       // Build the return value.
1827       auto RetTy = F.getReturnType();
1828 
1829       // Cast the continuation value if necessary.
1830       // We can't rely on the types matching up because that type would
1831       // have to be infinite.
1832       auto CastedContinuationTy =
1833         (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0));
1834       auto *CastedContinuation =
1835         Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy);
1836 
1837       Value *RetV;
1838       if (ReturnPHIs.size() == 1) {
1839         RetV = CastedContinuation;
1840       } else {
1841         RetV = UndefValue::get(RetTy);
1842         RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0);
1843         for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I)
1844           RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I);
1845       }
1846 
1847       Builder.CreateRet(RetV);
1848     }
1849 
1850     // Branch to the return block.
1851     Branch->setSuccessor(0, ReturnBB);
1852     ReturnPHIs[0]->addIncoming(Continuation, SuspendBB);
1853     size_t NextPHIIndex = 1;
1854     for (auto &VUse : Suspend->value_operands())
1855       ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB);
1856     assert(NextPHIIndex == ReturnPHIs.size());
1857   }
1858 
1859   assert(Clones.size() == Shape.CoroSuspends.size());
1860   for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1861     auto Suspend = Shape.CoroSuspends[i];
1862     auto Clone = Clones[i];
1863 
1864     CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create();
1865   }
1866 }
1867 
1868 namespace {
1869   class PrettyStackTraceFunction : public PrettyStackTraceEntry {
1870     Function &F;
1871   public:
1872     PrettyStackTraceFunction(Function &F) : F(F) {}
1873     void print(raw_ostream &OS) const override {
1874       OS << "While splitting coroutine ";
1875       F.printAsOperand(OS, /*print type*/ false, F.getParent());
1876       OS << "\n";
1877     }
1878   };
1879 }
1880 
1881 static coro::Shape splitCoroutine(Function &F,
1882                                   SmallVectorImpl<Function *> &Clones,
1883                                   bool OptimizeFrame) {
1884   PrettyStackTraceFunction prettyStackTrace(F);
1885 
1886   // The suspend-crossing algorithm in buildCoroutineFrame get tripped
1887   // up by uses in unreachable blocks, so remove them as a first pass.
1888   removeUnreachableBlocks(F);
1889 
1890   coro::Shape Shape(F, OptimizeFrame);
1891   if (!Shape.CoroBegin)
1892     return Shape;
1893 
1894   simplifySuspendPoints(Shape);
1895   buildCoroutineFrame(F, Shape);
1896   replaceFrameSizeAndAlignment(Shape);
1897 
1898   // If there are no suspend points, no split required, just remove
1899   // the allocation and deallocation blocks, they are not needed.
1900   if (Shape.CoroSuspends.empty()) {
1901     handleNoSuspendCoroutine(Shape);
1902   } else {
1903     switch (Shape.ABI) {
1904     case coro::ABI::Switch:
1905       splitSwitchCoroutine(F, Shape, Clones);
1906       break;
1907     case coro::ABI::Async:
1908       splitAsyncCoroutine(F, Shape, Clones);
1909       break;
1910     case coro::ABI::Retcon:
1911     case coro::ABI::RetconOnce:
1912       splitRetconCoroutine(F, Shape, Clones);
1913       break;
1914     }
1915   }
1916 
1917   // Replace all the swifterror operations in the original function.
1918   // This invalidates SwiftErrorOps in the Shape.
1919   replaceSwiftErrorOps(F, Shape, nullptr);
1920 
1921   return Shape;
1922 }
1923 
1924 static void
1925 updateCallGraphAfterCoroutineSplit(Function &F, const coro::Shape &Shape,
1926                                    const SmallVectorImpl<Function *> &Clones,
1927                                    CallGraph &CG, CallGraphSCC &SCC) {
1928   if (!Shape.CoroBegin)
1929     return;
1930 
1931   removeCoroEnds(Shape, &CG);
1932   postSplitCleanup(F);
1933 
1934   // Update call graph and add the functions we created to the SCC.
1935   coro::updateCallGraph(F, Clones, CG, SCC);
1936 }
1937 
1938 static void updateCallGraphAfterCoroutineSplit(
1939     LazyCallGraph::Node &N, const coro::Shape &Shape,
1940     const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C,
1941     LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
1942     FunctionAnalysisManager &FAM) {
1943   if (!Shape.CoroBegin)
1944     return;
1945 
1946   for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
1947     auto &Context = End->getContext();
1948     End->replaceAllUsesWith(ConstantInt::getFalse(Context));
1949     End->eraseFromParent();
1950   }
1951 
1952   if (!Clones.empty()) {
1953     switch (Shape.ABI) {
1954     case coro::ABI::Switch:
1955       // Each clone in the Switch lowering is independent of the other clones.
1956       // Let the LazyCallGraph know about each one separately.
1957       for (Function *Clone : Clones)
1958         CG.addSplitFunction(N.getFunction(), *Clone);
1959       break;
1960     case coro::ABI::Async:
1961     case coro::ABI::Retcon:
1962     case coro::ABI::RetconOnce:
1963       // Each clone in the Async/Retcon lowering references of the other clones.
1964       // Let the LazyCallGraph know about all of them at once.
1965       if (!Clones.empty())
1966         CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones);
1967       break;
1968     }
1969 
1970     // Let the CGSCC infra handle the changes to the original function.
1971     updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM);
1972   }
1973 
1974   // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges
1975   // to the split functions.
1976   postSplitCleanup(N.getFunction());
1977   updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM);
1978 }
1979 
1980 // When we see the coroutine the first time, we insert an indirect call to a
1981 // devirt trigger function and mark the coroutine that it is now ready for
1982 // split.
1983 // Async lowering uses this after it has split the function to restart the
1984 // pipeline.
1985 static void prepareForSplit(Function &F, CallGraph &CG,
1986                             bool MarkForAsyncRestart = false) {
1987   Module &M = *F.getParent();
1988   LLVMContext &Context = F.getContext();
1989 #ifndef NDEBUG
1990   Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN);
1991   assert(DevirtFn && "coro.devirt.trigger function not found");
1992 #endif
1993 
1994   F.addFnAttr(CORO_PRESPLIT_ATTR, MarkForAsyncRestart
1995                                       ? ASYNC_RESTART_AFTER_SPLIT
1996                                       : PREPARED_FOR_SPLIT);
1997 
1998   // Insert an indirect call sequence that will be devirtualized by CoroElide
1999   // pass:
2000   //    %0 = call i8* @llvm.coro.subfn.addr(i8* null, i8 -1)
2001   //    %1 = bitcast i8* %0 to void(i8*)*
2002   //    call void %1(i8* null)
2003   coro::LowererBase Lowerer(M);
2004   Instruction *InsertPt =
2005       MarkForAsyncRestart ? F.getEntryBlock().getFirstNonPHIOrDbgOrLifetime()
2006                           : F.getEntryBlock().getTerminator();
2007   auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context));
2008   auto *DevirtFnAddr =
2009       Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt);
2010   FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Context),
2011                                          {Type::getInt8PtrTy(Context)}, false);
2012   auto *IndirectCall = CallInst::Create(FnTy, DevirtFnAddr, Null, "", InsertPt);
2013 
2014   // Update CG graph with an indirect call we just added.
2015   CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode());
2016 }
2017 
2018 // Make sure that there is a devirtualization trigger function that the
2019 // coro-split pass uses to force a restart of the CGSCC pipeline. If the devirt
2020 // trigger function is not found, we will create one and add it to the current
2021 // SCC.
2022 static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
2023   Module &M = CG.getModule();
2024   if (M.getFunction(CORO_DEVIRT_TRIGGER_FN))
2025     return;
2026 
2027   LLVMContext &C = M.getContext();
2028   auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C),
2029                                  /*isVarArg=*/false);
2030   Function *DevirtFn =
2031       Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage,
2032                        CORO_DEVIRT_TRIGGER_FN, &M);
2033   DevirtFn->addFnAttr(Attribute::AlwaysInline);
2034   auto *Entry = BasicBlock::Create(C, "entry", DevirtFn);
2035   ReturnInst::Create(C, Entry);
2036 
2037   auto *Node = CG.getOrInsertFunction(DevirtFn);
2038 
2039   SmallVector<CallGraphNode *, 8> Nodes(SCC.begin(), SCC.end());
2040   Nodes.push_back(Node);
2041   SCC.initialize(Nodes);
2042 }
2043 
2044 /// Replace a call to llvm.coro.prepare.retcon.
2045 static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG,
2046                            LazyCallGraph::SCC &C) {
2047   auto CastFn = Prepare->getArgOperand(0); // as an i8*
2048   auto Fn = CastFn->stripPointerCasts();   // as its original type
2049 
2050   // Attempt to peephole this pattern:
2051   //    %0 = bitcast [[TYPE]] @some_function to i8*
2052   //    %1 = call @llvm.coro.prepare.retcon(i8* %0)
2053   //    %2 = bitcast %1 to [[TYPE]]
2054   // ==>
2055   //    %2 = @some_function
2056   for (Use &U : llvm::make_early_inc_range(Prepare->uses())) {
2057     // Look for bitcasts back to the original function type.
2058     auto *Cast = dyn_cast<BitCastInst>(U.getUser());
2059     if (!Cast || Cast->getType() != Fn->getType())
2060       continue;
2061 
2062     // Replace and remove the cast.
2063     Cast->replaceAllUsesWith(Fn);
2064     Cast->eraseFromParent();
2065   }
2066 
2067   // Replace any remaining uses with the function as an i8*.
2068   // This can never directly be a callee, so we don't need to update CG.
2069   Prepare->replaceAllUsesWith(CastFn);
2070   Prepare->eraseFromParent();
2071 
2072   // Kill dead bitcasts.
2073   while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) {
2074     if (!Cast->use_empty())
2075       break;
2076     CastFn = Cast->getOperand(0);
2077     Cast->eraseFromParent();
2078   }
2079 }
2080 /// Replace a call to llvm.coro.prepare.retcon.
2081 static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
2082   auto CastFn = Prepare->getArgOperand(0); // as an i8*
2083   auto Fn = CastFn->stripPointerCasts(); // as its original type
2084 
2085   // Find call graph nodes for the preparation.
2086   CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr;
2087   if (auto ConcreteFn = dyn_cast<Function>(Fn)) {
2088     PrepareUserNode = CG[Prepare->getFunction()];
2089     FnNode = CG[ConcreteFn];
2090   }
2091 
2092   // Attempt to peephole this pattern:
2093   //    %0 = bitcast [[TYPE]] @some_function to i8*
2094   //    %1 = call @llvm.coro.prepare.retcon(i8* %0)
2095   //    %2 = bitcast %1 to [[TYPE]]
2096   // ==>
2097   //    %2 = @some_function
2098   for (Use &U : llvm::make_early_inc_range(Prepare->uses())) {
2099     // Look for bitcasts back to the original function type.
2100     auto *Cast = dyn_cast<BitCastInst>(U.getUser());
2101     if (!Cast || Cast->getType() != Fn->getType()) continue;
2102 
2103     // Check whether the replacement will introduce new direct calls.
2104     // If so, we'll need to update the call graph.
2105     if (PrepareUserNode) {
2106       for (auto &Use : Cast->uses()) {
2107         if (auto *CB = dyn_cast<CallBase>(Use.getUser())) {
2108           if (!CB->isCallee(&Use))
2109             continue;
2110           PrepareUserNode->removeCallEdgeFor(*CB);
2111           PrepareUserNode->addCalledFunction(CB, FnNode);
2112         }
2113       }
2114     }
2115 
2116     // Replace and remove the cast.
2117     Cast->replaceAllUsesWith(Fn);
2118     Cast->eraseFromParent();
2119   }
2120 
2121   // Replace any remaining uses with the function as an i8*.
2122   // This can never directly be a callee, so we don't need to update CG.
2123   Prepare->replaceAllUsesWith(CastFn);
2124   Prepare->eraseFromParent();
2125 
2126   // Kill dead bitcasts.
2127   while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) {
2128     if (!Cast->use_empty()) break;
2129     CastFn = Cast->getOperand(0);
2130     Cast->eraseFromParent();
2131   }
2132 }
2133 
2134 static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
2135                                LazyCallGraph::SCC &C) {
2136   bool Changed = false;
2137   for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) {
2138     // Intrinsics can only be used in calls.
2139     auto *Prepare = cast<CallInst>(P.getUser());
2140     replacePrepare(Prepare, CG, C);
2141     Changed = true;
2142   }
2143 
2144   return Changed;
2145 }
2146 
2147 /// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent
2148 /// IPO from operating on calls to a retcon coroutine before it's been
2149 /// split.  This is only safe to do after we've split all retcon
2150 /// coroutines in the module.  We can do that this in this pass because
2151 /// this pass does promise to split all retcon coroutines (as opposed to
2152 /// switch coroutines, which are lowered in multiple stages).
2153 static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
2154   bool Changed = false;
2155   for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) {
2156     // Intrinsics can only be used in calls.
2157     auto *Prepare = cast<CallInst>(P.getUser());
2158     replacePrepare(Prepare, CG);
2159     Changed = true;
2160   }
2161 
2162   return Changed;
2163 }
2164 
2165 static bool declaresCoroSplitIntrinsics(const Module &M) {
2166   return coro::declaresIntrinsics(M, {"llvm.coro.begin",
2167                                       "llvm.coro.prepare.retcon",
2168                                       "llvm.coro.prepare.async"});
2169 }
2170 
2171 static void addPrepareFunction(const Module &M,
2172                                SmallVectorImpl<Function *> &Fns,
2173                                StringRef Name) {
2174   auto *PrepareFn = M.getFunction(Name);
2175   if (PrepareFn && !PrepareFn->use_empty())
2176     Fns.push_back(PrepareFn);
2177 }
2178 
2179 PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
2180                                      CGSCCAnalysisManager &AM,
2181                                      LazyCallGraph &CG, CGSCCUpdateResult &UR) {
2182   // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a
2183   //     non-zero number of nodes, so we assume that here and grab the first
2184   //     node's function's module.
2185   Module &M = *C.begin()->getFunction().getParent();
2186   auto &FAM =
2187       AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
2188 
2189   if (!declaresCoroSplitIntrinsics(M))
2190     return PreservedAnalyses::all();
2191 
2192   // Check for uses of llvm.coro.prepare.retcon/async.
2193   SmallVector<Function *, 2> PrepareFns;
2194   addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
2195   addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async");
2196 
2197   // Find coroutines for processing.
2198   SmallVector<LazyCallGraph::Node *, 4> Coroutines;
2199   for (LazyCallGraph::Node &N : C)
2200     if (N.getFunction().hasFnAttribute(CORO_PRESPLIT_ATTR))
2201       Coroutines.push_back(&N);
2202 
2203   if (Coroutines.empty() && PrepareFns.empty())
2204     return PreservedAnalyses::all();
2205 
2206   if (Coroutines.empty()) {
2207     for (auto *PrepareFn : PrepareFns) {
2208       replaceAllPrepares(PrepareFn, CG, C);
2209     }
2210   }
2211 
2212   // Split all the coroutines.
2213   for (LazyCallGraph::Node *N : Coroutines) {
2214     Function &F = N->getFunction();
2215     LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
2216                       << "' state: "
2217                       << F.getFnAttribute(CORO_PRESPLIT_ATTR).getValueAsString()
2218                       << "\n");
2219     F.removeFnAttr(CORO_PRESPLIT_ATTR);
2220 
2221     SmallVector<Function *, 4> Clones;
2222     const coro::Shape Shape = splitCoroutine(F, Clones, OptimizeFrame);
2223     updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
2224 
2225     if (!Shape.CoroSuspends.empty()) {
2226       // Run the CGSCC pipeline on the original and newly split functions.
2227       UR.CWorklist.insert(&C);
2228       for (Function *Clone : Clones)
2229         UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone)));
2230     }
2231   }
2232 
2233   if (!PrepareFns.empty()) {
2234     for (auto *PrepareFn : PrepareFns) {
2235       replaceAllPrepares(PrepareFn, CG, C);
2236     }
2237   }
2238 
2239   return PreservedAnalyses::none();
2240 }
2241 
2242 namespace {
2243 
2244 // We present a coroutine to LLVM as an ordinary function with suspension
2245 // points marked up with intrinsics. We let the optimizer party on the coroutine
2246 // as a single function for as long as possible. Shortly before the coroutine is
2247 // eligible to be inlined into its callers, we split up the coroutine into parts
2248 // corresponding to initial, resume and destroy invocations of the coroutine,
2249 // add them to the current SCC and restart the IPO pipeline to optimize the
2250 // coroutine subfunctions we extracted before proceeding to the caller of the
2251 // coroutine.
2252 struct CoroSplitLegacy : public CallGraphSCCPass {
2253   static char ID; // Pass identification, replacement for typeid
2254 
2255   CoroSplitLegacy(bool OptimizeFrame = false)
2256       : CallGraphSCCPass(ID), OptimizeFrame(OptimizeFrame) {
2257     initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry());
2258   }
2259 
2260   bool Run = false;
2261   bool OptimizeFrame;
2262 
2263   // A coroutine is identified by the presence of coro.begin intrinsic, if
2264   // we don't have any, this pass has nothing to do.
2265   bool doInitialization(CallGraph &CG) override {
2266     Run = declaresCoroSplitIntrinsics(CG.getModule());
2267     return CallGraphSCCPass::doInitialization(CG);
2268   }
2269 
2270   bool runOnSCC(CallGraphSCC &SCC) override {
2271     if (!Run)
2272       return false;
2273 
2274     // Check for uses of llvm.coro.prepare.retcon.
2275     SmallVector<Function *, 2> PrepareFns;
2276     auto &M = SCC.getCallGraph().getModule();
2277     addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
2278     addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async");
2279 
2280     // Find coroutines for processing.
2281     SmallVector<Function *, 4> Coroutines;
2282     for (CallGraphNode *CGN : SCC)
2283       if (auto *F = CGN->getFunction())
2284         if (F->hasFnAttribute(CORO_PRESPLIT_ATTR))
2285           Coroutines.push_back(F);
2286 
2287     if (Coroutines.empty() && PrepareFns.empty())
2288       return false;
2289 
2290     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
2291 
2292     if (Coroutines.empty()) {
2293       bool Changed = false;
2294       for (auto *PrepareFn : PrepareFns)
2295         Changed |= replaceAllPrepares(PrepareFn, CG);
2296       return Changed;
2297     }
2298 
2299     createDevirtTriggerFunc(CG, SCC);
2300 
2301     // Split all the coroutines.
2302     for (Function *F : Coroutines) {
2303       Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR);
2304       StringRef Value = Attr.getValueAsString();
2305       LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F->getName()
2306                         << "' state: " << Value << "\n");
2307       // Async lowering marks coroutines to trigger a restart of the pipeline
2308       // after it has split them.
2309       if (Value == ASYNC_RESTART_AFTER_SPLIT) {
2310         F->removeFnAttr(CORO_PRESPLIT_ATTR);
2311         continue;
2312       }
2313       if (Value == UNPREPARED_FOR_SPLIT) {
2314         prepareForSplit(*F, CG);
2315         continue;
2316       }
2317       F->removeFnAttr(CORO_PRESPLIT_ATTR);
2318 
2319       SmallVector<Function *, 4> Clones;
2320       const coro::Shape Shape = splitCoroutine(*F, Clones, OptimizeFrame);
2321       updateCallGraphAfterCoroutineSplit(*F, Shape, Clones, CG, SCC);
2322       if (Shape.ABI == coro::ABI::Async) {
2323         // Restart SCC passes.
2324         // Mark function for CoroElide pass. It will devirtualize causing a
2325         // restart of the SCC pipeline.
2326         prepareForSplit(*F, CG, true /*MarkForAsyncRestart*/);
2327       }
2328     }
2329 
2330     for (auto *PrepareFn : PrepareFns)
2331       replaceAllPrepares(PrepareFn, CG);
2332 
2333     return true;
2334   }
2335 
2336   void getAnalysisUsage(AnalysisUsage &AU) const override {
2337     CallGraphSCCPass::getAnalysisUsage(AU);
2338   }
2339 
2340   StringRef getPassName() const override { return "Coroutine Splitting"; }
2341 };
2342 
2343 } // end anonymous namespace
2344 
2345 char CoroSplitLegacy::ID = 0;
2346 
2347 INITIALIZE_PASS_BEGIN(
2348     CoroSplitLegacy, "coro-split",
2349     "Split coroutine into a set of functions driving its state machine", false,
2350     false)
2351 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
2352 INITIALIZE_PASS_END(
2353     CoroSplitLegacy, "coro-split",
2354     "Split coroutine into a set of functions driving its state machine", false,
2355     false)
2356 
2357 Pass *llvm::createCoroSplitLegacyPass(bool OptimizeFrame) {
2358   return new CoroSplitLegacy(OptimizeFrame);
2359 }
2360