1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // there from sue to definition that crosses a suspend block.
10 //
11 // Using the information discovered we form a Coroutine Frame structure to
12 // contain those values. All uses of those values are replaced with appropriate
13 // GEP + load from the coroutine frame. At the point of the definition we spill
14 // the value into the coroutine frame.
15 //===----------------------------------------------------------------------===//
16 
17 #include "CoroInternal.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/Analysis/PtrUseVisitor.h"
21 #include "llvm/Analysis/StackLifetime.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/IR/CFG.h"
24 #include "llvm/IR/DIBuilder.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/InstIterator.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Support/OptimizedStructLayout.h"
32 #include "llvm/Support/circular_raw_ostream.h"
33 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
34 #include "llvm/Transforms/Utils/Local.h"
35 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
36 #include <algorithm>
37 
38 using namespace llvm;
39 
40 // The "coro-suspend-crossing" flag is very noisy. There is another debug type,
41 // "coro-frame", which results in leaner debug spew.
42 #define DEBUG_TYPE "coro-suspend-crossing"
43 
44 static cl::opt<bool> EnableReuseStorageInFrame(
45     "reuse-storage-in-coroutine-frame", cl::Hidden,
46     cl::desc(
47         "Enable the optimization which would reuse the storage in the coroutine \
48          frame for allocas whose liferanges are not overlapped, for testing purposes"),
49     llvm::cl::init(false));
50 
51 enum { SmallVectorThreshold = 32 };
52 
53 // Provides two way mapping between the blocks and numbers.
54 namespace {
55 class BlockToIndexMapping {
56   SmallVector<BasicBlock *, SmallVectorThreshold> V;
57 
58 public:
59   size_t size() const { return V.size(); }
60 
61   BlockToIndexMapping(Function &F) {
62     for (BasicBlock &BB : F)
63       V.push_back(&BB);
64     llvm::sort(V);
65   }
66 
67   size_t blockToIndex(BasicBlock *BB) const {
68     auto *I = llvm::lower_bound(V, BB);
69     assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
70     return I - V.begin();
71   }
72 
73   BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; }
74 };
75 } // end anonymous namespace
76 
77 // The SuspendCrossingInfo maintains data that allows to answer a question
78 // whether given two BasicBlocks A and B there is a path from A to B that
79 // passes through a suspend point.
80 //
81 // For every basic block 'i' it maintains a BlockData that consists of:
82 //   Consumes:  a bit vector which contains a set of indices of blocks that can
83 //              reach block 'i'
84 //   Kills: a bit vector which contains a set of indices of blocks that can
85 //          reach block 'i', but one of the path will cross a suspend point
86 //   Suspend: a boolean indicating whether block 'i' contains a suspend point.
87 //   End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
88 //
89 namespace {
90 struct SuspendCrossingInfo {
91   BlockToIndexMapping Mapping;
92 
93   struct BlockData {
94     BitVector Consumes;
95     BitVector Kills;
96     bool Suspend = false;
97     bool End = false;
98   };
99   SmallVector<BlockData, SmallVectorThreshold> Block;
100 
101   iterator_range<succ_iterator> successors(BlockData const &BD) const {
102     BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
103     return llvm::successors(BB);
104   }
105 
106   BlockData &getBlockData(BasicBlock *BB) {
107     return Block[Mapping.blockToIndex(BB)];
108   }
109 
110   void dump() const;
111   void dump(StringRef Label, BitVector const &BV) const;
112 
113   SuspendCrossingInfo(Function &F, coro::Shape &Shape);
114 
115   bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const {
116     size_t const DefIndex = Mapping.blockToIndex(DefBB);
117     size_t const UseIndex = Mapping.blockToIndex(UseBB);
118 
119     bool const Result = Block[UseIndex].Kills[DefIndex];
120     LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName()
121                       << " answer is " << Result << "\n");
122     return Result;
123   }
124 
125   bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const {
126     auto *I = cast<Instruction>(U);
127 
128     // We rewrote PHINodes, so that only the ones with exactly one incoming
129     // value need to be analyzed.
130     if (auto *PN = dyn_cast<PHINode>(I))
131       if (PN->getNumIncomingValues() > 1)
132         return false;
133 
134     BasicBlock *UseBB = I->getParent();
135 
136     // As a special case, treat uses by an llvm.coro.suspend.retcon or an
137     // llvm.coro.suspend.async as if they were uses in the suspend's single
138     // predecessor: the uses conceptually occur before the suspend.
139     if (isa<CoroSuspendRetconInst>(I) || isa<CoroSuspendAsyncInst>(I)) {
140       UseBB = UseBB->getSinglePredecessor();
141       assert(UseBB && "should have split coro.suspend into its own block");
142     }
143 
144     return hasPathCrossingSuspendPoint(DefBB, UseBB);
145   }
146 
147   bool isDefinitionAcrossSuspend(Argument &A, User *U) const {
148     return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U);
149   }
150 
151   bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
152     auto *DefBB = I.getParent();
153 
154     // As a special case, treat values produced by an llvm.coro.suspend.*
155     // as if they were defined in the single successor: the uses
156     // conceptually occur after the suspend.
157     if (isa<AnyCoroSuspendInst>(I)) {
158       DefBB = DefBB->getSingleSuccessor();
159       assert(DefBB && "should have split coro.suspend into its own block");
160     }
161 
162     return isDefinitionAcrossSuspend(DefBB, U);
163   }
164 };
165 } // end anonymous namespace
166 
167 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
168 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
169                                                 BitVector const &BV) const {
170   dbgs() << Label << ":";
171   for (size_t I = 0, N = BV.size(); I < N; ++I)
172     if (BV[I])
173       dbgs() << " " << Mapping.indexToBlock(I)->getName();
174   dbgs() << "\n";
175 }
176 
177 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
178   for (size_t I = 0, N = Block.size(); I < N; ++I) {
179     BasicBlock *const B = Mapping.indexToBlock(I);
180     dbgs() << B->getName() << ":\n";
181     dump("   Consumes", Block[I].Consumes);
182     dump("      Kills", Block[I].Kills);
183   }
184   dbgs() << "\n";
185 }
186 #endif
187 
188 SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
189     : Mapping(F) {
190   const size_t N = Mapping.size();
191   Block.resize(N);
192 
193   // Initialize every block so that it consumes itself
194   for (size_t I = 0; I < N; ++I) {
195     auto &B = Block[I];
196     B.Consumes.resize(N);
197     B.Kills.resize(N);
198     B.Consumes.set(I);
199   }
200 
201   // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
202   // the code beyond coro.end is reachable during initial invocation of the
203   // coroutine.
204   for (auto *CE : Shape.CoroEnds)
205     getBlockData(CE->getParent()).End = true;
206 
207   // Mark all suspend blocks and indicate that they kill everything they
208   // consume. Note, that crossing coro.save also requires a spill, as any code
209   // between coro.save and coro.suspend may resume the coroutine and all of the
210   // state needs to be saved by that time.
211   auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
212     BasicBlock *SuspendBlock = BarrierInst->getParent();
213     auto &B = getBlockData(SuspendBlock);
214     B.Suspend = true;
215     B.Kills |= B.Consumes;
216   };
217   for (auto *CSI : Shape.CoroSuspends) {
218     markSuspendBlock(CSI);
219     if (auto *Save = CSI->getCoroSave())
220       markSuspendBlock(Save);
221   }
222 
223   // Iterate propagating consumes and kills until they stop changing.
224   int Iteration = 0;
225   (void)Iteration;
226 
227   bool Changed;
228   do {
229     LLVM_DEBUG(dbgs() << "iteration " << ++Iteration);
230     LLVM_DEBUG(dbgs() << "==============\n");
231 
232     Changed = false;
233     for (size_t I = 0; I < N; ++I) {
234       auto &B = Block[I];
235       for (BasicBlock *SI : successors(B)) {
236 
237         auto SuccNo = Mapping.blockToIndex(SI);
238 
239         // Saved Consumes and Kills bitsets so that it is easy to see
240         // if anything changed after propagation.
241         auto &S = Block[SuccNo];
242         auto SavedConsumes = S.Consumes;
243         auto SavedKills = S.Kills;
244 
245         // Propagate Kills and Consumes from block B into its successor S.
246         S.Consumes |= B.Consumes;
247         S.Kills |= B.Kills;
248 
249         // If block B is a suspend block, it should propagate kills into the
250         // its successor for every block B consumes.
251         if (B.Suspend) {
252           S.Kills |= B.Consumes;
253         }
254         if (S.Suspend) {
255           // If block S is a suspend block, it should kill all of the blocks it
256           // consumes.
257           S.Kills |= S.Consumes;
258         } else if (S.End) {
259           // If block S is an end block, it should not propagate kills as the
260           // blocks following coro.end() are reached during initial invocation
261           // of the coroutine while all the data are still available on the
262           // stack or in the registers.
263           S.Kills.reset();
264         } else {
265           // This is reached when S block it not Suspend nor coro.end and it
266           // need to make sure that it is not in the kill set.
267           S.Kills.reset(SuccNo);
268         }
269 
270         // See if anything changed.
271         Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes);
272 
273         if (S.Kills != SavedKills) {
274           LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName()
275                             << "\n");
276           LLVM_DEBUG(dump("S.Kills", S.Kills));
277           LLVM_DEBUG(dump("SavedKills", SavedKills));
278         }
279         if (S.Consumes != SavedConsumes) {
280           LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n");
281           LLVM_DEBUG(dump("S.Consume", S.Consumes));
282           LLVM_DEBUG(dump("SavedCons", SavedConsumes));
283         }
284       }
285     }
286   } while (Changed);
287   LLVM_DEBUG(dump());
288 }
289 
290 #undef DEBUG_TYPE // "coro-suspend-crossing"
291 #define DEBUG_TYPE "coro-frame"
292 
293 namespace {
294 class FrameTypeBuilder;
295 // Mapping from the to-be-spilled value to all the users that need reload.
296 using SpillInfo = SmallMapVector<Value *, SmallVector<Instruction *, 2>, 8>;
297 struct AllocaInfo {
298   AllocaInst *Alloca;
299   DenseMap<Instruction *, llvm::Optional<APInt>> Aliases;
300   bool MayWriteBeforeCoroBegin;
301   AllocaInfo(AllocaInst *Alloca,
302              DenseMap<Instruction *, llvm::Optional<APInt>> Aliases,
303              bool MayWriteBeforeCoroBegin)
304       : Alloca(Alloca), Aliases(std::move(Aliases)),
305         MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {}
306 };
307 struct FrameDataInfo {
308   // All the values (that are not allocas) that needs to be spilled to the
309   // frame.
310   SpillInfo Spills;
311   // Allocas contains all values defined as allocas that need to live in the
312   // frame.
313   SmallVector<AllocaInfo, 8> Allocas;
314 
315   SmallVector<Value *, 8> getAllDefs() const {
316     SmallVector<Value *, 8> Defs;
317     for (const auto &P : Spills)
318       Defs.push_back(P.first);
319     for (const auto &A : Allocas)
320       Defs.push_back(A.Alloca);
321     return Defs;
322   }
323 
324   uint32_t getFieldIndex(Value *V) const {
325     auto Itr = FieldIndexMap.find(V);
326     assert(Itr != FieldIndexMap.end() &&
327            "Value does not have a frame field index");
328     return Itr->second;
329   }
330 
331   void setFieldIndex(Value *V, uint32_t Index) {
332     assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) &&
333            "Cannot set the index for the same field twice.");
334     FieldIndexMap[V] = Index;
335   }
336 
337   // Remap the index of every field in the frame, using the final layout index.
338   void updateLayoutIndex(FrameTypeBuilder &B);
339 
340 private:
341   // LayoutIndexUpdateStarted is used to avoid updating the index of any field
342   // twice by mistake.
343   bool LayoutIndexUpdateStarted = false;
344   // Map from values to their slot indexes on the frame. They will be first set
345   // with their original insertion field index. After the frame is built, their
346   // indexes will be updated into the final layout index.
347   DenseMap<Value *, uint32_t> FieldIndexMap;
348 };
349 } // namespace
350 
351 #ifndef NDEBUG
352 static void dumpSpills(StringRef Title, const SpillInfo &Spills) {
353   dbgs() << "------------- " << Title << "--------------\n";
354   for (const auto &E : Spills) {
355     E.first->dump();
356     dbgs() << "   user: ";
357     for (auto *I : E.second)
358       I->dump();
359   }
360 }
361 
362 static void dumpAllocas(const SmallVectorImpl<AllocaInfo> &Allocas) {
363   dbgs() << "------------- Allocas --------------\n";
364   for (const auto &A : Allocas) {
365     A.Alloca->dump();
366   }
367 }
368 #endif
369 
370 namespace {
371 using FieldIDType = size_t;
372 // We cannot rely solely on natural alignment of a type when building a
373 // coroutine frame and if the alignment specified on the Alloca instruction
374 // differs from the natural alignment of the alloca type we will need to insert
375 // padding.
376 class FrameTypeBuilder {
377 private:
378   struct Field {
379     uint64_t Size;
380     uint64_t Offset;
381     Type *Ty;
382     FieldIDType LayoutFieldIndex;
383     Align Alignment;
384     Align TyAlignment;
385   };
386 
387   const DataLayout &DL;
388   LLVMContext &Context;
389   uint64_t StructSize = 0;
390   Align StructAlign;
391   bool IsFinished = false;
392 
393   SmallVector<Field, 8> Fields;
394   DenseMap<Value*, unsigned> FieldIndexByKey;
395 
396 public:
397   FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
398       : DL(DL), Context(Context) {}
399 
400   /// Add a field to this structure for the storage of an `alloca`
401   /// instruction.
402   LLVM_NODISCARD FieldIDType addFieldForAlloca(AllocaInst *AI,
403                                                bool IsHeader = false) {
404     Type *Ty = AI->getAllocatedType();
405 
406     // Make an array type if this is a static array allocation.
407     if (AI->isArrayAllocation()) {
408       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
409         Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
410       else
411         report_fatal_error("Coroutines cannot handle non static allocas yet");
412     }
413 
414     return addField(Ty, AI->getAlign(), IsHeader);
415   }
416 
417   /// We want to put the allocas whose lifetime-ranges are not overlapped
418   /// into one slot of coroutine frame.
419   /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
420   ///
421   ///     cppcoro::task<void> alternative_paths(bool cond) {
422   ///         if (cond) {
423   ///             big_structure a;
424   ///             process(a);
425   ///             co_await something();
426   ///         } else {
427   ///             big_structure b;
428   ///             process2(b);
429   ///             co_await something();
430   ///         }
431   ///     }
432   ///
433   /// We want to put variable a and variable b in the same slot to
434   /// reduce the size of coroutine frame.
435   ///
436   /// This function use StackLifetime algorithm to partition the AllocaInsts in
437   /// Spills to non-overlapped sets in order to put Alloca in the same
438   /// non-overlapped set into the same slot in the Coroutine Frame. Then add
439   /// field for the allocas in the same non-overlapped set by using the largest
440   /// type as the field type.
441   ///
442   /// Side Effects: Because We sort the allocas, the order of allocas in the
443   /// frame may be different with the order in the source code.
444   void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
445                           coro::Shape &Shape);
446 
447   /// Add a field to this structure.
448   LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment,
449                                       bool IsHeader = false) {
450     assert(!IsFinished && "adding fields to a finished builder");
451     assert(Ty && "must provide a type for a field");
452 
453     // The field size is always the alloc size of the type.
454     uint64_t FieldSize = DL.getTypeAllocSize(Ty);
455 
456     // For an alloca with size=0, we don't need to add a field and they
457     // can just point to any index in the frame. Use index 0.
458     if (FieldSize == 0) {
459       return 0;
460     }
461 
462     // The field alignment might not be the type alignment, but we need
463     // to remember the type alignment anyway to build the type.
464     Align TyAlignment = DL.getABITypeAlign(Ty);
465     if (!FieldAlignment) FieldAlignment = TyAlignment;
466 
467     // Lay out header fields immediately.
468     uint64_t Offset;
469     if (IsHeader) {
470       Offset = alignTo(StructSize, FieldAlignment);
471       StructSize = Offset + FieldSize;
472 
473     // Everything else has a flexible offset.
474     } else {
475       Offset = OptimizedStructLayoutField::FlexibleOffset;
476     }
477 
478     Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment});
479     return Fields.size() - 1;
480   }
481 
482   /// Finish the layout and set the body on the given type.
483   void finish(StructType *Ty);
484 
485   uint64_t getStructSize() const {
486     assert(IsFinished && "not yet finished!");
487     return StructSize;
488   }
489 
490   Align getStructAlign() const {
491     assert(IsFinished && "not yet finished!");
492     return StructAlign;
493   }
494 
495   FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
496     assert(IsFinished && "not yet finished!");
497     return Fields[Id].LayoutFieldIndex;
498   }
499 };
500 } // namespace
501 
502 void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
503   auto Updater = [&](Value *I) {
504     setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I)));
505   };
506   LayoutIndexUpdateStarted = true;
507   for (auto &S : Spills)
508     Updater(S.first);
509   for (const auto &A : Allocas)
510     Updater(A.Alloca);
511   LayoutIndexUpdateStarted = false;
512 }
513 
514 void FrameTypeBuilder::addFieldForAllocas(const Function &F,
515                                           FrameDataInfo &FrameData,
516                                           coro::Shape &Shape) {
517   using AllocaSetType = SmallVector<AllocaInst *, 4>;
518   SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
519 
520   // We need to add field for allocas at the end of this function. However, this
521   // function has multiple exits, so we use this helper to avoid redundant code.
522   struct RTTIHelper {
523     std::function<void()> func;
524     RTTIHelper(std::function<void()> &&func) : func(func) {}
525     ~RTTIHelper() { func(); }
526   } Helper([&]() {
527     for (auto AllocaList : NonOverlapedAllocas) {
528       auto *LargestAI = *AllocaList.begin();
529       FieldIDType Id = addFieldForAlloca(LargestAI);
530       for (auto *Alloca : AllocaList)
531         FrameData.setFieldIndex(Alloca, Id);
532     }
533   });
534 
535   if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) {
536     for (const auto &A : FrameData.Allocas) {
537       AllocaInst *Alloca = A.Alloca;
538       NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
539     }
540     return;
541   }
542 
543   // Because there are pathes from the lifetime.start to coro.end
544   // for each alloca, the liferanges for every alloca is overlaped
545   // in the blocks who contain coro.end and the successor blocks.
546   // So we choose to skip there blocks when we calculates the liferange
547   // for each alloca. It should be reasonable since there shouldn't be uses
548   // in these blocks and the coroutine frame shouldn't be used outside the
549   // coroutine body.
550   //
551   // Note that the user of coro.suspend may not be SwitchInst. However, this
552   // case seems too complex to handle. And it is harmless to skip these
553   // patterns since it just prevend putting the allocas to live in the same
554   // slot.
555   DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest;
556   for (auto CoroSuspendInst : Shape.CoroSuspends) {
557     for (auto U : CoroSuspendInst->users()) {
558       if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) {
559         auto *SWI = const_cast<SwitchInst *>(ConstSWI);
560         DefaultSuspendDest[SWI] = SWI->getDefaultDest();
561         SWI->setDefaultDest(SWI->getSuccessor(1));
562       }
563     }
564   }
565 
566   auto ExtractAllocas = [&]() {
567     AllocaSetType Allocas;
568     Allocas.reserve(FrameData.Allocas.size());
569     for (const auto &A : FrameData.Allocas)
570       Allocas.push_back(A.Alloca);
571     return Allocas;
572   };
573   StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
574                                       StackLifetime::LivenessType::May);
575   StackLifetimeAnalyzer.run();
576   auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
577     return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps(
578         StackLifetimeAnalyzer.getLiveRange(AI2));
579   };
580   auto GetAllocaSize = [&](const AllocaInfo &A) {
581     Optional<TypeSize> RetSize = A.Alloca->getAllocationSizeInBits(DL);
582     assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
583     assert(!RetSize->isScalable() && "Scalable vectors are not yet supported");
584     return RetSize->getFixedSize();
585   };
586   // Put larger allocas in the front. So the larger allocas have higher
587   // priority to merge, which can save more space potentially. Also each
588   // AllocaSet would be ordered. So we can get the largest Alloca in one
589   // AllocaSet easily.
590   sort(FrameData.Allocas, [&](const auto &Iter1, const auto &Iter2) {
591     return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
592   });
593   for (const auto &A : FrameData.Allocas) {
594     AllocaInst *Alloca = A.Alloca;
595     bool Merged = false;
596     // Try to find if the Alloca is not inferenced with any existing
597     // NonOverlappedAllocaSet. If it is true, insert the alloca to that
598     // NonOverlappedAllocaSet.
599     for (auto &AllocaSet : NonOverlapedAllocas) {
600       assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
601       bool NoInference = none_of(AllocaSet, [&](auto Iter) {
602         return IsAllocaInferenre(Alloca, Iter);
603       });
604       // If the alignment of A is multiple of the alignment of B, the address
605       // of A should satisfy the requirement for aligning for B.
606       //
607       // There may be other more fine-grained strategies to handle the alignment
608       // infomation during the merging process. But it seems hard to handle
609       // these strategies and benefit little.
610       bool Alignable = [&]() -> bool {
611         auto *LargestAlloca = *AllocaSet.begin();
612         return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
613                0;
614       }();
615       bool CouldMerge = NoInference && Alignable;
616       if (!CouldMerge)
617         continue;
618       AllocaSet.push_back(Alloca);
619       Merged = true;
620       break;
621     }
622     if (!Merged) {
623       NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
624     }
625   }
626   // Recover the default target destination for each Switch statement
627   // reserved.
628   for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
629     SwitchInst *SWI = SwitchAndDefaultDest.first;
630     BasicBlock *DestBB = SwitchAndDefaultDest.second;
631     SWI->setDefaultDest(DestBB);
632   }
633   // This Debug Info could tell us which allocas are merged into one slot.
634   LLVM_DEBUG(for (auto &AllocaSet
635                   : NonOverlapedAllocas) {
636     if (AllocaSet.size() > 1) {
637       dbgs() << "In Function:" << F.getName() << "\n";
638       dbgs() << "Find Union Set "
639              << "\n";
640       dbgs() << "\tAllocas are \n";
641       for (auto Alloca : AllocaSet)
642         dbgs() << "\t\t" << *Alloca << "\n";
643     }
644   });
645 }
646 
647 void FrameTypeBuilder::finish(StructType *Ty) {
648   assert(!IsFinished && "already finished!");
649 
650   // Prepare the optimal-layout field array.
651   // The Id in the layout field is a pointer to our Field for it.
652   SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
653   LayoutFields.reserve(Fields.size());
654   for (auto &Field : Fields) {
655     LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
656                               Field.Offset);
657   }
658 
659   // Perform layout.
660   auto SizeAndAlign = performOptimizedStructLayout(LayoutFields);
661   StructSize = SizeAndAlign.first;
662   StructAlign = SizeAndAlign.second;
663 
664   auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
665     return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
666   };
667 
668   // We need to produce a packed struct type if there's a field whose
669   // assigned offset isn't a multiple of its natural type alignment.
670   bool Packed = [&] {
671     for (auto &LayoutField : LayoutFields) {
672       auto &F = getField(LayoutField);
673       if (!isAligned(F.TyAlignment, LayoutField.Offset))
674         return true;
675     }
676     return false;
677   }();
678 
679   // Build the struct body.
680   SmallVector<Type*, 16> FieldTypes;
681   FieldTypes.reserve(LayoutFields.size() * 3 / 2);
682   uint64_t LastOffset = 0;
683   for (auto &LayoutField : LayoutFields) {
684     auto &F = getField(LayoutField);
685 
686     auto Offset = LayoutField.Offset;
687 
688     // Add a padding field if there's a padding gap and we're either
689     // building a packed struct or the padding gap is more than we'd
690     // get from aligning to the field type's natural alignment.
691     assert(Offset >= LastOffset);
692     if (Offset != LastOffset) {
693       if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
694         FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
695                                             Offset - LastOffset));
696     }
697 
698     F.Offset = Offset;
699     F.LayoutFieldIndex = FieldTypes.size();
700 
701     FieldTypes.push_back(F.Ty);
702     LastOffset = Offset + F.Size;
703   }
704 
705   Ty->setBody(FieldTypes, Packed);
706 
707 #ifndef NDEBUG
708   // Check that the IR layout matches the offsets we expect.
709   auto Layout = DL.getStructLayout(Ty);
710   for (auto &F : Fields) {
711     assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty);
712     assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset);
713   }
714 #endif
715 
716   IsFinished = true;
717 }
718 
719 // Build a struct that will keep state for an active coroutine.
720 //   struct f.frame {
721 //     ResumeFnTy ResumeFnAddr;
722 //     ResumeFnTy DestroyFnAddr;
723 //     int ResumeIndex;
724 //     ... promise (if present) ...
725 //     ... spills ...
726 //   };
727 static StructType *buildFrameType(Function &F, coro::Shape &Shape,
728                                   FrameDataInfo &FrameData) {
729   LLVMContext &C = F.getContext();
730   const DataLayout &DL = F.getParent()->getDataLayout();
731   StructType *FrameTy = [&] {
732     SmallString<32> Name(F.getName());
733     Name.append(".Frame");
734     return StructType::create(C, Name);
735   }();
736 
737   FrameTypeBuilder B(C, DL);
738 
739   AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
740   Optional<FieldIDType> SwitchIndexFieldId;
741 
742   if (Shape.ABI == coro::ABI::Switch) {
743     auto *FramePtrTy = FrameTy->getPointerTo();
744     auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
745                                    /*IsVarArg=*/false);
746     auto *FnPtrTy = FnTy->getPointerTo();
747 
748     // Add header fields for the resume and destroy functions.
749     // We can rely on these being perfectly packed.
750     (void)B.addField(FnPtrTy, None, /*header*/ true);
751     (void)B.addField(FnPtrTy, None, /*header*/ true);
752 
753     // PromiseAlloca field needs to be explicitly added here because it's
754     // a header field with a fixed offset based on its alignment. Hence it
755     // needs special handling and cannot be added to FrameData.Allocas.
756     if (PromiseAlloca)
757       FrameData.setFieldIndex(
758           PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true));
759 
760     // Add a field to store the suspend index.  This doesn't need to
761     // be in the header.
762     unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
763     Type *IndexType = Type::getIntNTy(C, IndexBits);
764 
765     SwitchIndexFieldId = B.addField(IndexType, None);
766   } else {
767     assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
768   }
769 
770   // Because multiple allocas may own the same field slot,
771   // we add allocas to field here.
772   B.addFieldForAllocas(F, FrameData, Shape);
773   // Add PromiseAlloca to Allocas list so that
774   // 1. updateLayoutIndex could update its index after
775   // `performOptimizedStructLayout`
776   // 2. it is processed in insertSpills.
777   if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
778     // We assume that the promise alloca won't be modified before
779     // CoroBegin and no alias will be create before CoroBegin.
780     FrameData.Allocas.emplace_back(
781         PromiseAlloca, DenseMap<Instruction *, llvm::Optional<APInt>>{}, false);
782   // Create an entry for every spilled value.
783   for (auto &S : FrameData.Spills) {
784     FieldIDType Id = B.addField(S.first->getType(), None);
785     FrameData.setFieldIndex(S.first, Id);
786   }
787 
788   B.finish(FrameTy);
789   FrameData.updateLayoutIndex(B);
790   Shape.FrameAlign = B.getStructAlign();
791   Shape.FrameSize = B.getStructSize();
792 
793   switch (Shape.ABI) {
794   case coro::ABI::Switch:
795     // In the switch ABI, remember the switch-index field.
796     Shape.SwitchLowering.IndexField =
797         B.getLayoutFieldIndex(*SwitchIndexFieldId);
798 
799     // Also round the frame size up to a multiple of its alignment, as is
800     // generally expected in C/C++.
801     Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
802     break;
803 
804   // In the retcon ABI, remember whether the frame is inline in the storage.
805   case coro::ABI::Retcon:
806   case coro::ABI::RetconOnce: {
807     auto Id = Shape.getRetconCoroId();
808     Shape.RetconLowering.IsFrameInlineInStorage
809       = (B.getStructSize() <= Id->getStorageSize() &&
810          B.getStructAlign() <= Id->getStorageAlignment());
811     break;
812   }
813   case coro::ABI::Async: {
814     Shape.AsyncLowering.FrameOffset =
815         alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign);
816     // Also make the final context size a multiple of the context alignment to
817     // make allocation easier for allocators.
818     Shape.AsyncLowering.ContextSize =
819         alignTo(Shape.AsyncLowering.FrameOffset + Shape.FrameSize,
820                 Shape.AsyncLowering.getContextAlignment());
821     if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
822       report_fatal_error(
823           "The alignment requirment of frame variables cannot be higher than "
824           "the alignment of the async function context");
825     }
826     break;
827   }
828   }
829 
830   return FrameTy;
831 }
832 
833 // We use a pointer use visitor to track how an alloca is being used.
834 // The goal is to be able to answer the following three questions:
835 // 1. Should this alloca be allocated on the frame instead.
836 // 2. Could the content of the alloca be modified prior to CoroBegn, which would
837 // require copying the data from alloca to the frame after CoroBegin.
838 // 3. Is there any alias created for this alloca prior to CoroBegin, but used
839 // after CoroBegin. In that case, we will need to recreate the alias after
840 // CoroBegin based off the frame. To answer question 1, we track two things:
841 //   a. List of all BasicBlocks that use this alloca or any of the aliases of
842 //   the alloca. In the end, we check if there exists any two basic blocks that
843 //   cross suspension points. If so, this alloca must be put on the frame. b.
844 //   Whether the alloca or any alias of the alloca is escaped at some point,
845 //   either by storing the address somewhere, or the address is used in a
846 //   function call that might capture. If it's ever escaped, this alloca must be
847 //   put on the frame conservatively.
848 // To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin.
849 // Whenever a potential write happens, either through a store instruction, a
850 // function call or any of the memory intrinsics, we check whether this
851 // instruction is prior to CoroBegin. To answer question 3, we track the offsets
852 // of all aliases created for the alloca prior to CoroBegin but used after
853 // CoroBegin. llvm::Optional is used to be able to represent the case when the
854 // offset is unknown (e.g. when you have a PHINode that takes in different
855 // offset values). We cannot handle unknown offsets and will assert. This is the
856 // potential issue left out. An ideal solution would likely require a
857 // significant redesign.
858 namespace {
859 struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
860   using Base = PtrUseVisitor<AllocaUseVisitor>;
861   AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
862                    const CoroBeginInst &CB, const SuspendCrossingInfo &Checker)
863       : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {}
864 
865   void visit(Instruction &I) {
866     Users.insert(&I);
867     Base::visit(I);
868     // If the pointer is escaped prior to CoroBegin, we have to assume it would
869     // be written into before CoroBegin as well.
870     if (PI.isEscaped() && !DT.dominates(&CoroBegin, PI.getEscapingInst())) {
871       MayWriteBeforeCoroBegin = true;
872     }
873   }
874   // We need to provide this overload as PtrUseVisitor uses a pointer based
875   // visiting function.
876   void visit(Instruction *I) { return visit(*I); }
877 
878   void visitPHINode(PHINode &I) {
879     enqueueUsers(I);
880     handleAlias(I);
881   }
882 
883   void visitSelectInst(SelectInst &I) {
884     enqueueUsers(I);
885     handleAlias(I);
886   }
887 
888   void visitStoreInst(StoreInst &SI) {
889     // Regardless whether the alias of the alloca is the value operand or the
890     // pointer operand, we need to assume the alloca is been written.
891     handleMayWrite(SI);
892 
893     if (SI.getValueOperand() != U->get())
894       return;
895 
896     // We are storing the pointer into a memory location, potentially escaping.
897     // As an optimization, we try to detect simple cases where it doesn't
898     // actually escape, for example:
899     //   %ptr = alloca ..
900     //   %addr = alloca ..
901     //   store %ptr, %addr
902     //   %x = load %addr
903     //   ..
904     // If %addr is only used by loading from it, we could simply treat %x as
905     // another alias of %ptr, and not considering %ptr being escaped.
906     auto IsSimpleStoreThenLoad = [&]() {
907       auto *AI = dyn_cast<AllocaInst>(SI.getPointerOperand());
908       // If the memory location we are storing to is not an alloca, it
909       // could be an alias of some other memory locations, which is difficult
910       // to analyze.
911       if (!AI)
912         return false;
913       // StoreAliases contains aliases of the memory location stored into.
914       SmallVector<Instruction *, 4> StoreAliases = {AI};
915       while (!StoreAliases.empty()) {
916         Instruction *I = StoreAliases.pop_back_val();
917         for (User *U : I->users()) {
918           // If we are loading from the memory location, we are creating an
919           // alias of the original pointer.
920           if (auto *LI = dyn_cast<LoadInst>(U)) {
921             enqueueUsers(*LI);
922             handleAlias(*LI);
923             continue;
924           }
925           // If we are overriding the memory location, the pointer certainly
926           // won't escape.
927           if (auto *S = dyn_cast<StoreInst>(U))
928             if (S->getPointerOperand() == I)
929               continue;
930           if (auto *II = dyn_cast<IntrinsicInst>(U))
931             if (II->isLifetimeStartOrEnd())
932               continue;
933           // BitCastInst creats aliases of the memory location being stored
934           // into.
935           if (auto *BI = dyn_cast<BitCastInst>(U)) {
936             StoreAliases.push_back(BI);
937             continue;
938           }
939           return false;
940         }
941       }
942 
943       return true;
944     };
945 
946     if (!IsSimpleStoreThenLoad())
947       PI.setEscaped(&SI);
948   }
949 
950   // All mem intrinsics modify the data.
951   void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(MI); }
952 
953   void visitBitCastInst(BitCastInst &BC) {
954     Base::visitBitCastInst(BC);
955     handleAlias(BC);
956   }
957 
958   void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
959     Base::visitAddrSpaceCastInst(ASC);
960     handleAlias(ASC);
961   }
962 
963   void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
964     // The base visitor will adjust Offset accordingly.
965     Base::visitGetElementPtrInst(GEPI);
966     handleAlias(GEPI);
967   }
968 
969   void visitIntrinsicInst(IntrinsicInst &II) {
970     if (II.getIntrinsicID() != Intrinsic::lifetime_start)
971       return Base::visitIntrinsicInst(II);
972     LifetimeStarts.insert(&II);
973   }
974 
975   void visitCallBase(CallBase &CB) {
976     for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op)
977       if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op))
978         PI.setEscaped(&CB);
979     handleMayWrite(CB);
980   }
981 
982   bool getShouldLiveOnFrame() const {
983     if (!ShouldLiveOnFrame)
984       ShouldLiveOnFrame = computeShouldLiveOnFrame();
985     return ShouldLiveOnFrame.getValue();
986   }
987 
988   bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; }
989 
990   DenseMap<Instruction *, llvm::Optional<APInt>> getAliasesCopy() const {
991     assert(getShouldLiveOnFrame() && "This method should only be called if the "
992                                      "alloca needs to live on the frame.");
993     for (const auto &P : AliasOffetMap)
994       if (!P.second)
995         report_fatal_error("Unable to handle an alias with unknown offset "
996                            "created before CoroBegin.");
997     return AliasOffetMap;
998   }
999 
1000 private:
1001   const DominatorTree &DT;
1002   const CoroBeginInst &CoroBegin;
1003   const SuspendCrossingInfo &Checker;
1004   // All alias to the original AllocaInst, created before CoroBegin and used
1005   // after CoroBegin. Each entry contains the instruction and the offset in the
1006   // original Alloca. They need to be recreated after CoroBegin off the frame.
1007   DenseMap<Instruction *, llvm::Optional<APInt>> AliasOffetMap{};
1008   SmallPtrSet<Instruction *, 4> Users{};
1009   SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
1010   bool MayWriteBeforeCoroBegin{false};
1011 
1012   mutable llvm::Optional<bool> ShouldLiveOnFrame{};
1013 
1014   bool computeShouldLiveOnFrame() const {
1015     // If lifetime information is available, we check it first since it's
1016     // more precise. We look at every pair of lifetime.start intrinsic and
1017     // every basic block that uses the pointer to see if they cross suspension
1018     // points. The uses cover both direct uses as well as indirect uses.
1019     if (!LifetimeStarts.empty()) {
1020       for (auto *I : Users)
1021         for (auto *S : LifetimeStarts)
1022           if (Checker.isDefinitionAcrossSuspend(*S, I))
1023             return true;
1024       return false;
1025     }
1026     // FIXME: Ideally the isEscaped check should come at the beginning.
1027     // However there are a few loose ends that need to be fixed first before
1028     // we can do that. We need to make sure we are not over-conservative, so
1029     // that the data accessed in-between await_suspend and symmetric transfer
1030     // is always put on the stack, and also data accessed after coro.end is
1031     // always put on the stack (esp the return object). To fix that, we need
1032     // to:
1033     //  1) Potentially treat sret as nocapture in calls
1034     //  2) Special handle the return object and put it on the stack
1035     //  3) Utilize lifetime.end intrinsic
1036     if (PI.isEscaped())
1037       return true;
1038 
1039     for (auto *U1 : Users)
1040       for (auto *U2 : Users)
1041         if (Checker.isDefinitionAcrossSuspend(*U1, U2))
1042           return true;
1043 
1044     return false;
1045   }
1046 
1047   void handleMayWrite(const Instruction &I) {
1048     if (!DT.dominates(&CoroBegin, &I))
1049       MayWriteBeforeCoroBegin = true;
1050   }
1051 
1052   bool usedAfterCoroBegin(Instruction &I) {
1053     for (auto &U : I.uses())
1054       if (DT.dominates(&CoroBegin, U))
1055         return true;
1056     return false;
1057   }
1058 
1059   void handleAlias(Instruction &I) {
1060     // We track all aliases created prior to CoroBegin but used after.
1061     // These aliases may need to be recreated after CoroBegin if the alloca
1062     // need to live on the frame.
1063     if (DT.dominates(&CoroBegin, &I) || !usedAfterCoroBegin(I))
1064       return;
1065 
1066     if (!IsOffsetKnown) {
1067       AliasOffetMap[&I].reset();
1068     } else {
1069       auto Itr = AliasOffetMap.find(&I);
1070       if (Itr == AliasOffetMap.end()) {
1071         AliasOffetMap[&I] = Offset;
1072       } else if (Itr->second.hasValue() && Itr->second.getValue() != Offset) {
1073         // If we have seen two different possible values for this alias, we set
1074         // it to empty.
1075         AliasOffetMap[&I].reset();
1076       }
1077     }
1078   }
1079 };
1080 } // namespace
1081 
1082 // We need to make room to insert a spill after initial PHIs, but before
1083 // catchswitch instruction. Placing it before violates the requirement that
1084 // catchswitch, like all other EHPads must be the first nonPHI in a block.
1085 //
1086 // Split away catchswitch into a separate block and insert in its place:
1087 //
1088 //   cleanuppad <InsertPt> cleanupret.
1089 //
1090 // cleanupret instruction will act as an insert point for the spill.
1091 static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
1092   BasicBlock *CurrentBlock = CatchSwitch->getParent();
1093   BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch);
1094   CurrentBlock->getTerminator()->eraseFromParent();
1095 
1096   auto *CleanupPad =
1097       CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock);
1098   auto *CleanupRet =
1099       CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock);
1100   return CleanupRet;
1101 }
1102 
1103 // Replace all alloca and SSA values that are accessed across suspend points
1104 // with GetElementPointer from coroutine frame + loads and stores. Create an
1105 // AllocaSpillBB that will become the new entry block for the resume parts of
1106 // the coroutine:
1107 //
1108 //    %hdl = coro.begin(...)
1109 //    whatever
1110 //
1111 // becomes:
1112 //
1113 //    %hdl = coro.begin(...)
1114 //    %FramePtr = bitcast i8* hdl to %f.frame*
1115 //    br label %AllocaSpillBB
1116 //
1117 //  AllocaSpillBB:
1118 //    ; geps corresponding to allocas that were moved to coroutine frame
1119 //    br label PostSpill
1120 //
1121 //  PostSpill:
1122 //    whatever
1123 //
1124 //
1125 static Instruction *insertSpills(const FrameDataInfo &FrameData,
1126                                  coro::Shape &Shape) {
1127   auto *CB = Shape.CoroBegin;
1128   LLVMContext &C = CB->getContext();
1129   IRBuilder<> Builder(CB->getNextNode());
1130   StructType *FrameTy = Shape.FrameTy;
1131   PointerType *FramePtrTy = FrameTy->getPointerTo();
1132   auto *FramePtr =
1133       cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
1134   DominatorTree DT(*CB->getFunction());
1135   SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
1136 
1137   // Create a GEP with the given index into the coroutine frame for the original
1138   // value Orig. Appends an extra 0 index for array-allocas, preserving the
1139   // original type.
1140   auto GetFramePointer = [&](Value *Orig) -> Value * {
1141     FieldIDType Index = FrameData.getFieldIndex(Orig);
1142     SmallVector<Value *, 3> Indices = {
1143         ConstantInt::get(Type::getInt32Ty(C), 0),
1144         ConstantInt::get(Type::getInt32Ty(C), Index),
1145     };
1146 
1147     if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1148       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
1149         auto Count = CI->getValue().getZExtValue();
1150         if (Count > 1) {
1151           Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
1152         }
1153       } else {
1154         report_fatal_error("Coroutines cannot handle non static allocas yet");
1155       }
1156     }
1157 
1158     auto GEP = cast<GetElementPtrInst>(
1159         Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
1160     if (isa<AllocaInst>(Orig)) {
1161       // If the type of GEP is not equal to the type of AllocaInst, it implies
1162       // that the AllocaInst may be reused in the Frame slot of other
1163       // AllocaInst. So We cast GEP to the AllocaInst here to re-use
1164       // the Frame storage.
1165       //
1166       // Note: If we change the strategy dealing with alignment, we need to refine
1167       // this casting.
1168       if (GEP->getResultElementType() != Orig->getType())
1169         return Builder.CreateBitCast(GEP, Orig->getType(),
1170                                      Orig->getName() + Twine(".cast"));
1171     }
1172     return GEP;
1173   };
1174 
1175   for (auto const &E : FrameData.Spills) {
1176     Value *Def = E.first;
1177     // Create a store instruction storing the value into the
1178     // coroutine frame.
1179     Instruction *InsertPt = nullptr;
1180     if (auto *Arg = dyn_cast<Argument>(Def)) {
1181       // For arguments, we will place the store instruction right after
1182       // the coroutine frame pointer instruction, i.e. bitcast of
1183       // coro.begin from i8* to %f.frame*.
1184       InsertPt = FramePtr->getNextNode();
1185 
1186       // If we're spilling an Argument, make sure we clear 'nocapture'
1187       // from the coroutine function.
1188       Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
1189 
1190     } else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
1191       // Don't spill immediately after a suspend; splitting assumes
1192       // that the suspend will be followed by a branch.
1193       InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI();
1194     } else {
1195       auto *I = cast<Instruction>(Def);
1196       if (!DT.dominates(CB, I)) {
1197         // If it is not dominated by CoroBegin, then spill should be
1198         // inserted immediately after CoroFrame is computed.
1199         InsertPt = FramePtr->getNextNode();
1200       } else if (auto *II = dyn_cast<InvokeInst>(I)) {
1201         // If we are spilling the result of the invoke instruction, split
1202         // the normal edge and insert the spill in the new block.
1203         auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest());
1204         InsertPt = NewBB->getTerminator();
1205       } else if (isa<PHINode>(I)) {
1206         // Skip the PHINodes and EH pads instructions.
1207         BasicBlock *DefBlock = I->getParent();
1208         if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
1209           InsertPt = splitBeforeCatchSwitch(CSI);
1210         else
1211           InsertPt = &*DefBlock->getFirstInsertionPt();
1212       } else {
1213         assert(!I->isTerminator() && "unexpected terminator");
1214         // For all other values, the spill is placed immediately after
1215         // the definition.
1216         InsertPt = I->getNextNode();
1217       }
1218     }
1219 
1220     auto Index = FrameData.getFieldIndex(Def);
1221     Builder.SetInsertPoint(InsertPt);
1222     auto *G = Builder.CreateConstInBoundsGEP2_32(
1223         FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
1224     Builder.CreateStore(Def, G);
1225 
1226     BasicBlock *CurrentBlock = nullptr;
1227     Value *CurrentReload = nullptr;
1228     for (auto *U : E.second) {
1229       // If we have not seen the use block, create a load instruction to reload
1230       // the spilled value from the coroutine frame. Populates the Value pointer
1231       // reference provided with the frame GEP.
1232       if (CurrentBlock != U->getParent()) {
1233         CurrentBlock = U->getParent();
1234         Builder.SetInsertPoint(&*CurrentBlock->getFirstInsertionPt());
1235 
1236         auto *GEP = GetFramePointer(E.first);
1237         GEP->setName(E.first->getName() + Twine(".reload.addr"));
1238         CurrentReload = Builder.CreateLoad(
1239             FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
1240             E.first->getName() + Twine(".reload"));
1241 
1242         TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);
1243         for (DbgDeclareInst *DDI : DIs) {
1244           bool AllowUnresolved = false;
1245           // This dbg.declare is preserved for all coro-split function
1246           // fragments. It will be unreachable in the main function, and
1247           // processed by coro::salvageDebugInfo() by CoroCloner.
1248           DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
1249               .insertDeclare(CurrentReload, DDI->getVariable(),
1250                              DDI->getExpression(), DDI->getDebugLoc(),
1251                              &*Builder.GetInsertPoint());
1252           // This dbg.declare is for the main function entry point.  It
1253           // will be deleted in all coro-split functions.
1254           coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.ReuseFrameSlot);
1255         }
1256       }
1257 
1258       // If we have a single edge PHINode, remove it and replace it with a
1259       // reload from the coroutine frame. (We already took care of multi edge
1260       // PHINodes by rewriting them in the rewritePHIs function).
1261       if (auto *PN = dyn_cast<PHINode>(U)) {
1262         assert(PN->getNumIncomingValues() == 1 &&
1263                "unexpected number of incoming "
1264                "values in the PHINode");
1265         PN->replaceAllUsesWith(CurrentReload);
1266         PN->eraseFromParent();
1267         continue;
1268       }
1269 
1270       // Replace all uses of CurrentValue in the current instruction with
1271       // reload.
1272       U->replaceUsesOfWith(Def, CurrentReload);
1273     }
1274   }
1275 
1276   BasicBlock *FramePtrBB = FramePtr->getParent();
1277 
1278   auto SpillBlock =
1279       FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
1280   SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
1281   Shape.AllocaSpillBlock = SpillBlock;
1282 
1283   // retcon and retcon.once lowering assumes all uses have been sunk.
1284   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
1285       Shape.ABI == coro::ABI::Async) {
1286     // If we found any allocas, replace all of their remaining uses with Geps.
1287     Builder.SetInsertPoint(&SpillBlock->front());
1288     for (const auto &P : FrameData.Allocas) {
1289       AllocaInst *Alloca = P.Alloca;
1290       auto *G = GetFramePointer(Alloca);
1291 
1292       // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1293       // here, as we are changing location of the instruction.
1294       G->takeName(Alloca);
1295       Alloca->replaceAllUsesWith(G);
1296       Alloca->eraseFromParent();
1297     }
1298     return FramePtr;
1299   }
1300 
1301   // If we found any alloca, replace all of their remaining uses with GEP
1302   // instructions. Because new dbg.declare have been created for these alloca,
1303   // we also delete the original dbg.declare and replace other uses with undef.
1304   // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1305   // as some of the uses may not be dominated by CoroBegin.
1306   Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
1307   SmallVector<Instruction *, 4> UsersToUpdate;
1308   for (const auto &A : FrameData.Allocas) {
1309     AllocaInst *Alloca = A.Alloca;
1310     UsersToUpdate.clear();
1311     for (User *U : Alloca->users()) {
1312       auto *I = cast<Instruction>(U);
1313       if (DT.dominates(CB, I))
1314         UsersToUpdate.push_back(I);
1315     }
1316     if (UsersToUpdate.empty())
1317       continue;
1318     auto *G = GetFramePointer(Alloca);
1319     G->setName(Alloca->getName() + Twine(".reload.addr"));
1320 
1321     TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Alloca);
1322     if (!DIs.empty())
1323       DIBuilder(*Alloca->getModule(),
1324                 /*AllowUnresolved*/ false)
1325           .insertDeclare(G, DIs.front()->getVariable(),
1326                          DIs.front()->getExpression(),
1327                          DIs.front()->getDebugLoc(), DIs.front());
1328     for (auto *DI : FindDbgDeclareUses(Alloca))
1329       DI->eraseFromParent();
1330     replaceDbgUsesWithUndef(Alloca);
1331 
1332     for (Instruction *I : UsersToUpdate)
1333       I->replaceUsesOfWith(Alloca, G);
1334   }
1335   Builder.SetInsertPoint(FramePtr->getNextNode());
1336   for (const auto &A : FrameData.Allocas) {
1337     AllocaInst *Alloca = A.Alloca;
1338     if (A.MayWriteBeforeCoroBegin) {
1339       // isEscaped really means potentially modified before CoroBegin.
1340       if (Alloca->isArrayAllocation())
1341         report_fatal_error(
1342             "Coroutines cannot handle copying of array allocas yet");
1343 
1344       auto *G = GetFramePointer(Alloca);
1345       auto *Value = Builder.CreateLoad(Alloca->getAllocatedType(), Alloca);
1346       Builder.CreateStore(Value, G);
1347     }
1348     // For each alias to Alloca created before CoroBegin but used after
1349     // CoroBegin, we recreate them after CoroBegin by appplying the offset
1350     // to the pointer in the frame.
1351     for (const auto &Alias : A.Aliases) {
1352       auto *FramePtr = GetFramePointer(Alloca);
1353       auto *FramePtrRaw =
1354           Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));
1355       auto *AliasPtr = Builder.CreateGEP(
1356           FramePtrRaw,
1357           ConstantInt::get(Type::getInt64Ty(C), Alias.second.getValue()));
1358       auto *AliasPtrTyped =
1359           Builder.CreateBitCast(AliasPtr, Alias.first->getType());
1360       Alias.first->replaceUsesWithIf(
1361           AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); });
1362     }
1363   }
1364   return FramePtr;
1365 }
1366 
1367 // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
1368 // PHI in InsertedBB.
1369 static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
1370                                          BasicBlock *InsertedBB,
1371                                          BasicBlock *PredBB,
1372                                          PHINode *UntilPHI = nullptr) {
1373   auto *PN = cast<PHINode>(&SuccBB->front());
1374   do {
1375     int Index = PN->getBasicBlockIndex(InsertedBB);
1376     Value *V = PN->getIncomingValue(Index);
1377     PHINode *InputV = PHINode::Create(
1378         V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName(),
1379         &InsertedBB->front());
1380     InputV->addIncoming(V, PredBB);
1381     PN->setIncomingValue(Index, InputV);
1382     PN = dyn_cast<PHINode>(PN->getNextNode());
1383   } while (PN != UntilPHI);
1384 }
1385 
1386 // Rewrites the PHI Nodes in a cleanuppad.
1387 static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
1388                                      CleanupPadInst *CleanupPad) {
1389   // For every incoming edge to a CleanupPad we will create a new block holding
1390   // all incoming values in single-value PHI nodes. We will then create another
1391   // block to act as a dispather (as all unwind edges for related EH blocks
1392   // must be the same).
1393   //
1394   // cleanuppad:
1395   //    %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
1396   //    %3 = cleanuppad within none []
1397   //
1398   // It will create:
1399   //
1400   // cleanuppad.corodispatch
1401   //    %2 = phi i8[0, %catchswitch], [1, %catch.1]
1402   //    %3 = cleanuppad within none []
1403   //    switch i8 % 2, label %unreachable
1404   //            [i8 0, label %cleanuppad.from.catchswitch
1405   //             i8 1, label %cleanuppad.from.catch.1]
1406   // cleanuppad.from.catchswitch:
1407   //    %4 = phi i32 [%0, %catchswitch]
1408   //    br %label cleanuppad
1409   // cleanuppad.from.catch.1:
1410   //    %6 = phi i32 [%1, %catch.1]
1411   //    br %label cleanuppad
1412   // cleanuppad:
1413   //    %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
1414   //                 [%6, %cleanuppad.from.catch.1]
1415 
1416   // Unreachable BB, in case switching on an invalid value in the dispatcher.
1417   auto *UnreachBB = BasicBlock::Create(
1418       CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent());
1419   IRBuilder<> Builder(UnreachBB);
1420   Builder.CreateUnreachable();
1421 
1422   // Create a new cleanuppad which will be the dispatcher.
1423   auto *NewCleanupPadBB =
1424       BasicBlock::Create(CleanupPadBB->getContext(),
1425                          CleanupPadBB->getName() + Twine(".corodispatch"),
1426                          CleanupPadBB->getParent(), CleanupPadBB);
1427   Builder.SetInsertPoint(NewCleanupPadBB);
1428   auto *SwitchType = Builder.getInt8Ty();
1429   auto *SetDispatchValuePN =
1430       Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB));
1431   CleanupPad->removeFromParent();
1432   CleanupPad->insertAfter(SetDispatchValuePN);
1433   auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB,
1434                                                 pred_size(CleanupPadBB));
1435 
1436   int SwitchIndex = 0;
1437   SmallVector<BasicBlock *, 8> Preds(predecessors(CleanupPadBB));
1438   for (BasicBlock *Pred : Preds) {
1439     // Create a new cleanuppad and move the PHI values to there.
1440     auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(),
1441                                       CleanupPadBB->getName() +
1442                                           Twine(".from.") + Pred->getName(),
1443                                       CleanupPadBB->getParent(), CleanupPadBB);
1444     updatePhiNodes(CleanupPadBB, Pred, CaseBB);
1445     CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") +
1446                     Pred->getName());
1447     Builder.SetInsertPoint(CaseBB);
1448     Builder.CreateBr(CleanupPadBB);
1449     movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB);
1450 
1451     // Update this Pred to the new unwind point.
1452     setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB);
1453 
1454     // Setup the switch in the dispatcher.
1455     auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex);
1456     SetDispatchValuePN->addIncoming(SwitchConstant, Pred);
1457     SwitchOnDispatch->addCase(SwitchConstant, CaseBB);
1458     SwitchIndex++;
1459   }
1460 }
1461 
1462 static void rewritePHIs(BasicBlock &BB) {
1463   // For every incoming edge we will create a block holding all
1464   // incoming values in a single PHI nodes.
1465   //
1466   // loop:
1467   //    %n.val = phi i32[%n, %entry], [%inc, %loop]
1468   //
1469   // It will create:
1470   //
1471   // loop.from.entry:
1472   //    %n.loop.pre = phi i32 [%n, %entry]
1473   //    br %label loop
1474   // loop.from.loop:
1475   //    %inc.loop.pre = phi i32 [%inc, %loop]
1476   //    br %label loop
1477   //
1478   // After this rewrite, further analysis will ignore any phi nodes with more
1479   // than one incoming edge.
1480 
1481   // TODO: Simplify PHINodes in the basic block to remove duplicate
1482   // predecessors.
1483 
1484   // Special case for CleanupPad: all EH blocks must have the same unwind edge
1485   // so we need to create an additional "dispatcher" block.
1486   if (auto *CleanupPad =
1487           dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) {
1488     SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
1489     for (BasicBlock *Pred : Preds) {
1490       if (CatchSwitchInst *CS =
1491               dyn_cast<CatchSwitchInst>(Pred->getTerminator())) {
1492         // CleanupPad with a CatchSwitch predecessor: therefore this is an
1493         // unwind destination that needs to be handle specially.
1494         assert(CS->getUnwindDest() == &BB);
1495         (void)CS;
1496         rewritePHIsForCleanupPad(&BB, CleanupPad);
1497         return;
1498       }
1499     }
1500   }
1501 
1502   LandingPadInst *LandingPad = nullptr;
1503   PHINode *ReplPHI = nullptr;
1504   if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
1505     // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
1506     // We replace the original landing pad with a PHINode that will collect the
1507     // results from all of them.
1508     ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad);
1509     ReplPHI->takeName(LandingPad);
1510     LandingPad->replaceAllUsesWith(ReplPHI);
1511     // We will erase the original landing pad at the end of this function after
1512     // ehAwareSplitEdge cloned it in the transition blocks.
1513   }
1514 
1515   SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
1516   for (BasicBlock *Pred : Preds) {
1517     auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
1518     IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
1519 
1520     // Stop the moving of values at ReplPHI, as this is either null or the PHI
1521     // that replaced the landing pad.
1522     movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI);
1523   }
1524 
1525   if (LandingPad) {
1526     // Calls to ehAwareSplitEdge function cloned the original lading pad.
1527     // No longer need it.
1528     LandingPad->eraseFromParent();
1529   }
1530 }
1531 
1532 static void rewritePHIs(Function &F) {
1533   SmallVector<BasicBlock *, 8> WorkList;
1534 
1535   for (BasicBlock &BB : F)
1536     if (auto *PN = dyn_cast<PHINode>(&BB.front()))
1537       if (PN->getNumIncomingValues() > 1)
1538         WorkList.push_back(&BB);
1539 
1540   for (BasicBlock *BB : WorkList)
1541     rewritePHIs(*BB);
1542 }
1543 
1544 // Check for instructions that we can recreate on resume as opposed to spill
1545 // the result into a coroutine frame.
1546 static bool materializable(Instruction &V) {
1547   return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
1548          isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V);
1549 }
1550 
1551 // Check for structural coroutine intrinsics that should not be spilled into
1552 // the coroutine frame.
1553 static bool isCoroutineStructureIntrinsic(Instruction &I) {
1554   return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) ||
1555          isa<CoroSuspendInst>(&I);
1556 }
1557 
1558 // For every use of the value that is across suspend point, recreate that value
1559 // after a suspend point.
1560 static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
1561                                               const SpillInfo &Spills) {
1562   for (const auto &E : Spills) {
1563     Value *Def = E.first;
1564     BasicBlock *CurrentBlock = nullptr;
1565     Instruction *CurrentMaterialization = nullptr;
1566     for (Instruction *U : E.second) {
1567       // If we have not seen this block, materialize the value.
1568       if (CurrentBlock != U->getParent()) {
1569         CurrentBlock = U->getParent();
1570         CurrentMaterialization = cast<Instruction>(Def)->clone();
1571         CurrentMaterialization->setName(Def->getName());
1572         CurrentMaterialization->insertBefore(
1573             &*CurrentBlock->getFirstInsertionPt());
1574       }
1575       if (auto *PN = dyn_cast<PHINode>(U)) {
1576         assert(PN->getNumIncomingValues() == 1 &&
1577                "unexpected number of incoming "
1578                "values in the PHINode");
1579         PN->replaceAllUsesWith(CurrentMaterialization);
1580         PN->eraseFromParent();
1581         continue;
1582       }
1583       // Replace all uses of Def in the current instruction with the
1584       // CurrentMaterialization for the block.
1585       U->replaceUsesOfWith(Def, CurrentMaterialization);
1586     }
1587   }
1588 }
1589 
1590 // Splits the block at a particular instruction unless it is the first
1591 // instruction in the block with a single predecessor.
1592 static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
1593   auto *BB = I->getParent();
1594   if (&BB->front() == I) {
1595     if (BB->getSinglePredecessor()) {
1596       BB->setName(Name);
1597       return BB;
1598     }
1599   }
1600   return BB->splitBasicBlock(I, Name);
1601 }
1602 
1603 // Split above and below a particular instruction so that it
1604 // will be all alone by itself in a block.
1605 static void splitAround(Instruction *I, const Twine &Name) {
1606   splitBlockIfNotFirst(I, Name);
1607   splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
1608 }
1609 
1610 static bool isSuspendBlock(BasicBlock *BB) {
1611   return isa<AnyCoroSuspendInst>(BB->front());
1612 }
1613 
1614 typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet;
1615 
1616 /// Does control flow starting at the given block ever reach a suspend
1617 /// instruction before reaching a block in VisitedOrFreeBBs?
1618 static bool isSuspendReachableFrom(BasicBlock *From,
1619                                    VisitedBlocksSet &VisitedOrFreeBBs) {
1620   // Eagerly try to add this block to the visited set.  If it's already
1621   // there, stop recursing; this path doesn't reach a suspend before
1622   // either looping or reaching a freeing block.
1623   if (!VisitedOrFreeBBs.insert(From).second)
1624     return false;
1625 
1626   // We assume that we'll already have split suspends into their own blocks.
1627   if (isSuspendBlock(From))
1628     return true;
1629 
1630   // Recurse on the successors.
1631   for (auto Succ : successors(From)) {
1632     if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs))
1633       return true;
1634   }
1635 
1636   return false;
1637 }
1638 
1639 /// Is the given alloca "local", i.e. bounded in lifetime to not cross a
1640 /// suspend point?
1641 static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
1642   // Seed the visited set with all the basic blocks containing a free
1643   // so that we won't pass them up.
1644   VisitedBlocksSet VisitedOrFreeBBs;
1645   for (auto User : AI->users()) {
1646     if (auto FI = dyn_cast<CoroAllocaFreeInst>(User))
1647       VisitedOrFreeBBs.insert(FI->getParent());
1648   }
1649 
1650   return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs);
1651 }
1652 
1653 /// After we split the coroutine, will the given basic block be along
1654 /// an obvious exit path for the resumption function?
1655 static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
1656                                               unsigned depth = 3) {
1657   // If we've bottomed out our depth count, stop searching and assume
1658   // that the path might loop back.
1659   if (depth == 0) return false;
1660 
1661   // If this is a suspend block, we're about to exit the resumption function.
1662   if (isSuspendBlock(BB)) return true;
1663 
1664   // Recurse into the successors.
1665   for (auto Succ : successors(BB)) {
1666     if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
1667       return false;
1668   }
1669 
1670   // If none of the successors leads back in a loop, we're on an exit/abort.
1671   return true;
1672 }
1673 
1674 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
1675   // Look for a free that isn't sufficiently obviously followed by
1676   // either a suspend or a termination, i.e. something that will leave
1677   // the coro resumption frame.
1678   for (auto U : AI->users()) {
1679     auto FI = dyn_cast<CoroAllocaFreeInst>(U);
1680     if (!FI) continue;
1681 
1682     if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
1683       return true;
1684   }
1685 
1686   // If we never found one, we don't need a stack save.
1687   return false;
1688 }
1689 
1690 /// Turn each of the given local allocas into a normal (dynamic) alloca
1691 /// instruction.
1692 static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
1693                               SmallVectorImpl<Instruction*> &DeadInsts) {
1694   for (auto AI : LocalAllocas) {
1695     auto M = AI->getModule();
1696     IRBuilder<> Builder(AI);
1697 
1698     // Save the stack depth.  Try to avoid doing this if the stackrestore
1699     // is going to immediately precede a return or something.
1700     Value *StackSave = nullptr;
1701     if (localAllocaNeedsStackSave(AI))
1702       StackSave = Builder.CreateCall(
1703                             Intrinsic::getDeclaration(M, Intrinsic::stacksave));
1704 
1705     // Allocate memory.
1706     auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
1707     Alloca->setAlignment(Align(AI->getAlignment()));
1708 
1709     for (auto U : AI->users()) {
1710       // Replace gets with the allocation.
1711       if (isa<CoroAllocaGetInst>(U)) {
1712         U->replaceAllUsesWith(Alloca);
1713 
1714       // Replace frees with stackrestores.  This is safe because
1715       // alloca.alloc is required to obey a stack discipline, although we
1716       // don't enforce that structurally.
1717       } else {
1718         auto FI = cast<CoroAllocaFreeInst>(U);
1719         if (StackSave) {
1720           Builder.SetInsertPoint(FI);
1721           Builder.CreateCall(
1722                     Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
1723                              StackSave);
1724         }
1725       }
1726       DeadInsts.push_back(cast<Instruction>(U));
1727     }
1728 
1729     DeadInsts.push_back(AI);
1730   }
1731 }
1732 
1733 /// Turn the given coro.alloca.alloc call into a dynamic allocation.
1734 /// This happens during the all-instructions iteration, so it must not
1735 /// delete the call.
1736 static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI,
1737                                         coro::Shape &Shape,
1738                                    SmallVectorImpl<Instruction*> &DeadInsts) {
1739   IRBuilder<> Builder(AI);
1740   auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
1741 
1742   for (User *U : AI->users()) {
1743     if (isa<CoroAllocaGetInst>(U)) {
1744       U->replaceAllUsesWith(Alloc);
1745     } else {
1746       auto FI = cast<CoroAllocaFreeInst>(U);
1747       Builder.SetInsertPoint(FI);
1748       Shape.emitDealloc(Builder, Alloc, nullptr);
1749     }
1750     DeadInsts.push_back(cast<Instruction>(U));
1751   }
1752 
1753   // Push this on last so that it gets deleted after all the others.
1754   DeadInsts.push_back(AI);
1755 
1756   // Return the new allocation value so that we can check for needed spills.
1757   return cast<Instruction>(Alloc);
1758 }
1759 
1760 /// Get the current swifterror value.
1761 static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
1762                                      coro::Shape &Shape) {
1763   // Make a fake function pointer as a sort of intrinsic.
1764   auto FnTy = FunctionType::get(ValueTy, {}, false);
1765   auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
1766 
1767   auto Call = Builder.CreateCall(FnTy, Fn, {});
1768   Shape.SwiftErrorOps.push_back(Call);
1769 
1770   return Call;
1771 }
1772 
1773 /// Set the given value as the current swifterror value.
1774 ///
1775 /// Returns a slot that can be used as a swifterror slot.
1776 static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
1777                                      coro::Shape &Shape) {
1778   // Make a fake function pointer as a sort of intrinsic.
1779   auto FnTy = FunctionType::get(V->getType()->getPointerTo(),
1780                                 {V->getType()}, false);
1781   auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
1782 
1783   auto Call = Builder.CreateCall(FnTy, Fn, { V });
1784   Shape.SwiftErrorOps.push_back(Call);
1785 
1786   return Call;
1787 }
1788 
1789 /// Set the swifterror value from the given alloca before a call,
1790 /// then put in back in the alloca afterwards.
1791 ///
1792 /// Returns an address that will stand in for the swifterror slot
1793 /// until splitting.
1794 static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
1795                                                  AllocaInst *Alloca,
1796                                                  coro::Shape &Shape) {
1797   auto ValueTy = Alloca->getAllocatedType();
1798   IRBuilder<> Builder(Call);
1799 
1800   // Load the current value from the alloca and set it as the
1801   // swifterror value.
1802   auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
1803   auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
1804 
1805   // Move to after the call.  Since swifterror only has a guaranteed
1806   // value on normal exits, we can ignore implicit and explicit unwind
1807   // edges.
1808   if (isa<CallInst>(Call)) {
1809     Builder.SetInsertPoint(Call->getNextNode());
1810   } else {
1811     auto Invoke = cast<InvokeInst>(Call);
1812     Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
1813   }
1814 
1815   // Get the current swifterror value and store it to the alloca.
1816   auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
1817   Builder.CreateStore(ValueAfterCall, Alloca);
1818 
1819   return Addr;
1820 }
1821 
1822 /// Eliminate a formerly-swifterror alloca by inserting the get/set
1823 /// intrinsics and attempting to MemToReg the alloca away.
1824 static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
1825                                       coro::Shape &Shape) {
1826   for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) {
1827     // We're likely changing the use list, so use a mutation-safe
1828     // iteration pattern.
1829     auto &Use = *UI;
1830     ++UI;
1831 
1832     // swifterror values can only be used in very specific ways.
1833     // We take advantage of that here.
1834     auto User = Use.getUser();
1835     if (isa<LoadInst>(User) || isa<StoreInst>(User))
1836       continue;
1837 
1838     assert(isa<CallInst>(User) || isa<InvokeInst>(User));
1839     auto Call = cast<Instruction>(User);
1840 
1841     auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
1842 
1843     // Use the returned slot address as the call argument.
1844     Use.set(Addr);
1845   }
1846 
1847   // All the uses should be loads and stores now.
1848   assert(isAllocaPromotable(Alloca));
1849 }
1850 
1851 /// "Eliminate" a swifterror argument by reducing it to the alloca case
1852 /// and then loading and storing in the prologue and epilog.
1853 ///
1854 /// The argument keeps the swifterror flag.
1855 static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
1856                                         coro::Shape &Shape,
1857                              SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
1858   IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
1859 
1860   auto ArgTy = cast<PointerType>(Arg.getType());
1861   auto ValueTy = ArgTy->getElementType();
1862 
1863   // Reduce to the alloca case:
1864 
1865   // Create an alloca and replace all uses of the arg with it.
1866   auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
1867   Arg.replaceAllUsesWith(Alloca);
1868 
1869   // Set an initial value in the alloca.  swifterror is always null on entry.
1870   auto InitialValue = Constant::getNullValue(ValueTy);
1871   Builder.CreateStore(InitialValue, Alloca);
1872 
1873   // Find all the suspends in the function and save and restore around them.
1874   for (auto Suspend : Shape.CoroSuspends) {
1875     (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
1876   }
1877 
1878   // Find all the coro.ends in the function and restore the error value.
1879   for (auto End : Shape.CoroEnds) {
1880     Builder.SetInsertPoint(End);
1881     auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
1882     (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
1883   }
1884 
1885   // Now we can use the alloca logic.
1886   AllocasToPromote.push_back(Alloca);
1887   eliminateSwiftErrorAlloca(F, Alloca, Shape);
1888 }
1889 
1890 /// Eliminate all problematic uses of swifterror arguments and allocas
1891 /// from the function.  We'll fix them up later when splitting the function.
1892 static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
1893   SmallVector<AllocaInst*, 4> AllocasToPromote;
1894 
1895   // Look for a swifterror argument.
1896   for (auto &Arg : F.args()) {
1897     if (!Arg.hasSwiftErrorAttr()) continue;
1898 
1899     eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
1900     break;
1901   }
1902 
1903   // Look for swifterror allocas.
1904   for (auto &Inst : F.getEntryBlock()) {
1905     auto Alloca = dyn_cast<AllocaInst>(&Inst);
1906     if (!Alloca || !Alloca->isSwiftError()) continue;
1907 
1908     // Clear the swifterror flag.
1909     Alloca->setSwiftError(false);
1910 
1911     AllocasToPromote.push_back(Alloca);
1912     eliminateSwiftErrorAlloca(F, Alloca, Shape);
1913   }
1914 
1915   // If we have any allocas to promote, compute a dominator tree and
1916   // promote them en masse.
1917   if (!AllocasToPromote.empty()) {
1918     DominatorTree DT(F);
1919     PromoteMemToReg(AllocasToPromote, DT);
1920   }
1921 }
1922 
1923 /// retcon and retcon.once conventions assume that all spill uses can be sunk
1924 /// after the coro.begin intrinsic.
1925 static void sinkSpillUsesAfterCoroBegin(Function &F,
1926                                         const FrameDataInfo &FrameData,
1927                                         CoroBeginInst *CoroBegin) {
1928   DominatorTree Dom(F);
1929 
1930   SmallSetVector<Instruction *, 32> ToMove;
1931   SmallVector<Instruction *, 32> Worklist;
1932 
1933   // Collect all users that precede coro.begin.
1934   for (auto *Def : FrameData.getAllDefs()) {
1935     for (User *U : Def->users()) {
1936       auto Inst = cast<Instruction>(U);
1937       if (Inst->getParent() != CoroBegin->getParent() ||
1938           Dom.dominates(CoroBegin, Inst))
1939         continue;
1940       if (ToMove.insert(Inst))
1941         Worklist.push_back(Inst);
1942     }
1943   }
1944   // Recursively collect users before coro.begin.
1945   while (!Worklist.empty()) {
1946     auto *Def = Worklist.pop_back_val();
1947     for (User *U : Def->users()) {
1948       auto Inst = cast<Instruction>(U);
1949       if (Dom.dominates(CoroBegin, Inst))
1950         continue;
1951       if (ToMove.insert(Inst))
1952         Worklist.push_back(Inst);
1953     }
1954   }
1955 
1956   // Sort by dominance.
1957   SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end());
1958   llvm::sort(InsertionList, [&Dom](Instruction *A, Instruction *B) -> bool {
1959     // If a dominates b it should preceed (<) b.
1960     return Dom.dominates(A, B);
1961   });
1962 
1963   Instruction *InsertPt = CoroBegin->getNextNode();
1964   for (Instruction *Inst : InsertionList)
1965     Inst->moveBefore(InsertPt);
1966 }
1967 
1968 /// For each local variable that all of its user are only used inside one of
1969 /// suspended region, we sink their lifetime.start markers to the place where
1970 /// after the suspend block. Doing so minimizes the lifetime of each variable,
1971 /// hence minimizing the amount of data we end up putting on the frame.
1972 static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
1973                                      SuspendCrossingInfo &Checker) {
1974   DominatorTree DT(F);
1975 
1976   // Collect all possible basic blocks which may dominate all uses of allocas.
1977   SmallPtrSet<BasicBlock *, 4> DomSet;
1978   DomSet.insert(&F.getEntryBlock());
1979   for (auto *CSI : Shape.CoroSuspends) {
1980     BasicBlock *SuspendBlock = CSI->getParent();
1981     assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() &&
1982            "should have split coro.suspend into its own block");
1983     DomSet.insert(SuspendBlock->getSingleSuccessor());
1984   }
1985 
1986   for (Instruction &I : instructions(F)) {
1987     AllocaInst* AI = dyn_cast<AllocaInst>(&I);
1988     if (!AI)
1989       continue;
1990 
1991     for (BasicBlock *DomBB : DomSet) {
1992       bool Valid = true;
1993       SmallVector<Instruction *, 1> Lifetimes;
1994 
1995       auto isLifetimeStart = [](Instruction* I) {
1996         if (auto* II = dyn_cast<IntrinsicInst>(I))
1997           return II->getIntrinsicID() == Intrinsic::lifetime_start;
1998         return false;
1999       };
2000 
2001       auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
2002         if (isLifetimeStart(U)) {
2003           Lifetimes.push_back(U);
2004           return true;
2005         }
2006         if (!U->hasOneUse() || U->stripPointerCasts() != AI)
2007           return false;
2008         if (isLifetimeStart(U->user_back())) {
2009           Lifetimes.push_back(U->user_back());
2010           return true;
2011         }
2012         return false;
2013       };
2014 
2015       for (User *U : AI->users()) {
2016         Instruction *UI = cast<Instruction>(U);
2017         // For all users except lifetime.start markers, if they are all
2018         // dominated by one of the basic blocks and do not cross
2019         // suspend points as well, then there is no need to spill the
2020         // instruction.
2021         if (!DT.dominates(DomBB, UI->getParent()) ||
2022             Checker.isDefinitionAcrossSuspend(DomBB, UI)) {
2023           // Skip lifetime.start, GEP and bitcast used by lifetime.start
2024           // markers.
2025           if (collectLifetimeStart(UI, AI))
2026             continue;
2027           Valid = false;
2028           break;
2029         }
2030       }
2031       // Sink lifetime.start markers to dominate block when they are
2032       // only used outside the region.
2033       if (Valid && Lifetimes.size() != 0) {
2034         // May be AI itself, when the type of AI is i8*
2035         auto *NewBitCast = [&](AllocaInst *AI) -> Value* {
2036           if (isa<AllocaInst>(Lifetimes[0]->getOperand(1)))
2037             return AI;
2038           auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext());
2039           return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "",
2040                                   DomBB->getTerminator());
2041         }(AI);
2042 
2043         auto *NewLifetime = Lifetimes[0]->clone();
2044         NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast);
2045         NewLifetime->insertBefore(DomBB->getTerminator());
2046 
2047         // All the outsided lifetime.start markers are no longer necessary.
2048         for (Instruction *S : Lifetimes)
2049           S->eraseFromParent();
2050 
2051         break;
2052       }
2053     }
2054   }
2055 }
2056 
2057 static void collectFrameAllocas(Function &F, coro::Shape &Shape,
2058                                 const SuspendCrossingInfo &Checker,
2059                                 SmallVectorImpl<AllocaInfo> &Allocas) {
2060   for (Instruction &I : instructions(F)) {
2061     auto *AI = dyn_cast<AllocaInst>(&I);
2062     if (!AI)
2063       continue;
2064     // The PromiseAlloca will be specially handled since it needs to be in a
2065     // fixed position in the frame.
2066     if (AI == Shape.SwitchLowering.PromiseAlloca) {
2067       continue;
2068     }
2069     DominatorTree DT(F);
2070     AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT,
2071                              *Shape.CoroBegin, Checker};
2072     Visitor.visitPtr(*AI);
2073     if (!Visitor.getShouldLiveOnFrame())
2074       continue;
2075     Allocas.emplace_back(AI, Visitor.getAliasesCopy(),
2076                          Visitor.getMayWriteBeforeCoroBegin());
2077   }
2078 }
2079 
2080 void coro::salvageDebugInfo(
2081     SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
2082     DbgDeclareInst *DDI, bool ReuseFrameSlot) {
2083   Function *F = DDI->getFunction();
2084   IRBuilder<> Builder(F->getContext());
2085   auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
2086   while (isa<IntrinsicInst>(InsertPt))
2087     ++InsertPt;
2088   Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);
2089   DIExpression *Expr = DDI->getExpression();
2090   // Follow the pointer arithmetic all the way to the incoming
2091   // function argument and convert into a DIExpression.
2092   bool OutermostLoad = true;
2093   Value *Storage = DDI->getAddress();
2094   Value *OriginalStorage = Storage;
2095   while (Storage) {
2096     if (auto *LdInst = dyn_cast<LoadInst>(Storage)) {
2097       Storage = LdInst->getOperand(0);
2098       // FIXME: This is a heuristic that works around the fact that
2099       // LLVM IR debug intrinsics cannot yet distinguish between
2100       // memory and value locations: Because a dbg.declare(alloca) is
2101       // implicitly a memory location no DW_OP_deref operation for the
2102       // last direct load from an alloca is necessary.  This condition
2103       // effectively drops the *last* DW_OP_deref in the expression.
2104       if (!OutermostLoad)
2105         Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
2106       OutermostLoad = false;
2107     } else if (auto *StInst = dyn_cast<StoreInst>(Storage)) {
2108       Storage = StInst->getOperand(0);
2109     } else if (auto *GEPInst = dyn_cast<GetElementPtrInst>(Storage)) {
2110       Expr = llvm::salvageDebugInfoImpl(*GEPInst, Expr,
2111                                         /*WithStackValue=*/false, 0);
2112       if (!Expr)
2113         return;
2114       Storage = GEPInst->getOperand(0);
2115     } else if (auto *BCInst = dyn_cast<llvm::BitCastInst>(Storage))
2116       Storage = BCInst->getOperand(0);
2117     else
2118       break;
2119   }
2120   if (!Storage)
2121     return;
2122 
2123   // Store a pointer to the coroutine frame object in an alloca so it
2124   // is available throughout the function when producing unoptimized
2125   // code. Extending the lifetime this way is correct because the
2126   // variable has been declared by a dbg.declare intrinsic.
2127   //
2128   // Avoid to create the alloca would be eliminated by optimization
2129   // passes and the corresponding dbg.declares would be invalid.
2130   if (!ReuseFrameSlot && !EnableReuseStorageInFrame)
2131     if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) {
2132       auto &Cached = DbgPtrAllocaCache[Storage];
2133       if (!Cached) {
2134         Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
2135                                       Arg->getName() + ".debug");
2136         Builder.CreateStore(Storage, Cached);
2137       }
2138       Storage = Cached;
2139       // FIXME: LLVM lacks nuanced semantics to differentiate between
2140       // memory and direct locations at the IR level. The backend will
2141       // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
2142       // location. Thus, if there are deref and offset operations in the
2143       // expression, we need to add a DW_OP_deref at the *start* of the
2144       // expression to first load the contents of the alloca before
2145       // adjusting it with the expression.
2146       if (Expr && Expr->isComplex())
2147         Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
2148     }
2149   DDI->replaceVariableLocationOp(OriginalStorage, Storage);
2150   DDI->setExpression(Expr);
2151   if (auto *InsertPt = dyn_cast<Instruction>(Storage))
2152     DDI->moveAfter(InsertPt);
2153   else if (isa<Argument>(Storage))
2154     DDI->moveAfter(F->getEntryBlock().getFirstNonPHI());
2155 }
2156 
2157 void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
2158   // Don't eliminate swifterror in async functions that won't be split.
2159   if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
2160     eliminateSwiftError(F, Shape);
2161 
2162   if (Shape.ABI == coro::ABI::Switch &&
2163       Shape.SwitchLowering.PromiseAlloca) {
2164     Shape.getSwitchCoroId()->clearPromise();
2165   }
2166 
2167   // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
2168   // intrinsics are in their own blocks to simplify the logic of building up
2169   // SuspendCrossing data.
2170   for (auto *CSI : Shape.CoroSuspends) {
2171     if (auto *Save = CSI->getCoroSave())
2172       splitAround(Save, "CoroSave");
2173     splitAround(CSI, "CoroSuspend");
2174   }
2175 
2176   // Put CoroEnds into their own blocks.
2177   for (AnyCoroEndInst *CE : Shape.CoroEnds) {
2178     splitAround(CE, "CoroEnd");
2179 
2180     // Emit the musttail call function in a new block before the CoroEnd.
2181     // We do this here so that the right suspend crossing info is computed for
2182     // the uses of the musttail call function call. (Arguments to the coro.end
2183     // instructions would be ignored)
2184     if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(CE)) {
2185       auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
2186       if (!MustTailCallFn)
2187         continue;
2188       IRBuilder<> Builder(AsyncEnd);
2189       SmallVector<Value *, 8> Args(AsyncEnd->args());
2190       auto Arguments = ArrayRef<Value *>(Args).drop_front(3);
2191       auto *Call = createMustTailCall(AsyncEnd->getDebugLoc(), MustTailCallFn,
2192                                       Arguments, Builder);
2193       splitAround(Call, "MustTailCall.Before.CoroEnd");
2194     }
2195   }
2196 
2197   // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
2198   // never has its definition separated from the PHI by the suspend point.
2199   rewritePHIs(F);
2200 
2201   // Build suspend crossing info.
2202   SuspendCrossingInfo Checker(F, Shape);
2203 
2204   IRBuilder<> Builder(F.getContext());
2205   FrameDataInfo FrameData;
2206   SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
2207   SmallVector<Instruction*, 4> DeadInstructions;
2208 
2209   {
2210     SpillInfo Spills;
2211     for (int Repeat = 0; Repeat < 4; ++Repeat) {
2212       // See if there are materializable instructions across suspend points.
2213       for (Instruction &I : instructions(F))
2214         if (materializable(I))
2215           for (User *U : I.users())
2216             if (Checker.isDefinitionAcrossSuspend(I, U))
2217               Spills[&I].push_back(cast<Instruction>(U));
2218 
2219       if (Spills.empty())
2220         break;
2221 
2222       // Rewrite materializable instructions to be materialized at the use
2223       // point.
2224       LLVM_DEBUG(dumpSpills("Materializations", Spills));
2225       rewriteMaterializableInstructions(Builder, Spills);
2226       Spills.clear();
2227     }
2228   }
2229 
2230   sinkLifetimeStartMarkers(F, Shape, Checker);
2231   if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
2232     collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);
2233   LLVM_DEBUG(dumpAllocas(FrameData.Allocas));
2234 
2235   // Collect the spills for arguments and other not-materializable values.
2236   for (Argument &A : F.args())
2237     for (User *U : A.users())
2238       if (Checker.isDefinitionAcrossSuspend(A, U))
2239         FrameData.Spills[&A].push_back(cast<Instruction>(U));
2240 
2241   for (Instruction &I : instructions(F)) {
2242     // Values returned from coroutine structure intrinsics should not be part
2243     // of the Coroutine Frame.
2244     if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin)
2245       continue;
2246 
2247     // The Coroutine Promise always included into coroutine frame, no need to
2248     // check for suspend crossing.
2249     if (Shape.ABI == coro::ABI::Switch &&
2250         Shape.SwitchLowering.PromiseAlloca == &I)
2251       continue;
2252 
2253     // Handle alloca.alloc specially here.
2254     if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) {
2255       // Check whether the alloca's lifetime is bounded by suspend points.
2256       if (isLocalAlloca(AI)) {
2257         LocalAllocas.push_back(AI);
2258         continue;
2259       }
2260 
2261       // If not, do a quick rewrite of the alloca and then add spills of
2262       // the rewritten value.  The rewrite doesn't invalidate anything in
2263       // Spills because the other alloca intrinsics have no other operands
2264       // besides AI, and it doesn't invalidate the iteration because we delay
2265       // erasing AI.
2266       auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions);
2267 
2268       for (User *U : Alloc->users()) {
2269         if (Checker.isDefinitionAcrossSuspend(*Alloc, U))
2270           FrameData.Spills[Alloc].push_back(cast<Instruction>(U));
2271       }
2272       continue;
2273     }
2274 
2275     // Ignore alloca.get; we process this as part of coro.alloca.alloc.
2276     if (isa<CoroAllocaGetInst>(I))
2277       continue;
2278 
2279     if (isa<AllocaInst>(I))
2280       continue;
2281 
2282     for (User *U : I.users())
2283       if (Checker.isDefinitionAcrossSuspend(I, U)) {
2284         // We cannot spill a token.
2285         if (I.getType()->isTokenTy())
2286           report_fatal_error(
2287               "token definition is separated from the use by a suspend point");
2288         FrameData.Spills[&I].push_back(cast<Instruction>(U));
2289       }
2290   }
2291   LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));
2292   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
2293       Shape.ABI == coro::ABI::Async)
2294     sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin);
2295   Shape.FrameTy = buildFrameType(F, Shape, FrameData);
2296   Shape.FramePtr = insertSpills(FrameData, Shape);
2297   lowerLocalAllocas(LocalAllocas, DeadInstructions);
2298 
2299   for (auto I : DeadInstructions)
2300     I->eraseFromParent();
2301 }
2302