1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // there from sue to definition that crosses a suspend block.
10 //
11 // Using the information discovered we form a Coroutine Frame structure to
12 // contain those values. All uses of those values are replaced with appropriate
13 // GEP + load from the coroutine frame. At the point of the definition we spill
14 // the value into the coroutine frame.
15 //===----------------------------------------------------------------------===//
16 
17 #include "CoroInternal.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/Analysis/PtrUseVisitor.h"
22 #include "llvm/Analysis/StackLifetime.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/IR/CFG.h"
25 #include "llvm/IR/DIBuilder.h"
26 #include "llvm/IR/DebugInfo.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/InstIterator.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/Support/OptimizedStructLayout.h"
34 #include "llvm/Support/circular_raw_ostream.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37 #include "llvm/Transforms/Utils/Local.h"
38 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
39 #include <algorithm>
40 
41 using namespace llvm;
42 
43 // The "coro-suspend-crossing" flag is very noisy. There is another debug type,
44 // "coro-frame", which results in leaner debug spew.
45 #define DEBUG_TYPE "coro-suspend-crossing"
46 
47 enum { SmallVectorThreshold = 32 };
48 
49 // Provides two way mapping between the blocks and numbers.
50 namespace {
51 class BlockToIndexMapping {
52   SmallVector<BasicBlock *, SmallVectorThreshold> V;
53 
54 public:
55   size_t size() const { return V.size(); }
56 
57   BlockToIndexMapping(Function &F) {
58     for (BasicBlock &BB : F)
59       V.push_back(&BB);
60     llvm::sort(V);
61   }
62 
63   size_t blockToIndex(BasicBlock *BB) const {
64     auto *I = llvm::lower_bound(V, BB);
65     assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
66     return I - V.begin();
67   }
68 
69   BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; }
70 };
71 } // end anonymous namespace
72 
73 // The SuspendCrossingInfo maintains data that allows to answer a question
74 // whether given two BasicBlocks A and B there is a path from A to B that
75 // passes through a suspend point.
76 //
77 // For every basic block 'i' it maintains a BlockData that consists of:
78 //   Consumes:  a bit vector which contains a set of indices of blocks that can
79 //              reach block 'i'
80 //   Kills: a bit vector which contains a set of indices of blocks that can
81 //          reach block 'i', but one of the path will cross a suspend point
82 //   Suspend: a boolean indicating whether block 'i' contains a suspend point.
83 //   End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
84 //
85 namespace {
86 struct SuspendCrossingInfo {
87   BlockToIndexMapping Mapping;
88 
89   struct BlockData {
90     BitVector Consumes;
91     BitVector Kills;
92     bool Suspend = false;
93     bool End = false;
94   };
95   SmallVector<BlockData, SmallVectorThreshold> Block;
96 
97   iterator_range<succ_iterator> successors(BlockData const &BD) const {
98     BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
99     return llvm::successors(BB);
100   }
101 
102   BlockData &getBlockData(BasicBlock *BB) {
103     return Block[Mapping.blockToIndex(BB)];
104   }
105 
106   void dump() const;
107   void dump(StringRef Label, BitVector const &BV) const;
108 
109   SuspendCrossingInfo(Function &F, coro::Shape &Shape);
110 
111   bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const {
112     size_t const DefIndex = Mapping.blockToIndex(DefBB);
113     size_t const UseIndex = Mapping.blockToIndex(UseBB);
114 
115     bool const Result = Block[UseIndex].Kills[DefIndex];
116     LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName()
117                       << " answer is " << Result << "\n");
118     return Result;
119   }
120 
121   bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const {
122     auto *I = cast<Instruction>(U);
123 
124     // We rewrote PHINodes, so that only the ones with exactly one incoming
125     // value need to be analyzed.
126     if (auto *PN = dyn_cast<PHINode>(I))
127       if (PN->getNumIncomingValues() > 1)
128         return false;
129 
130     BasicBlock *UseBB = I->getParent();
131 
132     // As a special case, treat uses by an llvm.coro.suspend.retcon or an
133     // llvm.coro.suspend.async as if they were uses in the suspend's single
134     // predecessor: the uses conceptually occur before the suspend.
135     if (isa<CoroSuspendRetconInst>(I) || isa<CoroSuspendAsyncInst>(I)) {
136       UseBB = UseBB->getSinglePredecessor();
137       assert(UseBB && "should have split coro.suspend into its own block");
138     }
139 
140     return hasPathCrossingSuspendPoint(DefBB, UseBB);
141   }
142 
143   bool isDefinitionAcrossSuspend(Argument &A, User *U) const {
144     return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U);
145   }
146 
147   bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
148     auto *DefBB = I.getParent();
149 
150     // As a special case, treat values produced by an llvm.coro.suspend.*
151     // as if they were defined in the single successor: the uses
152     // conceptually occur after the suspend.
153     if (isa<AnyCoroSuspendInst>(I)) {
154       DefBB = DefBB->getSingleSuccessor();
155       assert(DefBB && "should have split coro.suspend into its own block");
156     }
157 
158     return isDefinitionAcrossSuspend(DefBB, U);
159   }
160 
161   bool isDefinitionAcrossSuspend(Value &V, User *U) const {
162     if (auto *Arg = dyn_cast<Argument>(&V))
163       return isDefinitionAcrossSuspend(*Arg, U);
164     if (auto *Inst = dyn_cast<Instruction>(&V))
165       return isDefinitionAcrossSuspend(*Inst, U);
166 
167     llvm_unreachable(
168         "Coroutine could only collect Argument and Instruction now.");
169   }
170 };
171 } // end anonymous namespace
172 
173 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
174 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
175                                                 BitVector const &BV) const {
176   dbgs() << Label << ":";
177   for (size_t I = 0, N = BV.size(); I < N; ++I)
178     if (BV[I])
179       dbgs() << " " << Mapping.indexToBlock(I)->getName();
180   dbgs() << "\n";
181 }
182 
183 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
184   for (size_t I = 0, N = Block.size(); I < N; ++I) {
185     BasicBlock *const B = Mapping.indexToBlock(I);
186     dbgs() << B->getName() << ":\n";
187     dump("   Consumes", Block[I].Consumes);
188     dump("      Kills", Block[I].Kills);
189   }
190   dbgs() << "\n";
191 }
192 #endif
193 
194 SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
195     : Mapping(F) {
196   const size_t N = Mapping.size();
197   Block.resize(N);
198 
199   // Initialize every block so that it consumes itself
200   for (size_t I = 0; I < N; ++I) {
201     auto &B = Block[I];
202     B.Consumes.resize(N);
203     B.Kills.resize(N);
204     B.Consumes.set(I);
205   }
206 
207   // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
208   // the code beyond coro.end is reachable during initial invocation of the
209   // coroutine.
210   for (auto *CE : Shape.CoroEnds)
211     getBlockData(CE->getParent()).End = true;
212 
213   // Mark all suspend blocks and indicate that they kill everything they
214   // consume. Note, that crossing coro.save also requires a spill, as any code
215   // between coro.save and coro.suspend may resume the coroutine and all of the
216   // state needs to be saved by that time.
217   auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
218     BasicBlock *SuspendBlock = BarrierInst->getParent();
219     auto &B = getBlockData(SuspendBlock);
220     B.Suspend = true;
221     B.Kills |= B.Consumes;
222   };
223   for (auto *CSI : Shape.CoroSuspends) {
224     markSuspendBlock(CSI);
225     if (auto *Save = CSI->getCoroSave())
226       markSuspendBlock(Save);
227   }
228 
229   // Iterate propagating consumes and kills until they stop changing.
230   int Iteration = 0;
231   (void)Iteration;
232 
233   bool Changed;
234   do {
235     LLVM_DEBUG(dbgs() << "iteration " << ++Iteration);
236     LLVM_DEBUG(dbgs() << "==============\n");
237 
238     Changed = false;
239     for (size_t I = 0; I < N; ++I) {
240       auto &B = Block[I];
241       for (BasicBlock *SI : successors(B)) {
242 
243         auto SuccNo = Mapping.blockToIndex(SI);
244 
245         // Saved Consumes and Kills bitsets so that it is easy to see
246         // if anything changed after propagation.
247         auto &S = Block[SuccNo];
248         auto SavedConsumes = S.Consumes;
249         auto SavedKills = S.Kills;
250 
251         // Propagate Kills and Consumes from block B into its successor S.
252         S.Consumes |= B.Consumes;
253         S.Kills |= B.Kills;
254 
255         // If block B is a suspend block, it should propagate kills into the
256         // its successor for every block B consumes.
257         if (B.Suspend) {
258           S.Kills |= B.Consumes;
259         }
260         if (S.Suspend) {
261           // If block S is a suspend block, it should kill all of the blocks it
262           // consumes.
263           S.Kills |= S.Consumes;
264         } else if (S.End) {
265           // If block S is an end block, it should not propagate kills as the
266           // blocks following coro.end() are reached during initial invocation
267           // of the coroutine while all the data are still available on the
268           // stack or in the registers.
269           S.Kills.reset();
270         } else {
271           // This is reached when S block it not Suspend nor coro.end and it
272           // need to make sure that it is not in the kill set.
273           S.Kills.reset(SuccNo);
274         }
275 
276         // See if anything changed.
277         Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes);
278 
279         if (S.Kills != SavedKills) {
280           LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName()
281                             << "\n");
282           LLVM_DEBUG(dump("S.Kills", S.Kills));
283           LLVM_DEBUG(dump("SavedKills", SavedKills));
284         }
285         if (S.Consumes != SavedConsumes) {
286           LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n");
287           LLVM_DEBUG(dump("S.Consume", S.Consumes));
288           LLVM_DEBUG(dump("SavedCons", SavedConsumes));
289         }
290       }
291     }
292   } while (Changed);
293   LLVM_DEBUG(dump());
294 }
295 
296 #undef DEBUG_TYPE // "coro-suspend-crossing"
297 #define DEBUG_TYPE "coro-frame"
298 
299 namespace {
300 class FrameTypeBuilder;
301 // Mapping from the to-be-spilled value to all the users that need reload.
302 using SpillInfo = SmallMapVector<Value *, SmallVector<Instruction *, 2>, 8>;
303 struct AllocaInfo {
304   AllocaInst *Alloca;
305   DenseMap<Instruction *, llvm::Optional<APInt>> Aliases;
306   bool MayWriteBeforeCoroBegin;
307   AllocaInfo(AllocaInst *Alloca,
308              DenseMap<Instruction *, llvm::Optional<APInt>> Aliases,
309              bool MayWriteBeforeCoroBegin)
310       : Alloca(Alloca), Aliases(std::move(Aliases)),
311         MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {}
312 };
313 struct FrameDataInfo {
314   // All the values (that are not allocas) that needs to be spilled to the
315   // frame.
316   SpillInfo Spills;
317   // Allocas contains all values defined as allocas that need to live in the
318   // frame.
319   SmallVector<AllocaInfo, 8> Allocas;
320 
321   SmallVector<Value *, 8> getAllDefs() const {
322     SmallVector<Value *, 8> Defs;
323     for (const auto &P : Spills)
324       Defs.push_back(P.first);
325     for (const auto &A : Allocas)
326       Defs.push_back(A.Alloca);
327     return Defs;
328   }
329 
330   uint32_t getFieldIndex(Value *V) const {
331     auto Itr = FieldIndexMap.find(V);
332     assert(Itr != FieldIndexMap.end() &&
333            "Value does not have a frame field index");
334     return Itr->second;
335   }
336 
337   void setFieldIndex(Value *V, uint32_t Index) {
338     assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) &&
339            "Cannot set the index for the same field twice.");
340     FieldIndexMap[V] = Index;
341   }
342 
343   uint64_t getAlign(Value *V) const {
344     auto Iter = FieldAlignMap.find(V);
345     assert(Iter != FieldAlignMap.end());
346     return Iter->second;
347   }
348 
349   void setAlign(Value *V, uint64_t Align) {
350     assert(FieldAlignMap.count(V) == 0);
351     FieldAlignMap.insert({V, Align});
352   }
353 
354   uint64_t getDynamicAlign(Value *V) const {
355     auto Iter = FieldDynamicAlignMap.find(V);
356     assert(Iter != FieldDynamicAlignMap.end());
357     return Iter->second;
358   }
359 
360   void setDynamicAlign(Value *V, uint64_t Align) {
361     assert(FieldDynamicAlignMap.count(V) == 0);
362     FieldDynamicAlignMap.insert({V, Align});
363   }
364 
365   uint64_t getOffset(Value *V) const {
366     auto Iter = FieldOffsetMap.find(V);
367     assert(Iter != FieldOffsetMap.end());
368     return Iter->second;
369   }
370 
371   void setOffset(Value *V, uint64_t Offset) {
372     assert(FieldOffsetMap.count(V) == 0);
373     FieldOffsetMap.insert({V, Offset});
374   }
375 
376   // Remap the index of every field in the frame, using the final layout index.
377   void updateLayoutIndex(FrameTypeBuilder &B);
378 
379 private:
380   // LayoutIndexUpdateStarted is used to avoid updating the index of any field
381   // twice by mistake.
382   bool LayoutIndexUpdateStarted = false;
383   // Map from values to their slot indexes on the frame. They will be first set
384   // with their original insertion field index. After the frame is built, their
385   // indexes will be updated into the final layout index.
386   DenseMap<Value *, uint32_t> FieldIndexMap;
387   // Map from values to their alignment on the frame. They would be set after
388   // the frame is built.
389   DenseMap<Value *, uint64_t> FieldAlignMap;
390   DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
391   // Map from values to their offset on the frame. They would be set after
392   // the frame is built.
393   DenseMap<Value *, uint64_t> FieldOffsetMap;
394 };
395 } // namespace
396 
397 #ifndef NDEBUG
398 static void dumpSpills(StringRef Title, const SpillInfo &Spills) {
399   dbgs() << "------------- " << Title << "--------------\n";
400   for (const auto &E : Spills) {
401     E.first->dump();
402     dbgs() << "   user: ";
403     for (auto *I : E.second)
404       I->dump();
405   }
406 }
407 
408 static void dumpAllocas(const SmallVectorImpl<AllocaInfo> &Allocas) {
409   dbgs() << "------------- Allocas --------------\n";
410   for (const auto &A : Allocas) {
411     A.Alloca->dump();
412   }
413 }
414 #endif
415 
416 namespace {
417 using FieldIDType = size_t;
418 // We cannot rely solely on natural alignment of a type when building a
419 // coroutine frame and if the alignment specified on the Alloca instruction
420 // differs from the natural alignment of the alloca type we will need to insert
421 // padding.
422 class FrameTypeBuilder {
423 private:
424   struct Field {
425     uint64_t Size;
426     uint64_t Offset;
427     Type *Ty;
428     FieldIDType LayoutFieldIndex;
429     Align Alignment;
430     Align TyAlignment;
431     uint64_t DynamicAlignBuffer;
432   };
433 
434   const DataLayout &DL;
435   LLVMContext &Context;
436   uint64_t StructSize = 0;
437   Align StructAlign;
438   bool IsFinished = false;
439 
440   Optional<Align> MaxFrameAlignment;
441 
442   SmallVector<Field, 8> Fields;
443   DenseMap<Value*, unsigned> FieldIndexByKey;
444 
445 public:
446   FrameTypeBuilder(LLVMContext &Context, const DataLayout &DL,
447                    Optional<Align> MaxFrameAlignment)
448       : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
449 
450   /// Add a field to this structure for the storage of an `alloca`
451   /// instruction.
452   LLVM_NODISCARD FieldIDType addFieldForAlloca(AllocaInst *AI,
453                                                bool IsHeader = false) {
454     Type *Ty = AI->getAllocatedType();
455 
456     // Make an array type if this is a static array allocation.
457     if (AI->isArrayAllocation()) {
458       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
459         Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
460       else
461         report_fatal_error("Coroutines cannot handle non static allocas yet");
462     }
463 
464     return addField(Ty, AI->getAlign(), IsHeader);
465   }
466 
467   /// We want to put the allocas whose lifetime-ranges are not overlapped
468   /// into one slot of coroutine frame.
469   /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
470   ///
471   ///     cppcoro::task<void> alternative_paths(bool cond) {
472   ///         if (cond) {
473   ///             big_structure a;
474   ///             process(a);
475   ///             co_await something();
476   ///         } else {
477   ///             big_structure b;
478   ///             process2(b);
479   ///             co_await something();
480   ///         }
481   ///     }
482   ///
483   /// We want to put variable a and variable b in the same slot to
484   /// reduce the size of coroutine frame.
485   ///
486   /// This function use StackLifetime algorithm to partition the AllocaInsts in
487   /// Spills to non-overlapped sets in order to put Alloca in the same
488   /// non-overlapped set into the same slot in the Coroutine Frame. Then add
489   /// field for the allocas in the same non-overlapped set by using the largest
490   /// type as the field type.
491   ///
492   /// Side Effects: Because We sort the allocas, the order of allocas in the
493   /// frame may be different with the order in the source code.
494   void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
495                           coro::Shape &Shape);
496 
497   /// Add a field to this structure.
498   LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment,
499                                       bool IsHeader = false,
500                                       bool IsSpillOfValue = false) {
501     assert(!IsFinished && "adding fields to a finished builder");
502     assert(Ty && "must provide a type for a field");
503 
504     // The field size is always the alloc size of the type.
505     uint64_t FieldSize = DL.getTypeAllocSize(Ty);
506 
507     // For an alloca with size=0, we don't need to add a field and they
508     // can just point to any index in the frame. Use index 0.
509     if (FieldSize == 0) {
510       return 0;
511     }
512 
513     // The field alignment might not be the type alignment, but we need
514     // to remember the type alignment anyway to build the type.
515     // If we are spilling values we don't need to worry about ABI alignment
516     // concerns.
517     auto ABIAlign = DL.getABITypeAlign(Ty);
518     Align TyAlignment =
519         (IsSpillOfValue && MaxFrameAlignment)
520             ? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign)
521             : ABIAlign;
522     if (!FieldAlignment) {
523       FieldAlignment = TyAlignment;
524     }
525 
526     // The field alignment could be bigger than the max frame case, in that case
527     // we request additional storage to be able to dynamically align the
528     // pointer.
529     uint64_t DynamicAlignBuffer = 0;
530     if (MaxFrameAlignment &&
531         (FieldAlignment.valueOrOne() > *MaxFrameAlignment)) {
532       DynamicAlignBuffer =
533           offsetToAlignment((*MaxFrameAlignment).value(), *FieldAlignment);
534       FieldAlignment = *MaxFrameAlignment;
535       FieldSize = FieldSize + DynamicAlignBuffer;
536     }
537 
538     // Lay out header fields immediately.
539     uint64_t Offset;
540     if (IsHeader) {
541       Offset = alignTo(StructSize, FieldAlignment);
542       StructSize = Offset + FieldSize;
543 
544     // Everything else has a flexible offset.
545     } else {
546       Offset = OptimizedStructLayoutField::FlexibleOffset;
547     }
548 
549     Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment,
550                       DynamicAlignBuffer});
551     return Fields.size() - 1;
552   }
553 
554   /// Finish the layout and set the body on the given type.
555   void finish(StructType *Ty);
556 
557   uint64_t getStructSize() const {
558     assert(IsFinished && "not yet finished!");
559     return StructSize;
560   }
561 
562   Align getStructAlign() const {
563     assert(IsFinished && "not yet finished!");
564     return StructAlign;
565   }
566 
567   FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
568     assert(IsFinished && "not yet finished!");
569     return Fields[Id].LayoutFieldIndex;
570   }
571 
572   Field getLayoutField(FieldIDType Id) const {
573     assert(IsFinished && "not yet finished!");
574     return Fields[Id];
575   }
576 };
577 } // namespace
578 
579 void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
580   auto Updater = [&](Value *I) {
581     auto Field = B.getLayoutField(getFieldIndex(I));
582     setFieldIndex(I, Field.LayoutFieldIndex);
583     setAlign(I, Field.Alignment.value());
584     uint64_t dynamicAlign =
585         Field.DynamicAlignBuffer
586             ? Field.DynamicAlignBuffer + Field.Alignment.value()
587             : 0;
588     setDynamicAlign(I, dynamicAlign);
589     setOffset(I, Field.Offset);
590   };
591   LayoutIndexUpdateStarted = true;
592   for (auto &S : Spills)
593     Updater(S.first);
594   for (const auto &A : Allocas)
595     Updater(A.Alloca);
596   LayoutIndexUpdateStarted = false;
597 }
598 
599 void FrameTypeBuilder::addFieldForAllocas(const Function &F,
600                                           FrameDataInfo &FrameData,
601                                           coro::Shape &Shape) {
602   using AllocaSetType = SmallVector<AllocaInst *, 4>;
603   SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
604 
605   // We need to add field for allocas at the end of this function.
606   auto AddFieldForAllocasAtExit = make_scope_exit([&]() {
607     for (auto AllocaList : NonOverlapedAllocas) {
608       auto *LargestAI = *AllocaList.begin();
609       FieldIDType Id = addFieldForAlloca(LargestAI);
610       for (auto *Alloca : AllocaList)
611         FrameData.setFieldIndex(Alloca, Id);
612     }
613   });
614 
615   if (!Shape.OptimizeFrame) {
616     for (const auto &A : FrameData.Allocas) {
617       AllocaInst *Alloca = A.Alloca;
618       NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
619     }
620     return;
621   }
622 
623   // Because there are pathes from the lifetime.start to coro.end
624   // for each alloca, the liferanges for every alloca is overlaped
625   // in the blocks who contain coro.end and the successor blocks.
626   // So we choose to skip there blocks when we calculates the liferange
627   // for each alloca. It should be reasonable since there shouldn't be uses
628   // in these blocks and the coroutine frame shouldn't be used outside the
629   // coroutine body.
630   //
631   // Note that the user of coro.suspend may not be SwitchInst. However, this
632   // case seems too complex to handle. And it is harmless to skip these
633   // patterns since it just prevend putting the allocas to live in the same
634   // slot.
635   DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest;
636   for (auto CoroSuspendInst : Shape.CoroSuspends) {
637     for (auto U : CoroSuspendInst->users()) {
638       if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) {
639         auto *SWI = const_cast<SwitchInst *>(ConstSWI);
640         DefaultSuspendDest[SWI] = SWI->getDefaultDest();
641         SWI->setDefaultDest(SWI->getSuccessor(1));
642       }
643     }
644   }
645 
646   auto ExtractAllocas = [&]() {
647     AllocaSetType Allocas;
648     Allocas.reserve(FrameData.Allocas.size());
649     for (const auto &A : FrameData.Allocas)
650       Allocas.push_back(A.Alloca);
651     return Allocas;
652   };
653   StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
654                                       StackLifetime::LivenessType::May);
655   StackLifetimeAnalyzer.run();
656   auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
657     return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps(
658         StackLifetimeAnalyzer.getLiveRange(AI2));
659   };
660   auto GetAllocaSize = [&](const AllocaInfo &A) {
661     Optional<TypeSize> RetSize = A.Alloca->getAllocationSizeInBits(DL);
662     assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
663     assert(!RetSize->isScalable() && "Scalable vectors are not yet supported");
664     return RetSize->getFixedSize();
665   };
666   // Put larger allocas in the front. So the larger allocas have higher
667   // priority to merge, which can save more space potentially. Also each
668   // AllocaSet would be ordered. So we can get the largest Alloca in one
669   // AllocaSet easily.
670   sort(FrameData.Allocas, [&](const auto &Iter1, const auto &Iter2) {
671     return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
672   });
673   for (const auto &A : FrameData.Allocas) {
674     AllocaInst *Alloca = A.Alloca;
675     bool Merged = false;
676     // Try to find if the Alloca is not inferenced with any existing
677     // NonOverlappedAllocaSet. If it is true, insert the alloca to that
678     // NonOverlappedAllocaSet.
679     for (auto &AllocaSet : NonOverlapedAllocas) {
680       assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
681       bool NoInference = none_of(AllocaSet, [&](auto Iter) {
682         return IsAllocaInferenre(Alloca, Iter);
683       });
684       // If the alignment of A is multiple of the alignment of B, the address
685       // of A should satisfy the requirement for aligning for B.
686       //
687       // There may be other more fine-grained strategies to handle the alignment
688       // infomation during the merging process. But it seems hard to handle
689       // these strategies and benefit little.
690       bool Alignable = [&]() -> bool {
691         auto *LargestAlloca = *AllocaSet.begin();
692         return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
693                0;
694       }();
695       bool CouldMerge = NoInference && Alignable;
696       if (!CouldMerge)
697         continue;
698       AllocaSet.push_back(Alloca);
699       Merged = true;
700       break;
701     }
702     if (!Merged) {
703       NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
704     }
705   }
706   // Recover the default target destination for each Switch statement
707   // reserved.
708   for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
709     SwitchInst *SWI = SwitchAndDefaultDest.first;
710     BasicBlock *DestBB = SwitchAndDefaultDest.second;
711     SWI->setDefaultDest(DestBB);
712   }
713   // This Debug Info could tell us which allocas are merged into one slot.
714   LLVM_DEBUG(for (auto &AllocaSet
715                   : NonOverlapedAllocas) {
716     if (AllocaSet.size() > 1) {
717       dbgs() << "In Function:" << F.getName() << "\n";
718       dbgs() << "Find Union Set "
719              << "\n";
720       dbgs() << "\tAllocas are \n";
721       for (auto Alloca : AllocaSet)
722         dbgs() << "\t\t" << *Alloca << "\n";
723     }
724   });
725 }
726 
727 void FrameTypeBuilder::finish(StructType *Ty) {
728   assert(!IsFinished && "already finished!");
729 
730   // Prepare the optimal-layout field array.
731   // The Id in the layout field is a pointer to our Field for it.
732   SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
733   LayoutFields.reserve(Fields.size());
734   for (auto &Field : Fields) {
735     LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
736                               Field.Offset);
737   }
738 
739   // Perform layout.
740   auto SizeAndAlign = performOptimizedStructLayout(LayoutFields);
741   StructSize = SizeAndAlign.first;
742   StructAlign = SizeAndAlign.second;
743 
744   auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
745     return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
746   };
747 
748   // We need to produce a packed struct type if there's a field whose
749   // assigned offset isn't a multiple of its natural type alignment.
750   bool Packed = [&] {
751     for (auto &LayoutField : LayoutFields) {
752       auto &F = getField(LayoutField);
753       if (!isAligned(F.TyAlignment, LayoutField.Offset))
754         return true;
755     }
756     return false;
757   }();
758 
759   // Build the struct body.
760   SmallVector<Type*, 16> FieldTypes;
761   FieldTypes.reserve(LayoutFields.size() * 3 / 2);
762   uint64_t LastOffset = 0;
763   for (auto &LayoutField : LayoutFields) {
764     auto &F = getField(LayoutField);
765 
766     auto Offset = LayoutField.Offset;
767 
768     // Add a padding field if there's a padding gap and we're either
769     // building a packed struct or the padding gap is more than we'd
770     // get from aligning to the field type's natural alignment.
771     assert(Offset >= LastOffset);
772     if (Offset != LastOffset) {
773       if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
774         FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
775                                             Offset - LastOffset));
776     }
777 
778     F.Offset = Offset;
779     F.LayoutFieldIndex = FieldTypes.size();
780 
781     FieldTypes.push_back(F.Ty);
782     if (F.DynamicAlignBuffer) {
783       FieldTypes.push_back(
784           ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer));
785     }
786     LastOffset = Offset + F.Size;
787   }
788 
789   Ty->setBody(FieldTypes, Packed);
790 
791 #ifndef NDEBUG
792   // Check that the IR layout matches the offsets we expect.
793   auto Layout = DL.getStructLayout(Ty);
794   for (auto &F : Fields) {
795     assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty);
796     assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset);
797   }
798 #endif
799 
800   IsFinished = true;
801 }
802 
803 static void cacheDIVar(FrameDataInfo &FrameData,
804                        DenseMap<Value *, DILocalVariable *> &DIVarCache) {
805   for (auto *V : FrameData.getAllDefs()) {
806     if (DIVarCache.find(V) != DIVarCache.end())
807       continue;
808 
809     auto DDIs = FindDbgDeclareUses(V);
810     auto *I = llvm::find_if(DDIs, [](DbgDeclareInst *DDI) {
811       return DDI->getExpression()->getNumElements() == 0;
812     });
813     if (I != DDIs.end())
814       DIVarCache.insert({V, (*I)->getVariable()});
815   }
816 }
817 
818 /// Create name for Type. It uses MDString to store new created string to
819 /// avoid memory leak.
820 static StringRef solveTypeName(Type *Ty) {
821   if (Ty->isIntegerTy()) {
822     // The longest name in common may be '__int_128', which has 9 bits.
823     SmallString<16> Buffer;
824     raw_svector_ostream OS(Buffer);
825     OS << "__int_" << cast<IntegerType>(Ty)->getBitWidth();
826     auto *MDName = MDString::get(Ty->getContext(), OS.str());
827     return MDName->getString();
828   }
829 
830   if (Ty->isFloatingPointTy()) {
831     if (Ty->isFloatTy())
832       return "__float_";
833     if (Ty->isDoubleTy())
834       return "__double_";
835     return "__floating_type_";
836   }
837 
838   if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
839     if (PtrTy->isOpaque())
840       return "PointerType";
841     Type *PointeeTy = PtrTy->getNonOpaquePointerElementType();
842     auto Name = solveTypeName(PointeeTy);
843     if (Name == "UnknownType")
844       return "PointerType";
845     SmallString<16> Buffer;
846     Twine(Name + "_Ptr").toStringRef(Buffer);
847     auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
848     return MDName->getString();
849   }
850 
851   if (Ty->isStructTy()) {
852     if (!cast<StructType>(Ty)->hasName())
853       return "__LiteralStructType_";
854 
855     auto Name = Ty->getStructName();
856 
857     SmallString<16> Buffer(Name);
858     for (auto &Iter : Buffer)
859       if (Iter == '.' || Iter == ':')
860         Iter = '_';
861     auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
862     return MDName->getString();
863   }
864 
865   return "UnknownType";
866 }
867 
868 static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
869                            const DataLayout &Layout, DIScope *Scope,
870                            unsigned LineNum,
871                            DenseMap<Type *, DIType *> &DITypeCache) {
872   if (DIType *DT = DITypeCache.lookup(Ty))
873     return DT;
874 
875   StringRef Name = solveTypeName(Ty);
876 
877   DIType *RetType = nullptr;
878 
879   if (Ty->isIntegerTy()) {
880     auto BitWidth = cast<IntegerType>(Ty)->getBitWidth();
881     RetType = Builder.createBasicType(Name, BitWidth, dwarf::DW_ATE_signed,
882                                       llvm::DINode::FlagArtificial);
883   } else if (Ty->isFloatingPointTy()) {
884     RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
885                                       dwarf::DW_ATE_float,
886                                       llvm::DINode::FlagArtificial);
887   } else if (Ty->isPointerTy()) {
888     // Construct BasicType instead of PointerType to avoid infinite
889     // search problem.
890     // For example, we would be in trouble if we traverse recursively:
891     //
892     //  struct Node {
893     //      Node* ptr;
894     //  };
895     RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
896                                       dwarf::DW_ATE_address,
897                                       llvm::DINode::FlagArtificial);
898   } else if (Ty->isStructTy()) {
899     auto *DIStruct = Builder.createStructType(
900         Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),
901         Layout.getPrefTypeAlignment(Ty), llvm::DINode::FlagArtificial, nullptr,
902         llvm::DINodeArray());
903 
904     auto *StructTy = cast<StructType>(Ty);
905     SmallVector<Metadata *, 16> Elements;
906     for (unsigned I = 0; I < StructTy->getNumElements(); I++) {
907       DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout,
908                                  Scope, LineNum, DITypeCache);
909       assert(DITy);
910       Elements.push_back(Builder.createMemberType(
911           Scope, DITy->getName(), Scope->getFile(), LineNum,
912           DITy->getSizeInBits(), DITy->getAlignInBits(),
913           Layout.getStructLayout(StructTy)->getElementOffsetInBits(I),
914           llvm::DINode::FlagArtificial, DITy));
915     }
916 
917     Builder.replaceArrays(DIStruct, Builder.getOrCreateArray(Elements));
918 
919     RetType = DIStruct;
920   } else {
921     LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n";);
922     SmallString<32> Buffer;
923     raw_svector_ostream OS(Buffer);
924     OS << Name.str() << "_" << Layout.getTypeSizeInBits(Ty);
925     RetType = Builder.createBasicType(OS.str(), Layout.getTypeSizeInBits(Ty),
926                                       dwarf::DW_ATE_address,
927                                       llvm::DINode::FlagArtificial);
928   }
929 
930   DITypeCache.insert({Ty, RetType});
931   return RetType;
932 }
933 
934 /// Build artificial debug info for C++ coroutine frames to allow users to
935 /// inspect the contents of the frame directly
936 ///
937 /// Create Debug information for coroutine frame with debug name "__coro_frame".
938 /// The debug information for the fields of coroutine frame is constructed from
939 /// the following way:
940 /// 1. For all the value in the Frame, we search the use of dbg.declare to find
941 ///    the corresponding debug variables for the value. If we can find the
942 ///    debug variable, we can get full and accurate debug information.
943 /// 2. If we can't get debug information in step 1 and 2, we could only try to
944 ///    build the DIType by Type. We did this in solveDIType. We only handle
945 ///    integer, float, double, integer type and struct type for now.
946 static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
947                                 FrameDataInfo &FrameData) {
948   DISubprogram *DIS = F.getSubprogram();
949   // If there is no DISubprogram for F, it implies the Function are not compiled
950   // with debug info. So we also don't need to generate debug info for the frame
951   // neither.
952   if (!DIS || !DIS->getUnit() ||
953       !dwarf::isCPlusPlus(
954           (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()))
955     return;
956 
957   assert(Shape.ABI == coro::ABI::Switch &&
958          "We could only build debug infomation for C++ coroutine now.\n");
959 
960   DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);
961 
962   AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
963   assert(PromiseAlloca &&
964          "Coroutine with switch ABI should own Promise alloca");
965 
966   TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(PromiseAlloca);
967   if (DIs.empty())
968     return;
969 
970   DbgDeclareInst *PromiseDDI = DIs.front();
971   DILocalVariable *PromiseDIVariable = PromiseDDI->getVariable();
972   DILocalScope *PromiseDIScope = PromiseDIVariable->getScope();
973   DIFile *DFile = PromiseDIScope->getFile();
974   DILocation *DILoc = PromiseDDI->getDebugLoc().get();
975   unsigned LineNum = PromiseDIVariable->getLine();
976 
977   DICompositeType *FrameDITy = DBuilder.createStructType(
978       DIS, "__coro_frame_ty", DFile, LineNum, Shape.FrameSize * 8,
979       Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,
980       llvm::DINodeArray());
981   StructType *FrameTy = Shape.FrameTy;
982   SmallVector<Metadata *, 16> Elements;
983   DataLayout Layout = F.getParent()->getDataLayout();
984 
985   DenseMap<Value *, DILocalVariable *> DIVarCache;
986   cacheDIVar(FrameData, DIVarCache);
987 
988   unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;
989   unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;
990   unsigned IndexIndex = Shape.SwitchLowering.IndexField;
991 
992   DenseMap<unsigned, StringRef> NameCache;
993   NameCache.insert({ResumeIndex, "__resume_fn"});
994   NameCache.insert({DestroyIndex, "__destroy_fn"});
995   NameCache.insert({IndexIndex, "__coro_index"});
996 
997   Type *ResumeFnTy = FrameTy->getElementType(ResumeIndex),
998        *DestroyFnTy = FrameTy->getElementType(DestroyIndex),
999        *IndexTy = FrameTy->getElementType(IndexIndex);
1000 
1001   DenseMap<unsigned, DIType *> TyCache;
1002   TyCache.insert({ResumeIndex,
1003                   DBuilder.createBasicType("__resume_fn",
1004                                            Layout.getTypeSizeInBits(ResumeFnTy),
1005                                            dwarf::DW_ATE_address)});
1006   TyCache.insert(
1007       {DestroyIndex, DBuilder.createBasicType(
1008                          "__destroy_fn", Layout.getTypeSizeInBits(DestroyFnTy),
1009                          dwarf::DW_ATE_address)});
1010 
1011   /// FIXME: If we fill the field `SizeInBits` with the actual size of
1012   /// __coro_index in bits, then __coro_index wouldn't show in the debugger.
1013   TyCache.insert({IndexIndex, DBuilder.createBasicType(
1014                                   "__coro_index",
1015                                   (Layout.getTypeSizeInBits(IndexTy) < 8)
1016                                       ? 8
1017                                       : Layout.getTypeSizeInBits(IndexTy),
1018                                   dwarf::DW_ATE_unsigned_char)});
1019 
1020   for (auto *V : FrameData.getAllDefs()) {
1021     if (DIVarCache.find(V) == DIVarCache.end())
1022       continue;
1023 
1024     auto Index = FrameData.getFieldIndex(V);
1025 
1026     NameCache.insert({Index, DIVarCache[V]->getName()});
1027     TyCache.insert({Index, DIVarCache[V]->getType()});
1028   }
1029 
1030   // Cache from index to (Align, Offset Pair)
1031   DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;
1032   // The Align and Offset of Resume function and Destroy function are fixed.
1033   OffsetCache.insert({ResumeIndex, {8, 0}});
1034   OffsetCache.insert({DestroyIndex, {8, 8}});
1035   OffsetCache.insert(
1036       {IndexIndex,
1037        {Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});
1038 
1039   for (auto *V : FrameData.getAllDefs()) {
1040     auto Index = FrameData.getFieldIndex(V);
1041 
1042     OffsetCache.insert(
1043         {Index, {FrameData.getAlign(V), FrameData.getOffset(V)}});
1044   }
1045 
1046   DenseMap<Type *, DIType *> DITypeCache;
1047   // This counter is used to avoid same type names. e.g., there would be
1048   // many i32 and i64 types in one coroutine. And we would use i32_0 and
1049   // i32_1 to avoid the same type. Since it makes no sense the name of the
1050   // fields confilicts with each other.
1051   unsigned UnknownTypeNum = 0;
1052   for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {
1053     if (OffsetCache.find(Index) == OffsetCache.end())
1054       continue;
1055 
1056     std::string Name;
1057     uint64_t SizeInBits;
1058     uint32_t AlignInBits;
1059     uint64_t OffsetInBits;
1060     DIType *DITy = nullptr;
1061 
1062     Type *Ty = FrameTy->getElementType(Index);
1063     assert(Ty->isSized() && "We can't handle type which is not sized.\n");
1064     SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedSize();
1065     AlignInBits = OffsetCache[Index].first * 8;
1066     OffsetInBits = OffsetCache[Index].second * 8;
1067 
1068     if (NameCache.find(Index) != NameCache.end()) {
1069       Name = NameCache[Index].str();
1070       DITy = TyCache[Index];
1071     } else {
1072       DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache);
1073       assert(DITy && "SolveDIType shouldn't return nullptr.\n");
1074       Name = DITy->getName().str();
1075       Name += "_" + std::to_string(UnknownTypeNum);
1076       UnknownTypeNum++;
1077     }
1078 
1079     Elements.push_back(DBuilder.createMemberType(
1080         FrameDITy, Name, DFile, LineNum, SizeInBits, AlignInBits, OffsetInBits,
1081         llvm::DINode::FlagArtificial, DITy));
1082   }
1083 
1084   DBuilder.replaceArrays(FrameDITy, DBuilder.getOrCreateArray(Elements));
1085 
1086   auto *FrameDIVar = DBuilder.createAutoVariable(PromiseDIScope, "__coro_frame",
1087                                                  DFile, LineNum, FrameDITy,
1088                                                  true, DINode::FlagArtificial);
1089   assert(FrameDIVar->isValidLocationForIntrinsic(PromiseDDI->getDebugLoc()));
1090 
1091   // Subprogram would have ContainedNodes field which records the debug
1092   // variables it contained. So we need to add __coro_frame to the
1093   // ContainedNodes of it.
1094   //
1095   // If we don't add __coro_frame to the RetainedNodes, user may get
1096   // `no symbol __coro_frame in context` rather than `__coro_frame`
1097   // is optimized out, which is more precise.
1098   if (auto *SubProgram = dyn_cast<DISubprogram>(PromiseDIScope)) {
1099     auto RetainedNodes = SubProgram->getRetainedNodes();
1100     SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),
1101                                                  RetainedNodes.end());
1102     RetainedNodesVec.push_back(FrameDIVar);
1103     SubProgram->replaceOperandWith(
1104         7, (MDTuple::get(F.getContext(), RetainedNodesVec)));
1105   }
1106 
1107   DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
1108                          DBuilder.createExpression(), DILoc,
1109                          Shape.getInsertPtAfterFramePtr());
1110 }
1111 
1112 // Build a struct that will keep state for an active coroutine.
1113 //   struct f.frame {
1114 //     ResumeFnTy ResumeFnAddr;
1115 //     ResumeFnTy DestroyFnAddr;
1116 //     int ResumeIndex;
1117 //     ... promise (if present) ...
1118 //     ... spills ...
1119 //   };
1120 static StructType *buildFrameType(Function &F, coro::Shape &Shape,
1121                                   FrameDataInfo &FrameData) {
1122   LLVMContext &C = F.getContext();
1123   const DataLayout &DL = F.getParent()->getDataLayout();
1124   StructType *FrameTy = [&] {
1125     SmallString<32> Name(F.getName());
1126     Name.append(".Frame");
1127     return StructType::create(C, Name);
1128   }();
1129 
1130   // We will use this value to cap the alignment of spilled values.
1131   Optional<Align> MaxFrameAlignment;
1132   if (Shape.ABI == coro::ABI::Async)
1133     MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
1134   FrameTypeBuilder B(C, DL, MaxFrameAlignment);
1135 
1136   AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
1137   Optional<FieldIDType> SwitchIndexFieldId;
1138 
1139   if (Shape.ABI == coro::ABI::Switch) {
1140     auto *FramePtrTy = FrameTy->getPointerTo();
1141     auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
1142                                    /*IsVarArg=*/false);
1143     auto *FnPtrTy = FnTy->getPointerTo();
1144 
1145     // Add header fields for the resume and destroy functions.
1146     // We can rely on these being perfectly packed.
1147     (void)B.addField(FnPtrTy, None, /*header*/ true);
1148     (void)B.addField(FnPtrTy, None, /*header*/ true);
1149 
1150     // PromiseAlloca field needs to be explicitly added here because it's
1151     // a header field with a fixed offset based on its alignment. Hence it
1152     // needs special handling and cannot be added to FrameData.Allocas.
1153     if (PromiseAlloca)
1154       FrameData.setFieldIndex(
1155           PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true));
1156 
1157     // Add a field to store the suspend index.  This doesn't need to
1158     // be in the header.
1159     unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
1160     Type *IndexType = Type::getIntNTy(C, IndexBits);
1161 
1162     SwitchIndexFieldId = B.addField(IndexType, None);
1163   } else {
1164     assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
1165   }
1166 
1167   // Because multiple allocas may own the same field slot,
1168   // we add allocas to field here.
1169   B.addFieldForAllocas(F, FrameData, Shape);
1170   // Add PromiseAlloca to Allocas list so that
1171   // 1. updateLayoutIndex could update its index after
1172   // `performOptimizedStructLayout`
1173   // 2. it is processed in insertSpills.
1174   if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
1175     // We assume that the promise alloca won't be modified before
1176     // CoroBegin and no alias will be create before CoroBegin.
1177     FrameData.Allocas.emplace_back(
1178         PromiseAlloca, DenseMap<Instruction *, llvm::Optional<APInt>>{}, false);
1179   // Create an entry for every spilled value.
1180   for (auto &S : FrameData.Spills) {
1181     Type *FieldType = S.first->getType();
1182     // For byval arguments, we need to store the pointed value in the frame,
1183     // instead of the pointer itself.
1184     if (const Argument *A = dyn_cast<Argument>(S.first))
1185       if (A->hasByValAttr())
1186         FieldType = A->getParamByValType();
1187     FieldIDType Id =
1188         B.addField(FieldType, None, false /*header*/, true /*IsSpillOfValue*/);
1189     FrameData.setFieldIndex(S.first, Id);
1190   }
1191 
1192   B.finish(FrameTy);
1193   FrameData.updateLayoutIndex(B);
1194   Shape.FrameAlign = B.getStructAlign();
1195   Shape.FrameSize = B.getStructSize();
1196 
1197   switch (Shape.ABI) {
1198   case coro::ABI::Switch: {
1199     // In the switch ABI, remember the switch-index field.
1200     auto IndexField = B.getLayoutField(*SwitchIndexFieldId);
1201     Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;
1202     Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
1203     Shape.SwitchLowering.IndexOffset = IndexField.Offset;
1204 
1205     // Also round the frame size up to a multiple of its alignment, as is
1206     // generally expected in C/C++.
1207     Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
1208     break;
1209   }
1210 
1211   // In the retcon ABI, remember whether the frame is inline in the storage.
1212   case coro::ABI::Retcon:
1213   case coro::ABI::RetconOnce: {
1214     auto Id = Shape.getRetconCoroId();
1215     Shape.RetconLowering.IsFrameInlineInStorage
1216       = (B.getStructSize() <= Id->getStorageSize() &&
1217          B.getStructAlign() <= Id->getStorageAlignment());
1218     break;
1219   }
1220   case coro::ABI::Async: {
1221     Shape.AsyncLowering.FrameOffset =
1222         alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign);
1223     // Also make the final context size a multiple of the context alignment to
1224     // make allocation easier for allocators.
1225     Shape.AsyncLowering.ContextSize =
1226         alignTo(Shape.AsyncLowering.FrameOffset + Shape.FrameSize,
1227                 Shape.AsyncLowering.getContextAlignment());
1228     if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
1229       report_fatal_error(
1230           "The alignment requirment of frame variables cannot be higher than "
1231           "the alignment of the async function context");
1232     }
1233     break;
1234   }
1235   }
1236 
1237   return FrameTy;
1238 }
1239 
1240 // We use a pointer use visitor to track how an alloca is being used.
1241 // The goal is to be able to answer the following three questions:
1242 // 1. Should this alloca be allocated on the frame instead.
1243 // 2. Could the content of the alloca be modified prior to CoroBegn, which would
1244 // require copying the data from alloca to the frame after CoroBegin.
1245 // 3. Is there any alias created for this alloca prior to CoroBegin, but used
1246 // after CoroBegin. In that case, we will need to recreate the alias after
1247 // CoroBegin based off the frame. To answer question 1, we track two things:
1248 //   a. List of all BasicBlocks that use this alloca or any of the aliases of
1249 //   the alloca. In the end, we check if there exists any two basic blocks that
1250 //   cross suspension points. If so, this alloca must be put on the frame. b.
1251 //   Whether the alloca or any alias of the alloca is escaped at some point,
1252 //   either by storing the address somewhere, or the address is used in a
1253 //   function call that might capture. If it's ever escaped, this alloca must be
1254 //   put on the frame conservatively.
1255 // To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin.
1256 // Whenever a potential write happens, either through a store instruction, a
1257 // function call or any of the memory intrinsics, we check whether this
1258 // instruction is prior to CoroBegin. To answer question 3, we track the offsets
1259 // of all aliases created for the alloca prior to CoroBegin but used after
1260 // CoroBegin. llvm::Optional is used to be able to represent the case when the
1261 // offset is unknown (e.g. when you have a PHINode that takes in different
1262 // offset values). We cannot handle unknown offsets and will assert. This is the
1263 // potential issue left out. An ideal solution would likely require a
1264 // significant redesign.
1265 namespace {
1266 struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
1267   using Base = PtrUseVisitor<AllocaUseVisitor>;
1268   AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
1269                    const CoroBeginInst &CB, const SuspendCrossingInfo &Checker,
1270                    bool ShouldUseLifetimeStartInfo)
1271       : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker),
1272         ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {}
1273 
1274   void visit(Instruction &I) {
1275     Users.insert(&I);
1276     Base::visit(I);
1277     // If the pointer is escaped prior to CoroBegin, we have to assume it would
1278     // be written into before CoroBegin as well.
1279     if (PI.isEscaped() && !DT.dominates(&CoroBegin, PI.getEscapingInst())) {
1280       MayWriteBeforeCoroBegin = true;
1281     }
1282   }
1283   // We need to provide this overload as PtrUseVisitor uses a pointer based
1284   // visiting function.
1285   void visit(Instruction *I) { return visit(*I); }
1286 
1287   void visitPHINode(PHINode &I) {
1288     enqueueUsers(I);
1289     handleAlias(I);
1290   }
1291 
1292   void visitSelectInst(SelectInst &I) {
1293     enqueueUsers(I);
1294     handleAlias(I);
1295   }
1296 
1297   void visitStoreInst(StoreInst &SI) {
1298     // Regardless whether the alias of the alloca is the value operand or the
1299     // pointer operand, we need to assume the alloca is been written.
1300     handleMayWrite(SI);
1301 
1302     if (SI.getValueOperand() != U->get())
1303       return;
1304 
1305     // We are storing the pointer into a memory location, potentially escaping.
1306     // As an optimization, we try to detect simple cases where it doesn't
1307     // actually escape, for example:
1308     //   %ptr = alloca ..
1309     //   %addr = alloca ..
1310     //   store %ptr, %addr
1311     //   %x = load %addr
1312     //   ..
1313     // If %addr is only used by loading from it, we could simply treat %x as
1314     // another alias of %ptr, and not considering %ptr being escaped.
1315     auto IsSimpleStoreThenLoad = [&]() {
1316       auto *AI = dyn_cast<AllocaInst>(SI.getPointerOperand());
1317       // If the memory location we are storing to is not an alloca, it
1318       // could be an alias of some other memory locations, which is difficult
1319       // to analyze.
1320       if (!AI)
1321         return false;
1322       // StoreAliases contains aliases of the memory location stored into.
1323       SmallVector<Instruction *, 4> StoreAliases = {AI};
1324       while (!StoreAliases.empty()) {
1325         Instruction *I = StoreAliases.pop_back_val();
1326         for (User *U : I->users()) {
1327           // If we are loading from the memory location, we are creating an
1328           // alias of the original pointer.
1329           if (auto *LI = dyn_cast<LoadInst>(U)) {
1330             enqueueUsers(*LI);
1331             handleAlias(*LI);
1332             continue;
1333           }
1334           // If we are overriding the memory location, the pointer certainly
1335           // won't escape.
1336           if (auto *S = dyn_cast<StoreInst>(U))
1337             if (S->getPointerOperand() == I)
1338               continue;
1339           if (auto *II = dyn_cast<IntrinsicInst>(U))
1340             if (II->isLifetimeStartOrEnd())
1341               continue;
1342           // BitCastInst creats aliases of the memory location being stored
1343           // into.
1344           if (auto *BI = dyn_cast<BitCastInst>(U)) {
1345             StoreAliases.push_back(BI);
1346             continue;
1347           }
1348           return false;
1349         }
1350       }
1351 
1352       return true;
1353     };
1354 
1355     if (!IsSimpleStoreThenLoad())
1356       PI.setEscaped(&SI);
1357   }
1358 
1359   // All mem intrinsics modify the data.
1360   void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(MI); }
1361 
1362   void visitBitCastInst(BitCastInst &BC) {
1363     Base::visitBitCastInst(BC);
1364     handleAlias(BC);
1365   }
1366 
1367   void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1368     Base::visitAddrSpaceCastInst(ASC);
1369     handleAlias(ASC);
1370   }
1371 
1372   void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1373     // The base visitor will adjust Offset accordingly.
1374     Base::visitGetElementPtrInst(GEPI);
1375     handleAlias(GEPI);
1376   }
1377 
1378   void visitIntrinsicInst(IntrinsicInst &II) {
1379     // When we found the lifetime markers refers to a
1380     // subrange of the original alloca, ignore the lifetime
1381     // markers to avoid misleading the analysis.
1382     if (II.getIntrinsicID() != Intrinsic::lifetime_start || !IsOffsetKnown ||
1383         !Offset.isZero())
1384       return Base::visitIntrinsicInst(II);
1385     LifetimeStarts.insert(&II);
1386   }
1387 
1388   void visitCallBase(CallBase &CB) {
1389     for (unsigned Op = 0, OpCount = CB.arg_size(); Op < OpCount; ++Op)
1390       if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op))
1391         PI.setEscaped(&CB);
1392     handleMayWrite(CB);
1393   }
1394 
1395   bool getShouldLiveOnFrame() const {
1396     if (!ShouldLiveOnFrame)
1397       ShouldLiveOnFrame = computeShouldLiveOnFrame();
1398     return ShouldLiveOnFrame.getValue();
1399   }
1400 
1401   bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; }
1402 
1403   DenseMap<Instruction *, llvm::Optional<APInt>> getAliasesCopy() const {
1404     assert(getShouldLiveOnFrame() && "This method should only be called if the "
1405                                      "alloca needs to live on the frame.");
1406     for (const auto &P : AliasOffetMap)
1407       if (!P.second)
1408         report_fatal_error("Unable to handle an alias with unknown offset "
1409                            "created before CoroBegin.");
1410     return AliasOffetMap;
1411   }
1412 
1413 private:
1414   const DominatorTree &DT;
1415   const CoroBeginInst &CoroBegin;
1416   const SuspendCrossingInfo &Checker;
1417   // All alias to the original AllocaInst, created before CoroBegin and used
1418   // after CoroBegin. Each entry contains the instruction and the offset in the
1419   // original Alloca. They need to be recreated after CoroBegin off the frame.
1420   DenseMap<Instruction *, llvm::Optional<APInt>> AliasOffetMap{};
1421   SmallPtrSet<Instruction *, 4> Users{};
1422   SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
1423   bool MayWriteBeforeCoroBegin{false};
1424   bool ShouldUseLifetimeStartInfo{true};
1425 
1426   mutable llvm::Optional<bool> ShouldLiveOnFrame{};
1427 
1428   bool computeShouldLiveOnFrame() const {
1429     // If lifetime information is available, we check it first since it's
1430     // more precise. We look at every pair of lifetime.start intrinsic and
1431     // every basic block that uses the pointer to see if they cross suspension
1432     // points. The uses cover both direct uses as well as indirect uses.
1433     if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) {
1434       for (auto *I : Users)
1435         for (auto *S : LifetimeStarts)
1436           if (Checker.isDefinitionAcrossSuspend(*S, I))
1437             return true;
1438       return false;
1439     }
1440     // FIXME: Ideally the isEscaped check should come at the beginning.
1441     // However there are a few loose ends that need to be fixed first before
1442     // we can do that. We need to make sure we are not over-conservative, so
1443     // that the data accessed in-between await_suspend and symmetric transfer
1444     // is always put on the stack, and also data accessed after coro.end is
1445     // always put on the stack (esp the return object). To fix that, we need
1446     // to:
1447     //  1) Potentially treat sret as nocapture in calls
1448     //  2) Special handle the return object and put it on the stack
1449     //  3) Utilize lifetime.end intrinsic
1450     if (PI.isEscaped())
1451       return true;
1452 
1453     for (auto *U1 : Users)
1454       for (auto *U2 : Users)
1455         if (Checker.isDefinitionAcrossSuspend(*U1, U2))
1456           return true;
1457 
1458     return false;
1459   }
1460 
1461   void handleMayWrite(const Instruction &I) {
1462     if (!DT.dominates(&CoroBegin, &I))
1463       MayWriteBeforeCoroBegin = true;
1464   }
1465 
1466   bool usedAfterCoroBegin(Instruction &I) {
1467     for (auto &U : I.uses())
1468       if (DT.dominates(&CoroBegin, U))
1469         return true;
1470     return false;
1471   }
1472 
1473   void handleAlias(Instruction &I) {
1474     // We track all aliases created prior to CoroBegin but used after.
1475     // These aliases may need to be recreated after CoroBegin if the alloca
1476     // need to live on the frame.
1477     if (DT.dominates(&CoroBegin, &I) || !usedAfterCoroBegin(I))
1478       return;
1479 
1480     if (!IsOffsetKnown) {
1481       AliasOffetMap[&I].reset();
1482     } else {
1483       auto Itr = AliasOffetMap.find(&I);
1484       if (Itr == AliasOffetMap.end()) {
1485         AliasOffetMap[&I] = Offset;
1486       } else if (Itr->second.hasValue() && Itr->second.getValue() != Offset) {
1487         // If we have seen two different possible values for this alias, we set
1488         // it to empty.
1489         AliasOffetMap[&I].reset();
1490       }
1491     }
1492   }
1493 };
1494 } // namespace
1495 
1496 // We need to make room to insert a spill after initial PHIs, but before
1497 // catchswitch instruction. Placing it before violates the requirement that
1498 // catchswitch, like all other EHPads must be the first nonPHI in a block.
1499 //
1500 // Split away catchswitch into a separate block and insert in its place:
1501 //
1502 //   cleanuppad <InsertPt> cleanupret.
1503 //
1504 // cleanupret instruction will act as an insert point for the spill.
1505 static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
1506   BasicBlock *CurrentBlock = CatchSwitch->getParent();
1507   BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch);
1508   CurrentBlock->getTerminator()->eraseFromParent();
1509 
1510   auto *CleanupPad =
1511       CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock);
1512   auto *CleanupRet =
1513       CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock);
1514   return CleanupRet;
1515 }
1516 
1517 static void createFramePtr(coro::Shape &Shape) {
1518   auto *CB = Shape.CoroBegin;
1519   IRBuilder<> Builder(CB->getNextNode());
1520   StructType *FrameTy = Shape.FrameTy;
1521   PointerType *FramePtrTy = FrameTy->getPointerTo();
1522   Shape.FramePtr =
1523       cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
1524 }
1525 
1526 // Replace all alloca and SSA values that are accessed across suspend points
1527 // with GetElementPointer from coroutine frame + loads and stores. Create an
1528 // AllocaSpillBB that will become the new entry block for the resume parts of
1529 // the coroutine:
1530 //
1531 //    %hdl = coro.begin(...)
1532 //    whatever
1533 //
1534 // becomes:
1535 //
1536 //    %hdl = coro.begin(...)
1537 //    %FramePtr = bitcast i8* hdl to %f.frame*
1538 //    br label %AllocaSpillBB
1539 //
1540 //  AllocaSpillBB:
1541 //    ; geps corresponding to allocas that were moved to coroutine frame
1542 //    br label PostSpill
1543 //
1544 //  PostSpill:
1545 //    whatever
1546 //
1547 //
1548 static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
1549   auto *CB = Shape.CoroBegin;
1550   LLVMContext &C = CB->getContext();
1551   IRBuilder<> Builder(C);
1552   StructType *FrameTy = Shape.FrameTy;
1553   Value *FramePtr = Shape.FramePtr;
1554   DominatorTree DT(*CB->getFunction());
1555   SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
1556 
1557   // Create a GEP with the given index into the coroutine frame for the original
1558   // value Orig. Appends an extra 0 index for array-allocas, preserving the
1559   // original type.
1560   auto GetFramePointer = [&](Value *Orig) -> Value * {
1561     FieldIDType Index = FrameData.getFieldIndex(Orig);
1562     SmallVector<Value *, 3> Indices = {
1563         ConstantInt::get(Type::getInt32Ty(C), 0),
1564         ConstantInt::get(Type::getInt32Ty(C), Index),
1565     };
1566 
1567     if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1568       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
1569         auto Count = CI->getValue().getZExtValue();
1570         if (Count > 1) {
1571           Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
1572         }
1573       } else {
1574         report_fatal_error("Coroutines cannot handle non static allocas yet");
1575       }
1576     }
1577 
1578     auto GEP = cast<GetElementPtrInst>(
1579         Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
1580     if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1581       if (FrameData.getDynamicAlign(Orig) != 0) {
1582         assert(FrameData.getDynamicAlign(Orig) == AI->getAlignment());
1583         auto *M = AI->getModule();
1584         auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
1585         auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy);
1586         auto *AlignMask = ConstantInt::get(IntPtrTy, AI->getAlignment() - 1);
1587         PtrValue = Builder.CreateAdd(PtrValue, AlignMask);
1588         PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask));
1589         return Builder.CreateIntToPtr(PtrValue, AI->getType());
1590       }
1591       // If the type of GEP is not equal to the type of AllocaInst, it implies
1592       // that the AllocaInst may be reused in the Frame slot of other
1593       // AllocaInst. So We cast GEP to the AllocaInst here to re-use
1594       // the Frame storage.
1595       //
1596       // Note: If we change the strategy dealing with alignment, we need to refine
1597       // this casting.
1598       if (GEP->getResultElementType() != Orig->getType())
1599         return Builder.CreateBitCast(GEP, Orig->getType(),
1600                                      Orig->getName() + Twine(".cast"));
1601     }
1602     return GEP;
1603   };
1604 
1605   for (auto const &E : FrameData.Spills) {
1606     Value *Def = E.first;
1607     auto SpillAlignment = Align(FrameData.getAlign(Def));
1608     // Create a store instruction storing the value into the
1609     // coroutine frame.
1610     Instruction *InsertPt = nullptr;
1611     Type *ByValTy = nullptr;
1612     if (auto *Arg = dyn_cast<Argument>(Def)) {
1613       // For arguments, we will place the store instruction right after
1614       // the coroutine frame pointer instruction, i.e. bitcast of
1615       // coro.begin from i8* to %f.frame*.
1616       InsertPt = Shape.getInsertPtAfterFramePtr();
1617 
1618       // If we're spilling an Argument, make sure we clear 'nocapture'
1619       // from the coroutine function.
1620       Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
1621 
1622       if (Arg->hasByValAttr())
1623         ByValTy = Arg->getParamByValType();
1624     } else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
1625       // Don't spill immediately after a suspend; splitting assumes
1626       // that the suspend will be followed by a branch.
1627       InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI();
1628     } else {
1629       auto *I = cast<Instruction>(Def);
1630       if (!DT.dominates(CB, I)) {
1631         // If it is not dominated by CoroBegin, then spill should be
1632         // inserted immediately after CoroFrame is computed.
1633         InsertPt = Shape.getInsertPtAfterFramePtr();
1634       } else if (auto *II = dyn_cast<InvokeInst>(I)) {
1635         // If we are spilling the result of the invoke instruction, split
1636         // the normal edge and insert the spill in the new block.
1637         auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest());
1638         InsertPt = NewBB->getTerminator();
1639       } else if (isa<PHINode>(I)) {
1640         // Skip the PHINodes and EH pads instructions.
1641         BasicBlock *DefBlock = I->getParent();
1642         if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
1643           InsertPt = splitBeforeCatchSwitch(CSI);
1644         else
1645           InsertPt = &*DefBlock->getFirstInsertionPt();
1646       } else {
1647         assert(!I->isTerminator() && "unexpected terminator");
1648         // For all other values, the spill is placed immediately after
1649         // the definition.
1650         InsertPt = I->getNextNode();
1651       }
1652     }
1653 
1654     auto Index = FrameData.getFieldIndex(Def);
1655     Builder.SetInsertPoint(InsertPt);
1656     auto *G = Builder.CreateConstInBoundsGEP2_32(
1657         FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
1658     if (ByValTy) {
1659       // For byval arguments, we need to store the pointed value in the frame,
1660       // instead of the pointer itself.
1661       auto *Value = Builder.CreateLoad(ByValTy, Def);
1662       Builder.CreateAlignedStore(Value, G, SpillAlignment);
1663     } else {
1664       Builder.CreateAlignedStore(Def, G, SpillAlignment);
1665     }
1666 
1667     BasicBlock *CurrentBlock = nullptr;
1668     Value *CurrentReload = nullptr;
1669     for (auto *U : E.second) {
1670       // If we have not seen the use block, create a load instruction to reload
1671       // the spilled value from the coroutine frame. Populates the Value pointer
1672       // reference provided with the frame GEP.
1673       if (CurrentBlock != U->getParent()) {
1674         CurrentBlock = U->getParent();
1675         Builder.SetInsertPoint(&*CurrentBlock->getFirstInsertionPt());
1676 
1677         auto *GEP = GetFramePointer(E.first);
1678         GEP->setName(E.first->getName() + Twine(".reload.addr"));
1679         if (ByValTy)
1680           CurrentReload = GEP;
1681         else
1682           CurrentReload = Builder.CreateAlignedLoad(
1683               FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
1684               SpillAlignment, E.first->getName() + Twine(".reload"));
1685 
1686         TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);
1687         for (DbgDeclareInst *DDI : DIs) {
1688           bool AllowUnresolved = false;
1689           // This dbg.declare is preserved for all coro-split function
1690           // fragments. It will be unreachable in the main function, and
1691           // processed by coro::salvageDebugInfo() by CoroCloner.
1692           DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
1693               .insertDeclare(CurrentReload, DDI->getVariable(),
1694                              DDI->getExpression(), DDI->getDebugLoc(),
1695                              &*Builder.GetInsertPoint());
1696           // This dbg.declare is for the main function entry point.  It
1697           // will be deleted in all coro-split functions.
1698           coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame);
1699         }
1700       }
1701 
1702       // Salvage debug info on any dbg.addr that we see. We do not insert them
1703       // into each block where we have a use though.
1704       if (auto *DI = dyn_cast<DbgAddrIntrinsic>(U)) {
1705         coro::salvageDebugInfo(DbgPtrAllocaCache, DI, Shape.OptimizeFrame);
1706       }
1707 
1708       // If we have a single edge PHINode, remove it and replace it with a
1709       // reload from the coroutine frame. (We already took care of multi edge
1710       // PHINodes by rewriting them in the rewritePHIs function).
1711       if (auto *PN = dyn_cast<PHINode>(U)) {
1712         assert(PN->getNumIncomingValues() == 1 &&
1713                "unexpected number of incoming "
1714                "values in the PHINode");
1715         PN->replaceAllUsesWith(CurrentReload);
1716         PN->eraseFromParent();
1717         continue;
1718       }
1719 
1720       // Replace all uses of CurrentValue in the current instruction with
1721       // reload.
1722       U->replaceUsesOfWith(Def, CurrentReload);
1723     }
1724   }
1725 
1726   BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent();
1727 
1728   auto SpillBlock = FramePtrBB->splitBasicBlock(
1729       Shape.getInsertPtAfterFramePtr(), "AllocaSpillBB");
1730   SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
1731   Shape.AllocaSpillBlock = SpillBlock;
1732 
1733   // retcon and retcon.once lowering assumes all uses have been sunk.
1734   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
1735       Shape.ABI == coro::ABI::Async) {
1736     // If we found any allocas, replace all of their remaining uses with Geps.
1737     Builder.SetInsertPoint(&SpillBlock->front());
1738     for (const auto &P : FrameData.Allocas) {
1739       AllocaInst *Alloca = P.Alloca;
1740       auto *G = GetFramePointer(Alloca);
1741 
1742       // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1743       // here, as we are changing location of the instruction.
1744       G->takeName(Alloca);
1745       Alloca->replaceAllUsesWith(G);
1746       Alloca->eraseFromParent();
1747     }
1748     return;
1749   }
1750 
1751   // If we found any alloca, replace all of their remaining uses with GEP
1752   // instructions. To remain debugbility, we replace the uses of allocas for
1753   // dbg.declares and dbg.values with the reload from the frame.
1754   // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1755   // as some of the uses may not be dominated by CoroBegin.
1756   Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
1757   SmallVector<Instruction *, 4> UsersToUpdate;
1758   for (const auto &A : FrameData.Allocas) {
1759     AllocaInst *Alloca = A.Alloca;
1760     UsersToUpdate.clear();
1761     for (User *U : Alloca->users()) {
1762       auto *I = cast<Instruction>(U);
1763       if (DT.dominates(CB, I))
1764         UsersToUpdate.push_back(I);
1765     }
1766     if (UsersToUpdate.empty())
1767       continue;
1768     auto *G = GetFramePointer(Alloca);
1769     G->setName(Alloca->getName() + Twine(".reload.addr"));
1770 
1771     SmallVector<DbgVariableIntrinsic *, 4> DIs;
1772     findDbgUsers(DIs, Alloca);
1773     for (auto *DVI : DIs)
1774       DVI->replaceUsesOfWith(Alloca, G);
1775 
1776     for (Instruction *I : UsersToUpdate)
1777       I->replaceUsesOfWith(Alloca, G);
1778   }
1779   Builder.SetInsertPoint(Shape.getInsertPtAfterFramePtr());
1780   for (const auto &A : FrameData.Allocas) {
1781     AllocaInst *Alloca = A.Alloca;
1782     if (A.MayWriteBeforeCoroBegin) {
1783       // isEscaped really means potentially modified before CoroBegin.
1784       if (Alloca->isArrayAllocation())
1785         report_fatal_error(
1786             "Coroutines cannot handle copying of array allocas yet");
1787 
1788       auto *G = GetFramePointer(Alloca);
1789       auto *Value = Builder.CreateLoad(Alloca->getAllocatedType(), Alloca);
1790       Builder.CreateStore(Value, G);
1791     }
1792     // For each alias to Alloca created before CoroBegin but used after
1793     // CoroBegin, we recreate them after CoroBegin by appplying the offset
1794     // to the pointer in the frame.
1795     for (const auto &Alias : A.Aliases) {
1796       auto *FramePtr = GetFramePointer(Alloca);
1797       auto *FramePtrRaw =
1798           Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));
1799       auto &Value = Alias.second.getValue();
1800       auto ITy = IntegerType::get(C, Value.getBitWidth());
1801       auto *AliasPtr = Builder.CreateGEP(Type::getInt8Ty(C), FramePtrRaw,
1802                                          ConstantInt::get(ITy, Value));
1803       auto *AliasPtrTyped =
1804           Builder.CreateBitCast(AliasPtr, Alias.first->getType());
1805       Alias.first->replaceUsesWithIf(
1806           AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); });
1807     }
1808   }
1809 }
1810 
1811 // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
1812 // PHI in InsertedBB.
1813 static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
1814                                          BasicBlock *InsertedBB,
1815                                          BasicBlock *PredBB,
1816                                          PHINode *UntilPHI = nullptr) {
1817   auto *PN = cast<PHINode>(&SuccBB->front());
1818   do {
1819     int Index = PN->getBasicBlockIndex(InsertedBB);
1820     Value *V = PN->getIncomingValue(Index);
1821     PHINode *InputV = PHINode::Create(
1822         V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName(),
1823         &InsertedBB->front());
1824     InputV->addIncoming(V, PredBB);
1825     PN->setIncomingValue(Index, InputV);
1826     PN = dyn_cast<PHINode>(PN->getNextNode());
1827   } while (PN != UntilPHI);
1828 }
1829 
1830 // Rewrites the PHI Nodes in a cleanuppad.
1831 static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
1832                                      CleanupPadInst *CleanupPad) {
1833   // For every incoming edge to a CleanupPad we will create a new block holding
1834   // all incoming values in single-value PHI nodes. We will then create another
1835   // block to act as a dispather (as all unwind edges for related EH blocks
1836   // must be the same).
1837   //
1838   // cleanuppad:
1839   //    %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
1840   //    %3 = cleanuppad within none []
1841   //
1842   // It will create:
1843   //
1844   // cleanuppad.corodispatch
1845   //    %2 = phi i8[0, %catchswitch], [1, %catch.1]
1846   //    %3 = cleanuppad within none []
1847   //    switch i8 % 2, label %unreachable
1848   //            [i8 0, label %cleanuppad.from.catchswitch
1849   //             i8 1, label %cleanuppad.from.catch.1]
1850   // cleanuppad.from.catchswitch:
1851   //    %4 = phi i32 [%0, %catchswitch]
1852   //    br %label cleanuppad
1853   // cleanuppad.from.catch.1:
1854   //    %6 = phi i32 [%1, %catch.1]
1855   //    br %label cleanuppad
1856   // cleanuppad:
1857   //    %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
1858   //                 [%6, %cleanuppad.from.catch.1]
1859 
1860   // Unreachable BB, in case switching on an invalid value in the dispatcher.
1861   auto *UnreachBB = BasicBlock::Create(
1862       CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent());
1863   IRBuilder<> Builder(UnreachBB);
1864   Builder.CreateUnreachable();
1865 
1866   // Create a new cleanuppad which will be the dispatcher.
1867   auto *NewCleanupPadBB =
1868       BasicBlock::Create(CleanupPadBB->getContext(),
1869                          CleanupPadBB->getName() + Twine(".corodispatch"),
1870                          CleanupPadBB->getParent(), CleanupPadBB);
1871   Builder.SetInsertPoint(NewCleanupPadBB);
1872   auto *SwitchType = Builder.getInt8Ty();
1873   auto *SetDispatchValuePN =
1874       Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB));
1875   CleanupPad->removeFromParent();
1876   CleanupPad->insertAfter(SetDispatchValuePN);
1877   auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB,
1878                                                 pred_size(CleanupPadBB));
1879 
1880   int SwitchIndex = 0;
1881   SmallVector<BasicBlock *, 8> Preds(predecessors(CleanupPadBB));
1882   for (BasicBlock *Pred : Preds) {
1883     // Create a new cleanuppad and move the PHI values to there.
1884     auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(),
1885                                       CleanupPadBB->getName() +
1886                                           Twine(".from.") + Pred->getName(),
1887                                       CleanupPadBB->getParent(), CleanupPadBB);
1888     updatePhiNodes(CleanupPadBB, Pred, CaseBB);
1889     CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") +
1890                     Pred->getName());
1891     Builder.SetInsertPoint(CaseBB);
1892     Builder.CreateBr(CleanupPadBB);
1893     movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB);
1894 
1895     // Update this Pred to the new unwind point.
1896     setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB);
1897 
1898     // Setup the switch in the dispatcher.
1899     auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex);
1900     SetDispatchValuePN->addIncoming(SwitchConstant, Pred);
1901     SwitchOnDispatch->addCase(SwitchConstant, CaseBB);
1902     SwitchIndex++;
1903   }
1904 }
1905 
1906 static void cleanupSinglePredPHIs(Function &F) {
1907   SmallVector<PHINode *, 32> Worklist;
1908   for (auto &BB : F) {
1909     for (auto &Phi : BB.phis()) {
1910       if (Phi.getNumIncomingValues() == 1) {
1911         Worklist.push_back(&Phi);
1912       } else
1913         break;
1914     }
1915   }
1916   while (!Worklist.empty()) {
1917     auto *Phi = Worklist.pop_back_val();
1918     auto *OriginalValue = Phi->getIncomingValue(0);
1919     Phi->replaceAllUsesWith(OriginalValue);
1920   }
1921 }
1922 
1923 static void rewritePHIs(BasicBlock &BB) {
1924   // For every incoming edge we will create a block holding all
1925   // incoming values in a single PHI nodes.
1926   //
1927   // loop:
1928   //    %n.val = phi i32[%n, %entry], [%inc, %loop]
1929   //
1930   // It will create:
1931   //
1932   // loop.from.entry:
1933   //    %n.loop.pre = phi i32 [%n, %entry]
1934   //    br %label loop
1935   // loop.from.loop:
1936   //    %inc.loop.pre = phi i32 [%inc, %loop]
1937   //    br %label loop
1938   //
1939   // After this rewrite, further analysis will ignore any phi nodes with more
1940   // than one incoming edge.
1941 
1942   // TODO: Simplify PHINodes in the basic block to remove duplicate
1943   // predecessors.
1944 
1945   // Special case for CleanupPad: all EH blocks must have the same unwind edge
1946   // so we need to create an additional "dispatcher" block.
1947   if (auto *CleanupPad =
1948           dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) {
1949     SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
1950     for (BasicBlock *Pred : Preds) {
1951       if (CatchSwitchInst *CS =
1952               dyn_cast<CatchSwitchInst>(Pred->getTerminator())) {
1953         // CleanupPad with a CatchSwitch predecessor: therefore this is an
1954         // unwind destination that needs to be handle specially.
1955         assert(CS->getUnwindDest() == &BB);
1956         (void)CS;
1957         rewritePHIsForCleanupPad(&BB, CleanupPad);
1958         return;
1959       }
1960     }
1961   }
1962 
1963   LandingPadInst *LandingPad = nullptr;
1964   PHINode *ReplPHI = nullptr;
1965   if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
1966     // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
1967     // We replace the original landing pad with a PHINode that will collect the
1968     // results from all of them.
1969     ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad);
1970     ReplPHI->takeName(LandingPad);
1971     LandingPad->replaceAllUsesWith(ReplPHI);
1972     // We will erase the original landing pad at the end of this function after
1973     // ehAwareSplitEdge cloned it in the transition blocks.
1974   }
1975 
1976   SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
1977   for (BasicBlock *Pred : Preds) {
1978     auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
1979     IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
1980 
1981     // Stop the moving of values at ReplPHI, as this is either null or the PHI
1982     // that replaced the landing pad.
1983     movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI);
1984   }
1985 
1986   if (LandingPad) {
1987     // Calls to ehAwareSplitEdge function cloned the original lading pad.
1988     // No longer need it.
1989     LandingPad->eraseFromParent();
1990   }
1991 }
1992 
1993 static void rewritePHIs(Function &F) {
1994   SmallVector<BasicBlock *, 8> WorkList;
1995 
1996   for (BasicBlock &BB : F)
1997     if (auto *PN = dyn_cast<PHINode>(&BB.front()))
1998       if (PN->getNumIncomingValues() > 1)
1999         WorkList.push_back(&BB);
2000 
2001   for (BasicBlock *BB : WorkList)
2002     rewritePHIs(*BB);
2003 }
2004 
2005 // Check for instructions that we can recreate on resume as opposed to spill
2006 // the result into a coroutine frame.
2007 static bool materializable(Instruction &V) {
2008   return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
2009          isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V);
2010 }
2011 
2012 // Check for structural coroutine intrinsics that should not be spilled into
2013 // the coroutine frame.
2014 static bool isCoroutineStructureIntrinsic(Instruction &I) {
2015   return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) ||
2016          isa<CoroSuspendInst>(&I);
2017 }
2018 
2019 // For every use of the value that is across suspend point, recreate that value
2020 // after a suspend point.
2021 static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
2022                                               const SpillInfo &Spills) {
2023   for (const auto &E : Spills) {
2024     Value *Def = E.first;
2025     BasicBlock *CurrentBlock = nullptr;
2026     Instruction *CurrentMaterialization = nullptr;
2027     for (Instruction *U : E.second) {
2028       // If we have not seen this block, materialize the value.
2029       if (CurrentBlock != U->getParent()) {
2030 
2031         bool IsInCoroSuspendBlock = isa<AnyCoroSuspendInst>(U);
2032         CurrentBlock = U->getParent();
2033         auto *InsertBlock = IsInCoroSuspendBlock
2034                                 ? CurrentBlock->getSinglePredecessor()
2035                                 : CurrentBlock;
2036         CurrentMaterialization = cast<Instruction>(Def)->clone();
2037         CurrentMaterialization->setName(Def->getName());
2038         CurrentMaterialization->insertBefore(
2039             IsInCoroSuspendBlock ? InsertBlock->getTerminator()
2040                                  : &*InsertBlock->getFirstInsertionPt());
2041       }
2042       if (auto *PN = dyn_cast<PHINode>(U)) {
2043         assert(PN->getNumIncomingValues() == 1 &&
2044                "unexpected number of incoming "
2045                "values in the PHINode");
2046         PN->replaceAllUsesWith(CurrentMaterialization);
2047         PN->eraseFromParent();
2048         continue;
2049       }
2050       // Replace all uses of Def in the current instruction with the
2051       // CurrentMaterialization for the block.
2052       U->replaceUsesOfWith(Def, CurrentMaterialization);
2053     }
2054   }
2055 }
2056 
2057 // Splits the block at a particular instruction unless it is the first
2058 // instruction in the block with a single predecessor.
2059 static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
2060   auto *BB = I->getParent();
2061   if (&BB->front() == I) {
2062     if (BB->getSinglePredecessor()) {
2063       BB->setName(Name);
2064       return BB;
2065     }
2066   }
2067   return BB->splitBasicBlock(I, Name);
2068 }
2069 
2070 // Split above and below a particular instruction so that it
2071 // will be all alone by itself in a block.
2072 static void splitAround(Instruction *I, const Twine &Name) {
2073   splitBlockIfNotFirst(I, Name);
2074   splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
2075 }
2076 
2077 static bool isSuspendBlock(BasicBlock *BB) {
2078   return isa<AnyCoroSuspendInst>(BB->front());
2079 }
2080 
2081 typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet;
2082 
2083 /// Does control flow starting at the given block ever reach a suspend
2084 /// instruction before reaching a block in VisitedOrFreeBBs?
2085 static bool isSuspendReachableFrom(BasicBlock *From,
2086                                    VisitedBlocksSet &VisitedOrFreeBBs) {
2087   // Eagerly try to add this block to the visited set.  If it's already
2088   // there, stop recursing; this path doesn't reach a suspend before
2089   // either looping or reaching a freeing block.
2090   if (!VisitedOrFreeBBs.insert(From).second)
2091     return false;
2092 
2093   // We assume that we'll already have split suspends into their own blocks.
2094   if (isSuspendBlock(From))
2095     return true;
2096 
2097   // Recurse on the successors.
2098   for (auto Succ : successors(From)) {
2099     if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs))
2100       return true;
2101   }
2102 
2103   return false;
2104 }
2105 
2106 /// Is the given alloca "local", i.e. bounded in lifetime to not cross a
2107 /// suspend point?
2108 static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
2109   // Seed the visited set with all the basic blocks containing a free
2110   // so that we won't pass them up.
2111   VisitedBlocksSet VisitedOrFreeBBs;
2112   for (auto User : AI->users()) {
2113     if (auto FI = dyn_cast<CoroAllocaFreeInst>(User))
2114       VisitedOrFreeBBs.insert(FI->getParent());
2115   }
2116 
2117   return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs);
2118 }
2119 
2120 /// After we split the coroutine, will the given basic block be along
2121 /// an obvious exit path for the resumption function?
2122 static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
2123                                               unsigned depth = 3) {
2124   // If we've bottomed out our depth count, stop searching and assume
2125   // that the path might loop back.
2126   if (depth == 0) return false;
2127 
2128   // If this is a suspend block, we're about to exit the resumption function.
2129   if (isSuspendBlock(BB)) return true;
2130 
2131   // Recurse into the successors.
2132   for (auto Succ : successors(BB)) {
2133     if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
2134       return false;
2135   }
2136 
2137   // If none of the successors leads back in a loop, we're on an exit/abort.
2138   return true;
2139 }
2140 
2141 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
2142   // Look for a free that isn't sufficiently obviously followed by
2143   // either a suspend or a termination, i.e. something that will leave
2144   // the coro resumption frame.
2145   for (auto U : AI->users()) {
2146     auto FI = dyn_cast<CoroAllocaFreeInst>(U);
2147     if (!FI) continue;
2148 
2149     if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
2150       return true;
2151   }
2152 
2153   // If we never found one, we don't need a stack save.
2154   return false;
2155 }
2156 
2157 /// Turn each of the given local allocas into a normal (dynamic) alloca
2158 /// instruction.
2159 static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
2160                               SmallVectorImpl<Instruction*> &DeadInsts) {
2161   for (auto AI : LocalAllocas) {
2162     auto M = AI->getModule();
2163     IRBuilder<> Builder(AI);
2164 
2165     // Save the stack depth.  Try to avoid doing this if the stackrestore
2166     // is going to immediately precede a return or something.
2167     Value *StackSave = nullptr;
2168     if (localAllocaNeedsStackSave(AI))
2169       StackSave = Builder.CreateCall(
2170                             Intrinsic::getDeclaration(M, Intrinsic::stacksave));
2171 
2172     // Allocate memory.
2173     auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
2174     Alloca->setAlignment(Align(AI->getAlignment()));
2175 
2176     for (auto U : AI->users()) {
2177       // Replace gets with the allocation.
2178       if (isa<CoroAllocaGetInst>(U)) {
2179         U->replaceAllUsesWith(Alloca);
2180 
2181       // Replace frees with stackrestores.  This is safe because
2182       // alloca.alloc is required to obey a stack discipline, although we
2183       // don't enforce that structurally.
2184       } else {
2185         auto FI = cast<CoroAllocaFreeInst>(U);
2186         if (StackSave) {
2187           Builder.SetInsertPoint(FI);
2188           Builder.CreateCall(
2189                     Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
2190                              StackSave);
2191         }
2192       }
2193       DeadInsts.push_back(cast<Instruction>(U));
2194     }
2195 
2196     DeadInsts.push_back(AI);
2197   }
2198 }
2199 
2200 /// Turn the given coro.alloca.alloc call into a dynamic allocation.
2201 /// This happens during the all-instructions iteration, so it must not
2202 /// delete the call.
2203 static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI,
2204                                         coro::Shape &Shape,
2205                                    SmallVectorImpl<Instruction*> &DeadInsts) {
2206   IRBuilder<> Builder(AI);
2207   auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
2208 
2209   for (User *U : AI->users()) {
2210     if (isa<CoroAllocaGetInst>(U)) {
2211       U->replaceAllUsesWith(Alloc);
2212     } else {
2213       auto FI = cast<CoroAllocaFreeInst>(U);
2214       Builder.SetInsertPoint(FI);
2215       Shape.emitDealloc(Builder, Alloc, nullptr);
2216     }
2217     DeadInsts.push_back(cast<Instruction>(U));
2218   }
2219 
2220   // Push this on last so that it gets deleted after all the others.
2221   DeadInsts.push_back(AI);
2222 
2223   // Return the new allocation value so that we can check for needed spills.
2224   return cast<Instruction>(Alloc);
2225 }
2226 
2227 /// Get the current swifterror value.
2228 static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
2229                                      coro::Shape &Shape) {
2230   // Make a fake function pointer as a sort of intrinsic.
2231   auto FnTy = FunctionType::get(ValueTy, {}, false);
2232   auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
2233 
2234   auto Call = Builder.CreateCall(FnTy, Fn, {});
2235   Shape.SwiftErrorOps.push_back(Call);
2236 
2237   return Call;
2238 }
2239 
2240 /// Set the given value as the current swifterror value.
2241 ///
2242 /// Returns a slot that can be used as a swifterror slot.
2243 static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
2244                                      coro::Shape &Shape) {
2245   // Make a fake function pointer as a sort of intrinsic.
2246   auto FnTy = FunctionType::get(V->getType()->getPointerTo(),
2247                                 {V->getType()}, false);
2248   auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
2249 
2250   auto Call = Builder.CreateCall(FnTy, Fn, { V });
2251   Shape.SwiftErrorOps.push_back(Call);
2252 
2253   return Call;
2254 }
2255 
2256 /// Set the swifterror value from the given alloca before a call,
2257 /// then put in back in the alloca afterwards.
2258 ///
2259 /// Returns an address that will stand in for the swifterror slot
2260 /// until splitting.
2261 static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
2262                                                  AllocaInst *Alloca,
2263                                                  coro::Shape &Shape) {
2264   auto ValueTy = Alloca->getAllocatedType();
2265   IRBuilder<> Builder(Call);
2266 
2267   // Load the current value from the alloca and set it as the
2268   // swifterror value.
2269   auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
2270   auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
2271 
2272   // Move to after the call.  Since swifterror only has a guaranteed
2273   // value on normal exits, we can ignore implicit and explicit unwind
2274   // edges.
2275   if (isa<CallInst>(Call)) {
2276     Builder.SetInsertPoint(Call->getNextNode());
2277   } else {
2278     auto Invoke = cast<InvokeInst>(Call);
2279     Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
2280   }
2281 
2282   // Get the current swifterror value and store it to the alloca.
2283   auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
2284   Builder.CreateStore(ValueAfterCall, Alloca);
2285 
2286   return Addr;
2287 }
2288 
2289 /// Eliminate a formerly-swifterror alloca by inserting the get/set
2290 /// intrinsics and attempting to MemToReg the alloca away.
2291 static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
2292                                       coro::Shape &Shape) {
2293   for (Use &Use : llvm::make_early_inc_range(Alloca->uses())) {
2294     // swifterror values can only be used in very specific ways.
2295     // We take advantage of that here.
2296     auto User = Use.getUser();
2297     if (isa<LoadInst>(User) || isa<StoreInst>(User))
2298       continue;
2299 
2300     assert(isa<CallInst>(User) || isa<InvokeInst>(User));
2301     auto Call = cast<Instruction>(User);
2302 
2303     auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
2304 
2305     // Use the returned slot address as the call argument.
2306     Use.set(Addr);
2307   }
2308 
2309   // All the uses should be loads and stores now.
2310   assert(isAllocaPromotable(Alloca));
2311 }
2312 
2313 /// "Eliminate" a swifterror argument by reducing it to the alloca case
2314 /// and then loading and storing in the prologue and epilog.
2315 ///
2316 /// The argument keeps the swifterror flag.
2317 static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
2318                                         coro::Shape &Shape,
2319                              SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
2320   IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
2321 
2322   auto ArgTy = cast<PointerType>(Arg.getType());
2323   // swifterror arguments are required to have pointer-to-pointer type,
2324   // so create a pointer-typed alloca with opaque pointers.
2325   auto ValueTy = ArgTy->isOpaque() ? PointerType::getUnqual(F.getContext())
2326                                    : ArgTy->getNonOpaquePointerElementType();
2327 
2328   // Reduce to the alloca case:
2329 
2330   // Create an alloca and replace all uses of the arg with it.
2331   auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
2332   Arg.replaceAllUsesWith(Alloca);
2333 
2334   // Set an initial value in the alloca.  swifterror is always null on entry.
2335   auto InitialValue = Constant::getNullValue(ValueTy);
2336   Builder.CreateStore(InitialValue, Alloca);
2337 
2338   // Find all the suspends in the function and save and restore around them.
2339   for (auto Suspend : Shape.CoroSuspends) {
2340     (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
2341   }
2342 
2343   // Find all the coro.ends in the function and restore the error value.
2344   for (auto End : Shape.CoroEnds) {
2345     Builder.SetInsertPoint(End);
2346     auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
2347     (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
2348   }
2349 
2350   // Now we can use the alloca logic.
2351   AllocasToPromote.push_back(Alloca);
2352   eliminateSwiftErrorAlloca(F, Alloca, Shape);
2353 }
2354 
2355 /// Eliminate all problematic uses of swifterror arguments and allocas
2356 /// from the function.  We'll fix them up later when splitting the function.
2357 static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
2358   SmallVector<AllocaInst*, 4> AllocasToPromote;
2359 
2360   // Look for a swifterror argument.
2361   for (auto &Arg : F.args()) {
2362     if (!Arg.hasSwiftErrorAttr()) continue;
2363 
2364     eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
2365     break;
2366   }
2367 
2368   // Look for swifterror allocas.
2369   for (auto &Inst : F.getEntryBlock()) {
2370     auto Alloca = dyn_cast<AllocaInst>(&Inst);
2371     if (!Alloca || !Alloca->isSwiftError()) continue;
2372 
2373     // Clear the swifterror flag.
2374     Alloca->setSwiftError(false);
2375 
2376     AllocasToPromote.push_back(Alloca);
2377     eliminateSwiftErrorAlloca(F, Alloca, Shape);
2378   }
2379 
2380   // If we have any allocas to promote, compute a dominator tree and
2381   // promote them en masse.
2382   if (!AllocasToPromote.empty()) {
2383     DominatorTree DT(F);
2384     PromoteMemToReg(AllocasToPromote, DT);
2385   }
2386 }
2387 
2388 /// retcon and retcon.once conventions assume that all spill uses can be sunk
2389 /// after the coro.begin intrinsic.
2390 static void sinkSpillUsesAfterCoroBegin(Function &F,
2391                                         const FrameDataInfo &FrameData,
2392                                         CoroBeginInst *CoroBegin) {
2393   DominatorTree Dom(F);
2394 
2395   SmallSetVector<Instruction *, 32> ToMove;
2396   SmallVector<Instruction *, 32> Worklist;
2397 
2398   // Collect all users that precede coro.begin.
2399   for (auto *Def : FrameData.getAllDefs()) {
2400     for (User *U : Def->users()) {
2401       auto Inst = cast<Instruction>(U);
2402       if (Inst->getParent() != CoroBegin->getParent() ||
2403           Dom.dominates(CoroBegin, Inst))
2404         continue;
2405       if (ToMove.insert(Inst))
2406         Worklist.push_back(Inst);
2407     }
2408   }
2409   // Recursively collect users before coro.begin.
2410   while (!Worklist.empty()) {
2411     auto *Def = Worklist.pop_back_val();
2412     for (User *U : Def->users()) {
2413       auto Inst = cast<Instruction>(U);
2414       if (Dom.dominates(CoroBegin, Inst))
2415         continue;
2416       if (ToMove.insert(Inst))
2417         Worklist.push_back(Inst);
2418     }
2419   }
2420 
2421   // Sort by dominance.
2422   SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end());
2423   llvm::sort(InsertionList, [&Dom](Instruction *A, Instruction *B) -> bool {
2424     // If a dominates b it should preceed (<) b.
2425     return Dom.dominates(A, B);
2426   });
2427 
2428   Instruction *InsertPt = CoroBegin->getNextNode();
2429   for (Instruction *Inst : InsertionList)
2430     Inst->moveBefore(InsertPt);
2431 }
2432 
2433 /// For each local variable that all of its user are only used inside one of
2434 /// suspended region, we sink their lifetime.start markers to the place where
2435 /// after the suspend block. Doing so minimizes the lifetime of each variable,
2436 /// hence minimizing the amount of data we end up putting on the frame.
2437 static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
2438                                      SuspendCrossingInfo &Checker) {
2439   DominatorTree DT(F);
2440 
2441   // Collect all possible basic blocks which may dominate all uses of allocas.
2442   SmallPtrSet<BasicBlock *, 4> DomSet;
2443   DomSet.insert(&F.getEntryBlock());
2444   for (auto *CSI : Shape.CoroSuspends) {
2445     BasicBlock *SuspendBlock = CSI->getParent();
2446     assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() &&
2447            "should have split coro.suspend into its own block");
2448     DomSet.insert(SuspendBlock->getSingleSuccessor());
2449   }
2450 
2451   for (Instruction &I : instructions(F)) {
2452     AllocaInst* AI = dyn_cast<AllocaInst>(&I);
2453     if (!AI)
2454       continue;
2455 
2456     for (BasicBlock *DomBB : DomSet) {
2457       bool Valid = true;
2458       SmallVector<Instruction *, 1> Lifetimes;
2459 
2460       auto isLifetimeStart = [](Instruction* I) {
2461         if (auto* II = dyn_cast<IntrinsicInst>(I))
2462           return II->getIntrinsicID() == Intrinsic::lifetime_start;
2463         return false;
2464       };
2465 
2466       auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
2467         if (isLifetimeStart(U)) {
2468           Lifetimes.push_back(U);
2469           return true;
2470         }
2471         if (!U->hasOneUse() || U->stripPointerCasts() != AI)
2472           return false;
2473         if (isLifetimeStart(U->user_back())) {
2474           Lifetimes.push_back(U->user_back());
2475           return true;
2476         }
2477         return false;
2478       };
2479 
2480       for (User *U : AI->users()) {
2481         Instruction *UI = cast<Instruction>(U);
2482         // For all users except lifetime.start markers, if they are all
2483         // dominated by one of the basic blocks and do not cross
2484         // suspend points as well, then there is no need to spill the
2485         // instruction.
2486         if (!DT.dominates(DomBB, UI->getParent()) ||
2487             Checker.isDefinitionAcrossSuspend(DomBB, UI)) {
2488           // Skip lifetime.start, GEP and bitcast used by lifetime.start
2489           // markers.
2490           if (collectLifetimeStart(UI, AI))
2491             continue;
2492           Valid = false;
2493           break;
2494         }
2495       }
2496       // Sink lifetime.start markers to dominate block when they are
2497       // only used outside the region.
2498       if (Valid && Lifetimes.size() != 0) {
2499         // May be AI itself, when the type of AI is i8*
2500         auto *NewBitCast = [&](AllocaInst *AI) -> Value* {
2501           if (isa<AllocaInst>(Lifetimes[0]->getOperand(1)))
2502             return AI;
2503           auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext());
2504           return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "",
2505                                   DomBB->getTerminator());
2506         }(AI);
2507 
2508         auto *NewLifetime = Lifetimes[0]->clone();
2509         NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast);
2510         NewLifetime->insertBefore(DomBB->getTerminator());
2511 
2512         // All the outsided lifetime.start markers are no longer necessary.
2513         for (Instruction *S : Lifetimes)
2514           S->eraseFromParent();
2515 
2516         break;
2517       }
2518     }
2519   }
2520 }
2521 
2522 static void collectFrameAllocas(Function &F, coro::Shape &Shape,
2523                                 const SuspendCrossingInfo &Checker,
2524                                 SmallVectorImpl<AllocaInfo> &Allocas) {
2525   for (Instruction &I : instructions(F)) {
2526     auto *AI = dyn_cast<AllocaInst>(&I);
2527     if (!AI)
2528       continue;
2529     // The PromiseAlloca will be specially handled since it needs to be in a
2530     // fixed position in the frame.
2531     if (AI == Shape.SwitchLowering.PromiseAlloca) {
2532       continue;
2533     }
2534     DominatorTree DT(F);
2535     // The code that uses lifetime.start intrinsic does not work for functions
2536     // with loops without exit. Disable it on ABIs we know to generate such
2537     // code.
2538     bool ShouldUseLifetimeStartInfo =
2539         (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
2540          Shape.ABI != coro::ABI::RetconOnce);
2541     AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT,
2542                              *Shape.CoroBegin, Checker,
2543                              ShouldUseLifetimeStartInfo};
2544     Visitor.visitPtr(*AI);
2545     if (!Visitor.getShouldLiveOnFrame())
2546       continue;
2547     Allocas.emplace_back(AI, Visitor.getAliasesCopy(),
2548                          Visitor.getMayWriteBeforeCoroBegin());
2549   }
2550 }
2551 
2552 void coro::salvageDebugInfo(
2553     SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
2554     DbgVariableIntrinsic *DVI, bool OptimizeFrame) {
2555   Function *F = DVI->getFunction();
2556   IRBuilder<> Builder(F->getContext());
2557   auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
2558   while (isa<IntrinsicInst>(InsertPt))
2559     ++InsertPt;
2560   Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);
2561   DIExpression *Expr = DVI->getExpression();
2562   // Follow the pointer arithmetic all the way to the incoming
2563   // function argument and convert into a DIExpression.
2564   bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
2565   Value *Storage = DVI->getVariableLocationOp(0);
2566   Value *OriginalStorage = Storage;
2567 
2568   while (auto *Inst = dyn_cast_or_null<Instruction>(Storage)) {
2569     if (auto *LdInst = dyn_cast<LoadInst>(Inst)) {
2570       Storage = LdInst->getOperand(0);
2571       // FIXME: This is a heuristic that works around the fact that
2572       // LLVM IR debug intrinsics cannot yet distinguish between
2573       // memory and value locations: Because a dbg.declare(alloca) is
2574       // implicitly a memory location no DW_OP_deref operation for the
2575       // last direct load from an alloca is necessary.  This condition
2576       // effectively drops the *last* DW_OP_deref in the expression.
2577       if (!SkipOutermostLoad)
2578         Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
2579     } else if (auto *StInst = dyn_cast<StoreInst>(Inst)) {
2580       Storage = StInst->getOperand(0);
2581     } else {
2582       SmallVector<uint64_t, 16> Ops;
2583       SmallVector<Value *, 0> AdditionalValues;
2584       Value *Op = llvm::salvageDebugInfoImpl(
2585           *Inst, Expr ? Expr->getNumLocationOperands() : 0, Ops,
2586           AdditionalValues);
2587       if (!Op || !AdditionalValues.empty()) {
2588         // If salvaging failed or salvaging produced more than one location
2589         // operand, give up.
2590         break;
2591       }
2592       Storage = Op;
2593       Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, /*StackValue*/ false);
2594     }
2595     SkipOutermostLoad = false;
2596   }
2597   if (!Storage)
2598     return;
2599 
2600   // Store a pointer to the coroutine frame object in an alloca so it
2601   // is available throughout the function when producing unoptimized
2602   // code. Extending the lifetime this way is correct because the
2603   // variable has been declared by a dbg.declare intrinsic.
2604   //
2605   // Avoid to create the alloca would be eliminated by optimization
2606   // passes and the corresponding dbg.declares would be invalid.
2607   if (!OptimizeFrame)
2608     if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) {
2609       auto &Cached = DbgPtrAllocaCache[Storage];
2610       if (!Cached) {
2611         Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
2612                                       Arg->getName() + ".debug");
2613         Builder.CreateStore(Storage, Cached);
2614       }
2615       Storage = Cached;
2616       // FIXME: LLVM lacks nuanced semantics to differentiate between
2617       // memory and direct locations at the IR level. The backend will
2618       // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
2619       // location. Thus, if there are deref and offset operations in the
2620       // expression, we need to add a DW_OP_deref at the *start* of the
2621       // expression to first load the contents of the alloca before
2622       // adjusting it with the expression.
2623       Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
2624     }
2625 
2626   DVI->replaceVariableLocationOp(OriginalStorage, Storage);
2627   DVI->setExpression(Expr);
2628   // We only hoist dbg.declare today since it doesn't make sense to hoist
2629   // dbg.value or dbg.addr since they do not have the same function wide
2630   // guarantees that dbg.declare does.
2631   if (!isa<DbgValueInst>(DVI) && !isa<DbgAddrIntrinsic>(DVI)) {
2632     if (auto *II = dyn_cast<InvokeInst>(Storage))
2633       DVI->moveBefore(II->getNormalDest()->getFirstNonPHI());
2634     else if (auto *CBI = dyn_cast<CallBrInst>(Storage))
2635       DVI->moveBefore(CBI->getDefaultDest()->getFirstNonPHI());
2636     else if (auto *InsertPt = dyn_cast<Instruction>(Storage)) {
2637       assert(!InsertPt->isTerminator() &&
2638              "Unimaged terminator that could return a storage.");
2639       DVI->moveAfter(InsertPt);
2640     } else if (isa<Argument>(Storage))
2641       DVI->moveAfter(F->getEntryBlock().getFirstNonPHI());
2642   }
2643 }
2644 
2645 void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
2646   // Don't eliminate swifterror in async functions that won't be split.
2647   if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
2648     eliminateSwiftError(F, Shape);
2649 
2650   if (Shape.ABI == coro::ABI::Switch &&
2651       Shape.SwitchLowering.PromiseAlloca) {
2652     Shape.getSwitchCoroId()->clearPromise();
2653   }
2654 
2655   // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
2656   // intrinsics are in their own blocks to simplify the logic of building up
2657   // SuspendCrossing data.
2658   for (auto *CSI : Shape.CoroSuspends) {
2659     if (auto *Save = CSI->getCoroSave())
2660       splitAround(Save, "CoroSave");
2661     splitAround(CSI, "CoroSuspend");
2662   }
2663 
2664   // Put CoroEnds into their own blocks.
2665   for (AnyCoroEndInst *CE : Shape.CoroEnds) {
2666     splitAround(CE, "CoroEnd");
2667 
2668     // Emit the musttail call function in a new block before the CoroEnd.
2669     // We do this here so that the right suspend crossing info is computed for
2670     // the uses of the musttail call function call. (Arguments to the coro.end
2671     // instructions would be ignored)
2672     if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(CE)) {
2673       auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
2674       if (!MustTailCallFn)
2675         continue;
2676       IRBuilder<> Builder(AsyncEnd);
2677       SmallVector<Value *, 8> Args(AsyncEnd->args());
2678       auto Arguments = ArrayRef<Value *>(Args).drop_front(3);
2679       auto *Call = createMustTailCall(AsyncEnd->getDebugLoc(), MustTailCallFn,
2680                                       Arguments, Builder);
2681       splitAround(Call, "MustTailCall.Before.CoroEnd");
2682     }
2683   }
2684 
2685   // Later code makes structural assumptions about single predecessors phis e.g
2686   // that they are not live accross a suspend point.
2687   cleanupSinglePredPHIs(F);
2688 
2689   // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
2690   // never has its definition separated from the PHI by the suspend point.
2691   rewritePHIs(F);
2692 
2693   // Build suspend crossing info.
2694   SuspendCrossingInfo Checker(F, Shape);
2695 
2696   IRBuilder<> Builder(F.getContext());
2697   FrameDataInfo FrameData;
2698   SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
2699   SmallVector<Instruction*, 4> DeadInstructions;
2700 
2701   {
2702     SpillInfo Spills;
2703     for (int Repeat = 0; Repeat < 4; ++Repeat) {
2704       // See if there are materializable instructions across suspend points.
2705       for (Instruction &I : instructions(F))
2706         if (materializable(I)) {
2707           for (User *U : I.users())
2708             if (Checker.isDefinitionAcrossSuspend(I, U))
2709               Spills[&I].push_back(cast<Instruction>(U));
2710         }
2711 
2712       if (Spills.empty())
2713         break;
2714 
2715       // Rewrite materializable instructions to be materialized at the use
2716       // point.
2717       LLVM_DEBUG(dumpSpills("Materializations", Spills));
2718       rewriteMaterializableInstructions(Builder, Spills);
2719       Spills.clear();
2720     }
2721   }
2722 
2723   if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
2724       Shape.ABI != coro::ABI::RetconOnce)
2725     sinkLifetimeStartMarkers(F, Shape, Checker);
2726 
2727   if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
2728     collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);
2729   LLVM_DEBUG(dumpAllocas(FrameData.Allocas));
2730 
2731   // Collect the spills for arguments and other not-materializable values.
2732   for (Argument &A : F.args())
2733     for (User *U : A.users())
2734       if (Checker.isDefinitionAcrossSuspend(A, U))
2735         FrameData.Spills[&A].push_back(cast<Instruction>(U));
2736 
2737   for (Instruction &I : instructions(F)) {
2738     // Values returned from coroutine structure intrinsics should not be part
2739     // of the Coroutine Frame.
2740     if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin)
2741       continue;
2742 
2743     // The Coroutine Promise always included into coroutine frame, no need to
2744     // check for suspend crossing.
2745     if (Shape.ABI == coro::ABI::Switch &&
2746         Shape.SwitchLowering.PromiseAlloca == &I)
2747       continue;
2748 
2749     // Handle alloca.alloc specially here.
2750     if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) {
2751       // Check whether the alloca's lifetime is bounded by suspend points.
2752       if (isLocalAlloca(AI)) {
2753         LocalAllocas.push_back(AI);
2754         continue;
2755       }
2756 
2757       // If not, do a quick rewrite of the alloca and then add spills of
2758       // the rewritten value.  The rewrite doesn't invalidate anything in
2759       // Spills because the other alloca intrinsics have no other operands
2760       // besides AI, and it doesn't invalidate the iteration because we delay
2761       // erasing AI.
2762       auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions);
2763 
2764       for (User *U : Alloc->users()) {
2765         if (Checker.isDefinitionAcrossSuspend(*Alloc, U))
2766           FrameData.Spills[Alloc].push_back(cast<Instruction>(U));
2767       }
2768       continue;
2769     }
2770 
2771     // Ignore alloca.get; we process this as part of coro.alloca.alloc.
2772     if (isa<CoroAllocaGetInst>(I))
2773       continue;
2774 
2775     if (isa<AllocaInst>(I))
2776       continue;
2777 
2778     for (User *U : I.users())
2779       if (Checker.isDefinitionAcrossSuspend(I, U)) {
2780         // We cannot spill a token.
2781         if (I.getType()->isTokenTy())
2782           report_fatal_error(
2783               "token definition is separated from the use by a suspend point");
2784         FrameData.Spills[&I].push_back(cast<Instruction>(U));
2785       }
2786   }
2787 
2788   // We don't want the layout of coroutine frame to be affected
2789   // by debug information. So we only choose to salvage DbgValueInst for
2790   // whose value is already in the frame.
2791   // We would handle the dbg.values for allocas specially
2792   for (auto &Iter : FrameData.Spills) {
2793     auto *V = Iter.first;
2794     SmallVector<DbgValueInst *, 16> DVIs;
2795     findDbgValues(DVIs, V);
2796     for (DbgValueInst *DVI : DVIs)
2797       if (Checker.isDefinitionAcrossSuspend(*V, DVI))
2798         FrameData.Spills[V].push_back(DVI);
2799   }
2800 
2801   LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));
2802   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
2803       Shape.ABI == coro::ABI::Async)
2804     sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin);
2805   Shape.FrameTy = buildFrameType(F, Shape, FrameData);
2806   createFramePtr(Shape);
2807   // For now, this works for C++ programs only.
2808   buildFrameDebugInfo(F, Shape, FrameData);
2809   insertSpills(FrameData, Shape);
2810   lowerLocalAllocas(LocalAllocas, DeadInstructions);
2811 
2812   for (auto I : DeadInstructions)
2813     I->eraseFromParent();
2814 }
2815