1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // there from sue to definition that crosses a suspend block.
10 //
11 // Using the information discovered we form a Coroutine Frame structure to
12 // contain those values. All uses of those values are replaced with appropriate
13 // GEP + load from the coroutine frame. At the point of the definition we spill
14 // the value into the coroutine frame.
15 //
16 // TODO: pack values tightly using liveness info.
17 //===----------------------------------------------------------------------===//
18 
19 #include "CoroInternal.h"
20 #include "llvm/ADT/BitVector.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Analysis/PtrUseVisitor.h"
23 #include "llvm/Analysis/StackLifetime.h"
24 #include "llvm/Config/llvm-config.h"
25 #include "llvm/IR/CFG.h"
26 #include "llvm/IR/DIBuilder.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/InstIterator.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/Support/OptimizedStructLayout.h"
34 #include "llvm/Support/circular_raw_ostream.h"
35 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
36 #include "llvm/Transforms/Utils/Local.h"
37 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
38 #include <algorithm>
39 
40 using namespace llvm;
41 
42 // The "coro-suspend-crossing" flag is very noisy. There is another debug type,
43 // "coro-frame", which results in leaner debug spew.
44 #define DEBUG_TYPE "coro-suspend-crossing"
45 
46 static cl::opt<bool> EnableReuseStorageInFrame(
47     "reuse-storage-in-coroutine-frame", cl::Hidden,
48     cl::desc(
49         "Enable the optimization which would reuse the storage in the coroutine \
50          frame for allocas whose liferanges are not overlapped, for testing purposes"),
51     llvm::cl::init(false));
52 
53 enum { SmallVectorThreshold = 32 };
54 
55 // Provides two way mapping between the blocks and numbers.
56 namespace {
57 class BlockToIndexMapping {
58   SmallVector<BasicBlock *, SmallVectorThreshold> V;
59 
60 public:
61   size_t size() const { return V.size(); }
62 
63   BlockToIndexMapping(Function &F) {
64     for (BasicBlock &BB : F)
65       V.push_back(&BB);
66     llvm::sort(V);
67   }
68 
69   size_t blockToIndex(BasicBlock *BB) const {
70     auto *I = llvm::lower_bound(V, BB);
71     assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
72     return I - V.begin();
73   }
74 
75   BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; }
76 };
77 } // end anonymous namespace
78 
79 // The SuspendCrossingInfo maintains data that allows to answer a question
80 // whether given two BasicBlocks A and B there is a path from A to B that
81 // passes through a suspend point.
82 //
83 // For every basic block 'i' it maintains a BlockData that consists of:
84 //   Consumes:  a bit vector which contains a set of indices of blocks that can
85 //              reach block 'i'
86 //   Kills: a bit vector which contains a set of indices of blocks that can
87 //          reach block 'i', but one of the path will cross a suspend point
88 //   Suspend: a boolean indicating whether block 'i' contains a suspend point.
89 //   End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
90 //
91 namespace {
92 struct SuspendCrossingInfo {
93   BlockToIndexMapping Mapping;
94 
95   struct BlockData {
96     BitVector Consumes;
97     BitVector Kills;
98     bool Suspend = false;
99     bool End = false;
100   };
101   SmallVector<BlockData, SmallVectorThreshold> Block;
102 
103   iterator_range<succ_iterator> successors(BlockData const &BD) const {
104     BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
105     return llvm::successors(BB);
106   }
107 
108   BlockData &getBlockData(BasicBlock *BB) {
109     return Block[Mapping.blockToIndex(BB)];
110   }
111 
112   void dump() const;
113   void dump(StringRef Label, BitVector const &BV) const;
114 
115   SuspendCrossingInfo(Function &F, coro::Shape &Shape);
116 
117   bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const {
118     size_t const DefIndex = Mapping.blockToIndex(DefBB);
119     size_t const UseIndex = Mapping.blockToIndex(UseBB);
120 
121     bool const Result = Block[UseIndex].Kills[DefIndex];
122     LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName()
123                       << " answer is " << Result << "\n");
124     return Result;
125   }
126 
127   bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const {
128     auto *I = cast<Instruction>(U);
129 
130     // We rewrote PHINodes, so that only the ones with exactly one incoming
131     // value need to be analyzed.
132     if (auto *PN = dyn_cast<PHINode>(I))
133       if (PN->getNumIncomingValues() > 1)
134         return false;
135 
136     BasicBlock *UseBB = I->getParent();
137 
138     // As a special case, treat uses by an llvm.coro.suspend.retcon
139     // as if they were uses in the suspend's single predecessor: the
140     // uses conceptually occur before the suspend.
141     if (isa<CoroSuspendRetconInst>(I)) {
142       UseBB = UseBB->getSinglePredecessor();
143       assert(UseBB && "should have split coro.suspend into its own block");
144     }
145 
146     return hasPathCrossingSuspendPoint(DefBB, UseBB);
147   }
148 
149   bool isDefinitionAcrossSuspend(Argument &A, User *U) const {
150     return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U);
151   }
152 
153   bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
154     auto *DefBB = I.getParent();
155 
156     // As a special case, treat values produced by an llvm.coro.suspend.*
157     // as if they were defined in the single successor: the uses
158     // conceptually occur after the suspend.
159     if (isa<AnyCoroSuspendInst>(I)) {
160       DefBB = DefBB->getSingleSuccessor();
161       assert(DefBB && "should have split coro.suspend into its own block");
162     }
163 
164     return isDefinitionAcrossSuspend(DefBB, U);
165   }
166 };
167 } // end anonymous namespace
168 
169 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
170 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
171                                                 BitVector const &BV) const {
172   dbgs() << Label << ":";
173   for (size_t I = 0, N = BV.size(); I < N; ++I)
174     if (BV[I])
175       dbgs() << " " << Mapping.indexToBlock(I)->getName();
176   dbgs() << "\n";
177 }
178 
179 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
180   for (size_t I = 0, N = Block.size(); I < N; ++I) {
181     BasicBlock *const B = Mapping.indexToBlock(I);
182     dbgs() << B->getName() << ":\n";
183     dump("   Consumes", Block[I].Consumes);
184     dump("      Kills", Block[I].Kills);
185   }
186   dbgs() << "\n";
187 }
188 #endif
189 
190 SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
191     : Mapping(F) {
192   const size_t N = Mapping.size();
193   Block.resize(N);
194 
195   // Initialize every block so that it consumes itself
196   for (size_t I = 0; I < N; ++I) {
197     auto &B = Block[I];
198     B.Consumes.resize(N);
199     B.Kills.resize(N);
200     B.Consumes.set(I);
201   }
202 
203   // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
204   // the code beyond coro.end is reachable during initial invocation of the
205   // coroutine.
206   for (auto *CE : Shape.CoroEnds)
207     getBlockData(CE->getParent()).End = true;
208 
209   // Mark all suspend blocks and indicate that they kill everything they
210   // consume. Note, that crossing coro.save also requires a spill, as any code
211   // between coro.save and coro.suspend may resume the coroutine and all of the
212   // state needs to be saved by that time.
213   auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
214     BasicBlock *SuspendBlock = BarrierInst->getParent();
215     auto &B = getBlockData(SuspendBlock);
216     B.Suspend = true;
217     B.Kills |= B.Consumes;
218   };
219   for (auto *CSI : Shape.CoroSuspends) {
220     markSuspendBlock(CSI);
221     if (auto *Save = CSI->getCoroSave())
222       markSuspendBlock(Save);
223   }
224 
225   // Iterate propagating consumes and kills until they stop changing.
226   int Iteration = 0;
227   (void)Iteration;
228 
229   bool Changed;
230   do {
231     LLVM_DEBUG(dbgs() << "iteration " << ++Iteration);
232     LLVM_DEBUG(dbgs() << "==============\n");
233 
234     Changed = false;
235     for (size_t I = 0; I < N; ++I) {
236       auto &B = Block[I];
237       for (BasicBlock *SI : successors(B)) {
238 
239         auto SuccNo = Mapping.blockToIndex(SI);
240 
241         // Saved Consumes and Kills bitsets so that it is easy to see
242         // if anything changed after propagation.
243         auto &S = Block[SuccNo];
244         auto SavedConsumes = S.Consumes;
245         auto SavedKills = S.Kills;
246 
247         // Propagate Kills and Consumes from block B into its successor S.
248         S.Consumes |= B.Consumes;
249         S.Kills |= B.Kills;
250 
251         // If block B is a suspend block, it should propagate kills into the
252         // its successor for every block B consumes.
253         if (B.Suspend) {
254           S.Kills |= B.Consumes;
255         }
256         if (S.Suspend) {
257           // If block S is a suspend block, it should kill all of the blocks it
258           // consumes.
259           S.Kills |= S.Consumes;
260         } else if (S.End) {
261           // If block S is an end block, it should not propagate kills as the
262           // blocks following coro.end() are reached during initial invocation
263           // of the coroutine while all the data are still available on the
264           // stack or in the registers.
265           S.Kills.reset();
266         } else {
267           // This is reached when S block it not Suspend nor coro.end and it
268           // need to make sure that it is not in the kill set.
269           S.Kills.reset(SuccNo);
270         }
271 
272         // See if anything changed.
273         Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes);
274 
275         if (S.Kills != SavedKills) {
276           LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName()
277                             << "\n");
278           LLVM_DEBUG(dump("S.Kills", S.Kills));
279           LLVM_DEBUG(dump("SavedKills", SavedKills));
280         }
281         if (S.Consumes != SavedConsumes) {
282           LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n");
283           LLVM_DEBUG(dump("S.Consume", S.Consumes));
284           LLVM_DEBUG(dump("SavedCons", SavedConsumes));
285         }
286       }
287     }
288   } while (Changed);
289   LLVM_DEBUG(dump());
290 }
291 
292 #undef DEBUG_TYPE // "coro-suspend-crossing"
293 #define DEBUG_TYPE "coro-frame"
294 
295 // We build up the list of spills for every case where a use is separated
296 // from the definition by a suspend point.
297 
298 static const unsigned InvalidFieldIndex = ~0U;
299 
300 namespace {
301 class Spill {
302   Value *Def = nullptr;
303   Instruction *User = nullptr;
304   unsigned FieldNo = InvalidFieldIndex;
305 
306 public:
307   Spill(Value *Def, llvm::User *U) : Def(Def), User(cast<Instruction>(U)) {}
308 
309   Value *def() const { return Def; }
310   Instruction *user() const { return User; }
311   BasicBlock *userBlock() const { return User->getParent(); }
312 
313   // Note that field index is stored in the first SpillEntry for a particular
314   // definition. Subsequent mentions of a defintion do not have fieldNo
315   // assigned. This works out fine as the users of Spills capture the info about
316   // the definition the first time they encounter it. Consider refactoring
317   // SpillInfo into two arrays to normalize the spill representation.
318   unsigned fieldIndex() const {
319     assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field");
320     return FieldNo;
321   }
322   void setFieldIndex(unsigned FieldNumber) {
323     assert(FieldNo == InvalidFieldIndex && "Reassigning field number");
324     FieldNo = FieldNumber;
325   }
326 };
327 } // namespace
328 
329 // Note that there may be more than one record with the same value of Def in
330 // the SpillInfo vector.
331 using SpillInfo = SmallVector<Spill, 8>;
332 
333 #ifndef NDEBUG
334 static void dump(StringRef Title, SpillInfo const &Spills) {
335   dbgs() << "------------- " << Title << "--------------\n";
336   Value *CurrentValue = nullptr;
337   for (auto const &E : Spills) {
338     if (CurrentValue != E.def()) {
339       CurrentValue = E.def();
340       CurrentValue->dump();
341     }
342     dbgs() << "   user: ";
343     E.user()->dump();
344   }
345 }
346 #endif
347 
348 namespace {
349 // We cannot rely solely on natural alignment of a type when building a
350 // coroutine frame and if the alignment specified on the Alloca instruction
351 // differs from the natural alignment of the alloca type we will need to insert
352 // padding.
353 class FrameTypeBuilder {
354 public:
355   using ForSpillType = SmallVector<Spill *, 8>;
356 
357 private:
358   struct Field {
359     uint64_t Size;
360     uint64_t Offset;
361     ForSpillType ForSpill;
362     Type *Ty;
363     unsigned FieldIndex;
364     Align Alignment;
365     Align TyAlignment;
366   };
367 
368   const DataLayout &DL;
369   LLVMContext &Context;
370   uint64_t StructSize = 0;
371   Align StructAlign;
372   bool IsFinished = false;
373 
374   SmallVector<Field, 8> Fields;
375   DenseMap<Value*, unsigned> FieldIndexByKey;
376 
377 public:
378   FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
379       : DL(DL), Context(Context) {}
380 
381   class FieldId {
382     size_t Value;
383     explicit FieldId(size_t Value) : Value(Value) {}
384 
385     friend class FrameTypeBuilder;
386   };
387 
388   /// Add a field to this structure for the storage of an `alloca`
389   /// instruction.
390   FieldId addFieldForAlloca(AllocaInst *AI, ForSpillType ForSpill = {},
391                             bool IsHeader = false) {
392     Type *Ty = AI->getAllocatedType();
393 
394     // Make an array type if this is a static array allocation.
395     if (AI->isArrayAllocation()) {
396       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
397         Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
398       else
399         report_fatal_error("Coroutines cannot handle non static allocas yet");
400     }
401 
402     return addField(Ty, AI->getAlign(), ForSpill, IsHeader);
403   }
404 
405   /// We want to put the allocas whose lifetime-ranges are not overlapped
406   /// into one slot of coroutine frame.
407   /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
408   ///
409   ///     cppcoro::task<void> alternative_paths(bool cond) {
410   ///         if (cond) {
411   ///             big_structure a;
412   ///             process(a);
413   ///             co_await something();
414   ///         } else {
415   ///             big_structure b;
416   ///             process2(b);
417   ///             co_await something();
418   ///         }
419   ///     }
420   ///
421   /// We want to put variable a and variable b in the same slot to
422   /// reduce the size of coroutine frame.
423   ///
424   /// This function use StackLifetime algorithm to partition the AllocaInsts in
425   /// Spills to non-overlapped sets in order to put Alloca in the same
426   /// non-overlapped set into the same slot in the Coroutine Frame. Then add
427   /// field for the allocas in the same non-overlapped set by using the largest
428   /// type as the field type.
429   ///
430   /// Side Effects: Because We sort the allocas, the order of allocas in the
431   /// frame may be different with the order in the source code.
432   void addFieldForAllocas(const Function &F, SpillInfo &Spills,
433                           coro::Shape &Shape);
434 
435   /// Add a field to this structure.
436   FieldId addField(Type *Ty, MaybeAlign FieldAlignment,
437                    ForSpillType ForSpill = {}, bool IsHeader = false) {
438     assert(!IsFinished && "adding fields to a finished builder");
439     assert(Ty && "must provide a type for a field");
440 
441     // The field size is always the alloc size of the type.
442     uint64_t FieldSize = DL.getTypeAllocSize(Ty);
443 
444     // The field alignment might not be the type alignment, but we need
445     // to remember the type alignment anyway to build the type.
446     Align TyAlignment = DL.getABITypeAlign(Ty);
447     if (!FieldAlignment) FieldAlignment = TyAlignment;
448 
449     // Lay out header fields immediately.
450     uint64_t Offset;
451     if (IsHeader) {
452       Offset = alignTo(StructSize, FieldAlignment);
453       StructSize = Offset + FieldSize;
454 
455     // Everything else has a flexible offset.
456     } else {
457       Offset = OptimizedStructLayoutField::FlexibleOffset;
458     }
459 
460     Fields.push_back({FieldSize, Offset, ForSpill, Ty, 0,
461                       *FieldAlignment, TyAlignment});
462     return FieldId(Fields.size() - 1);
463   }
464 
465   /// Finish the layout and set the body on the given type.
466   void finish(StructType *Ty);
467 
468   uint64_t getStructSize() const {
469     assert(IsFinished && "not yet finished!");
470     return StructSize;
471   }
472 
473   Align getStructAlign() const {
474     assert(IsFinished && "not yet finished!");
475     return StructAlign;
476   }
477 
478   unsigned getFieldIndex(FieldId Id) const {
479     assert(IsFinished && "not yet finished!");
480     return Fields[Id.Value].FieldIndex;
481   }
482 };
483 } // namespace
484 
485 void FrameTypeBuilder::addFieldForAllocas(const Function &F, SpillInfo &Spills,
486                                           coro::Shape &Shape) {
487   DenseMap<AllocaInst *, unsigned int> AllocaIndex;
488   SmallVector<AllocaInst *, 8> Allocas;
489   DenseMap<AllocaInst *, Spill *> SpillOfAllocas;
490   using AllocaSetType = SmallVector<AllocaInst *, 4>;
491   SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
492 
493   // We need to add field for allocas at the end of this function. However, this
494   // function has multiple exits, so we use this helper to avoid redundant code.
495   struct RTTIHelper {
496     std::function<void()> func;
497     RTTIHelper(std::function<void()> &&func) : func(func) {}
498     ~RTTIHelper() { func(); }
499   } Helper([&]() {
500     for (auto AllocaSet : NonOverlapedAllocas) {
501       ForSpillType ForSpills;
502       for (auto Alloca : AllocaSet)
503         ForSpills.push_back(SpillOfAllocas[Alloca]);
504       auto *LargestAI = *AllocaSet.begin();
505       addFieldForAlloca(LargestAI, ForSpills);
506     }
507   });
508 
509   for (auto &Spill : Spills)
510     if (AllocaInst *AI = dyn_cast<AllocaInst>(Spill.def()))
511       if (find(Allocas, AI) == Allocas.end()) {
512         SpillOfAllocas[AI] = &Spill;
513         Allocas.emplace_back(AI);
514       }
515 
516   if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) {
517     for (auto Alloca : Allocas) {
518       AllocaIndex[Alloca] = NonOverlapedAllocas.size();
519       NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
520     }
521     return;
522   }
523 
524   // Because there are pathes from the lifetime.start to coro.end
525   // for each alloca, the liferanges for every alloca is overlaped
526   // in the blocks who contain coro.end and the successor blocks.
527   // So we choose to skip there blocks when we calculates the liferange
528   // for each alloca. It should be reasonable since there shouldn't be uses
529   // in these blocks and the coroutine frame shouldn't be used outside the
530   // coroutine body.
531   //
532   // Note that the user of coro.suspend may not be SwitchInst. However, this
533   // case seems too complex to handle. And it is harmless to skip these
534   // patterns since it just prevend putting the allocas to live in the same
535   // slot.
536   DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest;
537   for (auto CoroSuspendInst : Shape.CoroSuspends) {
538     for (auto U : CoroSuspendInst->users()) {
539       if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) {
540         auto *SWI = const_cast<SwitchInst *>(ConstSWI);
541         DefaultSuspendDest[SWI] = SWI->getDefaultDest();
542         SWI->setDefaultDest(SWI->getSuccessor(1));
543       }
544     }
545   }
546 
547   StackLifetime StackLifetimeAnalyzer(F, Allocas,
548                                       StackLifetime::LivenessType::May);
549   StackLifetimeAnalyzer.run();
550   auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
551     return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps(
552         StackLifetimeAnalyzer.getLiveRange(AI2));
553   };
554   auto GetAllocaSize = [&](const AllocaInst *AI) {
555     Optional<uint64_t> RetSize = AI->getAllocationSizeInBits(DL);
556     assert(RetSize && "We can't handle scalable type now.\n");
557     return RetSize.getValue();
558   };
559   // Put larger allocas in the front. So the larger allocas have higher
560   // priority to merge, which can save more space potentially. Also each
561   // AllocaSet would be ordered. So we can get the largest Alloca in one
562   // AllocaSet easily.
563   sort(Allocas, [&](auto Iter1, auto Iter2) {
564     return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
565   });
566   for (auto Alloca : Allocas) {
567     bool Merged = false;
568     // Try to find if the Alloca is not inferenced with any existing
569     // NonOverlappedAllocaSet. If it is true, insert the alloca to that
570     // NonOverlappedAllocaSet.
571     for (auto &AllocaSet : NonOverlapedAllocas) {
572       assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
573       bool CouldMerge = none_of(AllocaSet, [&](auto Iter) {
574         return IsAllocaInferenre(Alloca, Iter);
575       });
576       if (!CouldMerge)
577         continue;
578       AllocaIndex[Alloca] = AllocaIndex[*AllocaSet.begin()];
579       AllocaSet.push_back(Alloca);
580       Merged = true;
581       break;
582     }
583     if (!Merged) {
584       AllocaIndex[Alloca] = NonOverlapedAllocas.size();
585       NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
586     }
587   }
588   // Recover the default target destination for each Switch statement
589   // reserved.
590   for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
591     SwitchInst *SWI = SwitchAndDefaultDest.first;
592     BasicBlock *DestBB = SwitchAndDefaultDest.second;
593     SWI->setDefaultDest(DestBB);
594   }
595   // This Debug Info could tell us which allocas are merged into one slot.
596   LLVM_DEBUG(for (auto &AllocaSet
597                   : NonOverlapedAllocas) {
598     if (AllocaSet.size() > 1) {
599       dbgs() << "In Function:" << F.getName() << "\n";
600       dbgs() << "Find Union Set "
601              << "\n";
602       dbgs() << "\tAllocas are \n";
603       for (auto Alloca : AllocaSet)
604         dbgs() << "\t\t" << *Alloca << "\n";
605     }
606   });
607 }
608 
609 void FrameTypeBuilder::finish(StructType *Ty) {
610   assert(!IsFinished && "already finished!");
611 
612   // Prepare the optimal-layout field array.
613   // The Id in the layout field is a pointer to our Field for it.
614   SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
615   LayoutFields.reserve(Fields.size());
616   for (auto &Field : Fields) {
617     LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
618                               Field.Offset);
619   }
620 
621   // Perform layout.
622   auto SizeAndAlign = performOptimizedStructLayout(LayoutFields);
623   StructSize = SizeAndAlign.first;
624   StructAlign = SizeAndAlign.second;
625 
626   auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
627     return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
628   };
629 
630   // We need to produce a packed struct type if there's a field whose
631   // assigned offset isn't a multiple of its natural type alignment.
632   bool Packed = [&] {
633     for (auto &LayoutField : LayoutFields) {
634       auto &F = getField(LayoutField);
635       if (!isAligned(F.TyAlignment, LayoutField.Offset))
636         return true;
637     }
638     return false;
639   }();
640 
641   // Build the struct body.
642   SmallVector<Type*, 16> FieldTypes;
643   FieldTypes.reserve(LayoutFields.size() * 3 / 2);
644   uint64_t LastOffset = 0;
645   for (auto &LayoutField : LayoutFields) {
646     auto &F = getField(LayoutField);
647 
648     auto Offset = LayoutField.Offset;
649 
650     // Add a padding field if there's a padding gap and we're either
651     // building a packed struct or the padding gap is more than we'd
652     // get from aligning to the field type's natural alignment.
653     assert(Offset >= LastOffset);
654     if (Offset != LastOffset) {
655       if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
656         FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
657                                             Offset - LastOffset));
658     }
659 
660     // Record the layout information into both the Field and the
661     // original Spill, if there is one.
662     F.Offset = Offset;
663     F.FieldIndex = FieldTypes.size();
664     for (auto Spill : F.ForSpill) {
665       Spill->setFieldIndex(F.FieldIndex);
666     }
667 
668     FieldTypes.push_back(F.Ty);
669     LastOffset = Offset + F.Size;
670   }
671 
672   Ty->setBody(FieldTypes, Packed);
673 
674 #ifndef NDEBUG
675   // Check that the IR layout matches the offsets we expect.
676   auto Layout = DL.getStructLayout(Ty);
677   for (auto &F : Fields) {
678     assert(Ty->getElementType(F.FieldIndex) == F.Ty);
679     assert(Layout->getElementOffset(F.FieldIndex) == F.Offset);
680   }
681 #endif
682 
683   IsFinished = true;
684 }
685 
686 // Build a struct that will keep state for an active coroutine.
687 //   struct f.frame {
688 //     ResumeFnTy ResumeFnAddr;
689 //     ResumeFnTy DestroyFnAddr;
690 //     int ResumeIndex;
691 //     ... promise (if present) ...
692 //     ... spills ...
693 //   };
694 static StructType *buildFrameType(Function &F, coro::Shape &Shape,
695                                   SpillInfo &Spills) {
696   LLVMContext &C = F.getContext();
697   const DataLayout &DL = F.getParent()->getDataLayout();
698   StructType *FrameTy = [&] {
699     SmallString<32> Name(F.getName());
700     Name.append(".Frame");
701     return StructType::create(C, Name);
702   }();
703 
704   FrameTypeBuilder B(C, DL);
705 
706   AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
707   Optional<FrameTypeBuilder::FieldId> PromiseFieldId;
708   Optional<FrameTypeBuilder::FieldId> SwitchIndexFieldId;
709 
710   if (Shape.ABI == coro::ABI::Switch) {
711     auto *FramePtrTy = FrameTy->getPointerTo();
712     auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
713                                    /*IsVarArg=*/false);
714     auto *FnPtrTy = FnTy->getPointerTo();
715 
716     // Add header fields for the resume and destroy functions.
717     // We can rely on these being perfectly packed.
718     B.addField(FnPtrTy, None, {}, /*header*/ true);
719     B.addField(FnPtrTy, None, {}, /*header*/ true);
720 
721     // Add a header field for the promise if there is one.
722     if (PromiseAlloca) {
723       PromiseFieldId = B.addFieldForAlloca(PromiseAlloca, {}, /*header*/ true);
724     }
725 
726     // Add a field to store the suspend index.  This doesn't need to
727     // be in the header.
728     unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
729     Type *IndexType = Type::getIntNTy(C, IndexBits);
730 
731     SwitchIndexFieldId = B.addField(IndexType, None);
732   } else {
733     assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
734   }
735 
736   // Because multiple allocas may own the same field slot,
737   // we add allocas to field here.
738   B.addFieldForAllocas(F, Spills, Shape);
739   Value *CurrentDef = nullptr;
740   // Create an entry for every spilled value which is not an AllocaInst.
741   for (auto &S : Spills) {
742     // We can have multiple entries in Spills for a single value, but
743     // they should form a contiguous run.  Ignore all but the first.
744     if (CurrentDef == S.def())
745       continue;
746 
747     CurrentDef = S.def();
748 
749     assert(CurrentDef != PromiseAlloca &&
750            "recorded spill use of promise alloca?");
751 
752     if (!isa<AllocaInst>(CurrentDef)) {
753       Type *Ty = CurrentDef->getType();
754       B.addField(Ty, None, {&S});
755     }
756   }
757 
758   B.finish(FrameTy);
759   Shape.FrameAlign = B.getStructAlign();
760   Shape.FrameSize = B.getStructSize();
761 
762   switch (Shape.ABI) {
763   // In the switch ABI, remember the field indices for the promise and
764   // switch-index fields.
765   case coro::ABI::Switch:
766     Shape.SwitchLowering.IndexField =
767       B.getFieldIndex(*SwitchIndexFieldId);
768     Shape.SwitchLowering.PromiseField =
769       (PromiseAlloca ? B.getFieldIndex(*PromiseFieldId) : 0);
770 
771     // Also round the frame size up to a multiple of its alignment, as is
772     // generally expected in C/C++.
773     Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
774     break;
775 
776   // In the retcon ABI, remember whether the frame is inline in the storage.
777   case coro::ABI::Retcon:
778   case coro::ABI::RetconOnce: {
779     auto Id = Shape.getRetconCoroId();
780     Shape.RetconLowering.IsFrameInlineInStorage
781       = (B.getStructSize() <= Id->getStorageSize() &&
782          B.getStructAlign() <= Id->getStorageAlignment());
783     break;
784   }
785   }
786 
787   return FrameTy;
788 }
789 
790 // We use a pointer use visitor to discover if there are any writes into an
791 // alloca that dominates CoroBegin. If that is the case, insertSpills will copy
792 // the value from the alloca into the coroutine frame spill slot corresponding
793 // to that alloca. We also collect any alias pointing to the alloca created
794 // before CoroBegin but used after CoroBegin. These alias will be recreated
795 // after CoroBegin from the frame address so that latter references are
796 // pointing to the frame instead of the stack.
797 // Note: We are repurposing PtrUseVisitor's isEscaped() to mean whether the
798 // pointer is potentially written into.
799 // TODO: If the pointer is really escaped, we are in big trouble because we
800 // will be escaping a pointer to a stack address that would no longer exist
801 // soon. However most escape analysis isn't good enough to precisely tell,
802 // so we are assuming that if a pointer is escaped that it's written into.
803 // TODO: Another potential issue is if we are creating an alias through
804 // a function call, e.g:
805 //   %a = AllocaInst ...
806 //   %b = call @computeAddress(... %a)
807 // If %b is an alias of %a and will be used after CoroBegin, this will be broken
808 // and there is nothing we can do about it.
809 namespace {
810 struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
811   using Base = PtrUseVisitor<AllocaUseVisitor>;
812   AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
813                    const CoroBeginInst &CB)
814       : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {}
815 
816   // We are only interested in uses that's not dominated by coro.begin.
817   void visit(Instruction &I) {
818     if (!DT.dominates(&CoroBegin, &I))
819       Base::visit(I);
820   }
821   // We need to provide this overload as PtrUseVisitor uses a pointer based
822   // visiting function.
823   void visit(Instruction *I) { return visit(*I); }
824 
825   // We cannot handle PHI node and SelectInst because they could be selecting
826   // between two addresses that point to different Allocas.
827   void visitPHINode(PHINode &I) {
828     assert(!usedAfterCoroBegin(I) &&
829            "Unable to handle PHI node of aliases created before CoroBegin but "
830            "used after CoroBegin");
831   }
832 
833   void visitSelectInst(SelectInst &I) {
834     assert(!usedAfterCoroBegin(I) &&
835            "Unable to handle Select of aliases created before CoroBegin but "
836            "used after CoroBegin");
837   }
838 
839   void visitLoadInst(LoadInst &) {}
840 
841   // If the use is an operand, the pointer escaped and anything can write into
842   // that memory. If the use is the pointer, we are definitely writing into the
843   // alloca and therefore we need to copy.
844   void visitStoreInst(StoreInst &SI) { PI.setEscaped(&SI); }
845 
846   // All mem intrinsics modify the data.
847   void visitMemIntrinsic(MemIntrinsic &MI) { PI.setEscaped(&MI); }
848 
849   void visitBitCastInst(BitCastInst &BC) {
850     Base::visitBitCastInst(BC);
851     handleAlias(BC);
852   }
853 
854   void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
855     Base::visitAddrSpaceCastInst(ASC);
856     handleAlias(ASC);
857   }
858 
859   void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
860     // The base visitor will adjust Offset accordingly.
861     Base::visitGetElementPtrInst(GEPI);
862     handleAlias(GEPI);
863   }
864 
865   const SmallVector<std::pair<Instruction *, APInt>, 1> &getAliases() const {
866     return Aliases;
867   }
868 
869 private:
870   const DominatorTree &DT;
871   const CoroBeginInst &CoroBegin;
872   // All alias to the original AllocaInst, and are used after CoroBegin.
873   // Each entry contains the instruction and the offset in the original Alloca.
874   SmallVector<std::pair<Instruction *, APInt>, 1> Aliases{};
875 
876   bool usedAfterCoroBegin(Instruction &I) {
877     for (auto &U : I.uses())
878       if (DT.dominates(&CoroBegin, U))
879         return true;
880     return false;
881   }
882 
883   void handleAlias(Instruction &I) {
884     if (!usedAfterCoroBegin(I))
885       return;
886 
887     assert(IsOffsetKnown && "Can only handle alias with known offset created "
888                             "before CoroBegin and used after");
889     Aliases.emplace_back(&I, Offset);
890   }
891 };
892 } // namespace
893 
894 // We need to make room to insert a spill after initial PHIs, but before
895 // catchswitch instruction. Placing it before violates the requirement that
896 // catchswitch, like all other EHPads must be the first nonPHI in a block.
897 //
898 // Split away catchswitch into a separate block and insert in its place:
899 //
900 //   cleanuppad <InsertPt> cleanupret.
901 //
902 // cleanupret instruction will act as an insert point for the spill.
903 static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
904   BasicBlock *CurrentBlock = CatchSwitch->getParent();
905   BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch);
906   CurrentBlock->getTerminator()->eraseFromParent();
907 
908   auto *CleanupPad =
909       CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock);
910   auto *CleanupRet =
911       CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock);
912   return CleanupRet;
913 }
914 
915 // Replace all alloca and SSA values that are accessed across suspend points
916 // with GetElementPointer from coroutine frame + loads and stores. Create an
917 // AllocaSpillBB that will become the new entry block for the resume parts of
918 // the coroutine:
919 //
920 //    %hdl = coro.begin(...)
921 //    whatever
922 //
923 // becomes:
924 //
925 //    %hdl = coro.begin(...)
926 //    %FramePtr = bitcast i8* hdl to %f.frame*
927 //    br label %AllocaSpillBB
928 //
929 //  AllocaSpillBB:
930 //    ; geps corresponding to allocas that were moved to coroutine frame
931 //    br label PostSpill
932 //
933 //  PostSpill:
934 //    whatever
935 //
936 //
937 static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
938   auto *CB = Shape.CoroBegin;
939   LLVMContext &C = CB->getContext();
940   IRBuilder<> Builder(CB->getNextNode());
941   StructType *FrameTy = Shape.FrameTy;
942   PointerType *FramePtrTy = FrameTy->getPointerTo();
943   auto *FramePtr =
944       cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
945   DominatorTree DT(*CB->getFunction());
946 
947   Value *CurrentValue = nullptr;
948   BasicBlock *CurrentBlock = nullptr;
949   Value *CurrentReload = nullptr;
950 
951   // Proper field number will be read from field definition.
952   unsigned Index = InvalidFieldIndex;
953 
954   // We need to keep track of any allocas that need "spilling"
955   // since they will live in the coroutine frame now, all access to them
956   // need to be changed, not just the access across suspend points
957   // we remember allocas and their indices to be handled once we processed
958   // all the spills.
959   SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas;
960 
961   // Promise alloca (if present) doesn't show in the spills and has a
962   // special field number.
963   if (auto *PromiseAlloca = Shape.getPromiseAlloca()) {
964     assert(Shape.ABI == coro::ABI::Switch);
965     Allocas.emplace_back(PromiseAlloca, Shape.getPromiseField());
966   }
967 
968   // Create a GEP with the given index into the coroutine frame for the original
969   // value Orig. Appends an extra 0 index for array-allocas, preserving the
970   // original type.
971   auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * {
972     SmallVector<Value *, 3> Indices = {
973         ConstantInt::get(Type::getInt32Ty(C), 0),
974         ConstantInt::get(Type::getInt32Ty(C), Index),
975     };
976 
977     if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
978       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
979         auto Count = CI->getValue().getZExtValue();
980         if (Count > 1) {
981           Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
982         }
983       } else {
984         report_fatal_error("Coroutines cannot handle non static allocas yet");
985       }
986     }
987 
988     auto GEP = cast<GetElementPtrInst>(
989         Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
990     if (isa<AllocaInst>(Orig)) {
991       // If the type of GEP is not equal to the type of AllocaInst, it implies
992       // that the AllocaInst may be reused in the Frame slot of other
993       // AllocaInst. So we cast the GEP to the type of AllocaInst.
994       if (GEP->getResultElementType() != Orig->getType())
995         return Builder.CreateBitCast(GEP, Orig->getType(),
996                                      Orig->getName() + Twine(".cast"));
997     }
998     return GEP;
999   };
1000 
1001   // Create a load instruction to reload the spilled value from the coroutine
1002   // frame. Populates the Value pointer reference provided with the frame GEP.
1003   auto CreateReload = [&](Instruction *InsertBefore, Value *&G) {
1004     assert(Index != InvalidFieldIndex && "accessing unassigned field number");
1005     Builder.SetInsertPoint(InsertBefore);
1006 
1007     G = GetFramePointer(Index, CurrentValue);
1008     G->setName(CurrentValue->getName() + Twine(".reload.addr"));
1009 
1010     return isa<AllocaInst>(CurrentValue)
1011                ? G
1012                : Builder.CreateLoad(FrameTy->getElementType(Index), G,
1013                                     CurrentValue->getName() + Twine(".reload"));
1014   };
1015 
1016   Value *GEP = nullptr, *CurrentGEP = nullptr;
1017   for (auto const &E : Spills) {
1018     // If we have not seen the value, generate a spill.
1019     if (CurrentValue != E.def()) {
1020       CurrentValue = E.def();
1021       CurrentBlock = nullptr;
1022       CurrentReload = nullptr;
1023 
1024       Index = E.fieldIndex();
1025 
1026       if (auto *AI = dyn_cast<AllocaInst>(CurrentValue)) {
1027         // Spilled AllocaInst will be replaced with GEP from the coroutine frame
1028         // there is no spill required.
1029         Allocas.emplace_back(AI, Index);
1030         if (!AI->isStaticAlloca())
1031           report_fatal_error("Coroutines cannot handle non static allocas yet");
1032       } else {
1033         // Otherwise, create a store instruction storing the value into the
1034         // coroutine frame.
1035 
1036         Instruction *InsertPt = nullptr;
1037         if (auto Arg = dyn_cast<Argument>(CurrentValue)) {
1038           // For arguments, we will place the store instruction right after
1039           // the coroutine frame pointer instruction, i.e. bitcast of
1040           // coro.begin from i8* to %f.frame*.
1041           InsertPt = FramePtr->getNextNode();
1042 
1043           // If we're spilling an Argument, make sure we clear 'nocapture'
1044           // from the coroutine function.
1045           Arg->getParent()->removeParamAttr(Arg->getArgNo(),
1046                                             Attribute::NoCapture);
1047 
1048         } else if (auto CSI = dyn_cast<AnyCoroSuspendInst>(CurrentValue)) {
1049           // Don't spill immediately after a suspend; splitting assumes
1050           // that the suspend will be followed by a branch.
1051           InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI();
1052         } else {
1053           auto *I = cast<Instruction>(CurrentValue);
1054           if (!DT.dominates(CB, I)) {
1055             // If it is not dominated by CoroBegin, then spill should be
1056             // inserted immediately after CoroFrame is computed.
1057             InsertPt = FramePtr->getNextNode();
1058           } else if (auto *II = dyn_cast<InvokeInst>(I)) {
1059             // If we are spilling the result of the invoke instruction, split
1060             // the normal edge and insert the spill in the new block.
1061             auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest());
1062             InsertPt = NewBB->getTerminator();
1063           } else if (isa<PHINode>(I)) {
1064             // Skip the PHINodes and EH pads instructions.
1065             BasicBlock *DefBlock = I->getParent();
1066             if (auto *CSI =
1067                     dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
1068               InsertPt = splitBeforeCatchSwitch(CSI);
1069             else
1070               InsertPt = &*DefBlock->getFirstInsertionPt();
1071           } else {
1072             assert(!I->isTerminator() && "unexpected terminator");
1073             // For all other values, the spill is placed immediately after
1074             // the definition.
1075             InsertPt = I->getNextNode();
1076           }
1077         }
1078 
1079         Builder.SetInsertPoint(InsertPt);
1080         auto *G = Builder.CreateConstInBoundsGEP2_32(
1081             FrameTy, FramePtr, 0, Index,
1082             CurrentValue->getName() + Twine(".spill.addr"));
1083         Builder.CreateStore(CurrentValue, G);
1084       }
1085     }
1086 
1087     // If we have not seen the use block, generate a reload in it.
1088     if (CurrentBlock != E.userBlock()) {
1089       CurrentBlock = E.userBlock();
1090       CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt(), GEP);
1091     }
1092 
1093     // If we have a single edge PHINode, remove it and replace it with a reload
1094     // from the coroutine frame. (We already took care of multi edge PHINodes
1095     // by rewriting them in the rewritePHIs function).
1096     if (auto *PN = dyn_cast<PHINode>(E.user())) {
1097       assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
1098                                                 "values in the PHINode");
1099       PN->replaceAllUsesWith(CurrentReload);
1100       PN->eraseFromParent();
1101       continue;
1102     }
1103 
1104     // If we have not seen this GEP instruction, migrate any dbg.declare from
1105     // the alloca to it.
1106     if (CurrentGEP != GEP) {
1107       CurrentGEP = GEP;
1108       TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(CurrentValue);
1109       if (!DIs.empty())
1110         DIBuilder(*CurrentBlock->getParent()->getParent(),
1111                   /*AllowUnresolved*/ false)
1112             .insertDeclare(CurrentGEP, DIs.front()->getVariable(),
1113                            DIs.front()->getExpression(),
1114                            DIs.front()->getDebugLoc(), DIs.front());
1115     }
1116 
1117     // Replace all uses of CurrentValue in the current instruction with reload.
1118     E.user()->replaceUsesOfWith(CurrentValue, CurrentReload);
1119   }
1120 
1121   BasicBlock *FramePtrBB = FramePtr->getParent();
1122 
1123   auto SpillBlock =
1124     FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
1125   SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
1126   Shape.AllocaSpillBlock = SpillBlock;
1127 
1128   // retcon and retcon.once lowering assumes all uses have been sunk.
1129   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) {
1130     // If we found any allocas, replace all of their remaining uses with Geps.
1131     Builder.SetInsertPoint(&SpillBlock->front());
1132     for (auto &P : Allocas) {
1133       auto *G = GetFramePointer(P.second, P.first);
1134 
1135       // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1136       // here, as we are changing location of the instruction.
1137       G->takeName(P.first);
1138       P.first->replaceAllUsesWith(G);
1139       P.first->eraseFromParent();
1140     }
1141     return FramePtr;
1142   }
1143 
1144   // If we found any alloca, replace all of their remaining uses with GEP
1145   // instructions. Because new dbg.declare have been created for these alloca,
1146   // we also delete the original dbg.declare and replace other uses with undef.
1147   // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1148   // as some of the uses may not be dominated by CoroBegin.
1149   bool MightNeedToCopy = false;
1150   Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
1151   SmallVector<Instruction *, 4> UsersToUpdate;
1152   for (auto &P : Allocas) {
1153     AllocaInst *const A = P.first;
1154 
1155     for (auto *DI : FindDbgDeclareUses(A))
1156       DI->eraseFromParent();
1157     replaceDbgUsesWithUndef(A);
1158 
1159     UsersToUpdate.clear();
1160     for (User *U : A->users()) {
1161       auto *I = cast<Instruction>(U);
1162       if (DT.dominates(CB, I))
1163         UsersToUpdate.push_back(I);
1164       else
1165         MightNeedToCopy = true;
1166     }
1167     if (!UsersToUpdate.empty()) {
1168       auto *G = GetFramePointer(P.second, A);
1169       G->takeName(A);
1170       for (Instruction *I : UsersToUpdate)
1171         I->replaceUsesOfWith(A, G);
1172     }
1173   }
1174   // If we discovered such uses not dominated by CoroBegin, see if any of them
1175   // preceed coro begin and have instructions that can modify the
1176   // value of the alloca and therefore would require a copying the value into
1177   // the spill slot in the coroutine frame.
1178   if (MightNeedToCopy) {
1179     Builder.SetInsertPoint(FramePtr->getNextNode());
1180 
1181     for (auto &P : Allocas) {
1182       AllocaInst *const A = P.first;
1183       AllocaUseVisitor Visitor(A->getModule()->getDataLayout(), DT, *CB);
1184       auto PtrI = Visitor.visitPtr(*A);
1185       assert(!PtrI.isAborted());
1186       if (PtrI.isEscaped()) {
1187         // isEscaped really means potentially modified before CoroBegin.
1188         if (A->isArrayAllocation())
1189           report_fatal_error(
1190               "Coroutines cannot handle copying of array allocas yet");
1191 
1192         auto *G = GetFramePointer(P.second, A);
1193         auto *Value = Builder.CreateLoad(A->getAllocatedType(), A);
1194         Builder.CreateStore(Value, G);
1195       }
1196       // For each alias to Alloca created before CoroBegin but used after
1197       // CoroBegin, we recreate them after CoroBegin by appplying the offset
1198       // to the pointer in the frame.
1199       for (const auto &Alias : Visitor.getAliases()) {
1200         auto *FramePtr = GetFramePointer(P.second, A);
1201         auto *FramePtrRaw =
1202             Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));
1203         auto *AliasPtr = Builder.CreateGEP(
1204             FramePtrRaw, ConstantInt::get(Type::getInt64Ty(C), Alias.second));
1205         auto *AliasPtrTyped =
1206             Builder.CreateBitCast(AliasPtr, Alias.first->getType());
1207         Alias.first->replaceUsesWithIf(
1208             AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); });
1209       }
1210     }
1211   }
1212   return FramePtr;
1213 }
1214 
1215 // Sets the unwind edge of an instruction to a particular successor.
1216 static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
1217   if (auto *II = dyn_cast<InvokeInst>(TI))
1218     II->setUnwindDest(Succ);
1219   else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))
1220     CS->setUnwindDest(Succ);
1221   else if (auto *CR = dyn_cast<CleanupReturnInst>(TI))
1222     CR->setUnwindDest(Succ);
1223   else
1224     llvm_unreachable("unexpected terminator instruction");
1225 }
1226 
1227 // Replaces all uses of OldPred with the NewPred block in all PHINodes in a
1228 // block.
1229 static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,
1230                            BasicBlock *NewPred, PHINode *Until = nullptr) {
1231   unsigned BBIdx = 0;
1232   for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
1233     PHINode *PN = cast<PHINode>(I);
1234 
1235     // We manually update the LandingPadReplacement PHINode and it is the last
1236     // PHI Node. So, if we find it, we are done.
1237     if (Until == PN)
1238       break;
1239 
1240     // Reuse the previous value of BBIdx if it lines up.  In cases where we
1241     // have multiple phi nodes with *lots* of predecessors, this is a speed
1242     // win because we don't have to scan the PHI looking for TIBB.  This
1243     // happens because the BB list of PHI nodes are usually in the same
1244     // order.
1245     if (PN->getIncomingBlock(BBIdx) != OldPred)
1246       BBIdx = PN->getBasicBlockIndex(OldPred);
1247 
1248     assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!");
1249     PN->setIncomingBlock(BBIdx, NewPred);
1250   }
1251 }
1252 
1253 // Uses SplitEdge unless the successor block is an EHPad, in which case do EH
1254 // specific handling.
1255 static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,
1256                                     LandingPadInst *OriginalPad,
1257                                     PHINode *LandingPadReplacement) {
1258   auto *PadInst = Succ->getFirstNonPHI();
1259   if (!LandingPadReplacement && !PadInst->isEHPad())
1260     return SplitEdge(BB, Succ);
1261 
1262   auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ);
1263   setUnwindEdgeTo(BB->getTerminator(), NewBB);
1264   updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement);
1265 
1266   if (LandingPadReplacement) {
1267     auto *NewLP = OriginalPad->clone();
1268     auto *Terminator = BranchInst::Create(Succ, NewBB);
1269     NewLP->insertBefore(Terminator);
1270     LandingPadReplacement->addIncoming(NewLP, NewBB);
1271     return NewBB;
1272   }
1273   Value *ParentPad = nullptr;
1274   if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst))
1275     ParentPad = FuncletPad->getParentPad();
1276   else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst))
1277     ParentPad = CatchSwitch->getParentPad();
1278   else
1279     llvm_unreachable("handling for other EHPads not implemented yet");
1280 
1281   auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB);
1282   CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB);
1283   return NewBB;
1284 }
1285 
1286 // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
1287 // PHI in InsertedBB.
1288 static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
1289                                          BasicBlock *InsertedBB,
1290                                          BasicBlock *PredBB,
1291                                          PHINode *UntilPHI = nullptr) {
1292   auto *PN = cast<PHINode>(&SuccBB->front());
1293   do {
1294     int Index = PN->getBasicBlockIndex(InsertedBB);
1295     Value *V = PN->getIncomingValue(Index);
1296     PHINode *InputV = PHINode::Create(
1297         V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName(),
1298         &InsertedBB->front());
1299     InputV->addIncoming(V, PredBB);
1300     PN->setIncomingValue(Index, InputV);
1301     PN = dyn_cast<PHINode>(PN->getNextNode());
1302   } while (PN != UntilPHI);
1303 }
1304 
1305 // Rewrites the PHI Nodes in a cleanuppad.
1306 static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
1307                                      CleanupPadInst *CleanupPad) {
1308   // For every incoming edge to a CleanupPad we will create a new block holding
1309   // all incoming values in single-value PHI nodes. We will then create another
1310   // block to act as a dispather (as all unwind edges for related EH blocks
1311   // must be the same).
1312   //
1313   // cleanuppad:
1314   //    %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
1315   //    %3 = cleanuppad within none []
1316   //
1317   // It will create:
1318   //
1319   // cleanuppad.corodispatch
1320   //    %2 = phi i8[0, %catchswitch], [1, %catch.1]
1321   //    %3 = cleanuppad within none []
1322   //    switch i8 % 2, label %unreachable
1323   //            [i8 0, label %cleanuppad.from.catchswitch
1324   //             i8 1, label %cleanuppad.from.catch.1]
1325   // cleanuppad.from.catchswitch:
1326   //    %4 = phi i32 [%0, %catchswitch]
1327   //    br %label cleanuppad
1328   // cleanuppad.from.catch.1:
1329   //    %6 = phi i32 [%1, %catch.1]
1330   //    br %label cleanuppad
1331   // cleanuppad:
1332   //    %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
1333   //                 [%6, %cleanuppad.from.catch.1]
1334 
1335   // Unreachable BB, in case switching on an invalid value in the dispatcher.
1336   auto *UnreachBB = BasicBlock::Create(
1337       CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent());
1338   IRBuilder<> Builder(UnreachBB);
1339   Builder.CreateUnreachable();
1340 
1341   // Create a new cleanuppad which will be the dispatcher.
1342   auto *NewCleanupPadBB =
1343       BasicBlock::Create(CleanupPadBB->getContext(),
1344                          CleanupPadBB->getName() + Twine(".corodispatch"),
1345                          CleanupPadBB->getParent(), CleanupPadBB);
1346   Builder.SetInsertPoint(NewCleanupPadBB);
1347   auto *SwitchType = Builder.getInt8Ty();
1348   auto *SetDispatchValuePN =
1349       Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB));
1350   CleanupPad->removeFromParent();
1351   CleanupPad->insertAfter(SetDispatchValuePN);
1352   auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB,
1353                                                 pred_size(CleanupPadBB));
1354 
1355   int SwitchIndex = 0;
1356   SmallVector<BasicBlock *, 8> Preds(pred_begin(CleanupPadBB),
1357                                      pred_end(CleanupPadBB));
1358   for (BasicBlock *Pred : Preds) {
1359     // Create a new cleanuppad and move the PHI values to there.
1360     auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(),
1361                                       CleanupPadBB->getName() +
1362                                           Twine(".from.") + Pred->getName(),
1363                                       CleanupPadBB->getParent(), CleanupPadBB);
1364     updatePhiNodes(CleanupPadBB, Pred, CaseBB);
1365     CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") +
1366                     Pred->getName());
1367     Builder.SetInsertPoint(CaseBB);
1368     Builder.CreateBr(CleanupPadBB);
1369     movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB);
1370 
1371     // Update this Pred to the new unwind point.
1372     setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB);
1373 
1374     // Setup the switch in the dispatcher.
1375     auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex);
1376     SetDispatchValuePN->addIncoming(SwitchConstant, Pred);
1377     SwitchOnDispatch->addCase(SwitchConstant, CaseBB);
1378     SwitchIndex++;
1379   }
1380 }
1381 
1382 static void rewritePHIs(BasicBlock &BB) {
1383   // For every incoming edge we will create a block holding all
1384   // incoming values in a single PHI nodes.
1385   //
1386   // loop:
1387   //    %n.val = phi i32[%n, %entry], [%inc, %loop]
1388   //
1389   // It will create:
1390   //
1391   // loop.from.entry:
1392   //    %n.loop.pre = phi i32 [%n, %entry]
1393   //    br %label loop
1394   // loop.from.loop:
1395   //    %inc.loop.pre = phi i32 [%inc, %loop]
1396   //    br %label loop
1397   //
1398   // After this rewrite, further analysis will ignore any phi nodes with more
1399   // than one incoming edge.
1400 
1401   // TODO: Simplify PHINodes in the basic block to remove duplicate
1402   // predecessors.
1403 
1404   // Special case for CleanupPad: all EH blocks must have the same unwind edge
1405   // so we need to create an additional "dispatcher" block.
1406   if (auto *CleanupPad =
1407           dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) {
1408     SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB));
1409     for (BasicBlock *Pred : Preds) {
1410       if (CatchSwitchInst *CS =
1411               dyn_cast<CatchSwitchInst>(Pred->getTerminator())) {
1412         // CleanupPad with a CatchSwitch predecessor: therefore this is an
1413         // unwind destination that needs to be handle specially.
1414         assert(CS->getUnwindDest() == &BB);
1415         rewritePHIsForCleanupPad(&BB, CleanupPad);
1416         return;
1417       }
1418     }
1419   }
1420 
1421   LandingPadInst *LandingPad = nullptr;
1422   PHINode *ReplPHI = nullptr;
1423   if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
1424     // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
1425     // We replace the original landing pad with a PHINode that will collect the
1426     // results from all of them.
1427     ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad);
1428     ReplPHI->takeName(LandingPad);
1429     LandingPad->replaceAllUsesWith(ReplPHI);
1430     // We will erase the original landing pad at the end of this function after
1431     // ehAwareSplitEdge cloned it in the transition blocks.
1432   }
1433 
1434   SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB));
1435   for (BasicBlock *Pred : Preds) {
1436     auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
1437     IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
1438 
1439     // Stop the moving of values at ReplPHI, as this is either null or the PHI
1440     // that replaced the landing pad.
1441     movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI);
1442   }
1443 
1444   if (LandingPad) {
1445     // Calls to ehAwareSplitEdge function cloned the original lading pad.
1446     // No longer need it.
1447     LandingPad->eraseFromParent();
1448   }
1449 }
1450 
1451 static void rewritePHIs(Function &F) {
1452   SmallVector<BasicBlock *, 8> WorkList;
1453 
1454   for (BasicBlock &BB : F)
1455     if (auto *PN = dyn_cast<PHINode>(&BB.front()))
1456       if (PN->getNumIncomingValues() > 1)
1457         WorkList.push_back(&BB);
1458 
1459   for (BasicBlock *BB : WorkList)
1460     rewritePHIs(*BB);
1461 }
1462 
1463 // Check for instructions that we can recreate on resume as opposed to spill
1464 // the result into a coroutine frame.
1465 static bool materializable(Instruction &V) {
1466   return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
1467          isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V);
1468 }
1469 
1470 // Check for structural coroutine intrinsics that should not be spilled into
1471 // the coroutine frame.
1472 static bool isCoroutineStructureIntrinsic(Instruction &I) {
1473   return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) ||
1474          isa<CoroSuspendInst>(&I);
1475 }
1476 
1477 // For every use of the value that is across suspend point, recreate that value
1478 // after a suspend point.
1479 static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
1480                                               SpillInfo const &Spills) {
1481   BasicBlock *CurrentBlock = nullptr;
1482   Instruction *CurrentMaterialization = nullptr;
1483   Instruction *CurrentDef = nullptr;
1484 
1485   for (auto const &E : Spills) {
1486     // If it is a new definition, update CurrentXXX variables.
1487     if (CurrentDef != E.def()) {
1488       CurrentDef = cast<Instruction>(E.def());
1489       CurrentBlock = nullptr;
1490       CurrentMaterialization = nullptr;
1491     }
1492 
1493     // If we have not seen this block, materialize the value.
1494     if (CurrentBlock != E.userBlock()) {
1495       CurrentBlock = E.userBlock();
1496       CurrentMaterialization = cast<Instruction>(CurrentDef)->clone();
1497       CurrentMaterialization->setName(CurrentDef->getName());
1498       CurrentMaterialization->insertBefore(
1499           &*CurrentBlock->getFirstInsertionPt());
1500     }
1501 
1502     if (auto *PN = dyn_cast<PHINode>(E.user())) {
1503       assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
1504                                                 "values in the PHINode");
1505       PN->replaceAllUsesWith(CurrentMaterialization);
1506       PN->eraseFromParent();
1507       continue;
1508     }
1509 
1510     // Replace all uses of CurrentDef in the current instruction with the
1511     // CurrentMaterialization for the block.
1512     E.user()->replaceUsesOfWith(CurrentDef, CurrentMaterialization);
1513   }
1514 }
1515 
1516 // Splits the block at a particular instruction unless it is the first
1517 // instruction in the block with a single predecessor.
1518 static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
1519   auto *BB = I->getParent();
1520   if (&BB->front() == I) {
1521     if (BB->getSinglePredecessor()) {
1522       BB->setName(Name);
1523       return BB;
1524     }
1525   }
1526   return BB->splitBasicBlock(I, Name);
1527 }
1528 
1529 // Split above and below a particular instruction so that it
1530 // will be all alone by itself in a block.
1531 static void splitAround(Instruction *I, const Twine &Name) {
1532   splitBlockIfNotFirst(I, Name);
1533   splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
1534 }
1535 
1536 static bool isSuspendBlock(BasicBlock *BB) {
1537   return isa<AnyCoroSuspendInst>(BB->front());
1538 }
1539 
1540 typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet;
1541 
1542 /// Does control flow starting at the given block ever reach a suspend
1543 /// instruction before reaching a block in VisitedOrFreeBBs?
1544 static bool isSuspendReachableFrom(BasicBlock *From,
1545                                    VisitedBlocksSet &VisitedOrFreeBBs) {
1546   // Eagerly try to add this block to the visited set.  If it's already
1547   // there, stop recursing; this path doesn't reach a suspend before
1548   // either looping or reaching a freeing block.
1549   if (!VisitedOrFreeBBs.insert(From).second)
1550     return false;
1551 
1552   // We assume that we'll already have split suspends into their own blocks.
1553   if (isSuspendBlock(From))
1554     return true;
1555 
1556   // Recurse on the successors.
1557   for (auto Succ : successors(From)) {
1558     if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs))
1559       return true;
1560   }
1561 
1562   return false;
1563 }
1564 
1565 /// Is the given alloca "local", i.e. bounded in lifetime to not cross a
1566 /// suspend point?
1567 static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
1568   // Seed the visited set with all the basic blocks containing a free
1569   // so that we won't pass them up.
1570   VisitedBlocksSet VisitedOrFreeBBs;
1571   for (auto User : AI->users()) {
1572     if (auto FI = dyn_cast<CoroAllocaFreeInst>(User))
1573       VisitedOrFreeBBs.insert(FI->getParent());
1574   }
1575 
1576   return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs);
1577 }
1578 
1579 /// After we split the coroutine, will the given basic block be along
1580 /// an obvious exit path for the resumption function?
1581 static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
1582                                               unsigned depth = 3) {
1583   // If we've bottomed out our depth count, stop searching and assume
1584   // that the path might loop back.
1585   if (depth == 0) return false;
1586 
1587   // If this is a suspend block, we're about to exit the resumption function.
1588   if (isSuspendBlock(BB)) return true;
1589 
1590   // Recurse into the successors.
1591   for (auto Succ : successors(BB)) {
1592     if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
1593       return false;
1594   }
1595 
1596   // If none of the successors leads back in a loop, we're on an exit/abort.
1597   return true;
1598 }
1599 
1600 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
1601   // Look for a free that isn't sufficiently obviously followed by
1602   // either a suspend or a termination, i.e. something that will leave
1603   // the coro resumption frame.
1604   for (auto U : AI->users()) {
1605     auto FI = dyn_cast<CoroAllocaFreeInst>(U);
1606     if (!FI) continue;
1607 
1608     if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
1609       return true;
1610   }
1611 
1612   // If we never found one, we don't need a stack save.
1613   return false;
1614 }
1615 
1616 /// Turn each of the given local allocas into a normal (dynamic) alloca
1617 /// instruction.
1618 static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
1619                               SmallVectorImpl<Instruction*> &DeadInsts) {
1620   for (auto AI : LocalAllocas) {
1621     auto M = AI->getModule();
1622     IRBuilder<> Builder(AI);
1623 
1624     // Save the stack depth.  Try to avoid doing this if the stackrestore
1625     // is going to immediately precede a return or something.
1626     Value *StackSave = nullptr;
1627     if (localAllocaNeedsStackSave(AI))
1628       StackSave = Builder.CreateCall(
1629                             Intrinsic::getDeclaration(M, Intrinsic::stacksave));
1630 
1631     // Allocate memory.
1632     auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
1633     Alloca->setAlignment(Align(AI->getAlignment()));
1634 
1635     for (auto U : AI->users()) {
1636       // Replace gets with the allocation.
1637       if (isa<CoroAllocaGetInst>(U)) {
1638         U->replaceAllUsesWith(Alloca);
1639 
1640       // Replace frees with stackrestores.  This is safe because
1641       // alloca.alloc is required to obey a stack discipline, although we
1642       // don't enforce that structurally.
1643       } else {
1644         auto FI = cast<CoroAllocaFreeInst>(U);
1645         if (StackSave) {
1646           Builder.SetInsertPoint(FI);
1647           Builder.CreateCall(
1648                     Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
1649                              StackSave);
1650         }
1651       }
1652       DeadInsts.push_back(cast<Instruction>(U));
1653     }
1654 
1655     DeadInsts.push_back(AI);
1656   }
1657 }
1658 
1659 /// Turn the given coro.alloca.alloc call into a dynamic allocation.
1660 /// This happens during the all-instructions iteration, so it must not
1661 /// delete the call.
1662 static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI,
1663                                         coro::Shape &Shape,
1664                                    SmallVectorImpl<Instruction*> &DeadInsts) {
1665   IRBuilder<> Builder(AI);
1666   auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
1667 
1668   for (User *U : AI->users()) {
1669     if (isa<CoroAllocaGetInst>(U)) {
1670       U->replaceAllUsesWith(Alloc);
1671     } else {
1672       auto FI = cast<CoroAllocaFreeInst>(U);
1673       Builder.SetInsertPoint(FI);
1674       Shape.emitDealloc(Builder, Alloc, nullptr);
1675     }
1676     DeadInsts.push_back(cast<Instruction>(U));
1677   }
1678 
1679   // Push this on last so that it gets deleted after all the others.
1680   DeadInsts.push_back(AI);
1681 
1682   // Return the new allocation value so that we can check for needed spills.
1683   return cast<Instruction>(Alloc);
1684 }
1685 
1686 /// Get the current swifterror value.
1687 static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
1688                                      coro::Shape &Shape) {
1689   // Make a fake function pointer as a sort of intrinsic.
1690   auto FnTy = FunctionType::get(ValueTy, {}, false);
1691   auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
1692 
1693   auto Call = Builder.CreateCall(FnTy, Fn, {});
1694   Shape.SwiftErrorOps.push_back(Call);
1695 
1696   return Call;
1697 }
1698 
1699 /// Set the given value as the current swifterror value.
1700 ///
1701 /// Returns a slot that can be used as a swifterror slot.
1702 static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
1703                                      coro::Shape &Shape) {
1704   // Make a fake function pointer as a sort of intrinsic.
1705   auto FnTy = FunctionType::get(V->getType()->getPointerTo(),
1706                                 {V->getType()}, false);
1707   auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
1708 
1709   auto Call = Builder.CreateCall(FnTy, Fn, { V });
1710   Shape.SwiftErrorOps.push_back(Call);
1711 
1712   return Call;
1713 }
1714 
1715 /// Set the swifterror value from the given alloca before a call,
1716 /// then put in back in the alloca afterwards.
1717 ///
1718 /// Returns an address that will stand in for the swifterror slot
1719 /// until splitting.
1720 static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
1721                                                  AllocaInst *Alloca,
1722                                                  coro::Shape &Shape) {
1723   auto ValueTy = Alloca->getAllocatedType();
1724   IRBuilder<> Builder(Call);
1725 
1726   // Load the current value from the alloca and set it as the
1727   // swifterror value.
1728   auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
1729   auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
1730 
1731   // Move to after the call.  Since swifterror only has a guaranteed
1732   // value on normal exits, we can ignore implicit and explicit unwind
1733   // edges.
1734   if (isa<CallInst>(Call)) {
1735     Builder.SetInsertPoint(Call->getNextNode());
1736   } else {
1737     auto Invoke = cast<InvokeInst>(Call);
1738     Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
1739   }
1740 
1741   // Get the current swifterror value and store it to the alloca.
1742   auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
1743   Builder.CreateStore(ValueAfterCall, Alloca);
1744 
1745   return Addr;
1746 }
1747 
1748 /// Eliminate a formerly-swifterror alloca by inserting the get/set
1749 /// intrinsics and attempting to MemToReg the alloca away.
1750 static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
1751                                       coro::Shape &Shape) {
1752   for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) {
1753     // We're likely changing the use list, so use a mutation-safe
1754     // iteration pattern.
1755     auto &Use = *UI;
1756     ++UI;
1757 
1758     // swifterror values can only be used in very specific ways.
1759     // We take advantage of that here.
1760     auto User = Use.getUser();
1761     if (isa<LoadInst>(User) || isa<StoreInst>(User))
1762       continue;
1763 
1764     assert(isa<CallInst>(User) || isa<InvokeInst>(User));
1765     auto Call = cast<Instruction>(User);
1766 
1767     auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
1768 
1769     // Use the returned slot address as the call argument.
1770     Use.set(Addr);
1771   }
1772 
1773   // All the uses should be loads and stores now.
1774   assert(isAllocaPromotable(Alloca));
1775 }
1776 
1777 /// "Eliminate" a swifterror argument by reducing it to the alloca case
1778 /// and then loading and storing in the prologue and epilog.
1779 ///
1780 /// The argument keeps the swifterror flag.
1781 static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
1782                                         coro::Shape &Shape,
1783                              SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
1784   IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
1785 
1786   auto ArgTy = cast<PointerType>(Arg.getType());
1787   auto ValueTy = ArgTy->getElementType();
1788 
1789   // Reduce to the alloca case:
1790 
1791   // Create an alloca and replace all uses of the arg with it.
1792   auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
1793   Arg.replaceAllUsesWith(Alloca);
1794 
1795   // Set an initial value in the alloca.  swifterror is always null on entry.
1796   auto InitialValue = Constant::getNullValue(ValueTy);
1797   Builder.CreateStore(InitialValue, Alloca);
1798 
1799   // Find all the suspends in the function and save and restore around them.
1800   for (auto Suspend : Shape.CoroSuspends) {
1801     (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
1802   }
1803 
1804   // Find all the coro.ends in the function and restore the error value.
1805   for (auto End : Shape.CoroEnds) {
1806     Builder.SetInsertPoint(End);
1807     auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
1808     (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
1809   }
1810 
1811   // Now we can use the alloca logic.
1812   AllocasToPromote.push_back(Alloca);
1813   eliminateSwiftErrorAlloca(F, Alloca, Shape);
1814 }
1815 
1816 /// Eliminate all problematic uses of swifterror arguments and allocas
1817 /// from the function.  We'll fix them up later when splitting the function.
1818 static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
1819   SmallVector<AllocaInst*, 4> AllocasToPromote;
1820 
1821   // Look for a swifterror argument.
1822   for (auto &Arg : F.args()) {
1823     if (!Arg.hasSwiftErrorAttr()) continue;
1824 
1825     eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
1826     break;
1827   }
1828 
1829   // Look for swifterror allocas.
1830   for (auto &Inst : F.getEntryBlock()) {
1831     auto Alloca = dyn_cast<AllocaInst>(&Inst);
1832     if (!Alloca || !Alloca->isSwiftError()) continue;
1833 
1834     // Clear the swifterror flag.
1835     Alloca->setSwiftError(false);
1836 
1837     AllocasToPromote.push_back(Alloca);
1838     eliminateSwiftErrorAlloca(F, Alloca, Shape);
1839   }
1840 
1841   // If we have any allocas to promote, compute a dominator tree and
1842   // promote them en masse.
1843   if (!AllocasToPromote.empty()) {
1844     DominatorTree DT(F);
1845     PromoteMemToReg(AllocasToPromote, DT);
1846   }
1847 }
1848 
1849 /// retcon and retcon.once conventions assume that all spill uses can be sunk
1850 /// after the coro.begin intrinsic.
1851 static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills,
1852                                         CoroBeginInst *CoroBegin) {
1853   DominatorTree Dom(F);
1854 
1855   SmallSetVector<Instruction *, 32> ToMove;
1856   SmallVector<Instruction *, 32> Worklist;
1857 
1858   // Collect all users that precede coro.begin.
1859   for (auto const &Entry : Spills) {
1860     auto *SpillDef = Entry.def();
1861     for (User *U : SpillDef->users()) {
1862       auto Inst = cast<Instruction>(U);
1863       if (Inst->getParent() != CoroBegin->getParent() ||
1864           Dom.dominates(CoroBegin, Inst))
1865         continue;
1866       if (ToMove.insert(Inst))
1867         Worklist.push_back(Inst);
1868     }
1869   }
1870   // Recursively collect users before coro.begin.
1871   while (!Worklist.empty()) {
1872     auto *Def = Worklist.back();
1873     Worklist.pop_back();
1874     for (User *U : Def->users()) {
1875       auto Inst = cast<Instruction>(U);
1876       if (Dom.dominates(CoroBegin, Inst))
1877         continue;
1878       if (ToMove.insert(Inst))
1879         Worklist.push_back(Inst);
1880     }
1881   }
1882 
1883   // Sort by dominance.
1884   SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end());
1885   std::sort(InsertionList.begin(), InsertionList.end(),
1886             [&Dom](Instruction *A, Instruction *B) -> bool {
1887               // If a dominates b it should preceed (<) b.
1888               return Dom.dominates(A, B);
1889             });
1890 
1891   Instruction *InsertPt = CoroBegin->getNextNode();
1892   for (Instruction *Inst : InsertionList)
1893     Inst->moveBefore(InsertPt);
1894 
1895   return;
1896 }
1897 
1898 /// For each local variable that all of its user are only used inside one of
1899 /// suspended region, we sink their lifetime.start markers to the place where
1900 /// after the suspend block. Doing so minimizes the lifetime of each variable,
1901 /// hence minimizing the amount of data we end up putting on the frame.
1902 static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
1903                                      SuspendCrossingInfo &Checker) {
1904   DominatorTree DT(F);
1905 
1906   // Collect all possible basic blocks which may dominate all uses of allocas.
1907   SmallPtrSet<BasicBlock *, 4> DomSet;
1908   DomSet.insert(&F.getEntryBlock());
1909   for (auto *CSI : Shape.CoroSuspends) {
1910     BasicBlock *SuspendBlock = CSI->getParent();
1911     assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() &&
1912            "should have split coro.suspend into its own block");
1913     DomSet.insert(SuspendBlock->getSingleSuccessor());
1914   }
1915 
1916   for (Instruction &I : instructions(F)) {
1917     AllocaInst* AI = dyn_cast<AllocaInst>(&I);
1918     if (!AI)
1919       continue;
1920 
1921     for (BasicBlock *DomBB : DomSet) {
1922       bool Valid = true;
1923       SmallVector<Instruction *, 1> Lifetimes;
1924 
1925       auto isLifetimeStart = [](Instruction* I) {
1926         if (auto* II = dyn_cast<IntrinsicInst>(I))
1927           return II->getIntrinsicID() == Intrinsic::lifetime_start;
1928         return false;
1929       };
1930 
1931       auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
1932         if (isLifetimeStart(U)) {
1933           Lifetimes.push_back(U);
1934           return true;
1935         }
1936         if (!U->hasOneUse() || U->stripPointerCasts() != AI)
1937           return false;
1938         if (isLifetimeStart(U->user_back())) {
1939           Lifetimes.push_back(U->user_back());
1940           return true;
1941         }
1942         return false;
1943       };
1944 
1945       for (User *U : AI->users()) {
1946         Instruction *UI = cast<Instruction>(U);
1947         // For all users except lifetime.start markers, if they are all
1948         // dominated by one of the basic blocks and do not cross
1949         // suspend points as well, then there is no need to spill the
1950         // instruction.
1951         if (!DT.dominates(DomBB, UI->getParent()) ||
1952             Checker.isDefinitionAcrossSuspend(DomBB, UI)) {
1953           // Skip lifetime.start, GEP and bitcast used by lifetime.start
1954           // markers.
1955           if (collectLifetimeStart(UI, AI))
1956             continue;
1957           Valid = false;
1958           break;
1959         }
1960       }
1961       // Sink lifetime.start markers to dominate block when they are
1962       // only used outside the region.
1963       if (Valid && Lifetimes.size() != 0) {
1964         // May be AI itself, when the type of AI is i8*
1965         auto *NewBitCast = [&](AllocaInst *AI) -> Value* {
1966           if (isa<AllocaInst>(Lifetimes[0]->getOperand(1)))
1967             return AI;
1968           auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext());
1969           return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "",
1970                                   DomBB->getTerminator());
1971         }(AI);
1972 
1973         auto *NewLifetime = Lifetimes[0]->clone();
1974         NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast);
1975         NewLifetime->insertBefore(DomBB->getTerminator());
1976 
1977         // All the outsided lifetime.start markers are no longer necessary.
1978         for (Instruction *S : Lifetimes)
1979           S->eraseFromParent();
1980 
1981         break;
1982       }
1983     }
1984   }
1985 }
1986 
1987 void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
1988   eliminateSwiftError(F, Shape);
1989 
1990   if (Shape.ABI == coro::ABI::Switch &&
1991       Shape.SwitchLowering.PromiseAlloca) {
1992     Shape.getSwitchCoroId()->clearPromise();
1993   }
1994 
1995   // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
1996   // intrinsics are in their own blocks to simplify the logic of building up
1997   // SuspendCrossing data.
1998   for (auto *CSI : Shape.CoroSuspends) {
1999     if (auto *Save = CSI->getCoroSave())
2000       splitAround(Save, "CoroSave");
2001     splitAround(CSI, "CoroSuspend");
2002   }
2003 
2004   // Put CoroEnds into their own blocks.
2005   for (CoroEndInst *CE : Shape.CoroEnds)
2006     splitAround(CE, "CoroEnd");
2007 
2008   // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
2009   // never has its definition separated from the PHI by the suspend point.
2010   rewritePHIs(F);
2011 
2012   // Build suspend crossing info.
2013   SuspendCrossingInfo Checker(F, Shape);
2014 
2015   IRBuilder<> Builder(F.getContext());
2016   SpillInfo Spills;
2017   SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
2018   SmallVector<Instruction*, 4> DeadInstructions;
2019 
2020   for (int Repeat = 0; Repeat < 4; ++Repeat) {
2021     // See if there are materializable instructions across suspend points.
2022     for (Instruction &I : instructions(F))
2023       if (materializable(I))
2024         for (User *U : I.users())
2025           if (Checker.isDefinitionAcrossSuspend(I, U))
2026             Spills.emplace_back(&I, U);
2027 
2028     if (Spills.empty())
2029       break;
2030 
2031     // Rewrite materializable instructions to be materialized at the use point.
2032     LLVM_DEBUG(dump("Materializations", Spills));
2033     rewriteMaterializableInstructions(Builder, Spills);
2034     Spills.clear();
2035   }
2036 
2037   sinkLifetimeStartMarkers(F, Shape, Checker);
2038   // Collect lifetime.start info for each alloca.
2039   using LifetimeStart = SmallPtrSet<Instruction *, 2>;
2040   llvm::DenseMap<Instruction *, std::unique_ptr<LifetimeStart>> LifetimeMap;
2041   for (Instruction &I : instructions(F)) {
2042     auto *II = dyn_cast<IntrinsicInst>(&I);
2043     if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start)
2044       continue;
2045 
2046     if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) {
2047       if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) {
2048 
2049         if (LifetimeMap.find(AI) == LifetimeMap.end())
2050           LifetimeMap[AI] = std::make_unique<LifetimeStart>();
2051         LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst);
2052       }
2053     }
2054   }
2055 
2056   // Collect the spills for arguments and other not-materializable values.
2057   for (Argument &A : F.args())
2058     for (User *U : A.users())
2059       if (Checker.isDefinitionAcrossSuspend(A, U))
2060         Spills.emplace_back(&A, U);
2061 
2062   for (Instruction &I : instructions(F)) {
2063     // Values returned from coroutine structure intrinsics should not be part
2064     // of the Coroutine Frame.
2065     if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin)
2066       continue;
2067 
2068     // The Coroutine Promise always included into coroutine frame, no need to
2069     // check for suspend crossing.
2070     if (Shape.ABI == coro::ABI::Switch &&
2071         Shape.SwitchLowering.PromiseAlloca == &I)
2072       continue;
2073 
2074     // Handle alloca.alloc specially here.
2075     if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) {
2076       // Check whether the alloca's lifetime is bounded by suspend points.
2077       if (isLocalAlloca(AI)) {
2078         LocalAllocas.push_back(AI);
2079         continue;
2080       }
2081 
2082       // If not, do a quick rewrite of the alloca and then add spills of
2083       // the rewritten value.  The rewrite doesn't invalidate anything in
2084       // Spills because the other alloca intrinsics have no other operands
2085       // besides AI, and it doesn't invalidate the iteration because we delay
2086       // erasing AI.
2087       auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions);
2088 
2089       for (User *U : Alloc->users()) {
2090         if (Checker.isDefinitionAcrossSuspend(*Alloc, U))
2091           Spills.emplace_back(Alloc, U);
2092       }
2093       continue;
2094     }
2095 
2096     // Ignore alloca.get; we process this as part of coro.alloca.alloc.
2097     if (isa<CoroAllocaGetInst>(I)) {
2098       continue;
2099     }
2100 
2101     auto Iter = LifetimeMap.find(&I);
2102     for (User *U : I.users()) {
2103       bool NeedSpill = false;
2104 
2105       // Check against lifetime.start if the instruction has the info.
2106       if (Iter != LifetimeMap.end())
2107         for (auto *S : *Iter->second) {
2108           if ((NeedSpill = Checker.isDefinitionAcrossSuspend(*S, U)))
2109             break;
2110         }
2111       else
2112         NeedSpill = Checker.isDefinitionAcrossSuspend(I, U);
2113 
2114       if (NeedSpill) {
2115         // We cannot spill a token.
2116         if (I.getType()->isTokenTy())
2117           report_fatal_error(
2118               "token definition is separated from the use by a suspend point");
2119         Spills.emplace_back(&I, U);
2120       }
2121     }
2122   }
2123   LLVM_DEBUG(dump("Spills", Spills));
2124   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce)
2125     sinkSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin);
2126   Shape.FrameTy = buildFrameType(F, Shape, Spills);
2127   Shape.FramePtr = insertSpills(Spills, Shape);
2128   lowerLocalAllocas(LocalAllocas, DeadInstructions);
2129 
2130   for (auto I : DeadInstructions)
2131     I->eraseFromParent();
2132 }
2133