1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // there from sue to definition that crosses a suspend block.
10 //
11 // Using the information discovered we form a Coroutine Frame structure to
12 // contain those values. All uses of those values are replaced with appropriate
13 // GEP + load from the coroutine frame. At the point of the definition we spill
14 // the value into the coroutine frame.
15 //
16 // TODO: pack values tightly using liveness info.
17 //===----------------------------------------------------------------------===//
18 
19 #include "CoroInternal.h"
20 #include "llvm/ADT/BitVector.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Analysis/PtrUseVisitor.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/IR/CFG.h"
25 #include "llvm/IR/DIBuilder.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/InstIterator.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Support/circular_raw_ostream.h"
32 #include "llvm/Support/OptimizedStructLayout.h"
33 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
34 #include "llvm/Transforms/Utils/Local.h"
35 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
36 #include <algorithm>
37 
38 using namespace llvm;
39 
40 // The "coro-suspend-crossing" flag is very noisy. There is another debug type,
41 // "coro-frame", which results in leaner debug spew.
42 #define DEBUG_TYPE "coro-suspend-crossing"
43 
44 enum { SmallVectorThreshold = 32 };
45 
46 // Provides two way mapping between the blocks and numbers.
47 namespace {
48 class BlockToIndexMapping {
49   SmallVector<BasicBlock *, SmallVectorThreshold> V;
50 
51 public:
52   size_t size() const { return V.size(); }
53 
54   BlockToIndexMapping(Function &F) {
55     for (BasicBlock &BB : F)
56       V.push_back(&BB);
57     llvm::sort(V);
58   }
59 
60   size_t blockToIndex(BasicBlock *BB) const {
61     auto *I = llvm::lower_bound(V, BB);
62     assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
63     return I - V.begin();
64   }
65 
66   BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; }
67 };
68 } // end anonymous namespace
69 
70 // The SuspendCrossingInfo maintains data that allows to answer a question
71 // whether given two BasicBlocks A and B there is a path from A to B that
72 // passes through a suspend point.
73 //
74 // For every basic block 'i' it maintains a BlockData that consists of:
75 //   Consumes:  a bit vector which contains a set of indices of blocks that can
76 //              reach block 'i'
77 //   Kills: a bit vector which contains a set of indices of blocks that can
78 //          reach block 'i', but one of the path will cross a suspend point
79 //   Suspend: a boolean indicating whether block 'i' contains a suspend point.
80 //   End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
81 //
82 namespace {
83 struct SuspendCrossingInfo {
84   BlockToIndexMapping Mapping;
85 
86   struct BlockData {
87     BitVector Consumes;
88     BitVector Kills;
89     bool Suspend = false;
90     bool End = false;
91   };
92   SmallVector<BlockData, SmallVectorThreshold> Block;
93 
94   iterator_range<succ_iterator> successors(BlockData const &BD) const {
95     BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
96     return llvm::successors(BB);
97   }
98 
99   BlockData &getBlockData(BasicBlock *BB) {
100     return Block[Mapping.blockToIndex(BB)];
101   }
102 
103   void dump() const;
104   void dump(StringRef Label, BitVector const &BV) const;
105 
106   SuspendCrossingInfo(Function &F, coro::Shape &Shape);
107 
108   bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const {
109     size_t const DefIndex = Mapping.blockToIndex(DefBB);
110     size_t const UseIndex = Mapping.blockToIndex(UseBB);
111 
112     bool const Result = Block[UseIndex].Kills[DefIndex];
113     LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName()
114                       << " answer is " << Result << "\n");
115     return Result;
116   }
117 
118   bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const {
119     auto *I = cast<Instruction>(U);
120 
121     // We rewrote PHINodes, so that only the ones with exactly one incoming
122     // value need to be analyzed.
123     if (auto *PN = dyn_cast<PHINode>(I))
124       if (PN->getNumIncomingValues() > 1)
125         return false;
126 
127     BasicBlock *UseBB = I->getParent();
128 
129     // As a special case, treat uses by an llvm.coro.suspend.retcon
130     // as if they were uses in the suspend's single predecessor: the
131     // uses conceptually occur before the suspend.
132     if (isa<CoroSuspendRetconInst>(I)) {
133       UseBB = UseBB->getSinglePredecessor();
134       assert(UseBB && "should have split coro.suspend into its own block");
135     }
136 
137     return hasPathCrossingSuspendPoint(DefBB, UseBB);
138   }
139 
140   bool isDefinitionAcrossSuspend(Argument &A, User *U) const {
141     return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U);
142   }
143 
144   bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
145     auto *DefBB = I.getParent();
146 
147     // As a special case, treat values produced by an llvm.coro.suspend.*
148     // as if they were defined in the single successor: the uses
149     // conceptually occur after the suspend.
150     if (isa<AnyCoroSuspendInst>(I)) {
151       DefBB = DefBB->getSingleSuccessor();
152       assert(DefBB && "should have split coro.suspend into its own block");
153     }
154 
155     return isDefinitionAcrossSuspend(DefBB, U);
156   }
157 };
158 } // end anonymous namespace
159 
160 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
161 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
162                                                 BitVector const &BV) const {
163   dbgs() << Label << ":";
164   for (size_t I = 0, N = BV.size(); I < N; ++I)
165     if (BV[I])
166       dbgs() << " " << Mapping.indexToBlock(I)->getName();
167   dbgs() << "\n";
168 }
169 
170 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
171   for (size_t I = 0, N = Block.size(); I < N; ++I) {
172     BasicBlock *const B = Mapping.indexToBlock(I);
173     dbgs() << B->getName() << ":\n";
174     dump("   Consumes", Block[I].Consumes);
175     dump("      Kills", Block[I].Kills);
176   }
177   dbgs() << "\n";
178 }
179 #endif
180 
181 SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
182     : Mapping(F) {
183   const size_t N = Mapping.size();
184   Block.resize(N);
185 
186   // Initialize every block so that it consumes itself
187   for (size_t I = 0; I < N; ++I) {
188     auto &B = Block[I];
189     B.Consumes.resize(N);
190     B.Kills.resize(N);
191     B.Consumes.set(I);
192   }
193 
194   // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
195   // the code beyond coro.end is reachable during initial invocation of the
196   // coroutine.
197   for (auto *CE : Shape.CoroEnds)
198     getBlockData(CE->getParent()).End = true;
199 
200   // Mark all suspend blocks and indicate that they kill everything they
201   // consume. Note, that crossing coro.save also requires a spill, as any code
202   // between coro.save and coro.suspend may resume the coroutine and all of the
203   // state needs to be saved by that time.
204   auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
205     BasicBlock *SuspendBlock = BarrierInst->getParent();
206     auto &B = getBlockData(SuspendBlock);
207     B.Suspend = true;
208     B.Kills |= B.Consumes;
209   };
210   for (auto *CSI : Shape.CoroSuspends) {
211     markSuspendBlock(CSI);
212     if (auto *Save = CSI->getCoroSave())
213       markSuspendBlock(Save);
214   }
215 
216   // Iterate propagating consumes and kills until they stop changing.
217   int Iteration = 0;
218   (void)Iteration;
219 
220   bool Changed;
221   do {
222     LLVM_DEBUG(dbgs() << "iteration " << ++Iteration);
223     LLVM_DEBUG(dbgs() << "==============\n");
224 
225     Changed = false;
226     for (size_t I = 0; I < N; ++I) {
227       auto &B = Block[I];
228       for (BasicBlock *SI : successors(B)) {
229 
230         auto SuccNo = Mapping.blockToIndex(SI);
231 
232         // Saved Consumes and Kills bitsets so that it is easy to see
233         // if anything changed after propagation.
234         auto &S = Block[SuccNo];
235         auto SavedConsumes = S.Consumes;
236         auto SavedKills = S.Kills;
237 
238         // Propagate Kills and Consumes from block B into its successor S.
239         S.Consumes |= B.Consumes;
240         S.Kills |= B.Kills;
241 
242         // If block B is a suspend block, it should propagate kills into the
243         // its successor for every block B consumes.
244         if (B.Suspend) {
245           S.Kills |= B.Consumes;
246         }
247         if (S.Suspend) {
248           // If block S is a suspend block, it should kill all of the blocks it
249           // consumes.
250           S.Kills |= S.Consumes;
251         } else if (S.End) {
252           // If block S is an end block, it should not propagate kills as the
253           // blocks following coro.end() are reached during initial invocation
254           // of the coroutine while all the data are still available on the
255           // stack or in the registers.
256           S.Kills.reset();
257         } else {
258           // This is reached when S block it not Suspend nor coro.end and it
259           // need to make sure that it is not in the kill set.
260           S.Kills.reset(SuccNo);
261         }
262 
263         // See if anything changed.
264         Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes);
265 
266         if (S.Kills != SavedKills) {
267           LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName()
268                             << "\n");
269           LLVM_DEBUG(dump("S.Kills", S.Kills));
270           LLVM_DEBUG(dump("SavedKills", SavedKills));
271         }
272         if (S.Consumes != SavedConsumes) {
273           LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n");
274           LLVM_DEBUG(dump("S.Consume", S.Consumes));
275           LLVM_DEBUG(dump("SavedCons", SavedConsumes));
276         }
277       }
278     }
279   } while (Changed);
280   LLVM_DEBUG(dump());
281 }
282 
283 #undef DEBUG_TYPE // "coro-suspend-crossing"
284 #define DEBUG_TYPE "coro-frame"
285 
286 // We build up the list of spills for every case where a use is separated
287 // from the definition by a suspend point.
288 
289 static const unsigned InvalidFieldIndex = ~0U;
290 
291 namespace {
292 class Spill {
293   Value *Def = nullptr;
294   Instruction *User = nullptr;
295   unsigned FieldNo = InvalidFieldIndex;
296 
297 public:
298   Spill(Value *Def, llvm::User *U) : Def(Def), User(cast<Instruction>(U)) {}
299 
300   Value *def() const { return Def; }
301   Instruction *user() const { return User; }
302   BasicBlock *userBlock() const { return User->getParent(); }
303 
304   // Note that field index is stored in the first SpillEntry for a particular
305   // definition. Subsequent mentions of a defintion do not have fieldNo
306   // assigned. This works out fine as the users of Spills capture the info about
307   // the definition the first time they encounter it. Consider refactoring
308   // SpillInfo into two arrays to normalize the spill representation.
309   unsigned fieldIndex() const {
310     assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field");
311     return FieldNo;
312   }
313   void setFieldIndex(unsigned FieldNumber) {
314     assert(FieldNo == InvalidFieldIndex && "Reassigning field number");
315     FieldNo = FieldNumber;
316   }
317 };
318 } // namespace
319 
320 // Note that there may be more than one record with the same value of Def in
321 // the SpillInfo vector.
322 using SpillInfo = SmallVector<Spill, 8>;
323 
324 #ifndef NDEBUG
325 static void dump(StringRef Title, SpillInfo const &Spills) {
326   dbgs() << "------------- " << Title << "--------------\n";
327   Value *CurrentValue = nullptr;
328   for (auto const &E : Spills) {
329     if (CurrentValue != E.def()) {
330       CurrentValue = E.def();
331       CurrentValue->dump();
332     }
333     dbgs() << "   user: ";
334     E.user()->dump();
335   }
336 }
337 #endif
338 
339 namespace {
340 // We cannot rely solely on natural alignment of a type when building a
341 // coroutine frame and if the alignment specified on the Alloca instruction
342 // differs from the natural alignment of the alloca type we will need to insert
343 // padding.
344 class FrameTypeBuilder {
345   struct Field {
346     uint64_t Size;
347     uint64_t Offset;
348     Spill *ForSpill;
349     Type *Ty;
350     unsigned FieldIndex;
351     Align Alignment;
352     Align TyAlignment;
353   };
354 
355   const DataLayout &DL;
356   LLVMContext &Context;
357   uint64_t StructSize = 0;
358   Align StructAlign;
359   bool IsFinished = false;
360 
361   SmallVector<Field, 8> Fields;
362   DenseMap<Value*, unsigned> FieldIndexByKey;
363 
364 public:
365   FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
366     : DL(DL), Context(Context) {}
367 
368   class FieldId {
369     size_t Value;
370     explicit FieldId(size_t Value) : Value(Value) {}
371 
372     friend class FrameTypeBuilder;
373   };
374 
375   /// Add a field to this structure for the storage of an `alloca`
376   /// instruction.
377   FieldId addFieldForAlloca(AllocaInst *AI, Spill *ForSpill = nullptr,
378                             bool IsHeader = false) {
379     Type *Ty = AI->getAllocatedType();
380 
381     // Make an array type if this is a static array allocation.
382     if (AI->isArrayAllocation()) {
383       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
384         Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
385       else
386         report_fatal_error("Coroutines cannot handle non static allocas yet");
387     }
388 
389     return addField(Ty, AI->getAlign(), ForSpill, IsHeader);
390   }
391 
392   /// Add a field to this structure.
393   FieldId addField(Type *Ty, MaybeAlign FieldAlignment,
394                    Spill *ForSpill = nullptr,
395                    bool IsHeader = false) {
396     assert(!IsFinished && "adding fields to a finished builder");
397     assert(Ty && "must provide a type for a field");
398 
399     // The field size is always the alloc size of the type.
400     uint64_t FieldSize = DL.getTypeAllocSize(Ty);
401 
402     // The field alignment might not be the type alignment, but we need
403     // to remember the type alignment anyway to build the type.
404     Align TyAlignment = DL.getABITypeAlign(Ty);
405     if (!FieldAlignment) FieldAlignment = TyAlignment;
406 
407     // Lay out header fields immediately.
408     uint64_t Offset;
409     if (IsHeader) {
410       Offset = alignTo(StructSize, FieldAlignment);
411       StructSize = Offset + FieldSize;
412 
413     // Everything else has a flexible offset.
414     } else {
415       Offset = OptimizedStructLayoutField::FlexibleOffset;
416     }
417 
418     Fields.push_back({FieldSize, Offset, ForSpill, Ty, 0,
419                       *FieldAlignment, TyAlignment});
420     return FieldId(Fields.size() - 1);
421   }
422 
423   /// Finish the layout and set the body on the given type.
424   void finish(StructType *Ty);
425 
426   uint64_t getStructSize() const {
427     assert(IsFinished && "not yet finished!");
428     return StructSize;
429   }
430 
431   Align getStructAlign() const {
432     assert(IsFinished && "not yet finished!");
433     return StructAlign;
434   }
435 
436   unsigned getFieldIndex(FieldId Id) const {
437     assert(IsFinished && "not yet finished!");
438     return Fields[Id.Value].FieldIndex;
439   }
440 };
441 } // namespace
442 
443 void FrameTypeBuilder::finish(StructType *Ty) {
444   assert(!IsFinished && "already finished!");
445 
446   // Prepare the optimal-layout field array.
447   // The Id in the layout field is a pointer to our Field for it.
448   SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
449   LayoutFields.reserve(Fields.size());
450   for (auto &Field : Fields) {
451     LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
452                               Field.Offset);
453   }
454 
455   // Perform layout.
456   auto SizeAndAlign = performOptimizedStructLayout(LayoutFields);
457   StructSize = SizeAndAlign.first;
458   StructAlign = SizeAndAlign.second;
459 
460   auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
461     return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
462   };
463 
464   // We need to produce a packed struct type if there's a field whose
465   // assigned offset isn't a multiple of its natural type alignment.
466   bool Packed = [&] {
467     for (auto &LayoutField : LayoutFields) {
468       auto &F = getField(LayoutField);
469       if (!isAligned(F.TyAlignment, LayoutField.Offset))
470         return true;
471     }
472     return false;
473   }();
474 
475   // Build the struct body.
476   SmallVector<Type*, 16> FieldTypes;
477   FieldTypes.reserve(LayoutFields.size() * 3 / 2);
478   uint64_t LastOffset = 0;
479   for (auto &LayoutField : LayoutFields) {
480     auto &F = getField(LayoutField);
481 
482     auto Offset = LayoutField.Offset;
483 
484     // Add a padding field if there's a padding gap and we're either
485     // building a packed struct or the padding gap is more than we'd
486     // get from aligning to the field type's natural alignment.
487     assert(Offset >= LastOffset);
488     if (Offset != LastOffset) {
489       if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
490         FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
491                                             Offset - LastOffset));
492     }
493 
494     // Record the layout information into both the Field and the
495     // original Spill, if there is one.
496     F.Offset = Offset;
497     F.FieldIndex = FieldTypes.size();
498     if (F.ForSpill) {
499       F.ForSpill->setFieldIndex(F.FieldIndex);
500     }
501 
502     FieldTypes.push_back(F.Ty);
503     LastOffset = Offset + F.Size;
504   }
505 
506   Ty->setBody(FieldTypes, Packed);
507 
508 #ifndef NDEBUG
509   // Check that the IR layout matches the offsets we expect.
510   auto Layout = DL.getStructLayout(Ty);
511   for (auto &F : Fields) {
512     assert(Ty->getElementType(F.FieldIndex) == F.Ty);
513     assert(Layout->getElementOffset(F.FieldIndex) == F.Offset);
514   }
515 #endif
516 
517   IsFinished = true;
518 }
519 
520 // Build a struct that will keep state for an active coroutine.
521 //   struct f.frame {
522 //     ResumeFnTy ResumeFnAddr;
523 //     ResumeFnTy DestroyFnAddr;
524 //     int ResumeIndex;
525 //     ... promise (if present) ...
526 //     ... spills ...
527 //   };
528 static StructType *buildFrameType(Function &F, coro::Shape &Shape,
529                                   SpillInfo &Spills) {
530   LLVMContext &C = F.getContext();
531   const DataLayout &DL = F.getParent()->getDataLayout();
532   StructType *FrameTy = [&] {
533     SmallString<32> Name(F.getName());
534     Name.append(".Frame");
535     return StructType::create(C, Name);
536   }();
537 
538   FrameTypeBuilder B(C, DL);
539 
540   AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
541   Optional<FrameTypeBuilder::FieldId> PromiseFieldId;
542   Optional<FrameTypeBuilder::FieldId> SwitchIndexFieldId;
543 
544   if (Shape.ABI == coro::ABI::Switch) {
545     auto *FramePtrTy = FrameTy->getPointerTo();
546     auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
547                                    /*IsVarArg=*/false);
548     auto *FnPtrTy = FnTy->getPointerTo();
549 
550     // Add header fields for the resume and destroy functions.
551     // We can rely on these being perfectly packed.
552     B.addField(FnPtrTy, None, nullptr, /*header*/ true);
553     B.addField(FnPtrTy, None, nullptr, /*header*/ true);
554 
555     // Add a header field for the promise if there is one.
556     if (PromiseAlloca) {
557       PromiseFieldId =
558         B.addFieldForAlloca(PromiseAlloca, nullptr, /*header*/ true);
559     }
560 
561     // Add a field to store the suspend index.  This doesn't need to
562     // be in the header.
563     unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
564     Type *IndexType = Type::getIntNTy(C, IndexBits);
565 
566     SwitchIndexFieldId = B.addField(IndexType, None);
567   } else {
568     assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
569   }
570 
571   Value *CurrentDef = nullptr;
572 
573   // Create an entry for every spilled value.
574   for (auto &S : Spills) {
575     // We can have multiple entries in Spills for a single value, but
576     // they should form a contiguous run.  Ignore all but the first.
577     if (CurrentDef == S.def())
578       continue;
579 
580     CurrentDef = S.def();
581 
582     assert(CurrentDef != PromiseAlloca &&
583            "recorded spill use of promise alloca?");
584 
585     if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) {
586       B.addFieldForAlloca(AI, &S);
587     } else {
588       Type *Ty = CurrentDef->getType();
589       B.addField(Ty, None, &S);
590     }
591   }
592 
593   B.finish(FrameTy);
594   Shape.FrameAlign = B.getStructAlign();
595   Shape.FrameSize = B.getStructSize();
596 
597   switch (Shape.ABI) {
598   // In the switch ABI, remember the field indices for the promise and
599   // switch-index fields.
600   case coro::ABI::Switch:
601     Shape.SwitchLowering.IndexField =
602       B.getFieldIndex(*SwitchIndexFieldId);
603     Shape.SwitchLowering.PromiseField =
604       (PromiseAlloca ? B.getFieldIndex(*PromiseFieldId) : 0);
605 
606     // Also round the frame size up to a multiple of its alignment, as is
607     // generally expected in C/C++.
608     Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
609     break;
610 
611   // In the retcon ABI, remember whether the frame is inline in the storage.
612   case coro::ABI::Retcon:
613   case coro::ABI::RetconOnce: {
614     auto Id = Shape.getRetconCoroId();
615     Shape.RetconLowering.IsFrameInlineInStorage
616       = (B.getStructSize() <= Id->getStorageSize() &&
617          B.getStructAlign() <= Id->getStorageAlignment());
618     break;
619   }
620   }
621 
622   return FrameTy;
623 }
624 
625 // We use a pointer use visitor to discover if there are any writes into an
626 // alloca that dominates CoroBegin. If that is the case, insertSpills will copy
627 // the value from the alloca into the coroutine frame spill slot corresponding
628 // to that alloca. We also collect any alias pointing to the alloca created
629 // before CoroBegin but used after CoroBegin. These alias will be recreated
630 // after CoroBegin from the frame address so that latter references are
631 // pointing to the frame instead of the stack.
632 // Note: We are repurposing PtrUseVisitor's isEscaped() to mean whether the
633 // pointer is potentially written into.
634 // TODO: If the pointer is really escaped, we are in big trouble because we
635 // will be escaping a pointer to a stack address that would no longer exist
636 // soon. However most escape analysis isn't good enough to precisely tell,
637 // so we are assuming that if a pointer is escaped that it's written into.
638 // TODO: Another potential issue is if we are creating an alias through
639 // a function call, e.g:
640 //   %a = AllocaInst ...
641 //   %b = call @computeAddress(... %a)
642 // If %b is an alias of %a and will be used after CoroBegin, this will be broken
643 // and there is nothing we can do about it.
644 namespace {
645 struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
646   using Base = PtrUseVisitor<AllocaUseVisitor>;
647   AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
648                    const CoroBeginInst &CB)
649       : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {}
650 
651   // We are only interested in uses that's not dominated by coro.begin.
652   void visit(Instruction &I) {
653     if (!DT.dominates(&CoroBegin, &I))
654       Base::visit(I);
655   }
656   // We need to provide this overload as PtrUseVisitor uses a pointer based
657   // visiting function.
658   void visit(Instruction *I) { return visit(*I); }
659 
660   // We cannot handle PHI node and SelectInst because they could be selecting
661   // between two addresses that point to different Allocas.
662   void visitPHINode(PHINode &I) {
663     assert(!usedAfterCoroBegin(I) &&
664            "Unable to handle PHI node of aliases created before CoroBegin but "
665            "used after CoroBegin");
666   }
667 
668   void visitSelectInst(SelectInst &I) {
669     assert(!usedAfterCoroBegin(I) &&
670            "Unable to handle Select of aliases created before CoroBegin but "
671            "used after CoroBegin");
672   }
673 
674   void visitLoadInst(LoadInst &) {}
675 
676   // If the use is an operand, the pointer escaped and anything can write into
677   // that memory. If the use is the pointer, we are definitely writing into the
678   // alloca and therefore we need to copy.
679   void visitStoreInst(StoreInst &SI) { PI.setEscaped(&SI); }
680 
681   // All mem intrinsics modify the data.
682   void visitMemIntrinsic(MemIntrinsic &MI) { PI.setEscaped(&MI); }
683 
684   void visitBitCastInst(BitCastInst &BC) {
685     Base::visitBitCastInst(BC);
686     handleAlias(BC);
687   }
688 
689   void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
690     Base::visitAddrSpaceCastInst(ASC);
691     handleAlias(ASC);
692   }
693 
694   void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
695     // The base visitor will adjust Offset accordingly.
696     Base::visitGetElementPtrInst(GEPI);
697     handleAlias(GEPI);
698   }
699 
700   const SmallVector<std::pair<Instruction *, APInt>, 1> &getAliases() const {
701     return Aliases;
702   }
703 
704 private:
705   const DominatorTree &DT;
706   const CoroBeginInst &CoroBegin;
707   // All alias to the original AllocaInst, and are used after CoroBegin.
708   // Each entry contains the instruction and the offset in the original Alloca.
709   SmallVector<std::pair<Instruction *, APInt>, 1> Aliases{};
710 
711   bool usedAfterCoroBegin(Instruction &I) {
712     for (auto &U : I.uses())
713       if (DT.dominates(&CoroBegin, U))
714         return true;
715     return false;
716   }
717 
718   void handleAlias(Instruction &I) {
719     if (!usedAfterCoroBegin(I))
720       return;
721 
722     assert(IsOffsetKnown && "Can only handle alias with known offset created "
723                             "before CoroBegin and used after");
724     Aliases.emplace_back(&I, Offset);
725   }
726 };
727 } // namespace
728 
729 // We need to make room to insert a spill after initial PHIs, but before
730 // catchswitch instruction. Placing it before violates the requirement that
731 // catchswitch, like all other EHPads must be the first nonPHI in a block.
732 //
733 // Split away catchswitch into a separate block and insert in its place:
734 //
735 //   cleanuppad <InsertPt> cleanupret.
736 //
737 // cleanupret instruction will act as an insert point for the spill.
738 static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
739   BasicBlock *CurrentBlock = CatchSwitch->getParent();
740   BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch);
741   CurrentBlock->getTerminator()->eraseFromParent();
742 
743   auto *CleanupPad =
744       CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock);
745   auto *CleanupRet =
746       CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock);
747   return CleanupRet;
748 }
749 
750 // Replace all alloca and SSA values that are accessed across suspend points
751 // with GetElementPointer from coroutine frame + loads and stores. Create an
752 // AllocaSpillBB that will become the new entry block for the resume parts of
753 // the coroutine:
754 //
755 //    %hdl = coro.begin(...)
756 //    whatever
757 //
758 // becomes:
759 //
760 //    %hdl = coro.begin(...)
761 //    %FramePtr = bitcast i8* hdl to %f.frame*
762 //    br label %AllocaSpillBB
763 //
764 //  AllocaSpillBB:
765 //    ; geps corresponding to allocas that were moved to coroutine frame
766 //    br label PostSpill
767 //
768 //  PostSpill:
769 //    whatever
770 //
771 //
772 static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
773   auto *CB = Shape.CoroBegin;
774   LLVMContext &C = CB->getContext();
775   IRBuilder<> Builder(CB->getNextNode());
776   StructType *FrameTy = Shape.FrameTy;
777   PointerType *FramePtrTy = FrameTy->getPointerTo();
778   auto *FramePtr =
779       cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
780   DominatorTree DT(*CB->getFunction());
781 
782   Value *CurrentValue = nullptr;
783   BasicBlock *CurrentBlock = nullptr;
784   Value *CurrentReload = nullptr;
785 
786   // Proper field number will be read from field definition.
787   unsigned Index = InvalidFieldIndex;
788 
789   // We need to keep track of any allocas that need "spilling"
790   // since they will live in the coroutine frame now, all access to them
791   // need to be changed, not just the access across suspend points
792   // we remember allocas and their indices to be handled once we processed
793   // all the spills.
794   SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas;
795 
796   // Promise alloca (if present) doesn't show in the spills and has a
797   // special field number.
798   if (auto *PromiseAlloca = Shape.getPromiseAlloca()) {
799     assert(Shape.ABI == coro::ABI::Switch);
800     Allocas.emplace_back(PromiseAlloca, Shape.getPromiseField());
801   }
802 
803   // Create a GEP with the given index into the coroutine frame for the original
804   // value Orig. Appends an extra 0 index for array-allocas, preserving the
805   // original type.
806   auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * {
807     SmallVector<Value *, 3> Indices = {
808         ConstantInt::get(Type::getInt32Ty(C), 0),
809         ConstantInt::get(Type::getInt32Ty(C), Index),
810     };
811 
812     if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
813       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
814         auto Count = CI->getValue().getZExtValue();
815         if (Count > 1) {
816           Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
817         }
818       } else {
819         report_fatal_error("Coroutines cannot handle non static allocas yet");
820       }
821     }
822 
823     return Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices);
824   };
825 
826   // Create a load instruction to reload the spilled value from the coroutine
827   // frame. Populates the Value pointer reference provided with the frame GEP.
828   auto CreateReload = [&](Instruction *InsertBefore, Value *&G) {
829     assert(Index != InvalidFieldIndex && "accessing unassigned field number");
830     Builder.SetInsertPoint(InsertBefore);
831 
832     G = GetFramePointer(Index, CurrentValue);
833     G->setName(CurrentValue->getName() + Twine(".reload.addr"));
834 
835     return isa<AllocaInst>(CurrentValue)
836                ? G
837                : Builder.CreateLoad(FrameTy->getElementType(Index), G,
838                                     CurrentValue->getName() + Twine(".reload"));
839   };
840 
841   Value *GEP = nullptr, *CurrentGEP = nullptr;
842   for (auto const &E : Spills) {
843     // If we have not seen the value, generate a spill.
844     if (CurrentValue != E.def()) {
845       CurrentValue = E.def();
846       CurrentBlock = nullptr;
847       CurrentReload = nullptr;
848 
849       Index = E.fieldIndex();
850 
851       if (auto *AI = dyn_cast<AllocaInst>(CurrentValue)) {
852         // Spilled AllocaInst will be replaced with GEP from the coroutine frame
853         // there is no spill required.
854         Allocas.emplace_back(AI, Index);
855         if (!AI->isStaticAlloca())
856           report_fatal_error("Coroutines cannot handle non static allocas yet");
857       } else {
858         // Otherwise, create a store instruction storing the value into the
859         // coroutine frame.
860 
861         Instruction *InsertPt = nullptr;
862         if (auto Arg = dyn_cast<Argument>(CurrentValue)) {
863           // For arguments, we will place the store instruction right after
864           // the coroutine frame pointer instruction, i.e. bitcast of
865           // coro.begin from i8* to %f.frame*.
866           InsertPt = FramePtr->getNextNode();
867 
868           // If we're spilling an Argument, make sure we clear 'nocapture'
869           // from the coroutine function.
870           Arg->getParent()->removeParamAttr(Arg->getArgNo(),
871                                             Attribute::NoCapture);
872 
873         } else if (auto CSI = dyn_cast<AnyCoroSuspendInst>(CurrentValue)) {
874           // Don't spill immediately after a suspend; splitting assumes
875           // that the suspend will be followed by a branch.
876           InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI();
877         } else {
878           auto *I = cast<Instruction>(CurrentValue);
879           if (!DT.dominates(CB, I)) {
880             // If it is not dominated by CoroBegin, then spill should be
881             // inserted immediately after CoroFrame is computed.
882             InsertPt = FramePtr->getNextNode();
883           } else if (auto *II = dyn_cast<InvokeInst>(I)) {
884             // If we are spilling the result of the invoke instruction, split
885             // the normal edge and insert the spill in the new block.
886             auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest());
887             InsertPt = NewBB->getTerminator();
888           } else if (isa<PHINode>(I)) {
889             // Skip the PHINodes and EH pads instructions.
890             BasicBlock *DefBlock = I->getParent();
891             if (auto *CSI =
892                     dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
893               InsertPt = splitBeforeCatchSwitch(CSI);
894             else
895               InsertPt = &*DefBlock->getFirstInsertionPt();
896           } else {
897             assert(!I->isTerminator() && "unexpected terminator");
898             // For all other values, the spill is placed immediately after
899             // the definition.
900             InsertPt = I->getNextNode();
901           }
902         }
903 
904         Builder.SetInsertPoint(InsertPt);
905         auto *G = Builder.CreateConstInBoundsGEP2_32(
906             FrameTy, FramePtr, 0, Index,
907             CurrentValue->getName() + Twine(".spill.addr"));
908         Builder.CreateStore(CurrentValue, G);
909       }
910     }
911 
912     // If we have not seen the use block, generate a reload in it.
913     if (CurrentBlock != E.userBlock()) {
914       CurrentBlock = E.userBlock();
915       CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt(), GEP);
916     }
917 
918     // If we have a single edge PHINode, remove it and replace it with a reload
919     // from the coroutine frame. (We already took care of multi edge PHINodes
920     // by rewriting them in the rewritePHIs function).
921     if (auto *PN = dyn_cast<PHINode>(E.user())) {
922       assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
923                                                 "values in the PHINode");
924       PN->replaceAllUsesWith(CurrentReload);
925       PN->eraseFromParent();
926       continue;
927     }
928 
929     // If we have not seen this GEP instruction, migrate any dbg.declare from
930     // the alloca to it.
931     if (CurrentGEP != GEP) {
932       CurrentGEP = GEP;
933       TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(CurrentValue);
934       if (!DIs.empty())
935         DIBuilder(*CurrentBlock->getParent()->getParent(),
936                   /*AllowUnresolved*/ false)
937             .insertDeclare(CurrentGEP, DIs.front()->getVariable(),
938                            DIs.front()->getExpression(),
939                            DIs.front()->getDebugLoc(), DIs.front());
940     }
941 
942     // Replace all uses of CurrentValue in the current instruction with reload.
943     E.user()->replaceUsesOfWith(CurrentValue, CurrentReload);
944   }
945 
946   BasicBlock *FramePtrBB = FramePtr->getParent();
947 
948   auto SpillBlock =
949     FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
950   SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
951   Shape.AllocaSpillBlock = SpillBlock;
952 
953   // retcon and retcon.once lowering assumes all uses have been sunk.
954   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) {
955     // If we found any allocas, replace all of their remaining uses with Geps.
956     Builder.SetInsertPoint(&SpillBlock->front());
957     for (auto &P : Allocas) {
958       auto *G = GetFramePointer(P.second, P.first);
959 
960       // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
961       // here, as we are changing location of the instruction.
962       G->takeName(P.first);
963       P.first->replaceAllUsesWith(G);
964       P.first->eraseFromParent();
965     }
966     return FramePtr;
967   }
968 
969   // If we found any alloca, replace all of their remaining uses with GEP
970   // instructions. Because new dbg.declare have been created for these alloca,
971   // we also delete the original dbg.declare and replace other uses with undef.
972   // Note: We cannot replace the alloca with GEP instructions indiscriminately,
973   // as some of the uses may not be dominated by CoroBegin.
974   bool MightNeedToCopy = false;
975   Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
976   SmallVector<Instruction *, 4> UsersToUpdate;
977   for (auto &P : Allocas) {
978     AllocaInst *const A = P.first;
979 
980     for (auto *DI : FindDbgDeclareUses(A))
981       DI->eraseFromParent();
982     replaceDbgUsesWithUndef(A);
983 
984     UsersToUpdate.clear();
985     for (User *U : A->users()) {
986       auto *I = cast<Instruction>(U);
987       if (DT.dominates(CB, I))
988         UsersToUpdate.push_back(I);
989       else
990         MightNeedToCopy = true;
991     }
992     if (!UsersToUpdate.empty()) {
993       auto *G = GetFramePointer(P.second, A);
994       G->takeName(A);
995       for (Instruction *I : UsersToUpdate)
996         I->replaceUsesOfWith(A, G);
997     }
998   }
999   // If we discovered such uses not dominated by CoroBegin, see if any of them
1000   // preceed coro begin and have instructions that can modify the
1001   // value of the alloca and therefore would require a copying the value into
1002   // the spill slot in the coroutine frame.
1003   if (MightNeedToCopy) {
1004     Builder.SetInsertPoint(FramePtr->getNextNode());
1005 
1006     for (auto &P : Allocas) {
1007       AllocaInst *const A = P.first;
1008       AllocaUseVisitor Visitor(A->getModule()->getDataLayout(), DT, *CB);
1009       auto PtrI = Visitor.visitPtr(*A);
1010       assert(!PtrI.isAborted());
1011       if (PtrI.isEscaped()) {
1012         // isEscaped really means potentially modified before CoroBegin.
1013         if (A->isArrayAllocation())
1014           report_fatal_error(
1015               "Coroutines cannot handle copying of array allocas yet");
1016 
1017         auto *G = GetFramePointer(P.second, A);
1018         auto *Value = Builder.CreateLoad(A->getAllocatedType(), A);
1019         Builder.CreateStore(Value, G);
1020       }
1021       // For each alias to Alloca created before CoroBegin but used after
1022       // CoroBegin, we recreate them after CoroBegin by appplying the offset
1023       // to the pointer in the frame.
1024       for (const auto &Alias : Visitor.getAliases()) {
1025         auto *FramePtr = GetFramePointer(P.second, A);
1026         auto *FramePtrRaw =
1027             Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));
1028         auto *AliasPtr = Builder.CreateGEP(
1029             FramePtrRaw, ConstantInt::get(Type::getInt64Ty(C), Alias.second));
1030         auto *AliasPtrTyped =
1031             Builder.CreateBitCast(AliasPtr, Alias.first->getType());
1032         Alias.first->replaceUsesWithIf(
1033             AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); });
1034       }
1035     }
1036   }
1037   return FramePtr;
1038 }
1039 
1040 // Sets the unwind edge of an instruction to a particular successor.
1041 static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
1042   if (auto *II = dyn_cast<InvokeInst>(TI))
1043     II->setUnwindDest(Succ);
1044   else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))
1045     CS->setUnwindDest(Succ);
1046   else if (auto *CR = dyn_cast<CleanupReturnInst>(TI))
1047     CR->setUnwindDest(Succ);
1048   else
1049     llvm_unreachable("unexpected terminator instruction");
1050 }
1051 
1052 // Replaces all uses of OldPred with the NewPred block in all PHINodes in a
1053 // block.
1054 static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,
1055                            BasicBlock *NewPred,
1056                            PHINode *LandingPadReplacement) {
1057   unsigned BBIdx = 0;
1058   for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
1059     PHINode *PN = cast<PHINode>(I);
1060 
1061     // We manually update the LandingPadReplacement PHINode and it is the last
1062     // PHI Node. So, if we find it, we are done.
1063     if (LandingPadReplacement == PN)
1064       break;
1065 
1066     // Reuse the previous value of BBIdx if it lines up.  In cases where we
1067     // have multiple phi nodes with *lots* of predecessors, this is a speed
1068     // win because we don't have to scan the PHI looking for TIBB.  This
1069     // happens because the BB list of PHI nodes are usually in the same
1070     // order.
1071     if (PN->getIncomingBlock(BBIdx) != OldPred)
1072       BBIdx = PN->getBasicBlockIndex(OldPred);
1073 
1074     assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!");
1075     PN->setIncomingBlock(BBIdx, NewPred);
1076   }
1077 }
1078 
1079 // Uses SplitEdge unless the successor block is an EHPad, in which case do EH
1080 // specific handling.
1081 static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,
1082                                     LandingPadInst *OriginalPad,
1083                                     PHINode *LandingPadReplacement) {
1084   auto *PadInst = Succ->getFirstNonPHI();
1085   if (!LandingPadReplacement && !PadInst->isEHPad())
1086     return SplitEdge(BB, Succ);
1087 
1088   auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ);
1089   setUnwindEdgeTo(BB->getTerminator(), NewBB);
1090   updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement);
1091 
1092   if (LandingPadReplacement) {
1093     auto *NewLP = OriginalPad->clone();
1094     auto *Terminator = BranchInst::Create(Succ, NewBB);
1095     NewLP->insertBefore(Terminator);
1096     LandingPadReplacement->addIncoming(NewLP, NewBB);
1097     return NewBB;
1098   }
1099   Value *ParentPad = nullptr;
1100   if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst))
1101     ParentPad = FuncletPad->getParentPad();
1102   else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst))
1103     ParentPad = CatchSwitch->getParentPad();
1104   else
1105     llvm_unreachable("handling for other EHPads not implemented yet");
1106 
1107   auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB);
1108   CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB);
1109   return NewBB;
1110 }
1111 
1112 static void rewritePHIs(BasicBlock &BB) {
1113   // For every incoming edge we will create a block holding all
1114   // incoming values in a single PHI nodes.
1115   //
1116   // loop:
1117   //    %n.val = phi i32[%n, %entry], [%inc, %loop]
1118   //
1119   // It will create:
1120   //
1121   // loop.from.entry:
1122   //    %n.loop.pre = phi i32 [%n, %entry]
1123   //    br %label loop
1124   // loop.from.loop:
1125   //    %inc.loop.pre = phi i32 [%inc, %loop]
1126   //    br %label loop
1127   //
1128   // After this rewrite, further analysis will ignore any phi nodes with more
1129   // than one incoming edge.
1130 
1131   // TODO: Simplify PHINodes in the basic block to remove duplicate
1132   // predecessors.
1133 
1134   LandingPadInst *LandingPad = nullptr;
1135   PHINode *ReplPHI = nullptr;
1136   if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
1137     // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
1138     // We replace the original landing pad with a PHINode that will collect the
1139     // results from all of them.
1140     ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad);
1141     ReplPHI->takeName(LandingPad);
1142     LandingPad->replaceAllUsesWith(ReplPHI);
1143     // We will erase the original landing pad at the end of this function after
1144     // ehAwareSplitEdge cloned it in the transition blocks.
1145   }
1146 
1147   SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB));
1148   for (BasicBlock *Pred : Preds) {
1149     auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
1150     IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
1151     auto *PN = cast<PHINode>(&BB.front());
1152     do {
1153       int Index = PN->getBasicBlockIndex(IncomingBB);
1154       Value *V = PN->getIncomingValue(Index);
1155       PHINode *InputV = PHINode::Create(
1156           V->getType(), 1, V->getName() + Twine(".") + BB.getName(),
1157           &IncomingBB->front());
1158       InputV->addIncoming(V, Pred);
1159       PN->setIncomingValue(Index, InputV);
1160       PN = dyn_cast<PHINode>(PN->getNextNode());
1161     } while (PN != ReplPHI); // ReplPHI is either null or the PHI that replaced
1162                              // the landing pad.
1163   }
1164 
1165   if (LandingPad) {
1166     // Calls to ehAwareSplitEdge function cloned the original lading pad.
1167     // No longer need it.
1168     LandingPad->eraseFromParent();
1169   }
1170 }
1171 
1172 static void rewritePHIs(Function &F) {
1173   SmallVector<BasicBlock *, 8> WorkList;
1174 
1175   for (BasicBlock &BB : F)
1176     if (auto *PN = dyn_cast<PHINode>(&BB.front()))
1177       if (PN->getNumIncomingValues() > 1)
1178         WorkList.push_back(&BB);
1179 
1180   for (BasicBlock *BB : WorkList)
1181     rewritePHIs(*BB);
1182 }
1183 
1184 // Check for instructions that we can recreate on resume as opposed to spill
1185 // the result into a coroutine frame.
1186 static bool materializable(Instruction &V) {
1187   return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
1188          isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V);
1189 }
1190 
1191 // Check for structural coroutine intrinsics that should not be spilled into
1192 // the coroutine frame.
1193 static bool isCoroutineStructureIntrinsic(Instruction &I) {
1194   return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) ||
1195          isa<CoroSuspendInst>(&I);
1196 }
1197 
1198 // For every use of the value that is across suspend point, recreate that value
1199 // after a suspend point.
1200 static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
1201                                               SpillInfo const &Spills) {
1202   BasicBlock *CurrentBlock = nullptr;
1203   Instruction *CurrentMaterialization = nullptr;
1204   Instruction *CurrentDef = nullptr;
1205 
1206   for (auto const &E : Spills) {
1207     // If it is a new definition, update CurrentXXX variables.
1208     if (CurrentDef != E.def()) {
1209       CurrentDef = cast<Instruction>(E.def());
1210       CurrentBlock = nullptr;
1211       CurrentMaterialization = nullptr;
1212     }
1213 
1214     // If we have not seen this block, materialize the value.
1215     if (CurrentBlock != E.userBlock()) {
1216       CurrentBlock = E.userBlock();
1217       CurrentMaterialization = cast<Instruction>(CurrentDef)->clone();
1218       CurrentMaterialization->setName(CurrentDef->getName());
1219       CurrentMaterialization->insertBefore(
1220           &*CurrentBlock->getFirstInsertionPt());
1221     }
1222 
1223     if (auto *PN = dyn_cast<PHINode>(E.user())) {
1224       assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
1225                                                 "values in the PHINode");
1226       PN->replaceAllUsesWith(CurrentMaterialization);
1227       PN->eraseFromParent();
1228       continue;
1229     }
1230 
1231     // Replace all uses of CurrentDef in the current instruction with the
1232     // CurrentMaterialization for the block.
1233     E.user()->replaceUsesOfWith(CurrentDef, CurrentMaterialization);
1234   }
1235 }
1236 
1237 // Splits the block at a particular instruction unless it is the first
1238 // instruction in the block with a single predecessor.
1239 static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
1240   auto *BB = I->getParent();
1241   if (&BB->front() == I) {
1242     if (BB->getSinglePredecessor()) {
1243       BB->setName(Name);
1244       return BB;
1245     }
1246   }
1247   return BB->splitBasicBlock(I, Name);
1248 }
1249 
1250 // Split above and below a particular instruction so that it
1251 // will be all alone by itself in a block.
1252 static void splitAround(Instruction *I, const Twine &Name) {
1253   splitBlockIfNotFirst(I, Name);
1254   splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
1255 }
1256 
1257 static bool isSuspendBlock(BasicBlock *BB) {
1258   return isa<AnyCoroSuspendInst>(BB->front());
1259 }
1260 
1261 typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet;
1262 
1263 /// Does control flow starting at the given block ever reach a suspend
1264 /// instruction before reaching a block in VisitedOrFreeBBs?
1265 static bool isSuspendReachableFrom(BasicBlock *From,
1266                                    VisitedBlocksSet &VisitedOrFreeBBs) {
1267   // Eagerly try to add this block to the visited set.  If it's already
1268   // there, stop recursing; this path doesn't reach a suspend before
1269   // either looping or reaching a freeing block.
1270   if (!VisitedOrFreeBBs.insert(From).second)
1271     return false;
1272 
1273   // We assume that we'll already have split suspends into their own blocks.
1274   if (isSuspendBlock(From))
1275     return true;
1276 
1277   // Recurse on the successors.
1278   for (auto Succ : successors(From)) {
1279     if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs))
1280       return true;
1281   }
1282 
1283   return false;
1284 }
1285 
1286 /// Is the given alloca "local", i.e. bounded in lifetime to not cross a
1287 /// suspend point?
1288 static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
1289   // Seed the visited set with all the basic blocks containing a free
1290   // so that we won't pass them up.
1291   VisitedBlocksSet VisitedOrFreeBBs;
1292   for (auto User : AI->users()) {
1293     if (auto FI = dyn_cast<CoroAllocaFreeInst>(User))
1294       VisitedOrFreeBBs.insert(FI->getParent());
1295   }
1296 
1297   return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs);
1298 }
1299 
1300 /// After we split the coroutine, will the given basic block be along
1301 /// an obvious exit path for the resumption function?
1302 static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
1303                                               unsigned depth = 3) {
1304   // If we've bottomed out our depth count, stop searching and assume
1305   // that the path might loop back.
1306   if (depth == 0) return false;
1307 
1308   // If this is a suspend block, we're about to exit the resumption function.
1309   if (isSuspendBlock(BB)) return true;
1310 
1311   // Recurse into the successors.
1312   for (auto Succ : successors(BB)) {
1313     if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
1314       return false;
1315   }
1316 
1317   // If none of the successors leads back in a loop, we're on an exit/abort.
1318   return true;
1319 }
1320 
1321 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
1322   // Look for a free that isn't sufficiently obviously followed by
1323   // either a suspend or a termination, i.e. something that will leave
1324   // the coro resumption frame.
1325   for (auto U : AI->users()) {
1326     auto FI = dyn_cast<CoroAllocaFreeInst>(U);
1327     if (!FI) continue;
1328 
1329     if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
1330       return true;
1331   }
1332 
1333   // If we never found one, we don't need a stack save.
1334   return false;
1335 }
1336 
1337 /// Turn each of the given local allocas into a normal (dynamic) alloca
1338 /// instruction.
1339 static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
1340                               SmallVectorImpl<Instruction*> &DeadInsts) {
1341   for (auto AI : LocalAllocas) {
1342     auto M = AI->getModule();
1343     IRBuilder<> Builder(AI);
1344 
1345     // Save the stack depth.  Try to avoid doing this if the stackrestore
1346     // is going to immediately precede a return or something.
1347     Value *StackSave = nullptr;
1348     if (localAllocaNeedsStackSave(AI))
1349       StackSave = Builder.CreateCall(
1350                             Intrinsic::getDeclaration(M, Intrinsic::stacksave));
1351 
1352     // Allocate memory.
1353     auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
1354     Alloca->setAlignment(Align(AI->getAlignment()));
1355 
1356     for (auto U : AI->users()) {
1357       // Replace gets with the allocation.
1358       if (isa<CoroAllocaGetInst>(U)) {
1359         U->replaceAllUsesWith(Alloca);
1360 
1361       // Replace frees with stackrestores.  This is safe because
1362       // alloca.alloc is required to obey a stack discipline, although we
1363       // don't enforce that structurally.
1364       } else {
1365         auto FI = cast<CoroAllocaFreeInst>(U);
1366         if (StackSave) {
1367           Builder.SetInsertPoint(FI);
1368           Builder.CreateCall(
1369                     Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
1370                              StackSave);
1371         }
1372       }
1373       DeadInsts.push_back(cast<Instruction>(U));
1374     }
1375 
1376     DeadInsts.push_back(AI);
1377   }
1378 }
1379 
1380 /// Turn the given coro.alloca.alloc call into a dynamic allocation.
1381 /// This happens during the all-instructions iteration, so it must not
1382 /// delete the call.
1383 static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI,
1384                                         coro::Shape &Shape,
1385                                    SmallVectorImpl<Instruction*> &DeadInsts) {
1386   IRBuilder<> Builder(AI);
1387   auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
1388 
1389   for (User *U : AI->users()) {
1390     if (isa<CoroAllocaGetInst>(U)) {
1391       U->replaceAllUsesWith(Alloc);
1392     } else {
1393       auto FI = cast<CoroAllocaFreeInst>(U);
1394       Builder.SetInsertPoint(FI);
1395       Shape.emitDealloc(Builder, Alloc, nullptr);
1396     }
1397     DeadInsts.push_back(cast<Instruction>(U));
1398   }
1399 
1400   // Push this on last so that it gets deleted after all the others.
1401   DeadInsts.push_back(AI);
1402 
1403   // Return the new allocation value so that we can check for needed spills.
1404   return cast<Instruction>(Alloc);
1405 }
1406 
1407 /// Get the current swifterror value.
1408 static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
1409                                      coro::Shape &Shape) {
1410   // Make a fake function pointer as a sort of intrinsic.
1411   auto FnTy = FunctionType::get(ValueTy, {}, false);
1412   auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
1413 
1414   auto Call = Builder.CreateCall(FnTy, Fn, {});
1415   Shape.SwiftErrorOps.push_back(Call);
1416 
1417   return Call;
1418 }
1419 
1420 /// Set the given value as the current swifterror value.
1421 ///
1422 /// Returns a slot that can be used as a swifterror slot.
1423 static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
1424                                      coro::Shape &Shape) {
1425   // Make a fake function pointer as a sort of intrinsic.
1426   auto FnTy = FunctionType::get(V->getType()->getPointerTo(),
1427                                 {V->getType()}, false);
1428   auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
1429 
1430   auto Call = Builder.CreateCall(FnTy, Fn, { V });
1431   Shape.SwiftErrorOps.push_back(Call);
1432 
1433   return Call;
1434 }
1435 
1436 /// Set the swifterror value from the given alloca before a call,
1437 /// then put in back in the alloca afterwards.
1438 ///
1439 /// Returns an address that will stand in for the swifterror slot
1440 /// until splitting.
1441 static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
1442                                                  AllocaInst *Alloca,
1443                                                  coro::Shape &Shape) {
1444   auto ValueTy = Alloca->getAllocatedType();
1445   IRBuilder<> Builder(Call);
1446 
1447   // Load the current value from the alloca and set it as the
1448   // swifterror value.
1449   auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
1450   auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
1451 
1452   // Move to after the call.  Since swifterror only has a guaranteed
1453   // value on normal exits, we can ignore implicit and explicit unwind
1454   // edges.
1455   if (isa<CallInst>(Call)) {
1456     Builder.SetInsertPoint(Call->getNextNode());
1457   } else {
1458     auto Invoke = cast<InvokeInst>(Call);
1459     Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
1460   }
1461 
1462   // Get the current swifterror value and store it to the alloca.
1463   auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
1464   Builder.CreateStore(ValueAfterCall, Alloca);
1465 
1466   return Addr;
1467 }
1468 
1469 /// Eliminate a formerly-swifterror alloca by inserting the get/set
1470 /// intrinsics and attempting to MemToReg the alloca away.
1471 static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
1472                                       coro::Shape &Shape) {
1473   for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) {
1474     // We're likely changing the use list, so use a mutation-safe
1475     // iteration pattern.
1476     auto &Use = *UI;
1477     ++UI;
1478 
1479     // swifterror values can only be used in very specific ways.
1480     // We take advantage of that here.
1481     auto User = Use.getUser();
1482     if (isa<LoadInst>(User) || isa<StoreInst>(User))
1483       continue;
1484 
1485     assert(isa<CallInst>(User) || isa<InvokeInst>(User));
1486     auto Call = cast<Instruction>(User);
1487 
1488     auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
1489 
1490     // Use the returned slot address as the call argument.
1491     Use.set(Addr);
1492   }
1493 
1494   // All the uses should be loads and stores now.
1495   assert(isAllocaPromotable(Alloca));
1496 }
1497 
1498 /// "Eliminate" a swifterror argument by reducing it to the alloca case
1499 /// and then loading and storing in the prologue and epilog.
1500 ///
1501 /// The argument keeps the swifterror flag.
1502 static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
1503                                         coro::Shape &Shape,
1504                              SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
1505   IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
1506 
1507   auto ArgTy = cast<PointerType>(Arg.getType());
1508   auto ValueTy = ArgTy->getElementType();
1509 
1510   // Reduce to the alloca case:
1511 
1512   // Create an alloca and replace all uses of the arg with it.
1513   auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
1514   Arg.replaceAllUsesWith(Alloca);
1515 
1516   // Set an initial value in the alloca.  swifterror is always null on entry.
1517   auto InitialValue = Constant::getNullValue(ValueTy);
1518   Builder.CreateStore(InitialValue, Alloca);
1519 
1520   // Find all the suspends in the function and save and restore around them.
1521   for (auto Suspend : Shape.CoroSuspends) {
1522     (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
1523   }
1524 
1525   // Find all the coro.ends in the function and restore the error value.
1526   for (auto End : Shape.CoroEnds) {
1527     Builder.SetInsertPoint(End);
1528     auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
1529     (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
1530   }
1531 
1532   // Now we can use the alloca logic.
1533   AllocasToPromote.push_back(Alloca);
1534   eliminateSwiftErrorAlloca(F, Alloca, Shape);
1535 }
1536 
1537 /// Eliminate all problematic uses of swifterror arguments and allocas
1538 /// from the function.  We'll fix them up later when splitting the function.
1539 static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
1540   SmallVector<AllocaInst*, 4> AllocasToPromote;
1541 
1542   // Look for a swifterror argument.
1543   for (auto &Arg : F.args()) {
1544     if (!Arg.hasSwiftErrorAttr()) continue;
1545 
1546     eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
1547     break;
1548   }
1549 
1550   // Look for swifterror allocas.
1551   for (auto &Inst : F.getEntryBlock()) {
1552     auto Alloca = dyn_cast<AllocaInst>(&Inst);
1553     if (!Alloca || !Alloca->isSwiftError()) continue;
1554 
1555     // Clear the swifterror flag.
1556     Alloca->setSwiftError(false);
1557 
1558     AllocasToPromote.push_back(Alloca);
1559     eliminateSwiftErrorAlloca(F, Alloca, Shape);
1560   }
1561 
1562   // If we have any allocas to promote, compute a dominator tree and
1563   // promote them en masse.
1564   if (!AllocasToPromote.empty()) {
1565     DominatorTree DT(F);
1566     PromoteMemToReg(AllocasToPromote, DT);
1567   }
1568 }
1569 
1570 /// retcon and retcon.once conventions assume that all spill uses can be sunk
1571 /// after the coro.begin intrinsic.
1572 static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills,
1573                                         CoroBeginInst *CoroBegin) {
1574   DominatorTree Dom(F);
1575 
1576   SmallSetVector<Instruction *, 32> ToMove;
1577   SmallVector<Instruction *, 32> Worklist;
1578 
1579   // Collect all users that precede coro.begin.
1580   for (auto const &Entry : Spills) {
1581     auto *SpillDef = Entry.def();
1582     for (User *U : SpillDef->users()) {
1583       auto Inst = cast<Instruction>(U);
1584       if (Inst->getParent() != CoroBegin->getParent() ||
1585           Dom.dominates(CoroBegin, Inst))
1586         continue;
1587       if (ToMove.insert(Inst))
1588         Worklist.push_back(Inst);
1589     }
1590   }
1591   // Recursively collect users before coro.begin.
1592   while (!Worklist.empty()) {
1593     auto *Def = Worklist.back();
1594     Worklist.pop_back();
1595     for (User *U : Def->users()) {
1596       auto Inst = cast<Instruction>(U);
1597       if (Dom.dominates(CoroBegin, Inst))
1598         continue;
1599       if (ToMove.insert(Inst))
1600         Worklist.push_back(Inst);
1601     }
1602   }
1603 
1604   // Sort by dominance.
1605   SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end());
1606   std::sort(InsertionList.begin(), InsertionList.end(),
1607             [&Dom](Instruction *A, Instruction *B) -> bool {
1608               // If a dominates b it should preceed (<) b.
1609               return Dom.dominates(A, B);
1610             });
1611 
1612   Instruction *InsertPt = CoroBegin->getNextNode();
1613   for (Instruction *Inst : InsertionList)
1614     Inst->moveBefore(InsertPt);
1615 
1616   return;
1617 }
1618 
1619 /// For each local variable that all of its user are only used inside one of
1620 /// suspended region, we sink their lifetime.start markers to the place where
1621 /// after the suspend block. Doing so minimizes the lifetime of each variable,
1622 /// hence minimizing the amount of data we end up putting on the frame.
1623 static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
1624                                      SuspendCrossingInfo &Checker) {
1625   DominatorTree DT(F);
1626 
1627   // Collect all possible basic blocks which may dominate all uses of allocas.
1628   SmallPtrSet<BasicBlock *, 4> DomSet;
1629   DomSet.insert(&F.getEntryBlock());
1630   for (auto *CSI : Shape.CoroSuspends) {
1631     BasicBlock *SuspendBlock = CSI->getParent();
1632     assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() &&
1633            "should have split coro.suspend into its own block");
1634     DomSet.insert(SuspendBlock->getSingleSuccessor());
1635   }
1636 
1637   for (Instruction &I : instructions(F)) {
1638     AllocaInst* AI = dyn_cast<AllocaInst>(&I);
1639     if (!AI)
1640       continue;
1641 
1642     for (BasicBlock *DomBB : DomSet) {
1643       bool Valid = true;
1644       SmallVector<Instruction *, 1> Lifetimes;
1645 
1646       auto isLifetimeStart = [](Instruction* I) {
1647         if (auto* II = dyn_cast<IntrinsicInst>(I))
1648           return II->getIntrinsicID() == Intrinsic::lifetime_start;
1649         return false;
1650       };
1651 
1652       auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
1653         if (isLifetimeStart(U)) {
1654           Lifetimes.push_back(U);
1655           return true;
1656         }
1657         if (!U->hasOneUse() || U->stripPointerCasts() != AI)
1658           return false;
1659         if (isLifetimeStart(U->user_back())) {
1660           Lifetimes.push_back(U->user_back());
1661           return true;
1662         }
1663         return false;
1664       };
1665 
1666       for (User *U : AI->users()) {
1667         Instruction *UI = cast<Instruction>(U);
1668         // For all users except lifetime.start markers, if they are all
1669         // dominated by one of the basic blocks and do not cross
1670         // suspend points as well, then there is no need to spill the
1671         // instruction.
1672         if (!DT.dominates(DomBB, UI->getParent()) ||
1673             Checker.isDefinitionAcrossSuspend(DomBB, UI)) {
1674           // Skip lifetime.start, GEP and bitcast used by lifetime.start
1675           // markers.
1676           if (collectLifetimeStart(UI, AI))
1677             continue;
1678           Valid = false;
1679           break;
1680         }
1681       }
1682       // Sink lifetime.start markers to dominate block when they are
1683       // only used outside the region.
1684       if (Valid && Lifetimes.size() != 0) {
1685         // May be AI itself, when the type of AI is i8*
1686         auto *NewBitCast = [&](AllocaInst *AI) -> Value* {
1687           if (isa<AllocaInst>(Lifetimes[0]->getOperand(1)))
1688             return AI;
1689           auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext());
1690           return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "",
1691                                   DomBB->getTerminator());
1692         }(AI);
1693 
1694         auto *NewLifetime = Lifetimes[0]->clone();
1695         NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast);
1696         NewLifetime->insertBefore(DomBB->getTerminator());
1697 
1698         // All the outsided lifetime.start markers are no longer necessary.
1699         for (Instruction *S : Lifetimes)
1700           S->eraseFromParent();
1701 
1702         break;
1703       }
1704     }
1705   }
1706 }
1707 
1708 void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
1709   eliminateSwiftError(F, Shape);
1710 
1711   if (Shape.ABI == coro::ABI::Switch &&
1712       Shape.SwitchLowering.PromiseAlloca) {
1713     Shape.getSwitchCoroId()->clearPromise();
1714   }
1715 
1716   // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
1717   // intrinsics are in their own blocks to simplify the logic of building up
1718   // SuspendCrossing data.
1719   for (auto *CSI : Shape.CoroSuspends) {
1720     if (auto *Save = CSI->getCoroSave())
1721       splitAround(Save, "CoroSave");
1722     splitAround(CSI, "CoroSuspend");
1723   }
1724 
1725   // Put CoroEnds into their own blocks.
1726   for (CoroEndInst *CE : Shape.CoroEnds)
1727     splitAround(CE, "CoroEnd");
1728 
1729   // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
1730   // never has its definition separated from the PHI by the suspend point.
1731   rewritePHIs(F);
1732 
1733   // Build suspend crossing info.
1734   SuspendCrossingInfo Checker(F, Shape);
1735 
1736   IRBuilder<> Builder(F.getContext());
1737   SpillInfo Spills;
1738   SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
1739   SmallVector<Instruction*, 4> DeadInstructions;
1740 
1741   for (int Repeat = 0; Repeat < 4; ++Repeat) {
1742     // See if there are materializable instructions across suspend points.
1743     for (Instruction &I : instructions(F))
1744       if (materializable(I))
1745         for (User *U : I.users())
1746           if (Checker.isDefinitionAcrossSuspend(I, U))
1747             Spills.emplace_back(&I, U);
1748 
1749     if (Spills.empty())
1750       break;
1751 
1752     // Rewrite materializable instructions to be materialized at the use point.
1753     LLVM_DEBUG(dump("Materializations", Spills));
1754     rewriteMaterializableInstructions(Builder, Spills);
1755     Spills.clear();
1756   }
1757 
1758   sinkLifetimeStartMarkers(F, Shape, Checker);
1759   // Collect lifetime.start info for each alloca.
1760   using LifetimeStart = SmallPtrSet<Instruction *, 2>;
1761   llvm::DenseMap<Instruction *, std::unique_ptr<LifetimeStart>> LifetimeMap;
1762   for (Instruction &I : instructions(F)) {
1763     auto *II = dyn_cast<IntrinsicInst>(&I);
1764     if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start)
1765       continue;
1766 
1767     if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) {
1768       if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) {
1769 
1770         if (LifetimeMap.find(AI) == LifetimeMap.end())
1771           LifetimeMap[AI] = std::make_unique<LifetimeStart>();
1772         LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst);
1773       }
1774     }
1775   }
1776 
1777   // Collect the spills for arguments and other not-materializable values.
1778   for (Argument &A : F.args())
1779     for (User *U : A.users())
1780       if (Checker.isDefinitionAcrossSuspend(A, U))
1781         Spills.emplace_back(&A, U);
1782 
1783   for (Instruction &I : instructions(F)) {
1784     // Values returned from coroutine structure intrinsics should not be part
1785     // of the Coroutine Frame.
1786     if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin)
1787       continue;
1788 
1789     // The Coroutine Promise always included into coroutine frame, no need to
1790     // check for suspend crossing.
1791     if (Shape.ABI == coro::ABI::Switch &&
1792         Shape.SwitchLowering.PromiseAlloca == &I)
1793       continue;
1794 
1795     // Handle alloca.alloc specially here.
1796     if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) {
1797       // Check whether the alloca's lifetime is bounded by suspend points.
1798       if (isLocalAlloca(AI)) {
1799         LocalAllocas.push_back(AI);
1800         continue;
1801       }
1802 
1803       // If not, do a quick rewrite of the alloca and then add spills of
1804       // the rewritten value.  The rewrite doesn't invalidate anything in
1805       // Spills because the other alloca intrinsics have no other operands
1806       // besides AI, and it doesn't invalidate the iteration because we delay
1807       // erasing AI.
1808       auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions);
1809 
1810       for (User *U : Alloc->users()) {
1811         if (Checker.isDefinitionAcrossSuspend(*Alloc, U))
1812           Spills.emplace_back(Alloc, U);
1813       }
1814       continue;
1815     }
1816 
1817     // Ignore alloca.get; we process this as part of coro.alloca.alloc.
1818     if (isa<CoroAllocaGetInst>(I)) {
1819       continue;
1820     }
1821 
1822     auto Iter = LifetimeMap.find(&I);
1823     for (User *U : I.users()) {
1824       bool NeedSpill = false;
1825 
1826       // Check against lifetime.start if the instruction has the info.
1827       if (Iter != LifetimeMap.end())
1828         for (auto *S : *Iter->second) {
1829           if ((NeedSpill = Checker.isDefinitionAcrossSuspend(*S, U)))
1830             break;
1831         }
1832       else
1833         NeedSpill = Checker.isDefinitionAcrossSuspend(I, U);
1834 
1835       if (NeedSpill) {
1836         // We cannot spill a token.
1837         if (I.getType()->isTokenTy())
1838           report_fatal_error(
1839               "token definition is separated from the use by a suspend point");
1840         Spills.emplace_back(&I, U);
1841       }
1842     }
1843   }
1844   LLVM_DEBUG(dump("Spills", Spills));
1845   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce)
1846     sinkSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin);
1847   Shape.FrameTy = buildFrameType(F, Shape, Spills);
1848   Shape.FramePtr = insertSpills(Spills, Shape);
1849   lowerLocalAllocas(LocalAllocas, DeadInstructions);
1850 
1851   for (auto I : DeadInstructions)
1852     I->eraseFromParent();
1853 }
1854