1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This file contains classes used to discover if for a particular value 9 // there from sue to definition that crosses a suspend block. 10 // 11 // Using the information discovered we form a Coroutine Frame structure to 12 // contain those values. All uses of those values are replaced with appropriate 13 // GEP + load from the coroutine frame. At the point of the definition we spill 14 // the value into the coroutine frame. 15 // 16 // TODO: pack values tightly using liveness info. 17 //===----------------------------------------------------------------------===// 18 19 #include "CoroInternal.h" 20 #include "llvm/ADT/BitVector.h" 21 #include "llvm/ADT/SmallString.h" 22 #include "llvm/Analysis/PtrUseVisitor.h" 23 #include "llvm/Analysis/StackLifetime.h" 24 #include "llvm/Config/llvm-config.h" 25 #include "llvm/IR/CFG.h" 26 #include "llvm/IR/DIBuilder.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/InstIterator.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/MathExtras.h" 33 #include "llvm/Support/OptimizedStructLayout.h" 34 #include "llvm/Support/circular_raw_ostream.h" 35 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 36 #include "llvm/Transforms/Utils/Local.h" 37 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 38 #include <algorithm> 39 40 using namespace llvm; 41 42 // The "coro-suspend-crossing" flag is very noisy. There is another debug type, 43 // "coro-frame", which results in leaner debug spew. 44 #define DEBUG_TYPE "coro-suspend-crossing" 45 46 static cl::opt<bool> EnableReuseStorageInFrame( 47 "reuse-storage-in-coroutine-frame", cl::Hidden, 48 cl::desc( 49 "Enable the optimization which would reuse the storage in the coroutine \ 50 frame for allocas whose liferanges are not overlapped, for testing purposes"), 51 llvm::cl::init(false)); 52 53 enum { SmallVectorThreshold = 32 }; 54 55 // Provides two way mapping between the blocks and numbers. 56 namespace { 57 class BlockToIndexMapping { 58 SmallVector<BasicBlock *, SmallVectorThreshold> V; 59 60 public: 61 size_t size() const { return V.size(); } 62 63 BlockToIndexMapping(Function &F) { 64 for (BasicBlock &BB : F) 65 V.push_back(&BB); 66 llvm::sort(V); 67 } 68 69 size_t blockToIndex(BasicBlock *BB) const { 70 auto *I = llvm::lower_bound(V, BB); 71 assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block"); 72 return I - V.begin(); 73 } 74 75 BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; } 76 }; 77 } // end anonymous namespace 78 79 // The SuspendCrossingInfo maintains data that allows to answer a question 80 // whether given two BasicBlocks A and B there is a path from A to B that 81 // passes through a suspend point. 82 // 83 // For every basic block 'i' it maintains a BlockData that consists of: 84 // Consumes: a bit vector which contains a set of indices of blocks that can 85 // reach block 'i' 86 // Kills: a bit vector which contains a set of indices of blocks that can 87 // reach block 'i', but one of the path will cross a suspend point 88 // Suspend: a boolean indicating whether block 'i' contains a suspend point. 89 // End: a boolean indicating whether block 'i' contains a coro.end intrinsic. 90 // 91 namespace { 92 struct SuspendCrossingInfo { 93 BlockToIndexMapping Mapping; 94 95 struct BlockData { 96 BitVector Consumes; 97 BitVector Kills; 98 bool Suspend = false; 99 bool End = false; 100 }; 101 SmallVector<BlockData, SmallVectorThreshold> Block; 102 103 iterator_range<succ_iterator> successors(BlockData const &BD) const { 104 BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]); 105 return llvm::successors(BB); 106 } 107 108 BlockData &getBlockData(BasicBlock *BB) { 109 return Block[Mapping.blockToIndex(BB)]; 110 } 111 112 void dump() const; 113 void dump(StringRef Label, BitVector const &BV) const; 114 115 SuspendCrossingInfo(Function &F, coro::Shape &Shape); 116 117 bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const { 118 size_t const DefIndex = Mapping.blockToIndex(DefBB); 119 size_t const UseIndex = Mapping.blockToIndex(UseBB); 120 121 bool const Result = Block[UseIndex].Kills[DefIndex]; 122 LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName() 123 << " answer is " << Result << "\n"); 124 return Result; 125 } 126 127 bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const { 128 auto *I = cast<Instruction>(U); 129 130 // We rewrote PHINodes, so that only the ones with exactly one incoming 131 // value need to be analyzed. 132 if (auto *PN = dyn_cast<PHINode>(I)) 133 if (PN->getNumIncomingValues() > 1) 134 return false; 135 136 BasicBlock *UseBB = I->getParent(); 137 138 // As a special case, treat uses by an llvm.coro.suspend.retcon 139 // as if they were uses in the suspend's single predecessor: the 140 // uses conceptually occur before the suspend. 141 if (isa<CoroSuspendRetconInst>(I)) { 142 UseBB = UseBB->getSinglePredecessor(); 143 assert(UseBB && "should have split coro.suspend into its own block"); 144 } 145 146 return hasPathCrossingSuspendPoint(DefBB, UseBB); 147 } 148 149 bool isDefinitionAcrossSuspend(Argument &A, User *U) const { 150 return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U); 151 } 152 153 bool isDefinitionAcrossSuspend(Instruction &I, User *U) const { 154 auto *DefBB = I.getParent(); 155 156 // As a special case, treat values produced by an llvm.coro.suspend.* 157 // as if they were defined in the single successor: the uses 158 // conceptually occur after the suspend. 159 if (isa<AnyCoroSuspendInst>(I)) { 160 DefBB = DefBB->getSingleSuccessor(); 161 assert(DefBB && "should have split coro.suspend into its own block"); 162 } 163 164 return isDefinitionAcrossSuspend(DefBB, U); 165 } 166 }; 167 } // end anonymous namespace 168 169 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 170 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label, 171 BitVector const &BV) const { 172 dbgs() << Label << ":"; 173 for (size_t I = 0, N = BV.size(); I < N; ++I) 174 if (BV[I]) 175 dbgs() << " " << Mapping.indexToBlock(I)->getName(); 176 dbgs() << "\n"; 177 } 178 179 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const { 180 for (size_t I = 0, N = Block.size(); I < N; ++I) { 181 BasicBlock *const B = Mapping.indexToBlock(I); 182 dbgs() << B->getName() << ":\n"; 183 dump(" Consumes", Block[I].Consumes); 184 dump(" Kills", Block[I].Kills); 185 } 186 dbgs() << "\n"; 187 } 188 #endif 189 190 SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) 191 : Mapping(F) { 192 const size_t N = Mapping.size(); 193 Block.resize(N); 194 195 // Initialize every block so that it consumes itself 196 for (size_t I = 0; I < N; ++I) { 197 auto &B = Block[I]; 198 B.Consumes.resize(N); 199 B.Kills.resize(N); 200 B.Consumes.set(I); 201 } 202 203 // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as 204 // the code beyond coro.end is reachable during initial invocation of the 205 // coroutine. 206 for (auto *CE : Shape.CoroEnds) 207 getBlockData(CE->getParent()).End = true; 208 209 // Mark all suspend blocks and indicate that they kill everything they 210 // consume. Note, that crossing coro.save also requires a spill, as any code 211 // between coro.save and coro.suspend may resume the coroutine and all of the 212 // state needs to be saved by that time. 213 auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) { 214 BasicBlock *SuspendBlock = BarrierInst->getParent(); 215 auto &B = getBlockData(SuspendBlock); 216 B.Suspend = true; 217 B.Kills |= B.Consumes; 218 }; 219 for (auto *CSI : Shape.CoroSuspends) { 220 markSuspendBlock(CSI); 221 if (auto *Save = CSI->getCoroSave()) 222 markSuspendBlock(Save); 223 } 224 225 // Iterate propagating consumes and kills until they stop changing. 226 int Iteration = 0; 227 (void)Iteration; 228 229 bool Changed; 230 do { 231 LLVM_DEBUG(dbgs() << "iteration " << ++Iteration); 232 LLVM_DEBUG(dbgs() << "==============\n"); 233 234 Changed = false; 235 for (size_t I = 0; I < N; ++I) { 236 auto &B = Block[I]; 237 for (BasicBlock *SI : successors(B)) { 238 239 auto SuccNo = Mapping.blockToIndex(SI); 240 241 // Saved Consumes and Kills bitsets so that it is easy to see 242 // if anything changed after propagation. 243 auto &S = Block[SuccNo]; 244 auto SavedConsumes = S.Consumes; 245 auto SavedKills = S.Kills; 246 247 // Propagate Kills and Consumes from block B into its successor S. 248 S.Consumes |= B.Consumes; 249 S.Kills |= B.Kills; 250 251 // If block B is a suspend block, it should propagate kills into the 252 // its successor for every block B consumes. 253 if (B.Suspend) { 254 S.Kills |= B.Consumes; 255 } 256 if (S.Suspend) { 257 // If block S is a suspend block, it should kill all of the blocks it 258 // consumes. 259 S.Kills |= S.Consumes; 260 } else if (S.End) { 261 // If block S is an end block, it should not propagate kills as the 262 // blocks following coro.end() are reached during initial invocation 263 // of the coroutine while all the data are still available on the 264 // stack or in the registers. 265 S.Kills.reset(); 266 } else { 267 // This is reached when S block it not Suspend nor coro.end and it 268 // need to make sure that it is not in the kill set. 269 S.Kills.reset(SuccNo); 270 } 271 272 // See if anything changed. 273 Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes); 274 275 if (S.Kills != SavedKills) { 276 LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName() 277 << "\n"); 278 LLVM_DEBUG(dump("S.Kills", S.Kills)); 279 LLVM_DEBUG(dump("SavedKills", SavedKills)); 280 } 281 if (S.Consumes != SavedConsumes) { 282 LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n"); 283 LLVM_DEBUG(dump("S.Consume", S.Consumes)); 284 LLVM_DEBUG(dump("SavedCons", SavedConsumes)); 285 } 286 } 287 } 288 } while (Changed); 289 LLVM_DEBUG(dump()); 290 } 291 292 #undef DEBUG_TYPE // "coro-suspend-crossing" 293 #define DEBUG_TYPE "coro-frame" 294 295 // We build up the list of spills for every case where a use is separated 296 // from the definition by a suspend point. 297 298 static const unsigned InvalidFieldIndex = ~0U; 299 300 namespace { 301 class Spill { 302 Value *Def = nullptr; 303 Instruction *User = nullptr; 304 unsigned FieldNo = InvalidFieldIndex; 305 306 public: 307 Spill(Value *Def, llvm::User *U) : Def(Def), User(cast<Instruction>(U)) {} 308 309 Value *def() const { return Def; } 310 Instruction *user() const { return User; } 311 BasicBlock *userBlock() const { return User->getParent(); } 312 313 // Note that field index is stored in the first SpillEntry for a particular 314 // definition. Subsequent mentions of a defintion do not have fieldNo 315 // assigned. This works out fine as the users of Spills capture the info about 316 // the definition the first time they encounter it. Consider refactoring 317 // SpillInfo into two arrays to normalize the spill representation. 318 unsigned fieldIndex() const { 319 assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field"); 320 return FieldNo; 321 } 322 void setFieldIndex(unsigned FieldNumber) { 323 assert(FieldNo == InvalidFieldIndex && "Reassigning field number"); 324 FieldNo = FieldNumber; 325 } 326 }; 327 } // namespace 328 329 // Note that there may be more than one record with the same value of Def in 330 // the SpillInfo vector. 331 using SpillInfo = SmallVector<Spill, 8>; 332 333 #ifndef NDEBUG 334 static void dump(StringRef Title, SpillInfo const &Spills) { 335 dbgs() << "------------- " << Title << "--------------\n"; 336 Value *CurrentValue = nullptr; 337 for (auto const &E : Spills) { 338 if (CurrentValue != E.def()) { 339 CurrentValue = E.def(); 340 CurrentValue->dump(); 341 } 342 dbgs() << " user: "; 343 E.user()->dump(); 344 } 345 } 346 #endif 347 348 namespace { 349 // We cannot rely solely on natural alignment of a type when building a 350 // coroutine frame and if the alignment specified on the Alloca instruction 351 // differs from the natural alignment of the alloca type we will need to insert 352 // padding. 353 class FrameTypeBuilder { 354 public: 355 using ForSpillType = SmallVector<Spill *, 8>; 356 357 private: 358 struct Field { 359 uint64_t Size; 360 uint64_t Offset; 361 ForSpillType ForSpill; 362 Type *Ty; 363 unsigned FieldIndex; 364 Align Alignment; 365 Align TyAlignment; 366 }; 367 368 const DataLayout &DL; 369 LLVMContext &Context; 370 uint64_t StructSize = 0; 371 Align StructAlign; 372 bool IsFinished = false; 373 374 SmallVector<Field, 8> Fields; 375 DenseMap<Value*, unsigned> FieldIndexByKey; 376 377 public: 378 FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL) 379 : DL(DL), Context(Context) {} 380 381 class FieldId { 382 size_t Value; 383 explicit FieldId(size_t Value) : Value(Value) {} 384 385 friend class FrameTypeBuilder; 386 }; 387 388 /// Add a field to this structure for the storage of an `alloca` 389 /// instruction. 390 FieldId addFieldForAlloca(AllocaInst *AI, ForSpillType ForSpill = {}, 391 bool IsHeader = false) { 392 Type *Ty = AI->getAllocatedType(); 393 394 // Make an array type if this is a static array allocation. 395 if (AI->isArrayAllocation()) { 396 if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) 397 Ty = ArrayType::get(Ty, CI->getValue().getZExtValue()); 398 else 399 report_fatal_error("Coroutines cannot handle non static allocas yet"); 400 } 401 402 return addField(Ty, AI->getAlign(), ForSpill, IsHeader); 403 } 404 405 /// We want to put the allocas whose lifetime-ranges are not overlapped 406 /// into one slot of coroutine frame. 407 /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566 408 /// 409 /// cppcoro::task<void> alternative_paths(bool cond) { 410 /// if (cond) { 411 /// big_structure a; 412 /// process(a); 413 /// co_await something(); 414 /// } else { 415 /// big_structure b; 416 /// process2(b); 417 /// co_await something(); 418 /// } 419 /// } 420 /// 421 /// We want to put variable a and variable b in the same slot to 422 /// reduce the size of coroutine frame. 423 /// 424 /// This function use StackLifetime algorithm to partition the AllocaInsts in 425 /// Spills to non-overlapped sets in order to put Alloca in the same 426 /// non-overlapped set into the same slot in the Coroutine Frame. Then add 427 /// field for the allocas in the same non-overlapped set by using the largest 428 /// type as the field type. 429 /// 430 /// Side Effects: Because We sort the allocas, the order of allocas in the 431 /// frame may be different with the order in the source code. 432 void addFieldForAllocas(const Function &F, SpillInfo &Spills, 433 coro::Shape &Shape); 434 435 /// Add a field to this structure. 436 FieldId addField(Type *Ty, MaybeAlign FieldAlignment, 437 ForSpillType ForSpill = {}, bool IsHeader = false) { 438 assert(!IsFinished && "adding fields to a finished builder"); 439 assert(Ty && "must provide a type for a field"); 440 441 // The field size is always the alloc size of the type. 442 uint64_t FieldSize = DL.getTypeAllocSize(Ty); 443 444 // The field alignment might not be the type alignment, but we need 445 // to remember the type alignment anyway to build the type. 446 Align TyAlignment = DL.getABITypeAlign(Ty); 447 if (!FieldAlignment) FieldAlignment = TyAlignment; 448 449 // Lay out header fields immediately. 450 uint64_t Offset; 451 if (IsHeader) { 452 Offset = alignTo(StructSize, FieldAlignment); 453 StructSize = Offset + FieldSize; 454 455 // Everything else has a flexible offset. 456 } else { 457 Offset = OptimizedStructLayoutField::FlexibleOffset; 458 } 459 460 Fields.push_back({FieldSize, Offset, ForSpill, Ty, 0, 461 *FieldAlignment, TyAlignment}); 462 return FieldId(Fields.size() - 1); 463 } 464 465 /// Finish the layout and set the body on the given type. 466 void finish(StructType *Ty); 467 468 uint64_t getStructSize() const { 469 assert(IsFinished && "not yet finished!"); 470 return StructSize; 471 } 472 473 Align getStructAlign() const { 474 assert(IsFinished && "not yet finished!"); 475 return StructAlign; 476 } 477 478 unsigned getFieldIndex(FieldId Id) const { 479 assert(IsFinished && "not yet finished!"); 480 return Fields[Id.Value].FieldIndex; 481 } 482 }; 483 } // namespace 484 485 void FrameTypeBuilder::addFieldForAllocas(const Function &F, SpillInfo &Spills, 486 coro::Shape &Shape) { 487 DenseMap<AllocaInst *, unsigned int> AllocaIndex; 488 SmallVector<AllocaInst *, 8> Allocas; 489 DenseMap<AllocaInst *, Spill *> SpillOfAllocas; 490 using AllocaSetType = SmallVector<AllocaInst *, 4>; 491 SmallVector<AllocaSetType, 4> NonOverlapedAllocas; 492 493 // We need to add field for allocas at the end of this function. However, this 494 // function has multiple exits, so we use this helper to avoid redundant code. 495 struct RTTIHelper { 496 std::function<void()> func; 497 RTTIHelper(std::function<void()> &&func) : func(func) {} 498 ~RTTIHelper() { func(); } 499 } Helper([&]() { 500 for (auto AllocaSet : NonOverlapedAllocas) { 501 ForSpillType ForSpills; 502 for (auto Alloca : AllocaSet) 503 ForSpills.push_back(SpillOfAllocas[Alloca]); 504 auto *LargestAI = *AllocaSet.begin(); 505 addFieldForAlloca(LargestAI, ForSpills); 506 } 507 }); 508 509 for (auto &Spill : Spills) 510 if (AllocaInst *AI = dyn_cast<AllocaInst>(Spill.def())) 511 if (find(Allocas, AI) == Allocas.end()) { 512 SpillOfAllocas[AI] = &Spill; 513 Allocas.emplace_back(AI); 514 } 515 516 if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) { 517 for (auto Alloca : Allocas) { 518 AllocaIndex[Alloca] = NonOverlapedAllocas.size(); 519 NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); 520 } 521 return; 522 } 523 524 // Because there are pathes from the lifetime.start to coro.end 525 // for each alloca, the liferanges for every alloca is overlaped 526 // in the blocks who contain coro.end and the successor blocks. 527 // So we choose to skip there blocks when we calculates the liferange 528 // for each alloca. It should be reasonable since there shouldn't be uses 529 // in these blocks and the coroutine frame shouldn't be used outside the 530 // coroutine body. 531 // 532 // Note that the user of coro.suspend may not be SwitchInst. However, this 533 // case seems too complex to handle. And it is harmless to skip these 534 // patterns since it just prevend putting the allocas to live in the same 535 // slot. 536 DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest; 537 for (auto CoroSuspendInst : Shape.CoroSuspends) { 538 for (auto U : CoroSuspendInst->users()) { 539 if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) { 540 auto *SWI = const_cast<SwitchInst *>(ConstSWI); 541 DefaultSuspendDest[SWI] = SWI->getDefaultDest(); 542 SWI->setDefaultDest(SWI->getSuccessor(1)); 543 } 544 } 545 } 546 547 StackLifetime StackLifetimeAnalyzer(F, Allocas, 548 StackLifetime::LivenessType::May); 549 StackLifetimeAnalyzer.run(); 550 auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) { 551 return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps( 552 StackLifetimeAnalyzer.getLiveRange(AI2)); 553 }; 554 auto GetAllocaSize = [&](const AllocaInst *AI) { 555 Optional<uint64_t> RetSize = AI->getAllocationSizeInBits(DL); 556 assert(RetSize && "We can't handle scalable type now.\n"); 557 return RetSize.getValue(); 558 }; 559 // Put larger allocas in the front. So the larger allocas have higher 560 // priority to merge, which can save more space potentially. Also each 561 // AllocaSet would be ordered. So we can get the largest Alloca in one 562 // AllocaSet easily. 563 sort(Allocas, [&](auto Iter1, auto Iter2) { 564 return GetAllocaSize(Iter1) > GetAllocaSize(Iter2); 565 }); 566 for (auto Alloca : Allocas) { 567 bool Merged = false; 568 // Try to find if the Alloca is not inferenced with any existing 569 // NonOverlappedAllocaSet. If it is true, insert the alloca to that 570 // NonOverlappedAllocaSet. 571 for (auto &AllocaSet : NonOverlapedAllocas) { 572 assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n"); 573 bool CouldMerge = none_of(AllocaSet, [&](auto Iter) { 574 return IsAllocaInferenre(Alloca, Iter); 575 }); 576 if (!CouldMerge) 577 continue; 578 AllocaIndex[Alloca] = AllocaIndex[*AllocaSet.begin()]; 579 AllocaSet.push_back(Alloca); 580 Merged = true; 581 break; 582 } 583 if (!Merged) { 584 AllocaIndex[Alloca] = NonOverlapedAllocas.size(); 585 NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); 586 } 587 } 588 // Recover the default target destination for each Switch statement 589 // reserved. 590 for (auto SwitchAndDefaultDest : DefaultSuspendDest) { 591 SwitchInst *SWI = SwitchAndDefaultDest.first; 592 BasicBlock *DestBB = SwitchAndDefaultDest.second; 593 SWI->setDefaultDest(DestBB); 594 } 595 // This Debug Info could tell us which allocas are merged into one slot. 596 LLVM_DEBUG(for (auto &AllocaSet 597 : NonOverlapedAllocas) { 598 if (AllocaSet.size() > 1) { 599 dbgs() << "In Function:" << F.getName() << "\n"; 600 dbgs() << "Find Union Set " 601 << "\n"; 602 dbgs() << "\tAllocas are \n"; 603 for (auto Alloca : AllocaSet) 604 dbgs() << "\t\t" << *Alloca << "\n"; 605 } 606 }); 607 } 608 609 void FrameTypeBuilder::finish(StructType *Ty) { 610 assert(!IsFinished && "already finished!"); 611 612 // Prepare the optimal-layout field array. 613 // The Id in the layout field is a pointer to our Field for it. 614 SmallVector<OptimizedStructLayoutField, 8> LayoutFields; 615 LayoutFields.reserve(Fields.size()); 616 for (auto &Field : Fields) { 617 LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment, 618 Field.Offset); 619 } 620 621 // Perform layout. 622 auto SizeAndAlign = performOptimizedStructLayout(LayoutFields); 623 StructSize = SizeAndAlign.first; 624 StructAlign = SizeAndAlign.second; 625 626 auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & { 627 return *static_cast<Field *>(const_cast<void*>(LayoutField.Id)); 628 }; 629 630 // We need to produce a packed struct type if there's a field whose 631 // assigned offset isn't a multiple of its natural type alignment. 632 bool Packed = [&] { 633 for (auto &LayoutField : LayoutFields) { 634 auto &F = getField(LayoutField); 635 if (!isAligned(F.TyAlignment, LayoutField.Offset)) 636 return true; 637 } 638 return false; 639 }(); 640 641 // Build the struct body. 642 SmallVector<Type*, 16> FieldTypes; 643 FieldTypes.reserve(LayoutFields.size() * 3 / 2); 644 uint64_t LastOffset = 0; 645 for (auto &LayoutField : LayoutFields) { 646 auto &F = getField(LayoutField); 647 648 auto Offset = LayoutField.Offset; 649 650 // Add a padding field if there's a padding gap and we're either 651 // building a packed struct or the padding gap is more than we'd 652 // get from aligning to the field type's natural alignment. 653 assert(Offset >= LastOffset); 654 if (Offset != LastOffset) { 655 if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset) 656 FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context), 657 Offset - LastOffset)); 658 } 659 660 // Record the layout information into both the Field and the 661 // original Spill, if there is one. 662 F.Offset = Offset; 663 F.FieldIndex = FieldTypes.size(); 664 for (auto Spill : F.ForSpill) { 665 Spill->setFieldIndex(F.FieldIndex); 666 } 667 668 FieldTypes.push_back(F.Ty); 669 LastOffset = Offset + F.Size; 670 } 671 672 Ty->setBody(FieldTypes, Packed); 673 674 #ifndef NDEBUG 675 // Check that the IR layout matches the offsets we expect. 676 auto Layout = DL.getStructLayout(Ty); 677 for (auto &F : Fields) { 678 assert(Ty->getElementType(F.FieldIndex) == F.Ty); 679 assert(Layout->getElementOffset(F.FieldIndex) == F.Offset); 680 } 681 #endif 682 683 IsFinished = true; 684 } 685 686 // Build a struct that will keep state for an active coroutine. 687 // struct f.frame { 688 // ResumeFnTy ResumeFnAddr; 689 // ResumeFnTy DestroyFnAddr; 690 // int ResumeIndex; 691 // ... promise (if present) ... 692 // ... spills ... 693 // }; 694 static StructType *buildFrameType(Function &F, coro::Shape &Shape, 695 SpillInfo &Spills) { 696 LLVMContext &C = F.getContext(); 697 const DataLayout &DL = F.getParent()->getDataLayout(); 698 StructType *FrameTy = [&] { 699 SmallString<32> Name(F.getName()); 700 Name.append(".Frame"); 701 return StructType::create(C, Name); 702 }(); 703 704 FrameTypeBuilder B(C, DL); 705 706 AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); 707 Optional<FrameTypeBuilder::FieldId> PromiseFieldId; 708 Optional<FrameTypeBuilder::FieldId> SwitchIndexFieldId; 709 710 if (Shape.ABI == coro::ABI::Switch) { 711 auto *FramePtrTy = FrameTy->getPointerTo(); 712 auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, 713 /*IsVarArg=*/false); 714 auto *FnPtrTy = FnTy->getPointerTo(); 715 716 // Add header fields for the resume and destroy functions. 717 // We can rely on these being perfectly packed. 718 B.addField(FnPtrTy, None, {}, /*header*/ true); 719 B.addField(FnPtrTy, None, {}, /*header*/ true); 720 721 // Add a header field for the promise if there is one. 722 if (PromiseAlloca) { 723 PromiseFieldId = B.addFieldForAlloca(PromiseAlloca, {}, /*header*/ true); 724 } 725 726 // Add a field to store the suspend index. This doesn't need to 727 // be in the header. 728 unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); 729 Type *IndexType = Type::getIntNTy(C, IndexBits); 730 731 SwitchIndexFieldId = B.addField(IndexType, None); 732 } else { 733 assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); 734 } 735 736 // Because multiple allocas may own the same field slot, 737 // we add allocas to field here. 738 B.addFieldForAllocas(F, Spills, Shape); 739 Value *CurrentDef = nullptr; 740 // Create an entry for every spilled value which is not an AllocaInst. 741 for (auto &S : Spills) { 742 // We can have multiple entries in Spills for a single value, but 743 // they should form a contiguous run. Ignore all but the first. 744 if (CurrentDef == S.def()) 745 continue; 746 747 CurrentDef = S.def(); 748 749 assert(CurrentDef != PromiseAlloca && 750 "recorded spill use of promise alloca?"); 751 752 if (!isa<AllocaInst>(CurrentDef)) { 753 Type *Ty = CurrentDef->getType(); 754 B.addField(Ty, None, {&S}); 755 } 756 } 757 758 B.finish(FrameTy); 759 Shape.FrameAlign = B.getStructAlign(); 760 Shape.FrameSize = B.getStructSize(); 761 762 switch (Shape.ABI) { 763 // In the switch ABI, remember the field indices for the promise and 764 // switch-index fields. 765 case coro::ABI::Switch: 766 Shape.SwitchLowering.IndexField = 767 B.getFieldIndex(*SwitchIndexFieldId); 768 Shape.SwitchLowering.PromiseField = 769 (PromiseAlloca ? B.getFieldIndex(*PromiseFieldId) : 0); 770 771 // Also round the frame size up to a multiple of its alignment, as is 772 // generally expected in C/C++. 773 Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign); 774 break; 775 776 // In the retcon ABI, remember whether the frame is inline in the storage. 777 case coro::ABI::Retcon: 778 case coro::ABI::RetconOnce: { 779 auto Id = Shape.getRetconCoroId(); 780 Shape.RetconLowering.IsFrameInlineInStorage 781 = (B.getStructSize() <= Id->getStorageSize() && 782 B.getStructAlign() <= Id->getStorageAlignment()); 783 break; 784 } 785 } 786 787 return FrameTy; 788 } 789 790 // We use a pointer use visitor to discover if there are any writes into an 791 // alloca that dominates CoroBegin. If that is the case, insertSpills will copy 792 // the value from the alloca into the coroutine frame spill slot corresponding 793 // to that alloca. We also collect any alias pointing to the alloca created 794 // before CoroBegin but used after CoroBegin. These alias will be recreated 795 // after CoroBegin from the frame address so that latter references are 796 // pointing to the frame instead of the stack. 797 // Note: We are repurposing PtrUseVisitor's isEscaped() to mean whether the 798 // pointer is potentially written into. 799 // TODO: If the pointer is really escaped, we are in big trouble because we 800 // will be escaping a pointer to a stack address that would no longer exist 801 // soon. However most escape analysis isn't good enough to precisely tell, 802 // so we are assuming that if a pointer is escaped that it's written into. 803 // TODO: Another potential issue is if we are creating an alias through 804 // a function call, e.g: 805 // %a = AllocaInst ... 806 // %b = call @computeAddress(... %a) 807 // If %b is an alias of %a and will be used after CoroBegin, this will be broken 808 // and there is nothing we can do about it. 809 namespace { 810 struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> { 811 using Base = PtrUseVisitor<AllocaUseVisitor>; 812 AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT, 813 const CoroBeginInst &CB) 814 : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {} 815 816 // We are only interested in uses that's not dominated by coro.begin. 817 void visit(Instruction &I) { 818 if (!DT.dominates(&CoroBegin, &I)) 819 Base::visit(I); 820 } 821 // We need to provide this overload as PtrUseVisitor uses a pointer based 822 // visiting function. 823 void visit(Instruction *I) { return visit(*I); } 824 825 // We cannot handle PHI node and SelectInst because they could be selecting 826 // between two addresses that point to different Allocas. 827 void visitPHINode(PHINode &I) { 828 assert(!usedAfterCoroBegin(I) && 829 "Unable to handle PHI node of aliases created before CoroBegin but " 830 "used after CoroBegin"); 831 } 832 833 void visitSelectInst(SelectInst &I) { 834 assert(!usedAfterCoroBegin(I) && 835 "Unable to handle Select of aliases created before CoroBegin but " 836 "used after CoroBegin"); 837 } 838 839 void visitLoadInst(LoadInst &) {} 840 841 // If the use is an operand, the pointer escaped and anything can write into 842 // that memory. If the use is the pointer, we are definitely writing into the 843 // alloca and therefore we need to copy. 844 void visitStoreInst(StoreInst &SI) { PI.setEscaped(&SI); } 845 846 // All mem intrinsics modify the data. 847 void visitMemIntrinsic(MemIntrinsic &MI) { PI.setEscaped(&MI); } 848 849 void visitBitCastInst(BitCastInst &BC) { 850 Base::visitBitCastInst(BC); 851 handleAlias(BC); 852 } 853 854 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { 855 Base::visitAddrSpaceCastInst(ASC); 856 handleAlias(ASC); 857 } 858 859 void visitGetElementPtrInst(GetElementPtrInst &GEPI) { 860 // The base visitor will adjust Offset accordingly. 861 Base::visitGetElementPtrInst(GEPI); 862 handleAlias(GEPI); 863 } 864 865 const SmallVector<std::pair<Instruction *, APInt>, 1> &getAliases() const { 866 return Aliases; 867 } 868 869 private: 870 const DominatorTree &DT; 871 const CoroBeginInst &CoroBegin; 872 // All alias to the original AllocaInst, and are used after CoroBegin. 873 // Each entry contains the instruction and the offset in the original Alloca. 874 SmallVector<std::pair<Instruction *, APInt>, 1> Aliases{}; 875 876 bool usedAfterCoroBegin(Instruction &I) { 877 for (auto &U : I.uses()) 878 if (DT.dominates(&CoroBegin, U)) 879 return true; 880 return false; 881 } 882 883 void handleAlias(Instruction &I) { 884 if (!usedAfterCoroBegin(I)) 885 return; 886 887 assert(IsOffsetKnown && "Can only handle alias with known offset created " 888 "before CoroBegin and used after"); 889 Aliases.emplace_back(&I, Offset); 890 } 891 }; 892 } // namespace 893 894 // We need to make room to insert a spill after initial PHIs, but before 895 // catchswitch instruction. Placing it before violates the requirement that 896 // catchswitch, like all other EHPads must be the first nonPHI in a block. 897 // 898 // Split away catchswitch into a separate block and insert in its place: 899 // 900 // cleanuppad <InsertPt> cleanupret. 901 // 902 // cleanupret instruction will act as an insert point for the spill. 903 static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) { 904 BasicBlock *CurrentBlock = CatchSwitch->getParent(); 905 BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch); 906 CurrentBlock->getTerminator()->eraseFromParent(); 907 908 auto *CleanupPad = 909 CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock); 910 auto *CleanupRet = 911 CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock); 912 return CleanupRet; 913 } 914 915 // Replace all alloca and SSA values that are accessed across suspend points 916 // with GetElementPointer from coroutine frame + loads and stores. Create an 917 // AllocaSpillBB that will become the new entry block for the resume parts of 918 // the coroutine: 919 // 920 // %hdl = coro.begin(...) 921 // whatever 922 // 923 // becomes: 924 // 925 // %hdl = coro.begin(...) 926 // %FramePtr = bitcast i8* hdl to %f.frame* 927 // br label %AllocaSpillBB 928 // 929 // AllocaSpillBB: 930 // ; geps corresponding to allocas that were moved to coroutine frame 931 // br label PostSpill 932 // 933 // PostSpill: 934 // whatever 935 // 936 // 937 static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { 938 auto *CB = Shape.CoroBegin; 939 LLVMContext &C = CB->getContext(); 940 IRBuilder<> Builder(CB->getNextNode()); 941 StructType *FrameTy = Shape.FrameTy; 942 PointerType *FramePtrTy = FrameTy->getPointerTo(); 943 auto *FramePtr = 944 cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr")); 945 DominatorTree DT(*CB->getFunction()); 946 947 Value *CurrentValue = nullptr; 948 BasicBlock *CurrentBlock = nullptr; 949 Value *CurrentReload = nullptr; 950 951 // Proper field number will be read from field definition. 952 unsigned Index = InvalidFieldIndex; 953 954 // We need to keep track of any allocas that need "spilling" 955 // since they will live in the coroutine frame now, all access to them 956 // need to be changed, not just the access across suspend points 957 // we remember allocas and their indices to be handled once we processed 958 // all the spills. 959 SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas; 960 961 // Promise alloca (if present) doesn't show in the spills and has a 962 // special field number. 963 if (auto *PromiseAlloca = Shape.getPromiseAlloca()) { 964 assert(Shape.ABI == coro::ABI::Switch); 965 Allocas.emplace_back(PromiseAlloca, Shape.getPromiseField()); 966 } 967 968 // Create a GEP with the given index into the coroutine frame for the original 969 // value Orig. Appends an extra 0 index for array-allocas, preserving the 970 // original type. 971 auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * { 972 SmallVector<Value *, 3> Indices = { 973 ConstantInt::get(Type::getInt32Ty(C), 0), 974 ConstantInt::get(Type::getInt32Ty(C), Index), 975 }; 976 977 if (auto *AI = dyn_cast<AllocaInst>(Orig)) { 978 if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) { 979 auto Count = CI->getValue().getZExtValue(); 980 if (Count > 1) { 981 Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0)); 982 } 983 } else { 984 report_fatal_error("Coroutines cannot handle non static allocas yet"); 985 } 986 } 987 988 auto GEP = cast<GetElementPtrInst>( 989 Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices)); 990 if (isa<AllocaInst>(Orig)) { 991 // If the type of GEP is not equal to the type of AllocaInst, it implies 992 // that the AllocaInst may be reused in the Frame slot of other 993 // AllocaInst. So we cast the GEP to the type of AllocaInst. 994 if (GEP->getResultElementType() != Orig->getType()) 995 return Builder.CreateBitCast(GEP, Orig->getType(), 996 Orig->getName() + Twine(".cast")); 997 } 998 return GEP; 999 }; 1000 1001 // Create a load instruction to reload the spilled value from the coroutine 1002 // frame. Populates the Value pointer reference provided with the frame GEP. 1003 auto CreateReload = [&](Instruction *InsertBefore, Value *&G) { 1004 assert(Index != InvalidFieldIndex && "accessing unassigned field number"); 1005 Builder.SetInsertPoint(InsertBefore); 1006 1007 G = GetFramePointer(Index, CurrentValue); 1008 G->setName(CurrentValue->getName() + Twine(".reload.addr")); 1009 1010 return isa<AllocaInst>(CurrentValue) 1011 ? G 1012 : Builder.CreateLoad(FrameTy->getElementType(Index), G, 1013 CurrentValue->getName() + Twine(".reload")); 1014 }; 1015 1016 Value *GEP = nullptr, *CurrentGEP = nullptr; 1017 for (auto const &E : Spills) { 1018 // If we have not seen the value, generate a spill. 1019 if (CurrentValue != E.def()) { 1020 CurrentValue = E.def(); 1021 CurrentBlock = nullptr; 1022 CurrentReload = nullptr; 1023 1024 Index = E.fieldIndex(); 1025 1026 if (auto *AI = dyn_cast<AllocaInst>(CurrentValue)) { 1027 // Spilled AllocaInst will be replaced with GEP from the coroutine frame 1028 // there is no spill required. 1029 Allocas.emplace_back(AI, Index); 1030 if (!AI->isStaticAlloca()) 1031 report_fatal_error("Coroutines cannot handle non static allocas yet"); 1032 } else { 1033 // Otherwise, create a store instruction storing the value into the 1034 // coroutine frame. 1035 1036 Instruction *InsertPt = nullptr; 1037 if (auto Arg = dyn_cast<Argument>(CurrentValue)) { 1038 // For arguments, we will place the store instruction right after 1039 // the coroutine frame pointer instruction, i.e. bitcast of 1040 // coro.begin from i8* to %f.frame*. 1041 InsertPt = FramePtr->getNextNode(); 1042 1043 // If we're spilling an Argument, make sure we clear 'nocapture' 1044 // from the coroutine function. 1045 Arg->getParent()->removeParamAttr(Arg->getArgNo(), 1046 Attribute::NoCapture); 1047 1048 } else if (auto CSI = dyn_cast<AnyCoroSuspendInst>(CurrentValue)) { 1049 // Don't spill immediately after a suspend; splitting assumes 1050 // that the suspend will be followed by a branch. 1051 InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI(); 1052 } else { 1053 auto *I = cast<Instruction>(CurrentValue); 1054 if (!DT.dominates(CB, I)) { 1055 // If it is not dominated by CoroBegin, then spill should be 1056 // inserted immediately after CoroFrame is computed. 1057 InsertPt = FramePtr->getNextNode(); 1058 } else if (auto *II = dyn_cast<InvokeInst>(I)) { 1059 // If we are spilling the result of the invoke instruction, split 1060 // the normal edge and insert the spill in the new block. 1061 auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest()); 1062 InsertPt = NewBB->getTerminator(); 1063 } else if (isa<PHINode>(I)) { 1064 // Skip the PHINodes and EH pads instructions. 1065 BasicBlock *DefBlock = I->getParent(); 1066 if (auto *CSI = 1067 dyn_cast<CatchSwitchInst>(DefBlock->getTerminator())) 1068 InsertPt = splitBeforeCatchSwitch(CSI); 1069 else 1070 InsertPt = &*DefBlock->getFirstInsertionPt(); 1071 } else { 1072 assert(!I->isTerminator() && "unexpected terminator"); 1073 // For all other values, the spill is placed immediately after 1074 // the definition. 1075 InsertPt = I->getNextNode(); 1076 } 1077 } 1078 1079 Builder.SetInsertPoint(InsertPt); 1080 auto *G = Builder.CreateConstInBoundsGEP2_32( 1081 FrameTy, FramePtr, 0, Index, 1082 CurrentValue->getName() + Twine(".spill.addr")); 1083 Builder.CreateStore(CurrentValue, G); 1084 } 1085 } 1086 1087 // If we have not seen the use block, generate a reload in it. 1088 if (CurrentBlock != E.userBlock()) { 1089 CurrentBlock = E.userBlock(); 1090 CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt(), GEP); 1091 } 1092 1093 // If we have a single edge PHINode, remove it and replace it with a reload 1094 // from the coroutine frame. (We already took care of multi edge PHINodes 1095 // by rewriting them in the rewritePHIs function). 1096 if (auto *PN = dyn_cast<PHINode>(E.user())) { 1097 assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " 1098 "values in the PHINode"); 1099 PN->replaceAllUsesWith(CurrentReload); 1100 PN->eraseFromParent(); 1101 continue; 1102 } 1103 1104 // If we have not seen this GEP instruction, migrate any dbg.declare from 1105 // the alloca to it. 1106 if (CurrentGEP != GEP) { 1107 CurrentGEP = GEP; 1108 TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(CurrentValue); 1109 if (!DIs.empty()) 1110 DIBuilder(*CurrentBlock->getParent()->getParent(), 1111 /*AllowUnresolved*/ false) 1112 .insertDeclare(CurrentGEP, DIs.front()->getVariable(), 1113 DIs.front()->getExpression(), 1114 DIs.front()->getDebugLoc(), DIs.front()); 1115 } 1116 1117 // Replace all uses of CurrentValue in the current instruction with reload. 1118 E.user()->replaceUsesOfWith(CurrentValue, CurrentReload); 1119 } 1120 1121 BasicBlock *FramePtrBB = FramePtr->getParent(); 1122 1123 auto SpillBlock = 1124 FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); 1125 SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); 1126 Shape.AllocaSpillBlock = SpillBlock; 1127 1128 // retcon and retcon.once lowering assumes all uses have been sunk. 1129 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) { 1130 // If we found any allocas, replace all of their remaining uses with Geps. 1131 Builder.SetInsertPoint(&SpillBlock->front()); 1132 for (auto &P : Allocas) { 1133 auto *G = GetFramePointer(P.second, P.first); 1134 1135 // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) 1136 // here, as we are changing location of the instruction. 1137 G->takeName(P.first); 1138 P.first->replaceAllUsesWith(G); 1139 P.first->eraseFromParent(); 1140 } 1141 return FramePtr; 1142 } 1143 1144 // If we found any alloca, replace all of their remaining uses with GEP 1145 // instructions. Because new dbg.declare have been created for these alloca, 1146 // we also delete the original dbg.declare and replace other uses with undef. 1147 // Note: We cannot replace the alloca with GEP instructions indiscriminately, 1148 // as some of the uses may not be dominated by CoroBegin. 1149 bool MightNeedToCopy = false; 1150 Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); 1151 SmallVector<Instruction *, 4> UsersToUpdate; 1152 for (auto &P : Allocas) { 1153 AllocaInst *const A = P.first; 1154 1155 for (auto *DI : FindDbgDeclareUses(A)) 1156 DI->eraseFromParent(); 1157 replaceDbgUsesWithUndef(A); 1158 1159 UsersToUpdate.clear(); 1160 for (User *U : A->users()) { 1161 auto *I = cast<Instruction>(U); 1162 if (DT.dominates(CB, I)) 1163 UsersToUpdate.push_back(I); 1164 else 1165 MightNeedToCopy = true; 1166 } 1167 if (!UsersToUpdate.empty()) { 1168 auto *G = GetFramePointer(P.second, A); 1169 G->takeName(A); 1170 for (Instruction *I : UsersToUpdate) 1171 I->replaceUsesOfWith(A, G); 1172 } 1173 } 1174 // If we discovered such uses not dominated by CoroBegin, see if any of them 1175 // preceed coro begin and have instructions that can modify the 1176 // value of the alloca and therefore would require a copying the value into 1177 // the spill slot in the coroutine frame. 1178 if (MightNeedToCopy) { 1179 Builder.SetInsertPoint(FramePtr->getNextNode()); 1180 1181 for (auto &P : Allocas) { 1182 AllocaInst *const A = P.first; 1183 AllocaUseVisitor Visitor(A->getModule()->getDataLayout(), DT, *CB); 1184 auto PtrI = Visitor.visitPtr(*A); 1185 assert(!PtrI.isAborted()); 1186 if (PtrI.isEscaped()) { 1187 // isEscaped really means potentially modified before CoroBegin. 1188 if (A->isArrayAllocation()) 1189 report_fatal_error( 1190 "Coroutines cannot handle copying of array allocas yet"); 1191 1192 auto *G = GetFramePointer(P.second, A); 1193 auto *Value = Builder.CreateLoad(A->getAllocatedType(), A); 1194 Builder.CreateStore(Value, G); 1195 } 1196 // For each alias to Alloca created before CoroBegin but used after 1197 // CoroBegin, we recreate them after CoroBegin by appplying the offset 1198 // to the pointer in the frame. 1199 for (const auto &Alias : Visitor.getAliases()) { 1200 auto *FramePtr = GetFramePointer(P.second, A); 1201 auto *FramePtrRaw = 1202 Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); 1203 auto *AliasPtr = Builder.CreateGEP( 1204 FramePtrRaw, ConstantInt::get(Type::getInt64Ty(C), Alias.second)); 1205 auto *AliasPtrTyped = 1206 Builder.CreateBitCast(AliasPtr, Alias.first->getType()); 1207 Alias.first->replaceUsesWithIf( 1208 AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); 1209 } 1210 } 1211 } 1212 return FramePtr; 1213 } 1214 1215 // Sets the unwind edge of an instruction to a particular successor. 1216 static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) { 1217 if (auto *II = dyn_cast<InvokeInst>(TI)) 1218 II->setUnwindDest(Succ); 1219 else if (auto *CS = dyn_cast<CatchSwitchInst>(TI)) 1220 CS->setUnwindDest(Succ); 1221 else if (auto *CR = dyn_cast<CleanupReturnInst>(TI)) 1222 CR->setUnwindDest(Succ); 1223 else 1224 llvm_unreachable("unexpected terminator instruction"); 1225 } 1226 1227 // Replaces all uses of OldPred with the NewPred block in all PHINodes in a 1228 // block. 1229 static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred, 1230 BasicBlock *NewPred, PHINode *Until = nullptr) { 1231 unsigned BBIdx = 0; 1232 for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { 1233 PHINode *PN = cast<PHINode>(I); 1234 1235 // We manually update the LandingPadReplacement PHINode and it is the last 1236 // PHI Node. So, if we find it, we are done. 1237 if (Until == PN) 1238 break; 1239 1240 // Reuse the previous value of BBIdx if it lines up. In cases where we 1241 // have multiple phi nodes with *lots* of predecessors, this is a speed 1242 // win because we don't have to scan the PHI looking for TIBB. This 1243 // happens because the BB list of PHI nodes are usually in the same 1244 // order. 1245 if (PN->getIncomingBlock(BBIdx) != OldPred) 1246 BBIdx = PN->getBasicBlockIndex(OldPred); 1247 1248 assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!"); 1249 PN->setIncomingBlock(BBIdx, NewPred); 1250 } 1251 } 1252 1253 // Uses SplitEdge unless the successor block is an EHPad, in which case do EH 1254 // specific handling. 1255 static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ, 1256 LandingPadInst *OriginalPad, 1257 PHINode *LandingPadReplacement) { 1258 auto *PadInst = Succ->getFirstNonPHI(); 1259 if (!LandingPadReplacement && !PadInst->isEHPad()) 1260 return SplitEdge(BB, Succ); 1261 1262 auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ); 1263 setUnwindEdgeTo(BB->getTerminator(), NewBB); 1264 updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement); 1265 1266 if (LandingPadReplacement) { 1267 auto *NewLP = OriginalPad->clone(); 1268 auto *Terminator = BranchInst::Create(Succ, NewBB); 1269 NewLP->insertBefore(Terminator); 1270 LandingPadReplacement->addIncoming(NewLP, NewBB); 1271 return NewBB; 1272 } 1273 Value *ParentPad = nullptr; 1274 if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst)) 1275 ParentPad = FuncletPad->getParentPad(); 1276 else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst)) 1277 ParentPad = CatchSwitch->getParentPad(); 1278 else 1279 llvm_unreachable("handling for other EHPads not implemented yet"); 1280 1281 auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB); 1282 CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB); 1283 return NewBB; 1284 } 1285 1286 // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new 1287 // PHI in InsertedBB. 1288 static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB, 1289 BasicBlock *InsertedBB, 1290 BasicBlock *PredBB, 1291 PHINode *UntilPHI = nullptr) { 1292 auto *PN = cast<PHINode>(&SuccBB->front()); 1293 do { 1294 int Index = PN->getBasicBlockIndex(InsertedBB); 1295 Value *V = PN->getIncomingValue(Index); 1296 PHINode *InputV = PHINode::Create( 1297 V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName(), 1298 &InsertedBB->front()); 1299 InputV->addIncoming(V, PredBB); 1300 PN->setIncomingValue(Index, InputV); 1301 PN = dyn_cast<PHINode>(PN->getNextNode()); 1302 } while (PN != UntilPHI); 1303 } 1304 1305 // Rewrites the PHI Nodes in a cleanuppad. 1306 static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB, 1307 CleanupPadInst *CleanupPad) { 1308 // For every incoming edge to a CleanupPad we will create a new block holding 1309 // all incoming values in single-value PHI nodes. We will then create another 1310 // block to act as a dispather (as all unwind edges for related EH blocks 1311 // must be the same). 1312 // 1313 // cleanuppad: 1314 // %2 = phi i32[%0, %catchswitch], [%1, %catch.1] 1315 // %3 = cleanuppad within none [] 1316 // 1317 // It will create: 1318 // 1319 // cleanuppad.corodispatch 1320 // %2 = phi i8[0, %catchswitch], [1, %catch.1] 1321 // %3 = cleanuppad within none [] 1322 // switch i8 % 2, label %unreachable 1323 // [i8 0, label %cleanuppad.from.catchswitch 1324 // i8 1, label %cleanuppad.from.catch.1] 1325 // cleanuppad.from.catchswitch: 1326 // %4 = phi i32 [%0, %catchswitch] 1327 // br %label cleanuppad 1328 // cleanuppad.from.catch.1: 1329 // %6 = phi i32 [%1, %catch.1] 1330 // br %label cleanuppad 1331 // cleanuppad: 1332 // %8 = phi i32 [%4, %cleanuppad.from.catchswitch], 1333 // [%6, %cleanuppad.from.catch.1] 1334 1335 // Unreachable BB, in case switching on an invalid value in the dispatcher. 1336 auto *UnreachBB = BasicBlock::Create( 1337 CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent()); 1338 IRBuilder<> Builder(UnreachBB); 1339 Builder.CreateUnreachable(); 1340 1341 // Create a new cleanuppad which will be the dispatcher. 1342 auto *NewCleanupPadBB = 1343 BasicBlock::Create(CleanupPadBB->getContext(), 1344 CleanupPadBB->getName() + Twine(".corodispatch"), 1345 CleanupPadBB->getParent(), CleanupPadBB); 1346 Builder.SetInsertPoint(NewCleanupPadBB); 1347 auto *SwitchType = Builder.getInt8Ty(); 1348 auto *SetDispatchValuePN = 1349 Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB)); 1350 CleanupPad->removeFromParent(); 1351 CleanupPad->insertAfter(SetDispatchValuePN); 1352 auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB, 1353 pred_size(CleanupPadBB)); 1354 1355 int SwitchIndex = 0; 1356 SmallVector<BasicBlock *, 8> Preds(pred_begin(CleanupPadBB), 1357 pred_end(CleanupPadBB)); 1358 for (BasicBlock *Pred : Preds) { 1359 // Create a new cleanuppad and move the PHI values to there. 1360 auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(), 1361 CleanupPadBB->getName() + 1362 Twine(".from.") + Pred->getName(), 1363 CleanupPadBB->getParent(), CleanupPadBB); 1364 updatePhiNodes(CleanupPadBB, Pred, CaseBB); 1365 CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") + 1366 Pred->getName()); 1367 Builder.SetInsertPoint(CaseBB); 1368 Builder.CreateBr(CleanupPadBB); 1369 movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB); 1370 1371 // Update this Pred to the new unwind point. 1372 setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB); 1373 1374 // Setup the switch in the dispatcher. 1375 auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex); 1376 SetDispatchValuePN->addIncoming(SwitchConstant, Pred); 1377 SwitchOnDispatch->addCase(SwitchConstant, CaseBB); 1378 SwitchIndex++; 1379 } 1380 } 1381 1382 static void rewritePHIs(BasicBlock &BB) { 1383 // For every incoming edge we will create a block holding all 1384 // incoming values in a single PHI nodes. 1385 // 1386 // loop: 1387 // %n.val = phi i32[%n, %entry], [%inc, %loop] 1388 // 1389 // It will create: 1390 // 1391 // loop.from.entry: 1392 // %n.loop.pre = phi i32 [%n, %entry] 1393 // br %label loop 1394 // loop.from.loop: 1395 // %inc.loop.pre = phi i32 [%inc, %loop] 1396 // br %label loop 1397 // 1398 // After this rewrite, further analysis will ignore any phi nodes with more 1399 // than one incoming edge. 1400 1401 // TODO: Simplify PHINodes in the basic block to remove duplicate 1402 // predecessors. 1403 1404 // Special case for CleanupPad: all EH blocks must have the same unwind edge 1405 // so we need to create an additional "dispatcher" block. 1406 if (auto *CleanupPad = 1407 dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) { 1408 SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB)); 1409 for (BasicBlock *Pred : Preds) { 1410 if (CatchSwitchInst *CS = 1411 dyn_cast<CatchSwitchInst>(Pred->getTerminator())) { 1412 // CleanupPad with a CatchSwitch predecessor: therefore this is an 1413 // unwind destination that needs to be handle specially. 1414 assert(CS->getUnwindDest() == &BB); 1415 rewritePHIsForCleanupPad(&BB, CleanupPad); 1416 return; 1417 } 1418 } 1419 } 1420 1421 LandingPadInst *LandingPad = nullptr; 1422 PHINode *ReplPHI = nullptr; 1423 if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) { 1424 // ehAwareSplitEdge will clone the LandingPad in all the edge blocks. 1425 // We replace the original landing pad with a PHINode that will collect the 1426 // results from all of them. 1427 ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad); 1428 ReplPHI->takeName(LandingPad); 1429 LandingPad->replaceAllUsesWith(ReplPHI); 1430 // We will erase the original landing pad at the end of this function after 1431 // ehAwareSplitEdge cloned it in the transition blocks. 1432 } 1433 1434 SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB)); 1435 for (BasicBlock *Pred : Preds) { 1436 auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI); 1437 IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName()); 1438 1439 // Stop the moving of values at ReplPHI, as this is either null or the PHI 1440 // that replaced the landing pad. 1441 movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI); 1442 } 1443 1444 if (LandingPad) { 1445 // Calls to ehAwareSplitEdge function cloned the original lading pad. 1446 // No longer need it. 1447 LandingPad->eraseFromParent(); 1448 } 1449 } 1450 1451 static void rewritePHIs(Function &F) { 1452 SmallVector<BasicBlock *, 8> WorkList; 1453 1454 for (BasicBlock &BB : F) 1455 if (auto *PN = dyn_cast<PHINode>(&BB.front())) 1456 if (PN->getNumIncomingValues() > 1) 1457 WorkList.push_back(&BB); 1458 1459 for (BasicBlock *BB : WorkList) 1460 rewritePHIs(*BB); 1461 } 1462 1463 // Check for instructions that we can recreate on resume as opposed to spill 1464 // the result into a coroutine frame. 1465 static bool materializable(Instruction &V) { 1466 return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) || 1467 isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V); 1468 } 1469 1470 // Check for structural coroutine intrinsics that should not be spilled into 1471 // the coroutine frame. 1472 static bool isCoroutineStructureIntrinsic(Instruction &I) { 1473 return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) || 1474 isa<CoroSuspendInst>(&I); 1475 } 1476 1477 // For every use of the value that is across suspend point, recreate that value 1478 // after a suspend point. 1479 static void rewriteMaterializableInstructions(IRBuilder<> &IRB, 1480 SpillInfo const &Spills) { 1481 BasicBlock *CurrentBlock = nullptr; 1482 Instruction *CurrentMaterialization = nullptr; 1483 Instruction *CurrentDef = nullptr; 1484 1485 for (auto const &E : Spills) { 1486 // If it is a new definition, update CurrentXXX variables. 1487 if (CurrentDef != E.def()) { 1488 CurrentDef = cast<Instruction>(E.def()); 1489 CurrentBlock = nullptr; 1490 CurrentMaterialization = nullptr; 1491 } 1492 1493 // If we have not seen this block, materialize the value. 1494 if (CurrentBlock != E.userBlock()) { 1495 CurrentBlock = E.userBlock(); 1496 CurrentMaterialization = cast<Instruction>(CurrentDef)->clone(); 1497 CurrentMaterialization->setName(CurrentDef->getName()); 1498 CurrentMaterialization->insertBefore( 1499 &*CurrentBlock->getFirstInsertionPt()); 1500 } 1501 1502 if (auto *PN = dyn_cast<PHINode>(E.user())) { 1503 assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " 1504 "values in the PHINode"); 1505 PN->replaceAllUsesWith(CurrentMaterialization); 1506 PN->eraseFromParent(); 1507 continue; 1508 } 1509 1510 // Replace all uses of CurrentDef in the current instruction with the 1511 // CurrentMaterialization for the block. 1512 E.user()->replaceUsesOfWith(CurrentDef, CurrentMaterialization); 1513 } 1514 } 1515 1516 // Splits the block at a particular instruction unless it is the first 1517 // instruction in the block with a single predecessor. 1518 static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) { 1519 auto *BB = I->getParent(); 1520 if (&BB->front() == I) { 1521 if (BB->getSinglePredecessor()) { 1522 BB->setName(Name); 1523 return BB; 1524 } 1525 } 1526 return BB->splitBasicBlock(I, Name); 1527 } 1528 1529 // Split above and below a particular instruction so that it 1530 // will be all alone by itself in a block. 1531 static void splitAround(Instruction *I, const Twine &Name) { 1532 splitBlockIfNotFirst(I, Name); 1533 splitBlockIfNotFirst(I->getNextNode(), "After" + Name); 1534 } 1535 1536 static bool isSuspendBlock(BasicBlock *BB) { 1537 return isa<AnyCoroSuspendInst>(BB->front()); 1538 } 1539 1540 typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet; 1541 1542 /// Does control flow starting at the given block ever reach a suspend 1543 /// instruction before reaching a block in VisitedOrFreeBBs? 1544 static bool isSuspendReachableFrom(BasicBlock *From, 1545 VisitedBlocksSet &VisitedOrFreeBBs) { 1546 // Eagerly try to add this block to the visited set. If it's already 1547 // there, stop recursing; this path doesn't reach a suspend before 1548 // either looping or reaching a freeing block. 1549 if (!VisitedOrFreeBBs.insert(From).second) 1550 return false; 1551 1552 // We assume that we'll already have split suspends into their own blocks. 1553 if (isSuspendBlock(From)) 1554 return true; 1555 1556 // Recurse on the successors. 1557 for (auto Succ : successors(From)) { 1558 if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs)) 1559 return true; 1560 } 1561 1562 return false; 1563 } 1564 1565 /// Is the given alloca "local", i.e. bounded in lifetime to not cross a 1566 /// suspend point? 1567 static bool isLocalAlloca(CoroAllocaAllocInst *AI) { 1568 // Seed the visited set with all the basic blocks containing a free 1569 // so that we won't pass them up. 1570 VisitedBlocksSet VisitedOrFreeBBs; 1571 for (auto User : AI->users()) { 1572 if (auto FI = dyn_cast<CoroAllocaFreeInst>(User)) 1573 VisitedOrFreeBBs.insert(FI->getParent()); 1574 } 1575 1576 return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs); 1577 } 1578 1579 /// After we split the coroutine, will the given basic block be along 1580 /// an obvious exit path for the resumption function? 1581 static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB, 1582 unsigned depth = 3) { 1583 // If we've bottomed out our depth count, stop searching and assume 1584 // that the path might loop back. 1585 if (depth == 0) return false; 1586 1587 // If this is a suspend block, we're about to exit the resumption function. 1588 if (isSuspendBlock(BB)) return true; 1589 1590 // Recurse into the successors. 1591 for (auto Succ : successors(BB)) { 1592 if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1)) 1593 return false; 1594 } 1595 1596 // If none of the successors leads back in a loop, we're on an exit/abort. 1597 return true; 1598 } 1599 1600 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) { 1601 // Look for a free that isn't sufficiently obviously followed by 1602 // either a suspend or a termination, i.e. something that will leave 1603 // the coro resumption frame. 1604 for (auto U : AI->users()) { 1605 auto FI = dyn_cast<CoroAllocaFreeInst>(U); 1606 if (!FI) continue; 1607 1608 if (!willLeaveFunctionImmediatelyAfter(FI->getParent())) 1609 return true; 1610 } 1611 1612 // If we never found one, we don't need a stack save. 1613 return false; 1614 } 1615 1616 /// Turn each of the given local allocas into a normal (dynamic) alloca 1617 /// instruction. 1618 static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas, 1619 SmallVectorImpl<Instruction*> &DeadInsts) { 1620 for (auto AI : LocalAllocas) { 1621 auto M = AI->getModule(); 1622 IRBuilder<> Builder(AI); 1623 1624 // Save the stack depth. Try to avoid doing this if the stackrestore 1625 // is going to immediately precede a return or something. 1626 Value *StackSave = nullptr; 1627 if (localAllocaNeedsStackSave(AI)) 1628 StackSave = Builder.CreateCall( 1629 Intrinsic::getDeclaration(M, Intrinsic::stacksave)); 1630 1631 // Allocate memory. 1632 auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize()); 1633 Alloca->setAlignment(Align(AI->getAlignment())); 1634 1635 for (auto U : AI->users()) { 1636 // Replace gets with the allocation. 1637 if (isa<CoroAllocaGetInst>(U)) { 1638 U->replaceAllUsesWith(Alloca); 1639 1640 // Replace frees with stackrestores. This is safe because 1641 // alloca.alloc is required to obey a stack discipline, although we 1642 // don't enforce that structurally. 1643 } else { 1644 auto FI = cast<CoroAllocaFreeInst>(U); 1645 if (StackSave) { 1646 Builder.SetInsertPoint(FI); 1647 Builder.CreateCall( 1648 Intrinsic::getDeclaration(M, Intrinsic::stackrestore), 1649 StackSave); 1650 } 1651 } 1652 DeadInsts.push_back(cast<Instruction>(U)); 1653 } 1654 1655 DeadInsts.push_back(AI); 1656 } 1657 } 1658 1659 /// Turn the given coro.alloca.alloc call into a dynamic allocation. 1660 /// This happens during the all-instructions iteration, so it must not 1661 /// delete the call. 1662 static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI, 1663 coro::Shape &Shape, 1664 SmallVectorImpl<Instruction*> &DeadInsts) { 1665 IRBuilder<> Builder(AI); 1666 auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); 1667 1668 for (User *U : AI->users()) { 1669 if (isa<CoroAllocaGetInst>(U)) { 1670 U->replaceAllUsesWith(Alloc); 1671 } else { 1672 auto FI = cast<CoroAllocaFreeInst>(U); 1673 Builder.SetInsertPoint(FI); 1674 Shape.emitDealloc(Builder, Alloc, nullptr); 1675 } 1676 DeadInsts.push_back(cast<Instruction>(U)); 1677 } 1678 1679 // Push this on last so that it gets deleted after all the others. 1680 DeadInsts.push_back(AI); 1681 1682 // Return the new allocation value so that we can check for needed spills. 1683 return cast<Instruction>(Alloc); 1684 } 1685 1686 /// Get the current swifterror value. 1687 static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy, 1688 coro::Shape &Shape) { 1689 // Make a fake function pointer as a sort of intrinsic. 1690 auto FnTy = FunctionType::get(ValueTy, {}, false); 1691 auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); 1692 1693 auto Call = Builder.CreateCall(FnTy, Fn, {}); 1694 Shape.SwiftErrorOps.push_back(Call); 1695 1696 return Call; 1697 } 1698 1699 /// Set the given value as the current swifterror value. 1700 /// 1701 /// Returns a slot that can be used as a swifterror slot. 1702 static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V, 1703 coro::Shape &Shape) { 1704 // Make a fake function pointer as a sort of intrinsic. 1705 auto FnTy = FunctionType::get(V->getType()->getPointerTo(), 1706 {V->getType()}, false); 1707 auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); 1708 1709 auto Call = Builder.CreateCall(FnTy, Fn, { V }); 1710 Shape.SwiftErrorOps.push_back(Call); 1711 1712 return Call; 1713 } 1714 1715 /// Set the swifterror value from the given alloca before a call, 1716 /// then put in back in the alloca afterwards. 1717 /// 1718 /// Returns an address that will stand in for the swifterror slot 1719 /// until splitting. 1720 static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call, 1721 AllocaInst *Alloca, 1722 coro::Shape &Shape) { 1723 auto ValueTy = Alloca->getAllocatedType(); 1724 IRBuilder<> Builder(Call); 1725 1726 // Load the current value from the alloca and set it as the 1727 // swifterror value. 1728 auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca); 1729 auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape); 1730 1731 // Move to after the call. Since swifterror only has a guaranteed 1732 // value on normal exits, we can ignore implicit and explicit unwind 1733 // edges. 1734 if (isa<CallInst>(Call)) { 1735 Builder.SetInsertPoint(Call->getNextNode()); 1736 } else { 1737 auto Invoke = cast<InvokeInst>(Call); 1738 Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg()); 1739 } 1740 1741 // Get the current swifterror value and store it to the alloca. 1742 auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape); 1743 Builder.CreateStore(ValueAfterCall, Alloca); 1744 1745 return Addr; 1746 } 1747 1748 /// Eliminate a formerly-swifterror alloca by inserting the get/set 1749 /// intrinsics and attempting to MemToReg the alloca away. 1750 static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca, 1751 coro::Shape &Shape) { 1752 for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) { 1753 // We're likely changing the use list, so use a mutation-safe 1754 // iteration pattern. 1755 auto &Use = *UI; 1756 ++UI; 1757 1758 // swifterror values can only be used in very specific ways. 1759 // We take advantage of that here. 1760 auto User = Use.getUser(); 1761 if (isa<LoadInst>(User) || isa<StoreInst>(User)) 1762 continue; 1763 1764 assert(isa<CallInst>(User) || isa<InvokeInst>(User)); 1765 auto Call = cast<Instruction>(User); 1766 1767 auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape); 1768 1769 // Use the returned slot address as the call argument. 1770 Use.set(Addr); 1771 } 1772 1773 // All the uses should be loads and stores now. 1774 assert(isAllocaPromotable(Alloca)); 1775 } 1776 1777 /// "Eliminate" a swifterror argument by reducing it to the alloca case 1778 /// and then loading and storing in the prologue and epilog. 1779 /// 1780 /// The argument keeps the swifterror flag. 1781 static void eliminateSwiftErrorArgument(Function &F, Argument &Arg, 1782 coro::Shape &Shape, 1783 SmallVectorImpl<AllocaInst*> &AllocasToPromote) { 1784 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); 1785 1786 auto ArgTy = cast<PointerType>(Arg.getType()); 1787 auto ValueTy = ArgTy->getElementType(); 1788 1789 // Reduce to the alloca case: 1790 1791 // Create an alloca and replace all uses of the arg with it. 1792 auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace()); 1793 Arg.replaceAllUsesWith(Alloca); 1794 1795 // Set an initial value in the alloca. swifterror is always null on entry. 1796 auto InitialValue = Constant::getNullValue(ValueTy); 1797 Builder.CreateStore(InitialValue, Alloca); 1798 1799 // Find all the suspends in the function and save and restore around them. 1800 for (auto Suspend : Shape.CoroSuspends) { 1801 (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape); 1802 } 1803 1804 // Find all the coro.ends in the function and restore the error value. 1805 for (auto End : Shape.CoroEnds) { 1806 Builder.SetInsertPoint(End); 1807 auto FinalValue = Builder.CreateLoad(ValueTy, Alloca); 1808 (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape); 1809 } 1810 1811 // Now we can use the alloca logic. 1812 AllocasToPromote.push_back(Alloca); 1813 eliminateSwiftErrorAlloca(F, Alloca, Shape); 1814 } 1815 1816 /// Eliminate all problematic uses of swifterror arguments and allocas 1817 /// from the function. We'll fix them up later when splitting the function. 1818 static void eliminateSwiftError(Function &F, coro::Shape &Shape) { 1819 SmallVector<AllocaInst*, 4> AllocasToPromote; 1820 1821 // Look for a swifterror argument. 1822 for (auto &Arg : F.args()) { 1823 if (!Arg.hasSwiftErrorAttr()) continue; 1824 1825 eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote); 1826 break; 1827 } 1828 1829 // Look for swifterror allocas. 1830 for (auto &Inst : F.getEntryBlock()) { 1831 auto Alloca = dyn_cast<AllocaInst>(&Inst); 1832 if (!Alloca || !Alloca->isSwiftError()) continue; 1833 1834 // Clear the swifterror flag. 1835 Alloca->setSwiftError(false); 1836 1837 AllocasToPromote.push_back(Alloca); 1838 eliminateSwiftErrorAlloca(F, Alloca, Shape); 1839 } 1840 1841 // If we have any allocas to promote, compute a dominator tree and 1842 // promote them en masse. 1843 if (!AllocasToPromote.empty()) { 1844 DominatorTree DT(F); 1845 PromoteMemToReg(AllocasToPromote, DT); 1846 } 1847 } 1848 1849 /// retcon and retcon.once conventions assume that all spill uses can be sunk 1850 /// after the coro.begin intrinsic. 1851 static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills, 1852 CoroBeginInst *CoroBegin) { 1853 DominatorTree Dom(F); 1854 1855 SmallSetVector<Instruction *, 32> ToMove; 1856 SmallVector<Instruction *, 32> Worklist; 1857 1858 // Collect all users that precede coro.begin. 1859 for (auto const &Entry : Spills) { 1860 auto *SpillDef = Entry.def(); 1861 for (User *U : SpillDef->users()) { 1862 auto Inst = cast<Instruction>(U); 1863 if (Inst->getParent() != CoroBegin->getParent() || 1864 Dom.dominates(CoroBegin, Inst)) 1865 continue; 1866 if (ToMove.insert(Inst)) 1867 Worklist.push_back(Inst); 1868 } 1869 } 1870 // Recursively collect users before coro.begin. 1871 while (!Worklist.empty()) { 1872 auto *Def = Worklist.back(); 1873 Worklist.pop_back(); 1874 for (User *U : Def->users()) { 1875 auto Inst = cast<Instruction>(U); 1876 if (Dom.dominates(CoroBegin, Inst)) 1877 continue; 1878 if (ToMove.insert(Inst)) 1879 Worklist.push_back(Inst); 1880 } 1881 } 1882 1883 // Sort by dominance. 1884 SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end()); 1885 std::sort(InsertionList.begin(), InsertionList.end(), 1886 [&Dom](Instruction *A, Instruction *B) -> bool { 1887 // If a dominates b it should preceed (<) b. 1888 return Dom.dominates(A, B); 1889 }); 1890 1891 Instruction *InsertPt = CoroBegin->getNextNode(); 1892 for (Instruction *Inst : InsertionList) 1893 Inst->moveBefore(InsertPt); 1894 1895 return; 1896 } 1897 1898 /// For each local variable that all of its user are only used inside one of 1899 /// suspended region, we sink their lifetime.start markers to the place where 1900 /// after the suspend block. Doing so minimizes the lifetime of each variable, 1901 /// hence minimizing the amount of data we end up putting on the frame. 1902 static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape, 1903 SuspendCrossingInfo &Checker) { 1904 DominatorTree DT(F); 1905 1906 // Collect all possible basic blocks which may dominate all uses of allocas. 1907 SmallPtrSet<BasicBlock *, 4> DomSet; 1908 DomSet.insert(&F.getEntryBlock()); 1909 for (auto *CSI : Shape.CoroSuspends) { 1910 BasicBlock *SuspendBlock = CSI->getParent(); 1911 assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() && 1912 "should have split coro.suspend into its own block"); 1913 DomSet.insert(SuspendBlock->getSingleSuccessor()); 1914 } 1915 1916 for (Instruction &I : instructions(F)) { 1917 AllocaInst* AI = dyn_cast<AllocaInst>(&I); 1918 if (!AI) 1919 continue; 1920 1921 for (BasicBlock *DomBB : DomSet) { 1922 bool Valid = true; 1923 SmallVector<Instruction *, 1> Lifetimes; 1924 1925 auto isLifetimeStart = [](Instruction* I) { 1926 if (auto* II = dyn_cast<IntrinsicInst>(I)) 1927 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1928 return false; 1929 }; 1930 1931 auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) { 1932 if (isLifetimeStart(U)) { 1933 Lifetimes.push_back(U); 1934 return true; 1935 } 1936 if (!U->hasOneUse() || U->stripPointerCasts() != AI) 1937 return false; 1938 if (isLifetimeStart(U->user_back())) { 1939 Lifetimes.push_back(U->user_back()); 1940 return true; 1941 } 1942 return false; 1943 }; 1944 1945 for (User *U : AI->users()) { 1946 Instruction *UI = cast<Instruction>(U); 1947 // For all users except lifetime.start markers, if they are all 1948 // dominated by one of the basic blocks and do not cross 1949 // suspend points as well, then there is no need to spill the 1950 // instruction. 1951 if (!DT.dominates(DomBB, UI->getParent()) || 1952 Checker.isDefinitionAcrossSuspend(DomBB, UI)) { 1953 // Skip lifetime.start, GEP and bitcast used by lifetime.start 1954 // markers. 1955 if (collectLifetimeStart(UI, AI)) 1956 continue; 1957 Valid = false; 1958 break; 1959 } 1960 } 1961 // Sink lifetime.start markers to dominate block when they are 1962 // only used outside the region. 1963 if (Valid && Lifetimes.size() != 0) { 1964 // May be AI itself, when the type of AI is i8* 1965 auto *NewBitCast = [&](AllocaInst *AI) -> Value* { 1966 if (isa<AllocaInst>(Lifetimes[0]->getOperand(1))) 1967 return AI; 1968 auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext()); 1969 return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "", 1970 DomBB->getTerminator()); 1971 }(AI); 1972 1973 auto *NewLifetime = Lifetimes[0]->clone(); 1974 NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast); 1975 NewLifetime->insertBefore(DomBB->getTerminator()); 1976 1977 // All the outsided lifetime.start markers are no longer necessary. 1978 for (Instruction *S : Lifetimes) 1979 S->eraseFromParent(); 1980 1981 break; 1982 } 1983 } 1984 } 1985 } 1986 1987 void coro::buildCoroutineFrame(Function &F, Shape &Shape) { 1988 eliminateSwiftError(F, Shape); 1989 1990 if (Shape.ABI == coro::ABI::Switch && 1991 Shape.SwitchLowering.PromiseAlloca) { 1992 Shape.getSwitchCoroId()->clearPromise(); 1993 } 1994 1995 // Make sure that all coro.save, coro.suspend and the fallthrough coro.end 1996 // intrinsics are in their own blocks to simplify the logic of building up 1997 // SuspendCrossing data. 1998 for (auto *CSI : Shape.CoroSuspends) { 1999 if (auto *Save = CSI->getCoroSave()) 2000 splitAround(Save, "CoroSave"); 2001 splitAround(CSI, "CoroSuspend"); 2002 } 2003 2004 // Put CoroEnds into their own blocks. 2005 for (CoroEndInst *CE : Shape.CoroEnds) 2006 splitAround(CE, "CoroEnd"); 2007 2008 // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will 2009 // never has its definition separated from the PHI by the suspend point. 2010 rewritePHIs(F); 2011 2012 // Build suspend crossing info. 2013 SuspendCrossingInfo Checker(F, Shape); 2014 2015 IRBuilder<> Builder(F.getContext()); 2016 SpillInfo Spills; 2017 SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas; 2018 SmallVector<Instruction*, 4> DeadInstructions; 2019 2020 for (int Repeat = 0; Repeat < 4; ++Repeat) { 2021 // See if there are materializable instructions across suspend points. 2022 for (Instruction &I : instructions(F)) 2023 if (materializable(I)) 2024 for (User *U : I.users()) 2025 if (Checker.isDefinitionAcrossSuspend(I, U)) 2026 Spills.emplace_back(&I, U); 2027 2028 if (Spills.empty()) 2029 break; 2030 2031 // Rewrite materializable instructions to be materialized at the use point. 2032 LLVM_DEBUG(dump("Materializations", Spills)); 2033 rewriteMaterializableInstructions(Builder, Spills); 2034 Spills.clear(); 2035 } 2036 2037 sinkLifetimeStartMarkers(F, Shape, Checker); 2038 // Collect lifetime.start info for each alloca. 2039 using LifetimeStart = SmallPtrSet<Instruction *, 2>; 2040 llvm::DenseMap<Instruction *, std::unique_ptr<LifetimeStart>> LifetimeMap; 2041 for (Instruction &I : instructions(F)) { 2042 auto *II = dyn_cast<IntrinsicInst>(&I); 2043 if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) 2044 continue; 2045 2046 if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) { 2047 if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) { 2048 2049 if (LifetimeMap.find(AI) == LifetimeMap.end()) 2050 LifetimeMap[AI] = std::make_unique<LifetimeStart>(); 2051 LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst); 2052 } 2053 } 2054 } 2055 2056 // Collect the spills for arguments and other not-materializable values. 2057 for (Argument &A : F.args()) 2058 for (User *U : A.users()) 2059 if (Checker.isDefinitionAcrossSuspend(A, U)) 2060 Spills.emplace_back(&A, U); 2061 2062 for (Instruction &I : instructions(F)) { 2063 // Values returned from coroutine structure intrinsics should not be part 2064 // of the Coroutine Frame. 2065 if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin) 2066 continue; 2067 2068 // The Coroutine Promise always included into coroutine frame, no need to 2069 // check for suspend crossing. 2070 if (Shape.ABI == coro::ABI::Switch && 2071 Shape.SwitchLowering.PromiseAlloca == &I) 2072 continue; 2073 2074 // Handle alloca.alloc specially here. 2075 if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) { 2076 // Check whether the alloca's lifetime is bounded by suspend points. 2077 if (isLocalAlloca(AI)) { 2078 LocalAllocas.push_back(AI); 2079 continue; 2080 } 2081 2082 // If not, do a quick rewrite of the alloca and then add spills of 2083 // the rewritten value. The rewrite doesn't invalidate anything in 2084 // Spills because the other alloca intrinsics have no other operands 2085 // besides AI, and it doesn't invalidate the iteration because we delay 2086 // erasing AI. 2087 auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions); 2088 2089 for (User *U : Alloc->users()) { 2090 if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) 2091 Spills.emplace_back(Alloc, U); 2092 } 2093 continue; 2094 } 2095 2096 // Ignore alloca.get; we process this as part of coro.alloca.alloc. 2097 if (isa<CoroAllocaGetInst>(I)) { 2098 continue; 2099 } 2100 2101 auto Iter = LifetimeMap.find(&I); 2102 for (User *U : I.users()) { 2103 bool NeedSpill = false; 2104 2105 // Check against lifetime.start if the instruction has the info. 2106 if (Iter != LifetimeMap.end()) 2107 for (auto *S : *Iter->second) { 2108 if ((NeedSpill = Checker.isDefinitionAcrossSuspend(*S, U))) 2109 break; 2110 } 2111 else 2112 NeedSpill = Checker.isDefinitionAcrossSuspend(I, U); 2113 2114 if (NeedSpill) { 2115 // We cannot spill a token. 2116 if (I.getType()->isTokenTy()) 2117 report_fatal_error( 2118 "token definition is separated from the use by a suspend point"); 2119 Spills.emplace_back(&I, U); 2120 } 2121 } 2122 } 2123 LLVM_DEBUG(dump("Spills", Spills)); 2124 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) 2125 sinkSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin); 2126 Shape.FrameTy = buildFrameType(F, Shape, Spills); 2127 Shape.FramePtr = insertSpills(Spills, Shape); 2128 lowerLocalAllocas(LocalAllocas, DeadInstructions); 2129 2130 for (auto I : DeadInstructions) 2131 I->eraseFromParent(); 2132 } 2133