1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This file contains classes used to discover if for a particular value 9 // there from sue to definition that crosses a suspend block. 10 // 11 // Using the information discovered we form a Coroutine Frame structure to 12 // contain those values. All uses of those values are replaced with appropriate 13 // GEP + load from the coroutine frame. At the point of the definition we spill 14 // the value into the coroutine frame. 15 // 16 // TODO: pack values tightly using liveness info. 17 //===----------------------------------------------------------------------===// 18 19 #include "CoroInternal.h" 20 #include "llvm/ADT/BitVector.h" 21 #include "llvm/ADT/SmallString.h" 22 #include "llvm/Analysis/PtrUseVisitor.h" 23 #include "llvm/Analysis/StackLifetime.h" 24 #include "llvm/Config/llvm-config.h" 25 #include "llvm/IR/CFG.h" 26 #include "llvm/IR/DIBuilder.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/InstIterator.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/MathExtras.h" 33 #include "llvm/Support/OptimizedStructLayout.h" 34 #include "llvm/Support/circular_raw_ostream.h" 35 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 36 #include "llvm/Transforms/Utils/Local.h" 37 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 38 #include <algorithm> 39 40 using namespace llvm; 41 42 // The "coro-suspend-crossing" flag is very noisy. There is another debug type, 43 // "coro-frame", which results in leaner debug spew. 44 #define DEBUG_TYPE "coro-suspend-crossing" 45 46 static cl::opt<bool> EnableReuseStorageInFrame( 47 "reuse-storage-in-coroutine-frame", cl::Hidden, 48 cl::desc( 49 "Enable the optimization which would reuse the storage in the coroutine \ 50 frame for allocas whose liferanges are not overlapped, for testing purposes"), 51 llvm::cl::init(false)); 52 53 enum { SmallVectorThreshold = 32 }; 54 55 // Provides two way mapping between the blocks and numbers. 56 namespace { 57 class BlockToIndexMapping { 58 SmallVector<BasicBlock *, SmallVectorThreshold> V; 59 60 public: 61 size_t size() const { return V.size(); } 62 63 BlockToIndexMapping(Function &F) { 64 for (BasicBlock &BB : F) 65 V.push_back(&BB); 66 llvm::sort(V); 67 } 68 69 size_t blockToIndex(BasicBlock *BB) const { 70 auto *I = llvm::lower_bound(V, BB); 71 assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block"); 72 return I - V.begin(); 73 } 74 75 BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; } 76 }; 77 } // end anonymous namespace 78 79 // The SuspendCrossingInfo maintains data that allows to answer a question 80 // whether given two BasicBlocks A and B there is a path from A to B that 81 // passes through a suspend point. 82 // 83 // For every basic block 'i' it maintains a BlockData that consists of: 84 // Consumes: a bit vector which contains a set of indices of blocks that can 85 // reach block 'i' 86 // Kills: a bit vector which contains a set of indices of blocks that can 87 // reach block 'i', but one of the path will cross a suspend point 88 // Suspend: a boolean indicating whether block 'i' contains a suspend point. 89 // End: a boolean indicating whether block 'i' contains a coro.end intrinsic. 90 // 91 namespace { 92 struct SuspendCrossingInfo { 93 BlockToIndexMapping Mapping; 94 95 struct BlockData { 96 BitVector Consumes; 97 BitVector Kills; 98 bool Suspend = false; 99 bool End = false; 100 }; 101 SmallVector<BlockData, SmallVectorThreshold> Block; 102 103 iterator_range<succ_iterator> successors(BlockData const &BD) const { 104 BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]); 105 return llvm::successors(BB); 106 } 107 108 BlockData &getBlockData(BasicBlock *BB) { 109 return Block[Mapping.blockToIndex(BB)]; 110 } 111 112 void dump() const; 113 void dump(StringRef Label, BitVector const &BV) const; 114 115 SuspendCrossingInfo(Function &F, coro::Shape &Shape); 116 117 bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const { 118 size_t const DefIndex = Mapping.blockToIndex(DefBB); 119 size_t const UseIndex = Mapping.blockToIndex(UseBB); 120 121 bool const Result = Block[UseIndex].Kills[DefIndex]; 122 LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName() 123 << " answer is " << Result << "\n"); 124 return Result; 125 } 126 127 bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const { 128 auto *I = cast<Instruction>(U); 129 130 // We rewrote PHINodes, so that only the ones with exactly one incoming 131 // value need to be analyzed. 132 if (auto *PN = dyn_cast<PHINode>(I)) 133 if (PN->getNumIncomingValues() > 1) 134 return false; 135 136 BasicBlock *UseBB = I->getParent(); 137 138 // As a special case, treat uses by an llvm.coro.suspend.retcon 139 // as if they were uses in the suspend's single predecessor: the 140 // uses conceptually occur before the suspend. 141 if (isa<CoroSuspendRetconInst>(I)) { 142 UseBB = UseBB->getSinglePredecessor(); 143 assert(UseBB && "should have split coro.suspend into its own block"); 144 } 145 146 return hasPathCrossingSuspendPoint(DefBB, UseBB); 147 } 148 149 bool isDefinitionAcrossSuspend(Argument &A, User *U) const { 150 return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U); 151 } 152 153 bool isDefinitionAcrossSuspend(Instruction &I, User *U) const { 154 auto *DefBB = I.getParent(); 155 156 // As a special case, treat values produced by an llvm.coro.suspend.* 157 // as if they were defined in the single successor: the uses 158 // conceptually occur after the suspend. 159 if (isa<AnyCoroSuspendInst>(I)) { 160 DefBB = DefBB->getSingleSuccessor(); 161 assert(DefBB && "should have split coro.suspend into its own block"); 162 } 163 164 return isDefinitionAcrossSuspend(DefBB, U); 165 } 166 }; 167 } // end anonymous namespace 168 169 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 170 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label, 171 BitVector const &BV) const { 172 dbgs() << Label << ":"; 173 for (size_t I = 0, N = BV.size(); I < N; ++I) 174 if (BV[I]) 175 dbgs() << " " << Mapping.indexToBlock(I)->getName(); 176 dbgs() << "\n"; 177 } 178 179 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const { 180 for (size_t I = 0, N = Block.size(); I < N; ++I) { 181 BasicBlock *const B = Mapping.indexToBlock(I); 182 dbgs() << B->getName() << ":\n"; 183 dump(" Consumes", Block[I].Consumes); 184 dump(" Kills", Block[I].Kills); 185 } 186 dbgs() << "\n"; 187 } 188 #endif 189 190 SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) 191 : Mapping(F) { 192 const size_t N = Mapping.size(); 193 Block.resize(N); 194 195 // Initialize every block so that it consumes itself 196 for (size_t I = 0; I < N; ++I) { 197 auto &B = Block[I]; 198 B.Consumes.resize(N); 199 B.Kills.resize(N); 200 B.Consumes.set(I); 201 } 202 203 // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as 204 // the code beyond coro.end is reachable during initial invocation of the 205 // coroutine. 206 for (auto *CE : Shape.CoroEnds) 207 getBlockData(CE->getParent()).End = true; 208 209 // Mark all suspend blocks and indicate that they kill everything they 210 // consume. Note, that crossing coro.save also requires a spill, as any code 211 // between coro.save and coro.suspend may resume the coroutine and all of the 212 // state needs to be saved by that time. 213 auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) { 214 BasicBlock *SuspendBlock = BarrierInst->getParent(); 215 auto &B = getBlockData(SuspendBlock); 216 B.Suspend = true; 217 B.Kills |= B.Consumes; 218 }; 219 for (auto *CSI : Shape.CoroSuspends) { 220 markSuspendBlock(CSI); 221 if (auto *Save = CSI->getCoroSave()) 222 markSuspendBlock(Save); 223 } 224 225 // Iterate propagating consumes and kills until they stop changing. 226 int Iteration = 0; 227 (void)Iteration; 228 229 bool Changed; 230 do { 231 LLVM_DEBUG(dbgs() << "iteration " << ++Iteration); 232 LLVM_DEBUG(dbgs() << "==============\n"); 233 234 Changed = false; 235 for (size_t I = 0; I < N; ++I) { 236 auto &B = Block[I]; 237 for (BasicBlock *SI : successors(B)) { 238 239 auto SuccNo = Mapping.blockToIndex(SI); 240 241 // Saved Consumes and Kills bitsets so that it is easy to see 242 // if anything changed after propagation. 243 auto &S = Block[SuccNo]; 244 auto SavedConsumes = S.Consumes; 245 auto SavedKills = S.Kills; 246 247 // Propagate Kills and Consumes from block B into its successor S. 248 S.Consumes |= B.Consumes; 249 S.Kills |= B.Kills; 250 251 // If block B is a suspend block, it should propagate kills into the 252 // its successor for every block B consumes. 253 if (B.Suspend) { 254 S.Kills |= B.Consumes; 255 } 256 if (S.Suspend) { 257 // If block S is a suspend block, it should kill all of the blocks it 258 // consumes. 259 S.Kills |= S.Consumes; 260 } else if (S.End) { 261 // If block S is an end block, it should not propagate kills as the 262 // blocks following coro.end() are reached during initial invocation 263 // of the coroutine while all the data are still available on the 264 // stack or in the registers. 265 S.Kills.reset(); 266 } else { 267 // This is reached when S block it not Suspend nor coro.end and it 268 // need to make sure that it is not in the kill set. 269 S.Kills.reset(SuccNo); 270 } 271 272 // See if anything changed. 273 Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes); 274 275 if (S.Kills != SavedKills) { 276 LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName() 277 << "\n"); 278 LLVM_DEBUG(dump("S.Kills", S.Kills)); 279 LLVM_DEBUG(dump("SavedKills", SavedKills)); 280 } 281 if (S.Consumes != SavedConsumes) { 282 LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n"); 283 LLVM_DEBUG(dump("S.Consume", S.Consumes)); 284 LLVM_DEBUG(dump("SavedCons", SavedConsumes)); 285 } 286 } 287 } 288 } while (Changed); 289 LLVM_DEBUG(dump()); 290 } 291 292 #undef DEBUG_TYPE // "coro-suspend-crossing" 293 #define DEBUG_TYPE "coro-frame" 294 295 namespace { 296 class FrameTypeBuilder; 297 // Mapping from the to-be-spilled value to all the users that need reload. 298 using SpillInfo = SmallMapVector<Value *, SmallVector<Instruction *, 2>, 8>; 299 struct FrameDataInfo { 300 // All the values (that are not allocas) that needs to be spilled to the 301 // frame. 302 SpillInfo Spills; 303 // Allocas contains all values defined as allocas that need to live in the 304 // frame. 305 SmallVector<AllocaInst *, 8> Allocas; 306 307 SmallVector<Value *, 8> getAllDefs() const { 308 SmallVector<Value *, 8> Defs; 309 for (const auto &P : Spills) 310 Defs.push_back(P.first); 311 for (auto *A : Allocas) 312 Defs.push_back(A); 313 return Defs; 314 } 315 316 uint32_t getFieldIndex(Value *V) const { 317 auto Itr = FieldIndexMap.find(V); 318 assert(Itr != FieldIndexMap.end() && 319 "Value does not have a frame field index"); 320 return Itr->second; 321 } 322 323 void setFieldIndex(Value *V, uint32_t Index) { 324 assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) && 325 "Cannot set the index for the same field twice."); 326 FieldIndexMap[V] = Index; 327 } 328 329 // Remap the index of every field in the frame, using the final layout index. 330 void updateLayoutIndex(FrameTypeBuilder &B); 331 332 private: 333 // LayoutIndexUpdateStarted is used to avoid updating the index of any field 334 // twice by mistake. 335 bool LayoutIndexUpdateStarted = false; 336 // Map from values to their slot indexes on the frame. They will be first set 337 // with their original insertion field index. After the frame is built, their 338 // indexes will be updated into the final layout index. 339 DenseMap<Value *, uint32_t> FieldIndexMap; 340 }; 341 } // namespace 342 343 #ifndef NDEBUG 344 static void dumpSpills(StringRef Title, const SpillInfo &Spills) { 345 dbgs() << "------------- " << Title << "--------------\n"; 346 for (const auto &E : Spills) { 347 E.first->dump(); 348 dbgs() << " user: "; 349 for (auto *I : E.second) 350 I->dump(); 351 } 352 } 353 354 static void dumpAllocas(const SmallVectorImpl<AllocaInst *> &Allocas) { 355 dbgs() << "------------- Allocas --------------\n"; 356 for (auto *A : Allocas) 357 A->dump(); 358 } 359 #endif 360 361 namespace { 362 using FieldIDType = size_t; 363 // We cannot rely solely on natural alignment of a type when building a 364 // coroutine frame and if the alignment specified on the Alloca instruction 365 // differs from the natural alignment of the alloca type we will need to insert 366 // padding. 367 class FrameTypeBuilder { 368 private: 369 struct Field { 370 uint64_t Size; 371 uint64_t Offset; 372 Type *Ty; 373 FieldIDType LayoutFieldIndex; 374 Align Alignment; 375 Align TyAlignment; 376 }; 377 378 const DataLayout &DL; 379 LLVMContext &Context; 380 uint64_t StructSize = 0; 381 Align StructAlign; 382 bool IsFinished = false; 383 384 SmallVector<Field, 8> Fields; 385 DenseMap<Value*, unsigned> FieldIndexByKey; 386 387 public: 388 FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL) 389 : DL(DL), Context(Context) {} 390 391 /// Add a field to this structure for the storage of an `alloca` 392 /// instruction. 393 LLVM_NODISCARD FieldIDType addFieldForAlloca(AllocaInst *AI, 394 bool IsHeader = false) { 395 Type *Ty = AI->getAllocatedType(); 396 397 // Make an array type if this is a static array allocation. 398 if (AI->isArrayAllocation()) { 399 if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) 400 Ty = ArrayType::get(Ty, CI->getValue().getZExtValue()); 401 else 402 report_fatal_error("Coroutines cannot handle non static allocas yet"); 403 } 404 405 return addField(Ty, AI->getAlign(), IsHeader); 406 } 407 408 /// We want to put the allocas whose lifetime-ranges are not overlapped 409 /// into one slot of coroutine frame. 410 /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566 411 /// 412 /// cppcoro::task<void> alternative_paths(bool cond) { 413 /// if (cond) { 414 /// big_structure a; 415 /// process(a); 416 /// co_await something(); 417 /// } else { 418 /// big_structure b; 419 /// process2(b); 420 /// co_await something(); 421 /// } 422 /// } 423 /// 424 /// We want to put variable a and variable b in the same slot to 425 /// reduce the size of coroutine frame. 426 /// 427 /// This function use StackLifetime algorithm to partition the AllocaInsts in 428 /// Spills to non-overlapped sets in order to put Alloca in the same 429 /// non-overlapped set into the same slot in the Coroutine Frame. Then add 430 /// field for the allocas in the same non-overlapped set by using the largest 431 /// type as the field type. 432 /// 433 /// Side Effects: Because We sort the allocas, the order of allocas in the 434 /// frame may be different with the order in the source code. 435 void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData, 436 coro::Shape &Shape); 437 438 /// Add a field to this structure. 439 LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment, 440 bool IsHeader = false) { 441 assert(!IsFinished && "adding fields to a finished builder"); 442 assert(Ty && "must provide a type for a field"); 443 444 // The field size is always the alloc size of the type. 445 uint64_t FieldSize = DL.getTypeAllocSize(Ty); 446 447 // The field alignment might not be the type alignment, but we need 448 // to remember the type alignment anyway to build the type. 449 Align TyAlignment = DL.getABITypeAlign(Ty); 450 if (!FieldAlignment) FieldAlignment = TyAlignment; 451 452 // Lay out header fields immediately. 453 uint64_t Offset; 454 if (IsHeader) { 455 Offset = alignTo(StructSize, FieldAlignment); 456 StructSize = Offset + FieldSize; 457 458 // Everything else has a flexible offset. 459 } else { 460 Offset = OptimizedStructLayoutField::FlexibleOffset; 461 } 462 463 Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment}); 464 return Fields.size() - 1; 465 } 466 467 /// Finish the layout and set the body on the given type. 468 void finish(StructType *Ty); 469 470 uint64_t getStructSize() const { 471 assert(IsFinished && "not yet finished!"); 472 return StructSize; 473 } 474 475 Align getStructAlign() const { 476 assert(IsFinished && "not yet finished!"); 477 return StructAlign; 478 } 479 480 FieldIDType getLayoutFieldIndex(FieldIDType Id) const { 481 assert(IsFinished && "not yet finished!"); 482 return Fields[Id].LayoutFieldIndex; 483 } 484 }; 485 } // namespace 486 487 void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) { 488 auto Updater = [&](Value *I) { 489 setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I))); 490 }; 491 LayoutIndexUpdateStarted = true; 492 for (auto &S : Spills) 493 Updater(S.first); 494 for (auto *A : Allocas) 495 Updater(A); 496 LayoutIndexUpdateStarted = false; 497 } 498 499 void FrameTypeBuilder::addFieldForAllocas(const Function &F, 500 FrameDataInfo &FrameData, 501 coro::Shape &Shape) { 502 DenseMap<AllocaInst *, unsigned int> AllocaIndex; 503 using AllocaSetType = SmallVector<AllocaInst *, 4>; 504 SmallVector<AllocaSetType, 4> NonOverlapedAllocas; 505 506 // We need to add field for allocas at the end of this function. However, this 507 // function has multiple exits, so we use this helper to avoid redundant code. 508 struct RTTIHelper { 509 std::function<void()> func; 510 RTTIHelper(std::function<void()> &&func) : func(func) {} 511 ~RTTIHelper() { func(); } 512 } Helper([&]() { 513 for (auto AllocaList : NonOverlapedAllocas) { 514 auto *LargestAI = *AllocaList.begin(); 515 FieldIDType Id = addFieldForAlloca(LargestAI); 516 for (auto *Alloca : AllocaList) 517 FrameData.setFieldIndex(Alloca, Id); 518 } 519 }); 520 521 if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) { 522 for (auto *Alloca : FrameData.Allocas) { 523 AllocaIndex[Alloca] = NonOverlapedAllocas.size(); 524 NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); 525 } 526 return; 527 } 528 529 // Because there are pathes from the lifetime.start to coro.end 530 // for each alloca, the liferanges for every alloca is overlaped 531 // in the blocks who contain coro.end and the successor blocks. 532 // So we choose to skip there blocks when we calculates the liferange 533 // for each alloca. It should be reasonable since there shouldn't be uses 534 // in these blocks and the coroutine frame shouldn't be used outside the 535 // coroutine body. 536 // 537 // Note that the user of coro.suspend may not be SwitchInst. However, this 538 // case seems too complex to handle. And it is harmless to skip these 539 // patterns since it just prevend putting the allocas to live in the same 540 // slot. 541 DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest; 542 for (auto CoroSuspendInst : Shape.CoroSuspends) { 543 for (auto U : CoroSuspendInst->users()) { 544 if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) { 545 auto *SWI = const_cast<SwitchInst *>(ConstSWI); 546 DefaultSuspendDest[SWI] = SWI->getDefaultDest(); 547 SWI->setDefaultDest(SWI->getSuccessor(1)); 548 } 549 } 550 } 551 552 StackLifetime StackLifetimeAnalyzer(F, FrameData.Allocas, 553 StackLifetime::LivenessType::May); 554 StackLifetimeAnalyzer.run(); 555 auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) { 556 return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps( 557 StackLifetimeAnalyzer.getLiveRange(AI2)); 558 }; 559 auto GetAllocaSize = [&](const AllocaInst *AI) { 560 Optional<uint64_t> RetSize = AI->getAllocationSizeInBits(DL); 561 assert(RetSize && "We can't handle scalable type now.\n"); 562 return RetSize.getValue(); 563 }; 564 // Put larger allocas in the front. So the larger allocas have higher 565 // priority to merge, which can save more space potentially. Also each 566 // AllocaSet would be ordered. So we can get the largest Alloca in one 567 // AllocaSet easily. 568 sort(FrameData.Allocas, [&](auto Iter1, auto Iter2) { 569 return GetAllocaSize(Iter1) > GetAllocaSize(Iter2); 570 }); 571 for (auto *Alloca : FrameData.Allocas) { 572 bool Merged = false; 573 // Try to find if the Alloca is not inferenced with any existing 574 // NonOverlappedAllocaSet. If it is true, insert the alloca to that 575 // NonOverlappedAllocaSet. 576 for (auto &AllocaSet : NonOverlapedAllocas) { 577 assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n"); 578 bool CouldMerge = none_of(AllocaSet, [&](auto Iter) { 579 return IsAllocaInferenre(Alloca, Iter); 580 }); 581 if (!CouldMerge) 582 continue; 583 AllocaIndex[Alloca] = AllocaIndex[*AllocaSet.begin()]; 584 AllocaSet.push_back(Alloca); 585 Merged = true; 586 break; 587 } 588 if (!Merged) { 589 AllocaIndex[Alloca] = NonOverlapedAllocas.size(); 590 NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); 591 } 592 } 593 // Recover the default target destination for each Switch statement 594 // reserved. 595 for (auto SwitchAndDefaultDest : DefaultSuspendDest) { 596 SwitchInst *SWI = SwitchAndDefaultDest.first; 597 BasicBlock *DestBB = SwitchAndDefaultDest.second; 598 SWI->setDefaultDest(DestBB); 599 } 600 // This Debug Info could tell us which allocas are merged into one slot. 601 LLVM_DEBUG(for (auto &AllocaSet 602 : NonOverlapedAllocas) { 603 if (AllocaSet.size() > 1) { 604 dbgs() << "In Function:" << F.getName() << "\n"; 605 dbgs() << "Find Union Set " 606 << "\n"; 607 dbgs() << "\tAllocas are \n"; 608 for (auto Alloca : AllocaSet) 609 dbgs() << "\t\t" << *Alloca << "\n"; 610 } 611 }); 612 } 613 614 void FrameTypeBuilder::finish(StructType *Ty) { 615 assert(!IsFinished && "already finished!"); 616 617 // Prepare the optimal-layout field array. 618 // The Id in the layout field is a pointer to our Field for it. 619 SmallVector<OptimizedStructLayoutField, 8> LayoutFields; 620 LayoutFields.reserve(Fields.size()); 621 for (auto &Field : Fields) { 622 LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment, 623 Field.Offset); 624 } 625 626 // Perform layout. 627 auto SizeAndAlign = performOptimizedStructLayout(LayoutFields); 628 StructSize = SizeAndAlign.first; 629 StructAlign = SizeAndAlign.second; 630 631 auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & { 632 return *static_cast<Field *>(const_cast<void*>(LayoutField.Id)); 633 }; 634 635 // We need to produce a packed struct type if there's a field whose 636 // assigned offset isn't a multiple of its natural type alignment. 637 bool Packed = [&] { 638 for (auto &LayoutField : LayoutFields) { 639 auto &F = getField(LayoutField); 640 if (!isAligned(F.TyAlignment, LayoutField.Offset)) 641 return true; 642 } 643 return false; 644 }(); 645 646 // Build the struct body. 647 SmallVector<Type*, 16> FieldTypes; 648 FieldTypes.reserve(LayoutFields.size() * 3 / 2); 649 uint64_t LastOffset = 0; 650 for (auto &LayoutField : LayoutFields) { 651 auto &F = getField(LayoutField); 652 653 auto Offset = LayoutField.Offset; 654 655 // Add a padding field if there's a padding gap and we're either 656 // building a packed struct or the padding gap is more than we'd 657 // get from aligning to the field type's natural alignment. 658 assert(Offset >= LastOffset); 659 if (Offset != LastOffset) { 660 if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset) 661 FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context), 662 Offset - LastOffset)); 663 } 664 665 F.Offset = Offset; 666 F.LayoutFieldIndex = FieldTypes.size(); 667 668 FieldTypes.push_back(F.Ty); 669 LastOffset = Offset + F.Size; 670 } 671 672 Ty->setBody(FieldTypes, Packed); 673 674 #ifndef NDEBUG 675 // Check that the IR layout matches the offsets we expect. 676 auto Layout = DL.getStructLayout(Ty); 677 for (auto &F : Fields) { 678 assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty); 679 assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset); 680 } 681 #endif 682 683 IsFinished = true; 684 } 685 686 // Build a struct that will keep state for an active coroutine. 687 // struct f.frame { 688 // ResumeFnTy ResumeFnAddr; 689 // ResumeFnTy DestroyFnAddr; 690 // int ResumeIndex; 691 // ... promise (if present) ... 692 // ... spills ... 693 // }; 694 static StructType *buildFrameType(Function &F, coro::Shape &Shape, 695 FrameDataInfo &FrameData) { 696 LLVMContext &C = F.getContext(); 697 const DataLayout &DL = F.getParent()->getDataLayout(); 698 StructType *FrameTy = [&] { 699 SmallString<32> Name(F.getName()); 700 Name.append(".Frame"); 701 return StructType::create(C, Name); 702 }(); 703 704 FrameTypeBuilder B(C, DL); 705 706 AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); 707 Optional<FieldIDType> SwitchIndexFieldId; 708 709 if (Shape.ABI == coro::ABI::Switch) { 710 auto *FramePtrTy = FrameTy->getPointerTo(); 711 auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, 712 /*IsVarArg=*/false); 713 auto *FnPtrTy = FnTy->getPointerTo(); 714 715 // Add header fields for the resume and destroy functions. 716 // We can rely on these being perfectly packed. 717 (void)B.addField(FnPtrTy, None, /*header*/ true); 718 (void)B.addField(FnPtrTy, None, /*header*/ true); 719 720 // PromiseAlloca field needs to be explicitly added here because it's 721 // a header field with a fixed offset based on its alignment. Hence it 722 // needs special handling and cannot be added to FrameData.Allocas. 723 if (PromiseAlloca) 724 FrameData.setFieldIndex( 725 PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true)); 726 727 // Add a field to store the suspend index. This doesn't need to 728 // be in the header. 729 unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); 730 Type *IndexType = Type::getIntNTy(C, IndexBits); 731 732 SwitchIndexFieldId = B.addField(IndexType, None); 733 } else { 734 assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); 735 } 736 737 // Because multiple allocas may own the same field slot, 738 // we add allocas to field here. 739 B.addFieldForAllocas(F, FrameData, Shape); 740 // Create an entry for every spilled value. 741 for (auto &S : FrameData.Spills) { 742 FieldIDType Id = B.addField(S.first->getType(), None); 743 FrameData.setFieldIndex(S.first, Id); 744 } 745 746 B.finish(FrameTy); 747 FrameData.updateLayoutIndex(B); 748 Shape.FrameAlign = B.getStructAlign(); 749 Shape.FrameSize = B.getStructSize(); 750 751 switch (Shape.ABI) { 752 case coro::ABI::Switch: 753 // In the switch ABI, remember the switch-index field. 754 Shape.SwitchLowering.IndexField = 755 B.getLayoutFieldIndex(*SwitchIndexFieldId); 756 757 // Also round the frame size up to a multiple of its alignment, as is 758 // generally expected in C/C++. 759 Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign); 760 break; 761 762 // In the retcon ABI, remember whether the frame is inline in the storage. 763 case coro::ABI::Retcon: 764 case coro::ABI::RetconOnce: { 765 auto Id = Shape.getRetconCoroId(); 766 Shape.RetconLowering.IsFrameInlineInStorage 767 = (B.getStructSize() <= Id->getStorageSize() && 768 B.getStructAlign() <= Id->getStorageAlignment()); 769 break; 770 } 771 } 772 773 return FrameTy; 774 } 775 776 // We use a pointer use visitor to discover if there are any writes into an 777 // alloca that dominates CoroBegin. If that is the case, insertSpills will copy 778 // the value from the alloca into the coroutine frame spill slot corresponding 779 // to that alloca. We also collect any alias pointing to the alloca created 780 // before CoroBegin but used after CoroBegin. These alias will be recreated 781 // after CoroBegin from the frame address so that latter references are 782 // pointing to the frame instead of the stack. 783 // Note: We are repurposing PtrUseVisitor's isEscaped() to mean whether the 784 // pointer is potentially written into. 785 // TODO: If the pointer is really escaped, we are in big trouble because we 786 // will be escaping a pointer to a stack address that would no longer exist 787 // soon. However most escape analysis isn't good enough to precisely tell, 788 // so we are assuming that if a pointer is escaped that it's written into. 789 // TODO: Another potential issue is if we are creating an alias through 790 // a function call, e.g: 791 // %a = AllocaInst ... 792 // %b = call @computeAddress(... %a) 793 // If %b is an alias of %a and will be used after CoroBegin, this will be broken 794 // and there is nothing we can do about it. 795 namespace { 796 struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> { 797 using Base = PtrUseVisitor<AllocaUseVisitor>; 798 AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT, 799 const CoroBeginInst &CB) 800 : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {} 801 802 // We are only interested in uses that's not dominated by coro.begin. 803 void visit(Instruction &I) { 804 if (!DT.dominates(&CoroBegin, &I)) 805 Base::visit(I); 806 } 807 // We need to provide this overload as PtrUseVisitor uses a pointer based 808 // visiting function. 809 void visit(Instruction *I) { return visit(*I); } 810 811 // We cannot handle PHI node and SelectInst because they could be selecting 812 // between two addresses that point to different Allocas. 813 void visitPHINode(PHINode &I) { 814 assert(!usedAfterCoroBegin(I) && 815 "Unable to handle PHI node of aliases created before CoroBegin but " 816 "used after CoroBegin"); 817 } 818 819 void visitSelectInst(SelectInst &I) { 820 assert(!usedAfterCoroBegin(I) && 821 "Unable to handle Select of aliases created before CoroBegin but " 822 "used after CoroBegin"); 823 } 824 825 void visitLoadInst(LoadInst &) {} 826 827 // If the use is an operand, the pointer escaped and anything can write into 828 // that memory. If the use is the pointer, we are definitely writing into the 829 // alloca and therefore we need to copy. 830 void visitStoreInst(StoreInst &SI) { PI.setEscaped(&SI); } 831 832 // All mem intrinsics modify the data. 833 void visitMemIntrinsic(MemIntrinsic &MI) { PI.setEscaped(&MI); } 834 835 void visitBitCastInst(BitCastInst &BC) { 836 Base::visitBitCastInst(BC); 837 handleAlias(BC); 838 } 839 840 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { 841 Base::visitAddrSpaceCastInst(ASC); 842 handleAlias(ASC); 843 } 844 845 void visitGetElementPtrInst(GetElementPtrInst &GEPI) { 846 // The base visitor will adjust Offset accordingly. 847 Base::visitGetElementPtrInst(GEPI); 848 handleAlias(GEPI); 849 } 850 851 const SmallVector<std::pair<Instruction *, APInt>, 1> &getAliases() const { 852 return Aliases; 853 } 854 855 private: 856 const DominatorTree &DT; 857 const CoroBeginInst &CoroBegin; 858 // All alias to the original AllocaInst, and are used after CoroBegin. 859 // Each entry contains the instruction and the offset in the original Alloca. 860 SmallVector<std::pair<Instruction *, APInt>, 1> Aliases{}; 861 862 bool usedAfterCoroBegin(Instruction &I) { 863 for (auto &U : I.uses()) 864 if (DT.dominates(&CoroBegin, U)) 865 return true; 866 return false; 867 } 868 869 void handleAlias(Instruction &I) { 870 if (!usedAfterCoroBegin(I)) 871 return; 872 873 assert(IsOffsetKnown && "Can only handle alias with known offset created " 874 "before CoroBegin and used after"); 875 Aliases.emplace_back(&I, Offset); 876 } 877 }; 878 } // namespace 879 880 // We need to make room to insert a spill after initial PHIs, but before 881 // catchswitch instruction. Placing it before violates the requirement that 882 // catchswitch, like all other EHPads must be the first nonPHI in a block. 883 // 884 // Split away catchswitch into a separate block and insert in its place: 885 // 886 // cleanuppad <InsertPt> cleanupret. 887 // 888 // cleanupret instruction will act as an insert point for the spill. 889 static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) { 890 BasicBlock *CurrentBlock = CatchSwitch->getParent(); 891 BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch); 892 CurrentBlock->getTerminator()->eraseFromParent(); 893 894 auto *CleanupPad = 895 CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock); 896 auto *CleanupRet = 897 CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock); 898 return CleanupRet; 899 } 900 901 // Replace all alloca and SSA values that are accessed across suspend points 902 // with GetElementPointer from coroutine frame + loads and stores. Create an 903 // AllocaSpillBB that will become the new entry block for the resume parts of 904 // the coroutine: 905 // 906 // %hdl = coro.begin(...) 907 // whatever 908 // 909 // becomes: 910 // 911 // %hdl = coro.begin(...) 912 // %FramePtr = bitcast i8* hdl to %f.frame* 913 // br label %AllocaSpillBB 914 // 915 // AllocaSpillBB: 916 // ; geps corresponding to allocas that were moved to coroutine frame 917 // br label PostSpill 918 // 919 // PostSpill: 920 // whatever 921 // 922 // 923 static Instruction *insertSpills(const FrameDataInfo &FrameData, 924 coro::Shape &Shape) { 925 auto *CB = Shape.CoroBegin; 926 LLVMContext &C = CB->getContext(); 927 IRBuilder<> Builder(CB->getNextNode()); 928 StructType *FrameTy = Shape.FrameTy; 929 PointerType *FramePtrTy = FrameTy->getPointerTo(); 930 auto *FramePtr = 931 cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr")); 932 DominatorTree DT(*CB->getFunction()); 933 934 // Create a GEP with the given index into the coroutine frame for the original 935 // value Orig. Appends an extra 0 index for array-allocas, preserving the 936 // original type. 937 auto GetFramePointer = [&](Value *Orig) -> Value * { 938 FieldIDType Index = FrameData.getFieldIndex(Orig); 939 SmallVector<Value *, 3> Indices = { 940 ConstantInt::get(Type::getInt32Ty(C), 0), 941 ConstantInt::get(Type::getInt32Ty(C), Index), 942 }; 943 944 if (auto *AI = dyn_cast<AllocaInst>(Orig)) { 945 if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) { 946 auto Count = CI->getValue().getZExtValue(); 947 if (Count > 1) { 948 Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0)); 949 } 950 } else { 951 report_fatal_error("Coroutines cannot handle non static allocas yet"); 952 } 953 } 954 955 auto GEP = cast<GetElementPtrInst>( 956 Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices)); 957 if (isa<AllocaInst>(Orig)) { 958 // If the type of GEP is not equal to the type of AllocaInst, it implies 959 // that the AllocaInst may be reused in the Frame slot of other 960 // AllocaInst. So we cast the GEP to the type of AllocaInst. 961 if (GEP->getResultElementType() != Orig->getType()) 962 return Builder.CreateBitCast(GEP, Orig->getType(), 963 Orig->getName() + Twine(".cast")); 964 } 965 return GEP; 966 }; 967 968 for (auto const &E : FrameData.Spills) { 969 Value *Def = E.first; 970 // Create a store instruction storing the value into the 971 // coroutine frame. 972 Instruction *InsertPt = nullptr; 973 if (auto *Arg = dyn_cast<Argument>(Def)) { 974 // For arguments, we will place the store instruction right after 975 // the coroutine frame pointer instruction, i.e. bitcast of 976 // coro.begin from i8* to %f.frame*. 977 InsertPt = FramePtr->getNextNode(); 978 979 // If we're spilling an Argument, make sure we clear 'nocapture' 980 // from the coroutine function. 981 Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture); 982 983 } else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) { 984 // Don't spill immediately after a suspend; splitting assumes 985 // that the suspend will be followed by a branch. 986 InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI(); 987 } else { 988 auto *I = cast<Instruction>(Def); 989 if (!DT.dominates(CB, I)) { 990 // If it is not dominated by CoroBegin, then spill should be 991 // inserted immediately after CoroFrame is computed. 992 InsertPt = FramePtr->getNextNode(); 993 } else if (auto *II = dyn_cast<InvokeInst>(I)) { 994 // If we are spilling the result of the invoke instruction, split 995 // the normal edge and insert the spill in the new block. 996 auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest()); 997 InsertPt = NewBB->getTerminator(); 998 } else if (isa<PHINode>(I)) { 999 // Skip the PHINodes and EH pads instructions. 1000 BasicBlock *DefBlock = I->getParent(); 1001 if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator())) 1002 InsertPt = splitBeforeCatchSwitch(CSI); 1003 else 1004 InsertPt = &*DefBlock->getFirstInsertionPt(); 1005 } else { 1006 assert(!I->isTerminator() && "unexpected terminator"); 1007 // For all other values, the spill is placed immediately after 1008 // the definition. 1009 InsertPt = I->getNextNode(); 1010 } 1011 } 1012 1013 auto Index = FrameData.getFieldIndex(Def); 1014 Builder.SetInsertPoint(InsertPt); 1015 auto *G = Builder.CreateConstInBoundsGEP2_32( 1016 FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr")); 1017 Builder.CreateStore(Def, G); 1018 1019 BasicBlock *CurrentBlock = nullptr; 1020 Value *CurrentReload = nullptr; 1021 for (auto *U : E.second) { 1022 // If we have not seen the use block, create a load instruction to reload 1023 // the spilled value from the coroutine frame. Populates the Value pointer 1024 // reference provided with the frame GEP. 1025 if (CurrentBlock != U->getParent()) { 1026 CurrentBlock = U->getParent(); 1027 Builder.SetInsertPoint(&*CurrentBlock->getFirstInsertionPt()); 1028 1029 auto *GEP = GetFramePointer(E.first); 1030 GEP->setName(E.first->getName() + Twine(".reload.addr")); 1031 CurrentReload = Builder.CreateLoad( 1032 FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, 1033 E.first->getName() + Twine(".reload")); 1034 } 1035 1036 // If we have a single edge PHINode, remove it and replace it with a 1037 // reload from the coroutine frame. (We already took care of multi edge 1038 // PHINodes by rewriting them in the rewritePHIs function). 1039 if (auto *PN = dyn_cast<PHINode>(U)) { 1040 assert(PN->getNumIncomingValues() == 1 && 1041 "unexpected number of incoming " 1042 "values in the PHINode"); 1043 PN->replaceAllUsesWith(CurrentReload); 1044 PN->eraseFromParent(); 1045 continue; 1046 } 1047 1048 // Replace all uses of CurrentValue in the current instruction with 1049 // reload. 1050 U->replaceUsesOfWith(Def, CurrentReload); 1051 } 1052 } 1053 1054 BasicBlock *FramePtrBB = FramePtr->getParent(); 1055 1056 auto SpillBlock = 1057 FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); 1058 SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); 1059 Shape.AllocaSpillBlock = SpillBlock; 1060 1061 // retcon and retcon.once lowering assumes all uses have been sunk. 1062 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) { 1063 // If we found any allocas, replace all of their remaining uses with Geps. 1064 Builder.SetInsertPoint(&SpillBlock->front()); 1065 for (const auto &P : FrameData.Allocas) { 1066 auto *G = GetFramePointer(P); 1067 1068 // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) 1069 // here, as we are changing location of the instruction. 1070 G->takeName(P); 1071 P->replaceAllUsesWith(G); 1072 P->eraseFromParent(); 1073 } 1074 return FramePtr; 1075 } 1076 1077 // If we found any alloca, replace all of their remaining uses with GEP 1078 // instructions. Because new dbg.declare have been created for these alloca, 1079 // we also delete the original dbg.declare and replace other uses with undef. 1080 // Note: We cannot replace the alloca with GEP instructions indiscriminately, 1081 // as some of the uses may not be dominated by CoroBegin. 1082 bool MightNeedToCopy = false; 1083 Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); 1084 SmallVector<Instruction *, 4> UsersToUpdate; 1085 for (AllocaInst *A : FrameData.Allocas) { 1086 UsersToUpdate.clear(); 1087 for (User *U : A->users()) { 1088 auto *I = cast<Instruction>(U); 1089 if (DT.dominates(CB, I)) 1090 UsersToUpdate.push_back(I); 1091 else 1092 MightNeedToCopy = true; 1093 } 1094 if (!UsersToUpdate.empty()) { 1095 auto *G = GetFramePointer(A); 1096 G->setName(A->getName() + Twine(".reload.addr")); 1097 TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(A); 1098 if (!DIs.empty()) 1099 DIBuilder(*A->getModule(), 1100 /*AllowUnresolved*/ false) 1101 .insertDeclare(G, DIs.front()->getVariable(), 1102 DIs.front()->getExpression(), 1103 DIs.front()->getDebugLoc(), DIs.front()); 1104 for (auto *DI : FindDbgDeclareUses(A)) 1105 DI->eraseFromParent(); 1106 replaceDbgUsesWithUndef(A); 1107 1108 for (Instruction *I : UsersToUpdate) 1109 I->replaceUsesOfWith(A, G); 1110 } 1111 } 1112 // If we discovered such uses not dominated by CoroBegin, see if any of them 1113 // preceed coro begin and have instructions that can modify the 1114 // value of the alloca and therefore would require a copying the value into 1115 // the spill slot in the coroutine frame. 1116 if (MightNeedToCopy) { 1117 Builder.SetInsertPoint(FramePtr->getNextNode()); 1118 1119 for (AllocaInst *A : FrameData.Allocas) { 1120 AllocaUseVisitor Visitor(A->getModule()->getDataLayout(), DT, *CB); 1121 auto PtrI = Visitor.visitPtr(*A); 1122 assert(!PtrI.isAborted()); 1123 if (PtrI.isEscaped()) { 1124 // isEscaped really means potentially modified before CoroBegin. 1125 if (A->isArrayAllocation()) 1126 report_fatal_error( 1127 "Coroutines cannot handle copying of array allocas yet"); 1128 1129 auto *G = GetFramePointer(A); 1130 auto *Value = Builder.CreateLoad(A->getAllocatedType(), A); 1131 Builder.CreateStore(Value, G); 1132 } 1133 // For each alias to Alloca created before CoroBegin but used after 1134 // CoroBegin, we recreate them after CoroBegin by appplying the offset 1135 // to the pointer in the frame. 1136 for (const auto &Alias : Visitor.getAliases()) { 1137 auto *FramePtr = GetFramePointer(A); 1138 auto *FramePtrRaw = 1139 Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); 1140 auto *AliasPtr = Builder.CreateGEP( 1141 FramePtrRaw, ConstantInt::get(Type::getInt64Ty(C), Alias.second)); 1142 auto *AliasPtrTyped = 1143 Builder.CreateBitCast(AliasPtr, Alias.first->getType()); 1144 Alias.first->replaceUsesWithIf( 1145 AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); 1146 } 1147 } 1148 } 1149 return FramePtr; 1150 } 1151 1152 // Sets the unwind edge of an instruction to a particular successor. 1153 static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) { 1154 if (auto *II = dyn_cast<InvokeInst>(TI)) 1155 II->setUnwindDest(Succ); 1156 else if (auto *CS = dyn_cast<CatchSwitchInst>(TI)) 1157 CS->setUnwindDest(Succ); 1158 else if (auto *CR = dyn_cast<CleanupReturnInst>(TI)) 1159 CR->setUnwindDest(Succ); 1160 else 1161 llvm_unreachable("unexpected terminator instruction"); 1162 } 1163 1164 // Replaces all uses of OldPred with the NewPred block in all PHINodes in a 1165 // block. 1166 static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred, 1167 BasicBlock *NewPred, PHINode *Until = nullptr) { 1168 unsigned BBIdx = 0; 1169 for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { 1170 PHINode *PN = cast<PHINode>(I); 1171 1172 // We manually update the LandingPadReplacement PHINode and it is the last 1173 // PHI Node. So, if we find it, we are done. 1174 if (Until == PN) 1175 break; 1176 1177 // Reuse the previous value of BBIdx if it lines up. In cases where we 1178 // have multiple phi nodes with *lots* of predecessors, this is a speed 1179 // win because we don't have to scan the PHI looking for TIBB. This 1180 // happens because the BB list of PHI nodes are usually in the same 1181 // order. 1182 if (PN->getIncomingBlock(BBIdx) != OldPred) 1183 BBIdx = PN->getBasicBlockIndex(OldPred); 1184 1185 assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!"); 1186 PN->setIncomingBlock(BBIdx, NewPred); 1187 } 1188 } 1189 1190 // Uses SplitEdge unless the successor block is an EHPad, in which case do EH 1191 // specific handling. 1192 static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ, 1193 LandingPadInst *OriginalPad, 1194 PHINode *LandingPadReplacement) { 1195 auto *PadInst = Succ->getFirstNonPHI(); 1196 if (!LandingPadReplacement && !PadInst->isEHPad()) 1197 return SplitEdge(BB, Succ); 1198 1199 auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ); 1200 setUnwindEdgeTo(BB->getTerminator(), NewBB); 1201 updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement); 1202 1203 if (LandingPadReplacement) { 1204 auto *NewLP = OriginalPad->clone(); 1205 auto *Terminator = BranchInst::Create(Succ, NewBB); 1206 NewLP->insertBefore(Terminator); 1207 LandingPadReplacement->addIncoming(NewLP, NewBB); 1208 return NewBB; 1209 } 1210 Value *ParentPad = nullptr; 1211 if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst)) 1212 ParentPad = FuncletPad->getParentPad(); 1213 else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst)) 1214 ParentPad = CatchSwitch->getParentPad(); 1215 else 1216 llvm_unreachable("handling for other EHPads not implemented yet"); 1217 1218 auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB); 1219 CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB); 1220 return NewBB; 1221 } 1222 1223 // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new 1224 // PHI in InsertedBB. 1225 static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB, 1226 BasicBlock *InsertedBB, 1227 BasicBlock *PredBB, 1228 PHINode *UntilPHI = nullptr) { 1229 auto *PN = cast<PHINode>(&SuccBB->front()); 1230 do { 1231 int Index = PN->getBasicBlockIndex(InsertedBB); 1232 Value *V = PN->getIncomingValue(Index); 1233 PHINode *InputV = PHINode::Create( 1234 V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName(), 1235 &InsertedBB->front()); 1236 InputV->addIncoming(V, PredBB); 1237 PN->setIncomingValue(Index, InputV); 1238 PN = dyn_cast<PHINode>(PN->getNextNode()); 1239 } while (PN != UntilPHI); 1240 } 1241 1242 // Rewrites the PHI Nodes in a cleanuppad. 1243 static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB, 1244 CleanupPadInst *CleanupPad) { 1245 // For every incoming edge to a CleanupPad we will create a new block holding 1246 // all incoming values in single-value PHI nodes. We will then create another 1247 // block to act as a dispather (as all unwind edges for related EH blocks 1248 // must be the same). 1249 // 1250 // cleanuppad: 1251 // %2 = phi i32[%0, %catchswitch], [%1, %catch.1] 1252 // %3 = cleanuppad within none [] 1253 // 1254 // It will create: 1255 // 1256 // cleanuppad.corodispatch 1257 // %2 = phi i8[0, %catchswitch], [1, %catch.1] 1258 // %3 = cleanuppad within none [] 1259 // switch i8 % 2, label %unreachable 1260 // [i8 0, label %cleanuppad.from.catchswitch 1261 // i8 1, label %cleanuppad.from.catch.1] 1262 // cleanuppad.from.catchswitch: 1263 // %4 = phi i32 [%0, %catchswitch] 1264 // br %label cleanuppad 1265 // cleanuppad.from.catch.1: 1266 // %6 = phi i32 [%1, %catch.1] 1267 // br %label cleanuppad 1268 // cleanuppad: 1269 // %8 = phi i32 [%4, %cleanuppad.from.catchswitch], 1270 // [%6, %cleanuppad.from.catch.1] 1271 1272 // Unreachable BB, in case switching on an invalid value in the dispatcher. 1273 auto *UnreachBB = BasicBlock::Create( 1274 CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent()); 1275 IRBuilder<> Builder(UnreachBB); 1276 Builder.CreateUnreachable(); 1277 1278 // Create a new cleanuppad which will be the dispatcher. 1279 auto *NewCleanupPadBB = 1280 BasicBlock::Create(CleanupPadBB->getContext(), 1281 CleanupPadBB->getName() + Twine(".corodispatch"), 1282 CleanupPadBB->getParent(), CleanupPadBB); 1283 Builder.SetInsertPoint(NewCleanupPadBB); 1284 auto *SwitchType = Builder.getInt8Ty(); 1285 auto *SetDispatchValuePN = 1286 Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB)); 1287 CleanupPad->removeFromParent(); 1288 CleanupPad->insertAfter(SetDispatchValuePN); 1289 auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB, 1290 pred_size(CleanupPadBB)); 1291 1292 int SwitchIndex = 0; 1293 SmallVector<BasicBlock *, 8> Preds(pred_begin(CleanupPadBB), 1294 pred_end(CleanupPadBB)); 1295 for (BasicBlock *Pred : Preds) { 1296 // Create a new cleanuppad and move the PHI values to there. 1297 auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(), 1298 CleanupPadBB->getName() + 1299 Twine(".from.") + Pred->getName(), 1300 CleanupPadBB->getParent(), CleanupPadBB); 1301 updatePhiNodes(CleanupPadBB, Pred, CaseBB); 1302 CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") + 1303 Pred->getName()); 1304 Builder.SetInsertPoint(CaseBB); 1305 Builder.CreateBr(CleanupPadBB); 1306 movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB); 1307 1308 // Update this Pred to the new unwind point. 1309 setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB); 1310 1311 // Setup the switch in the dispatcher. 1312 auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex); 1313 SetDispatchValuePN->addIncoming(SwitchConstant, Pred); 1314 SwitchOnDispatch->addCase(SwitchConstant, CaseBB); 1315 SwitchIndex++; 1316 } 1317 } 1318 1319 static void rewritePHIs(BasicBlock &BB) { 1320 // For every incoming edge we will create a block holding all 1321 // incoming values in a single PHI nodes. 1322 // 1323 // loop: 1324 // %n.val = phi i32[%n, %entry], [%inc, %loop] 1325 // 1326 // It will create: 1327 // 1328 // loop.from.entry: 1329 // %n.loop.pre = phi i32 [%n, %entry] 1330 // br %label loop 1331 // loop.from.loop: 1332 // %inc.loop.pre = phi i32 [%inc, %loop] 1333 // br %label loop 1334 // 1335 // After this rewrite, further analysis will ignore any phi nodes with more 1336 // than one incoming edge. 1337 1338 // TODO: Simplify PHINodes in the basic block to remove duplicate 1339 // predecessors. 1340 1341 // Special case for CleanupPad: all EH blocks must have the same unwind edge 1342 // so we need to create an additional "dispatcher" block. 1343 if (auto *CleanupPad = 1344 dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) { 1345 SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB)); 1346 for (BasicBlock *Pred : Preds) { 1347 if (CatchSwitchInst *CS = 1348 dyn_cast<CatchSwitchInst>(Pred->getTerminator())) { 1349 // CleanupPad with a CatchSwitch predecessor: therefore this is an 1350 // unwind destination that needs to be handle specially. 1351 assert(CS->getUnwindDest() == &BB); 1352 rewritePHIsForCleanupPad(&BB, CleanupPad); 1353 return; 1354 } 1355 } 1356 } 1357 1358 LandingPadInst *LandingPad = nullptr; 1359 PHINode *ReplPHI = nullptr; 1360 if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) { 1361 // ehAwareSplitEdge will clone the LandingPad in all the edge blocks. 1362 // We replace the original landing pad with a PHINode that will collect the 1363 // results from all of them. 1364 ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad); 1365 ReplPHI->takeName(LandingPad); 1366 LandingPad->replaceAllUsesWith(ReplPHI); 1367 // We will erase the original landing pad at the end of this function after 1368 // ehAwareSplitEdge cloned it in the transition blocks. 1369 } 1370 1371 SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB)); 1372 for (BasicBlock *Pred : Preds) { 1373 auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI); 1374 IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName()); 1375 1376 // Stop the moving of values at ReplPHI, as this is either null or the PHI 1377 // that replaced the landing pad. 1378 movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI); 1379 } 1380 1381 if (LandingPad) { 1382 // Calls to ehAwareSplitEdge function cloned the original lading pad. 1383 // No longer need it. 1384 LandingPad->eraseFromParent(); 1385 } 1386 } 1387 1388 static void rewritePHIs(Function &F) { 1389 SmallVector<BasicBlock *, 8> WorkList; 1390 1391 for (BasicBlock &BB : F) 1392 if (auto *PN = dyn_cast<PHINode>(&BB.front())) 1393 if (PN->getNumIncomingValues() > 1) 1394 WorkList.push_back(&BB); 1395 1396 for (BasicBlock *BB : WorkList) 1397 rewritePHIs(*BB); 1398 } 1399 1400 // Check for instructions that we can recreate on resume as opposed to spill 1401 // the result into a coroutine frame. 1402 static bool materializable(Instruction &V) { 1403 return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) || 1404 isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V); 1405 } 1406 1407 // Check for structural coroutine intrinsics that should not be spilled into 1408 // the coroutine frame. 1409 static bool isCoroutineStructureIntrinsic(Instruction &I) { 1410 return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) || 1411 isa<CoroSuspendInst>(&I); 1412 } 1413 1414 // For every use of the value that is across suspend point, recreate that value 1415 // after a suspend point. 1416 static void rewriteMaterializableInstructions(IRBuilder<> &IRB, 1417 const SpillInfo &Spills) { 1418 for (const auto &E : Spills) { 1419 Value *Def = E.first; 1420 BasicBlock *CurrentBlock = nullptr; 1421 Instruction *CurrentMaterialization = nullptr; 1422 for (Instruction *U : E.second) { 1423 // If we have not seen this block, materialize the value. 1424 if (CurrentBlock != U->getParent()) { 1425 CurrentBlock = U->getParent(); 1426 CurrentMaterialization = cast<Instruction>(Def)->clone(); 1427 CurrentMaterialization->setName(Def->getName()); 1428 CurrentMaterialization->insertBefore( 1429 &*CurrentBlock->getFirstInsertionPt()); 1430 } 1431 if (auto *PN = dyn_cast<PHINode>(U)) { 1432 assert(PN->getNumIncomingValues() == 1 && 1433 "unexpected number of incoming " 1434 "values in the PHINode"); 1435 PN->replaceAllUsesWith(CurrentMaterialization); 1436 PN->eraseFromParent(); 1437 continue; 1438 } 1439 // Replace all uses of Def in the current instruction with the 1440 // CurrentMaterialization for the block. 1441 U->replaceUsesOfWith(Def, CurrentMaterialization); 1442 } 1443 } 1444 } 1445 1446 // Splits the block at a particular instruction unless it is the first 1447 // instruction in the block with a single predecessor. 1448 static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) { 1449 auto *BB = I->getParent(); 1450 if (&BB->front() == I) { 1451 if (BB->getSinglePredecessor()) { 1452 BB->setName(Name); 1453 return BB; 1454 } 1455 } 1456 return BB->splitBasicBlock(I, Name); 1457 } 1458 1459 // Split above and below a particular instruction so that it 1460 // will be all alone by itself in a block. 1461 static void splitAround(Instruction *I, const Twine &Name) { 1462 splitBlockIfNotFirst(I, Name); 1463 splitBlockIfNotFirst(I->getNextNode(), "After" + Name); 1464 } 1465 1466 static bool isSuspendBlock(BasicBlock *BB) { 1467 return isa<AnyCoroSuspendInst>(BB->front()); 1468 } 1469 1470 typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet; 1471 1472 /// Does control flow starting at the given block ever reach a suspend 1473 /// instruction before reaching a block in VisitedOrFreeBBs? 1474 static bool isSuspendReachableFrom(BasicBlock *From, 1475 VisitedBlocksSet &VisitedOrFreeBBs) { 1476 // Eagerly try to add this block to the visited set. If it's already 1477 // there, stop recursing; this path doesn't reach a suspend before 1478 // either looping or reaching a freeing block. 1479 if (!VisitedOrFreeBBs.insert(From).second) 1480 return false; 1481 1482 // We assume that we'll already have split suspends into their own blocks. 1483 if (isSuspendBlock(From)) 1484 return true; 1485 1486 // Recurse on the successors. 1487 for (auto Succ : successors(From)) { 1488 if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs)) 1489 return true; 1490 } 1491 1492 return false; 1493 } 1494 1495 /// Is the given alloca "local", i.e. bounded in lifetime to not cross a 1496 /// suspend point? 1497 static bool isLocalAlloca(CoroAllocaAllocInst *AI) { 1498 // Seed the visited set with all the basic blocks containing a free 1499 // so that we won't pass them up. 1500 VisitedBlocksSet VisitedOrFreeBBs; 1501 for (auto User : AI->users()) { 1502 if (auto FI = dyn_cast<CoroAllocaFreeInst>(User)) 1503 VisitedOrFreeBBs.insert(FI->getParent()); 1504 } 1505 1506 return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs); 1507 } 1508 1509 /// After we split the coroutine, will the given basic block be along 1510 /// an obvious exit path for the resumption function? 1511 static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB, 1512 unsigned depth = 3) { 1513 // If we've bottomed out our depth count, stop searching and assume 1514 // that the path might loop back. 1515 if (depth == 0) return false; 1516 1517 // If this is a suspend block, we're about to exit the resumption function. 1518 if (isSuspendBlock(BB)) return true; 1519 1520 // Recurse into the successors. 1521 for (auto Succ : successors(BB)) { 1522 if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1)) 1523 return false; 1524 } 1525 1526 // If none of the successors leads back in a loop, we're on an exit/abort. 1527 return true; 1528 } 1529 1530 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) { 1531 // Look for a free that isn't sufficiently obviously followed by 1532 // either a suspend or a termination, i.e. something that will leave 1533 // the coro resumption frame. 1534 for (auto U : AI->users()) { 1535 auto FI = dyn_cast<CoroAllocaFreeInst>(U); 1536 if (!FI) continue; 1537 1538 if (!willLeaveFunctionImmediatelyAfter(FI->getParent())) 1539 return true; 1540 } 1541 1542 // If we never found one, we don't need a stack save. 1543 return false; 1544 } 1545 1546 /// Turn each of the given local allocas into a normal (dynamic) alloca 1547 /// instruction. 1548 static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas, 1549 SmallVectorImpl<Instruction*> &DeadInsts) { 1550 for (auto AI : LocalAllocas) { 1551 auto M = AI->getModule(); 1552 IRBuilder<> Builder(AI); 1553 1554 // Save the stack depth. Try to avoid doing this if the stackrestore 1555 // is going to immediately precede a return or something. 1556 Value *StackSave = nullptr; 1557 if (localAllocaNeedsStackSave(AI)) 1558 StackSave = Builder.CreateCall( 1559 Intrinsic::getDeclaration(M, Intrinsic::stacksave)); 1560 1561 // Allocate memory. 1562 auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize()); 1563 Alloca->setAlignment(Align(AI->getAlignment())); 1564 1565 for (auto U : AI->users()) { 1566 // Replace gets with the allocation. 1567 if (isa<CoroAllocaGetInst>(U)) { 1568 U->replaceAllUsesWith(Alloca); 1569 1570 // Replace frees with stackrestores. This is safe because 1571 // alloca.alloc is required to obey a stack discipline, although we 1572 // don't enforce that structurally. 1573 } else { 1574 auto FI = cast<CoroAllocaFreeInst>(U); 1575 if (StackSave) { 1576 Builder.SetInsertPoint(FI); 1577 Builder.CreateCall( 1578 Intrinsic::getDeclaration(M, Intrinsic::stackrestore), 1579 StackSave); 1580 } 1581 } 1582 DeadInsts.push_back(cast<Instruction>(U)); 1583 } 1584 1585 DeadInsts.push_back(AI); 1586 } 1587 } 1588 1589 /// Turn the given coro.alloca.alloc call into a dynamic allocation. 1590 /// This happens during the all-instructions iteration, so it must not 1591 /// delete the call. 1592 static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI, 1593 coro::Shape &Shape, 1594 SmallVectorImpl<Instruction*> &DeadInsts) { 1595 IRBuilder<> Builder(AI); 1596 auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); 1597 1598 for (User *U : AI->users()) { 1599 if (isa<CoroAllocaGetInst>(U)) { 1600 U->replaceAllUsesWith(Alloc); 1601 } else { 1602 auto FI = cast<CoroAllocaFreeInst>(U); 1603 Builder.SetInsertPoint(FI); 1604 Shape.emitDealloc(Builder, Alloc, nullptr); 1605 } 1606 DeadInsts.push_back(cast<Instruction>(U)); 1607 } 1608 1609 // Push this on last so that it gets deleted after all the others. 1610 DeadInsts.push_back(AI); 1611 1612 // Return the new allocation value so that we can check for needed spills. 1613 return cast<Instruction>(Alloc); 1614 } 1615 1616 /// Get the current swifterror value. 1617 static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy, 1618 coro::Shape &Shape) { 1619 // Make a fake function pointer as a sort of intrinsic. 1620 auto FnTy = FunctionType::get(ValueTy, {}, false); 1621 auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); 1622 1623 auto Call = Builder.CreateCall(FnTy, Fn, {}); 1624 Shape.SwiftErrorOps.push_back(Call); 1625 1626 return Call; 1627 } 1628 1629 /// Set the given value as the current swifterror value. 1630 /// 1631 /// Returns a slot that can be used as a swifterror slot. 1632 static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V, 1633 coro::Shape &Shape) { 1634 // Make a fake function pointer as a sort of intrinsic. 1635 auto FnTy = FunctionType::get(V->getType()->getPointerTo(), 1636 {V->getType()}, false); 1637 auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); 1638 1639 auto Call = Builder.CreateCall(FnTy, Fn, { V }); 1640 Shape.SwiftErrorOps.push_back(Call); 1641 1642 return Call; 1643 } 1644 1645 /// Set the swifterror value from the given alloca before a call, 1646 /// then put in back in the alloca afterwards. 1647 /// 1648 /// Returns an address that will stand in for the swifterror slot 1649 /// until splitting. 1650 static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call, 1651 AllocaInst *Alloca, 1652 coro::Shape &Shape) { 1653 auto ValueTy = Alloca->getAllocatedType(); 1654 IRBuilder<> Builder(Call); 1655 1656 // Load the current value from the alloca and set it as the 1657 // swifterror value. 1658 auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca); 1659 auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape); 1660 1661 // Move to after the call. Since swifterror only has a guaranteed 1662 // value on normal exits, we can ignore implicit and explicit unwind 1663 // edges. 1664 if (isa<CallInst>(Call)) { 1665 Builder.SetInsertPoint(Call->getNextNode()); 1666 } else { 1667 auto Invoke = cast<InvokeInst>(Call); 1668 Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg()); 1669 } 1670 1671 // Get the current swifterror value and store it to the alloca. 1672 auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape); 1673 Builder.CreateStore(ValueAfterCall, Alloca); 1674 1675 return Addr; 1676 } 1677 1678 /// Eliminate a formerly-swifterror alloca by inserting the get/set 1679 /// intrinsics and attempting to MemToReg the alloca away. 1680 static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca, 1681 coro::Shape &Shape) { 1682 for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) { 1683 // We're likely changing the use list, so use a mutation-safe 1684 // iteration pattern. 1685 auto &Use = *UI; 1686 ++UI; 1687 1688 // swifterror values can only be used in very specific ways. 1689 // We take advantage of that here. 1690 auto User = Use.getUser(); 1691 if (isa<LoadInst>(User) || isa<StoreInst>(User)) 1692 continue; 1693 1694 assert(isa<CallInst>(User) || isa<InvokeInst>(User)); 1695 auto Call = cast<Instruction>(User); 1696 1697 auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape); 1698 1699 // Use the returned slot address as the call argument. 1700 Use.set(Addr); 1701 } 1702 1703 // All the uses should be loads and stores now. 1704 assert(isAllocaPromotable(Alloca)); 1705 } 1706 1707 /// "Eliminate" a swifterror argument by reducing it to the alloca case 1708 /// and then loading and storing in the prologue and epilog. 1709 /// 1710 /// The argument keeps the swifterror flag. 1711 static void eliminateSwiftErrorArgument(Function &F, Argument &Arg, 1712 coro::Shape &Shape, 1713 SmallVectorImpl<AllocaInst*> &AllocasToPromote) { 1714 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); 1715 1716 auto ArgTy = cast<PointerType>(Arg.getType()); 1717 auto ValueTy = ArgTy->getElementType(); 1718 1719 // Reduce to the alloca case: 1720 1721 // Create an alloca and replace all uses of the arg with it. 1722 auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace()); 1723 Arg.replaceAllUsesWith(Alloca); 1724 1725 // Set an initial value in the alloca. swifterror is always null on entry. 1726 auto InitialValue = Constant::getNullValue(ValueTy); 1727 Builder.CreateStore(InitialValue, Alloca); 1728 1729 // Find all the suspends in the function and save and restore around them. 1730 for (auto Suspend : Shape.CoroSuspends) { 1731 (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape); 1732 } 1733 1734 // Find all the coro.ends in the function and restore the error value. 1735 for (auto End : Shape.CoroEnds) { 1736 Builder.SetInsertPoint(End); 1737 auto FinalValue = Builder.CreateLoad(ValueTy, Alloca); 1738 (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape); 1739 } 1740 1741 // Now we can use the alloca logic. 1742 AllocasToPromote.push_back(Alloca); 1743 eliminateSwiftErrorAlloca(F, Alloca, Shape); 1744 } 1745 1746 /// Eliminate all problematic uses of swifterror arguments and allocas 1747 /// from the function. We'll fix them up later when splitting the function. 1748 static void eliminateSwiftError(Function &F, coro::Shape &Shape) { 1749 SmallVector<AllocaInst*, 4> AllocasToPromote; 1750 1751 // Look for a swifterror argument. 1752 for (auto &Arg : F.args()) { 1753 if (!Arg.hasSwiftErrorAttr()) continue; 1754 1755 eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote); 1756 break; 1757 } 1758 1759 // Look for swifterror allocas. 1760 for (auto &Inst : F.getEntryBlock()) { 1761 auto Alloca = dyn_cast<AllocaInst>(&Inst); 1762 if (!Alloca || !Alloca->isSwiftError()) continue; 1763 1764 // Clear the swifterror flag. 1765 Alloca->setSwiftError(false); 1766 1767 AllocasToPromote.push_back(Alloca); 1768 eliminateSwiftErrorAlloca(F, Alloca, Shape); 1769 } 1770 1771 // If we have any allocas to promote, compute a dominator tree and 1772 // promote them en masse. 1773 if (!AllocasToPromote.empty()) { 1774 DominatorTree DT(F); 1775 PromoteMemToReg(AllocasToPromote, DT); 1776 } 1777 } 1778 1779 /// retcon and retcon.once conventions assume that all spill uses can be sunk 1780 /// after the coro.begin intrinsic. 1781 static void sinkSpillUsesAfterCoroBegin(Function &F, 1782 const FrameDataInfo &FrameData, 1783 CoroBeginInst *CoroBegin) { 1784 DominatorTree Dom(F); 1785 1786 SmallSetVector<Instruction *, 32> ToMove; 1787 SmallVector<Instruction *, 32> Worklist; 1788 1789 // Collect all users that precede coro.begin. 1790 for (auto *Def : FrameData.getAllDefs()) { 1791 for (User *U : Def->users()) { 1792 auto Inst = cast<Instruction>(U); 1793 if (Inst->getParent() != CoroBegin->getParent() || 1794 Dom.dominates(CoroBegin, Inst)) 1795 continue; 1796 if (ToMove.insert(Inst)) 1797 Worklist.push_back(Inst); 1798 } 1799 } 1800 // Recursively collect users before coro.begin. 1801 while (!Worklist.empty()) { 1802 auto *Def = Worklist.back(); 1803 Worklist.pop_back(); 1804 for (User *U : Def->users()) { 1805 auto Inst = cast<Instruction>(U); 1806 if (Dom.dominates(CoroBegin, Inst)) 1807 continue; 1808 if (ToMove.insert(Inst)) 1809 Worklist.push_back(Inst); 1810 } 1811 } 1812 1813 // Sort by dominance. 1814 SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end()); 1815 std::sort(InsertionList.begin(), InsertionList.end(), 1816 [&Dom](Instruction *A, Instruction *B) -> bool { 1817 // If a dominates b it should preceed (<) b. 1818 return Dom.dominates(A, B); 1819 }); 1820 1821 Instruction *InsertPt = CoroBegin->getNextNode(); 1822 for (Instruction *Inst : InsertionList) 1823 Inst->moveBefore(InsertPt); 1824 1825 return; 1826 } 1827 1828 /// For each local variable that all of its user are only used inside one of 1829 /// suspended region, we sink their lifetime.start markers to the place where 1830 /// after the suspend block. Doing so minimizes the lifetime of each variable, 1831 /// hence minimizing the amount of data we end up putting on the frame. 1832 static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape, 1833 SuspendCrossingInfo &Checker) { 1834 DominatorTree DT(F); 1835 1836 // Collect all possible basic blocks which may dominate all uses of allocas. 1837 SmallPtrSet<BasicBlock *, 4> DomSet; 1838 DomSet.insert(&F.getEntryBlock()); 1839 for (auto *CSI : Shape.CoroSuspends) { 1840 BasicBlock *SuspendBlock = CSI->getParent(); 1841 assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() && 1842 "should have split coro.suspend into its own block"); 1843 DomSet.insert(SuspendBlock->getSingleSuccessor()); 1844 } 1845 1846 for (Instruction &I : instructions(F)) { 1847 AllocaInst* AI = dyn_cast<AllocaInst>(&I); 1848 if (!AI) 1849 continue; 1850 1851 for (BasicBlock *DomBB : DomSet) { 1852 bool Valid = true; 1853 SmallVector<Instruction *, 1> Lifetimes; 1854 1855 auto isLifetimeStart = [](Instruction* I) { 1856 if (auto* II = dyn_cast<IntrinsicInst>(I)) 1857 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1858 return false; 1859 }; 1860 1861 auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) { 1862 if (isLifetimeStart(U)) { 1863 Lifetimes.push_back(U); 1864 return true; 1865 } 1866 if (!U->hasOneUse() || U->stripPointerCasts() != AI) 1867 return false; 1868 if (isLifetimeStart(U->user_back())) { 1869 Lifetimes.push_back(U->user_back()); 1870 return true; 1871 } 1872 return false; 1873 }; 1874 1875 for (User *U : AI->users()) { 1876 Instruction *UI = cast<Instruction>(U); 1877 // For all users except lifetime.start markers, if they are all 1878 // dominated by one of the basic blocks and do not cross 1879 // suspend points as well, then there is no need to spill the 1880 // instruction. 1881 if (!DT.dominates(DomBB, UI->getParent()) || 1882 Checker.isDefinitionAcrossSuspend(DomBB, UI)) { 1883 // Skip lifetime.start, GEP and bitcast used by lifetime.start 1884 // markers. 1885 if (collectLifetimeStart(UI, AI)) 1886 continue; 1887 Valid = false; 1888 break; 1889 } 1890 } 1891 // Sink lifetime.start markers to dominate block when they are 1892 // only used outside the region. 1893 if (Valid && Lifetimes.size() != 0) { 1894 // May be AI itself, when the type of AI is i8* 1895 auto *NewBitCast = [&](AllocaInst *AI) -> Value* { 1896 if (isa<AllocaInst>(Lifetimes[0]->getOperand(1))) 1897 return AI; 1898 auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext()); 1899 return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "", 1900 DomBB->getTerminator()); 1901 }(AI); 1902 1903 auto *NewLifetime = Lifetimes[0]->clone(); 1904 NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast); 1905 NewLifetime->insertBefore(DomBB->getTerminator()); 1906 1907 // All the outsided lifetime.start markers are no longer necessary. 1908 for (Instruction *S : Lifetimes) 1909 S->eraseFromParent(); 1910 1911 break; 1912 } 1913 } 1914 } 1915 } 1916 1917 static void collectFrameAllocas(Function &F, coro::Shape &Shape, 1918 SuspendCrossingInfo &Checker, 1919 SmallVectorImpl<AllocaInst *> &Allocas) { 1920 // Collect lifetime.start info for each alloca. 1921 using LifetimeStart = SmallPtrSet<Instruction *, 2>; 1922 llvm::DenseMap<AllocaInst *, std::unique_ptr<LifetimeStart>> LifetimeMap; 1923 for (Instruction &I : instructions(F)) { 1924 auto *II = dyn_cast<IntrinsicInst>(&I); 1925 if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) 1926 continue; 1927 1928 if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) { 1929 if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) { 1930 1931 if (LifetimeMap.find(AI) == LifetimeMap.end()) 1932 LifetimeMap[AI] = std::make_unique<LifetimeStart>(); 1933 LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst); 1934 } 1935 } 1936 } 1937 1938 for (Instruction &I : instructions(F)) { 1939 auto *AI = dyn_cast<AllocaInst>(&I); 1940 if (!AI) 1941 continue; 1942 // The PromiseAlloca will be specially handled since it needs to be in a 1943 // fixed position in the frame. 1944 if (AI == Shape.SwitchLowering.PromiseAlloca) { 1945 continue; 1946 } 1947 auto Iter = LifetimeMap.find(AI); 1948 for (User *U : I.users()) { 1949 bool ShouldLiveOnFrame = false; 1950 1951 // Check against lifetime.start if the instruction has the info. 1952 if (Iter != LifetimeMap.end()) 1953 for (auto *S : *Iter->second) { 1954 if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U))) 1955 break; 1956 } 1957 else 1958 ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(I, U); 1959 1960 if (ShouldLiveOnFrame) { 1961 Allocas.push_back(AI); 1962 break; 1963 } 1964 } 1965 } 1966 } 1967 1968 void coro::buildCoroutineFrame(Function &F, Shape &Shape) { 1969 eliminateSwiftError(F, Shape); 1970 1971 if (Shape.ABI == coro::ABI::Switch && 1972 Shape.SwitchLowering.PromiseAlloca) { 1973 Shape.getSwitchCoroId()->clearPromise(); 1974 } 1975 1976 // Make sure that all coro.save, coro.suspend and the fallthrough coro.end 1977 // intrinsics are in their own blocks to simplify the logic of building up 1978 // SuspendCrossing data. 1979 for (auto *CSI : Shape.CoroSuspends) { 1980 if (auto *Save = CSI->getCoroSave()) 1981 splitAround(Save, "CoroSave"); 1982 splitAround(CSI, "CoroSuspend"); 1983 } 1984 1985 // Put CoroEnds into their own blocks. 1986 for (CoroEndInst *CE : Shape.CoroEnds) 1987 splitAround(CE, "CoroEnd"); 1988 1989 // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will 1990 // never has its definition separated from the PHI by the suspend point. 1991 rewritePHIs(F); 1992 1993 // Build suspend crossing info. 1994 SuspendCrossingInfo Checker(F, Shape); 1995 1996 IRBuilder<> Builder(F.getContext()); 1997 FrameDataInfo FrameData; 1998 SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas; 1999 SmallVector<Instruction*, 4> DeadInstructions; 2000 2001 { 2002 SpillInfo Spills; 2003 for (int Repeat = 0; Repeat < 4; ++Repeat) { 2004 // See if there are materializable instructions across suspend points. 2005 for (Instruction &I : instructions(F)) 2006 if (materializable(I)) 2007 for (User *U : I.users()) 2008 if (Checker.isDefinitionAcrossSuspend(I, U)) 2009 Spills[&I].push_back(cast<Instruction>(U)); 2010 2011 if (Spills.empty()) 2012 break; 2013 2014 // Rewrite materializable instructions to be materialized at the use 2015 // point. 2016 LLVM_DEBUG(dumpSpills("Materializations", Spills)); 2017 rewriteMaterializableInstructions(Builder, Spills); 2018 Spills.clear(); 2019 } 2020 } 2021 2022 sinkLifetimeStartMarkers(F, Shape, Checker); 2023 collectFrameAllocas(F, Shape, Checker, FrameData.Allocas); 2024 LLVM_DEBUG(dumpAllocas(FrameData.Allocas)); 2025 2026 // Collect the spills for arguments and other not-materializable values. 2027 for (Argument &A : F.args()) 2028 for (User *U : A.users()) 2029 if (Checker.isDefinitionAcrossSuspend(A, U)) 2030 FrameData.Spills[&A].push_back(cast<Instruction>(U)); 2031 2032 for (Instruction &I : instructions(F)) { 2033 // Values returned from coroutine structure intrinsics should not be part 2034 // of the Coroutine Frame. 2035 if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin) 2036 continue; 2037 2038 // The Coroutine Promise always included into coroutine frame, no need to 2039 // check for suspend crossing. 2040 if (Shape.ABI == coro::ABI::Switch && 2041 Shape.SwitchLowering.PromiseAlloca == &I) 2042 continue; 2043 2044 // Handle alloca.alloc specially here. 2045 if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) { 2046 // Check whether the alloca's lifetime is bounded by suspend points. 2047 if (isLocalAlloca(AI)) { 2048 LocalAllocas.push_back(AI); 2049 continue; 2050 } 2051 2052 // If not, do a quick rewrite of the alloca and then add spills of 2053 // the rewritten value. The rewrite doesn't invalidate anything in 2054 // Spills because the other alloca intrinsics have no other operands 2055 // besides AI, and it doesn't invalidate the iteration because we delay 2056 // erasing AI. 2057 auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions); 2058 2059 for (User *U : Alloc->users()) { 2060 if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) 2061 FrameData.Spills[Alloc].push_back(cast<Instruction>(U)); 2062 } 2063 continue; 2064 } 2065 2066 // Ignore alloca.get; we process this as part of coro.alloca.alloc. 2067 if (isa<CoroAllocaGetInst>(I)) 2068 continue; 2069 2070 if (isa<AllocaInst>(I)) 2071 continue; 2072 2073 for (User *U : I.users()) 2074 if (Checker.isDefinitionAcrossSuspend(I, U)) { 2075 // We cannot spill a token. 2076 if (I.getType()->isTokenTy()) 2077 report_fatal_error( 2078 "token definition is separated from the use by a suspend point"); 2079 FrameData.Spills[&I].push_back(cast<Instruction>(U)); 2080 } 2081 } 2082 LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills)); 2083 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) 2084 sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin); 2085 Shape.FrameTy = buildFrameType(F, Shape, FrameData); 2086 // Add PromiseAlloca to Allocas list so that it is processed in insertSpills. 2087 if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) 2088 FrameData.Allocas.push_back(Shape.SwitchLowering.PromiseAlloca); 2089 Shape.FramePtr = insertSpills(FrameData, Shape); 2090 lowerLocalAllocas(LocalAllocas, DeadInstructions); 2091 2092 for (auto I : DeadInstructions) 2093 I->eraseFromParent(); 2094 } 2095