1 //===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 // Implementation for the IROutliner which is used by the IROutliner Pass. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/IPO/IROutliner.h" 15 #include "llvm/Analysis/IRSimilarityIdentifier.h" 16 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 17 #include "llvm/Analysis/TargetTransformInfo.h" 18 #include "llvm/IR/Attributes.h" 19 #include "llvm/IR/DebugInfoMetadata.h" 20 #include "llvm/IR/DIBuilder.h" 21 #include "llvm/IR/Dominators.h" 22 #include "llvm/IR/Mangler.h" 23 #include "llvm/IR/PassManager.h" 24 #include "llvm/InitializePasses.h" 25 #include "llvm/Pass.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Transforms/IPO.h" 28 #include <map> 29 #include <set> 30 #include <vector> 31 32 #define DEBUG_TYPE "iroutliner" 33 34 using namespace llvm; 35 using namespace IRSimilarity; 36 37 // A command flag to be used for debugging to exclude branches from similarity 38 // matching and outlining. 39 namespace llvm { 40 extern cl::opt<bool> DisableBranches; 41 42 // A command flag to be used for debugging to indirect calls from similarity 43 // matching and outlining. 44 extern cl::opt<bool> DisableIndirectCalls; 45 } // namespace llvm 46 47 // Set to true if the user wants the ir outliner to run on linkonceodr linkage 48 // functions. This is false by default because the linker can dedupe linkonceodr 49 // functions. Since the outliner is confined to a single module (modulo LTO), 50 // this is off by default. It should, however, be the default behavior in 51 // LTO. 52 static cl::opt<bool> EnableLinkOnceODRIROutlining( 53 "enable-linkonceodr-ir-outlining", cl::Hidden, 54 cl::desc("Enable the IR outliner on linkonceodr functions"), 55 cl::init(false)); 56 57 // This is a debug option to test small pieces of code to ensure that outlining 58 // works correctly. 59 static cl::opt<bool> NoCostModel( 60 "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden, 61 cl::desc("Debug option to outline greedily, without restriction that " 62 "calculated benefit outweighs cost")); 63 64 /// The OutlinableGroup holds all the overarching information for outlining 65 /// a set of regions that are structurally similar to one another, such as the 66 /// types of the overall function, the output blocks, the sets of stores needed 67 /// and a list of the different regions. This information is used in the 68 /// deduplication of extracted regions with the same structure. 69 struct OutlinableGroup { 70 /// The sections that could be outlined 71 std::vector<OutlinableRegion *> Regions; 72 73 /// The argument types for the function created as the overall function to 74 /// replace the extracted function for each region. 75 std::vector<Type *> ArgumentTypes; 76 /// The FunctionType for the overall function. 77 FunctionType *OutlinedFunctionType = nullptr; 78 /// The Function for the collective overall function. 79 Function *OutlinedFunction = nullptr; 80 81 /// Flag for whether we should not consider this group of OutlinableRegions 82 /// for extraction. 83 bool IgnoreGroup = false; 84 85 /// The return blocks for the overall function. 86 DenseMap<Value *, BasicBlock *> EndBBs; 87 88 /// The PHIBlocks with their corresponding return block based on the return 89 /// value as the key. 90 DenseMap<Value *, BasicBlock *> PHIBlocks; 91 92 /// A set containing the different GVN store sets needed. Each array contains 93 /// a sorted list of the different values that need to be stored into output 94 /// registers. 95 DenseSet<ArrayRef<unsigned>> OutputGVNCombinations; 96 97 /// Flag for whether the \ref ArgumentTypes have been defined after the 98 /// extraction of the first region. 99 bool InputTypesSet = false; 100 101 /// The number of input values in \ref ArgumentTypes. Anything after this 102 /// index in ArgumentTypes is an output argument. 103 unsigned NumAggregateInputs = 0; 104 105 /// The mapping of the canonical numbering of the values in outlined sections 106 /// to specific arguments. 107 DenseMap<unsigned, unsigned> CanonicalNumberToAggArg; 108 109 /// The number of branches in the region target a basic block that is outside 110 /// of the region. 111 unsigned BranchesToOutside = 0; 112 113 /// Tracker counting backwards from the highest unsigned value possible to 114 /// avoid conflicting with the GVNs of assigned values. We start at -3 since 115 /// -2 and -1 are assigned by the DenseMap. 116 unsigned PHINodeGVNTracker = -3; 117 118 DenseMap<unsigned, 119 std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>> 120 PHINodeGVNToGVNs; 121 DenseMap<hash_code, unsigned> GVNsToPHINodeGVN; 122 123 /// The number of instructions that will be outlined by extracting \ref 124 /// Regions. 125 InstructionCost Benefit = 0; 126 /// The number of added instructions needed for the outlining of the \ref 127 /// Regions. 128 InstructionCost Cost = 0; 129 130 /// The argument that needs to be marked with the swifterr attribute. If not 131 /// needed, there is no value. 132 Optional<unsigned> SwiftErrorArgument; 133 134 /// For the \ref Regions, we look at every Value. If it is a constant, 135 /// we check whether it is the same in Region. 136 /// 137 /// \param [in,out] NotSame contains the global value numbers where the 138 /// constant is not always the same, and must be passed in as an argument. 139 void findSameConstants(DenseSet<unsigned> &NotSame); 140 141 /// For the regions, look at each set of GVN stores needed and account for 142 /// each combination. Add an argument to the argument types if there is 143 /// more than one combination. 144 /// 145 /// \param [in] M - The module we are outlining from. 146 void collectGVNStoreSets(Module &M); 147 }; 148 149 /// Move the contents of \p SourceBB to before the last instruction of \p 150 /// TargetBB. 151 /// \param SourceBB - the BasicBlock to pull Instructions from. 152 /// \param TargetBB - the BasicBlock to put Instruction into. 153 static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) { 154 for (Instruction &I : llvm::make_early_inc_range(SourceBB)) 155 I.moveBefore(TargetBB, TargetBB.end()); 156 } 157 158 /// A function to sort the keys of \p Map, which must be a mapping of constant 159 /// values to basic blocks and return it in \p SortedKeys 160 /// 161 /// \param SortedKeys - The vector the keys will be return in and sorted. 162 /// \param Map - The DenseMap containing keys to sort. 163 static void getSortedConstantKeys(std::vector<Value *> &SortedKeys, 164 DenseMap<Value *, BasicBlock *> &Map) { 165 for (auto &VtoBB : Map) 166 SortedKeys.push_back(VtoBB.first); 167 168 stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) { 169 const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS); 170 const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS); 171 assert(RHSC && "Not a constant integer in return value?"); 172 assert(LHSC && "Not a constant integer in return value?"); 173 174 return LHSC->getLimitedValue() < RHSC->getLimitedValue(); 175 }); 176 } 177 178 Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other, 179 Value *V) { 180 Optional<unsigned> GVN = Candidate->getGVN(V); 181 assert(GVN.hasValue() && "No GVN for incoming value"); 182 Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN); 183 Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum); 184 Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN); 185 return FoundValueOpt.getValueOr(nullptr); 186 } 187 188 /// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found 189 /// in \p Included to branch to BasicBlock \p Replace if they currently branch 190 /// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks 191 /// when PHINodes are included in outlined regions. 192 /// 193 /// \param PHIBlock - The BasicBlock containing the PHINodes that need to be 194 /// checked. 195 /// \param Find - The successor block to be replaced. 196 /// \param Replace - The new succesor block to branch to. 197 /// \param Included - The set of blocks about to be outlined. 198 static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find, 199 BasicBlock *Replace, 200 DenseSet<BasicBlock *> &Included) { 201 for (PHINode &PN : PHIBlock->phis()) { 202 for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd; 203 ++Idx) { 204 // Check if the incoming block is included in the set of blocks being 205 // outlined. 206 BasicBlock *Incoming = PN.getIncomingBlock(Idx); 207 if (!Included.contains(Incoming)) 208 continue; 209 210 BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator()); 211 assert(BI && "Not a branch instruction?"); 212 // Look over the branching instructions into this block to see if we 213 // used to branch to Find in this outlined block. 214 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End; 215 Succ++) { 216 // If we have found the block to replace, we do so here. 217 if (BI->getSuccessor(Succ) != Find) 218 continue; 219 BI->setSuccessor(Succ, Replace); 220 } 221 } 222 } 223 } 224 225 226 void OutlinableRegion::splitCandidate() { 227 assert(!CandidateSplit && "Candidate already split!"); 228 229 Instruction *BackInst = Candidate->backInstruction(); 230 231 Instruction *EndInst = nullptr; 232 // Check whether the last instruction is a terminator, if it is, we do 233 // not split on the following instruction. We leave the block as it is. We 234 // also check that this is not the last instruction in the Module, otherwise 235 // the check for whether the current following instruction matches the 236 // previously recorded instruction will be incorrect. 237 if (!BackInst->isTerminator() || 238 BackInst->getParent() != &BackInst->getFunction()->back()) { 239 EndInst = Candidate->end()->Inst; 240 assert(EndInst && "Expected an end instruction?"); 241 } 242 243 // We check if the current instruction following the last instruction in the 244 // region is the same as the recorded instruction following the last 245 // instruction. If they do not match, there could be problems in rewriting 246 // the program after outlining, so we ignore it. 247 if (!BackInst->isTerminator() && 248 EndInst != BackInst->getNextNonDebugInstruction()) 249 return; 250 251 Instruction *StartInst = (*Candidate->begin()).Inst; 252 assert(StartInst && "Expected a start instruction?"); 253 StartBB = StartInst->getParent(); 254 PrevBB = StartBB; 255 256 DenseSet<BasicBlock *> BBSet; 257 Candidate->getBasicBlocks(BBSet); 258 259 // We iterate over the instructions in the region, if we find a PHINode, we 260 // check if there are predecessors outside of the region, if there are, 261 // we ignore this region since we are unable to handle the severing of the 262 // phi node right now. 263 BasicBlock::iterator It = StartInst->getIterator(); 264 while (PHINode *PN = dyn_cast<PHINode>(&*It)) { 265 unsigned NumPredsOutsideRegion = 0; 266 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 267 if (!BBSet.contains(PN->getIncomingBlock(i))) 268 ++NumPredsOutsideRegion; 269 270 if (NumPredsOutsideRegion > 1) 271 return; 272 273 It++; 274 } 275 276 // If the region starts with a PHINode, but is not the initial instruction of 277 // the BasicBlock, we ignore this region for now. 278 if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin()) 279 return; 280 281 // If the region ends with a PHINode, but does not contain all of the phi node 282 // instructions of the region, we ignore it for now. 283 if (isa<PHINode>(BackInst)) { 284 EndBB = BackInst->getParent(); 285 if (BackInst != &*std::prev(EndBB->getFirstInsertionPt())) 286 return; 287 } 288 289 // The basic block gets split like so: 290 // block: block: 291 // inst1 inst1 292 // inst2 inst2 293 // region1 br block_to_outline 294 // region2 block_to_outline: 295 // region3 -> region1 296 // region4 region2 297 // inst3 region3 298 // inst4 region4 299 // br block_after_outline 300 // block_after_outline: 301 // inst3 302 // inst4 303 304 std::string OriginalName = PrevBB->getName().str(); 305 306 StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline"); 307 PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB); 308 309 CandidateSplit = true; 310 if (!BackInst->isTerminator()) { 311 EndBB = EndInst->getParent(); 312 FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline"); 313 EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB); 314 FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB); 315 } else { 316 EndBB = BackInst->getParent(); 317 EndsInBranch = true; 318 FollowBB = nullptr; 319 } 320 321 // Refind the basic block set. 322 BBSet.clear(); 323 Candidate->getBasicBlocks(BBSet); 324 // For the phi nodes in the new starting basic block of the region, we 325 // reassign the targets of the basic blocks branching instructions. 326 replaceTargetsFromPHINode(StartBB, PrevBB, StartBB, BBSet); 327 if (FollowBB) 328 replaceTargetsFromPHINode(FollowBB, EndBB, FollowBB, BBSet); 329 } 330 331 void OutlinableRegion::reattachCandidate() { 332 assert(CandidateSplit && "Candidate is not split!"); 333 334 // The basic block gets reattached like so: 335 // block: block: 336 // inst1 inst1 337 // inst2 inst2 338 // br block_to_outline region1 339 // block_to_outline: -> region2 340 // region1 region3 341 // region2 region4 342 // region3 inst3 343 // region4 inst4 344 // br block_after_outline 345 // block_after_outline: 346 // inst3 347 // inst4 348 assert(StartBB != nullptr && "StartBB for Candidate is not defined!"); 349 350 assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!"); 351 PrevBB->getTerminator()->eraseFromParent(); 352 353 // If we reattaching after outlining, we iterate over the phi nodes to 354 // the initial block, and reassign the branch instructions of the incoming 355 // blocks to the block we are remerging into. 356 if (!ExtractedFunction) { 357 DenseSet<BasicBlock *> BBSet; 358 Candidate->getBasicBlocks(BBSet); 359 360 replaceTargetsFromPHINode(StartBB, StartBB, PrevBB, BBSet); 361 if (!EndsInBranch) 362 replaceTargetsFromPHINode(FollowBB, FollowBB, EndBB, BBSet); 363 } 364 365 moveBBContents(*StartBB, *PrevBB); 366 367 BasicBlock *PlacementBB = PrevBB; 368 if (StartBB != EndBB) 369 PlacementBB = EndBB; 370 if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) { 371 assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!"); 372 assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!"); 373 PlacementBB->getTerminator()->eraseFromParent(); 374 moveBBContents(*FollowBB, *PlacementBB); 375 PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); 376 FollowBB->eraseFromParent(); 377 } 378 379 PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB); 380 StartBB->eraseFromParent(); 381 382 // Make sure to save changes back to the StartBB. 383 StartBB = PrevBB; 384 EndBB = nullptr; 385 PrevBB = nullptr; 386 FollowBB = nullptr; 387 388 CandidateSplit = false; 389 } 390 391 /// Find whether \p V matches the Constants previously found for the \p GVN. 392 /// 393 /// \param V - The value to check for consistency. 394 /// \param GVN - The global value number assigned to \p V. 395 /// \param GVNToConstant - The mapping of global value number to Constants. 396 /// \returns true if the Value matches the Constant mapped to by V and false if 397 /// it \p V is a Constant but does not match. 398 /// \returns None if \p V is not a Constant. 399 static Optional<bool> 400 constantMatches(Value *V, unsigned GVN, 401 DenseMap<unsigned, Constant *> &GVNToConstant) { 402 // See if we have a constants 403 Constant *CST = dyn_cast<Constant>(V); 404 if (!CST) 405 return None; 406 407 // Holds a mapping from a global value number to a Constant. 408 DenseMap<unsigned, Constant *>::iterator GVNToConstantIt; 409 bool Inserted; 410 411 412 // If we have a constant, try to make a new entry in the GVNToConstant. 413 std::tie(GVNToConstantIt, Inserted) = 414 GVNToConstant.insert(std::make_pair(GVN, CST)); 415 // If it was found and is not equal, it is not the same. We do not 416 // handle this case yet, and exit early. 417 if (Inserted || (GVNToConstantIt->second == CST)) 418 return true; 419 420 return false; 421 } 422 423 InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) { 424 InstructionCost Benefit = 0; 425 426 // Estimate the benefit of outlining a specific sections of the program. We 427 // delegate mostly this task to the TargetTransformInfo so that if the target 428 // has specific changes, we can have a more accurate estimate. 429 430 // However, getInstructionCost delegates the code size calculation for 431 // arithmetic instructions to getArithmeticInstrCost in 432 // include/Analysis/TargetTransformImpl.h, where it always estimates that the 433 // code size for a division and remainder instruction to be equal to 4, and 434 // everything else to 1. This is not an accurate representation of the 435 // division instruction for targets that have a native division instruction. 436 // To be overly conservative, we only add 1 to the number of instructions for 437 // each division instruction. 438 for (IRInstructionData &ID : *Candidate) { 439 Instruction *I = ID.Inst; 440 switch (I->getOpcode()) { 441 case Instruction::FDiv: 442 case Instruction::FRem: 443 case Instruction::SDiv: 444 case Instruction::SRem: 445 case Instruction::UDiv: 446 case Instruction::URem: 447 Benefit += 1; 448 break; 449 default: 450 Benefit += TTI.getInstructionCost(I, TargetTransformInfo::TCK_CodeSize); 451 break; 452 } 453 } 454 455 return Benefit; 456 } 457 458 /// Check the \p OutputMappings structure for value \p Input, if it exists 459 /// it has been used as an output for outlining, and has been renamed, and we 460 /// return the new value, otherwise, we return the same value. 461 /// 462 /// \param OutputMappings [in] - The mapping of values to their renamed value 463 /// after being used as an output for an outlined region. 464 /// \param Input [in] - The value to find the remapped value of, if it exists. 465 /// \return The remapped value if it has been renamed, and the same value if has 466 /// not. 467 static Value *findOutputMapping(const DenseMap<Value *, Value *> OutputMappings, 468 Value *Input) { 469 DenseMap<Value *, Value *>::const_iterator OutputMapping = 470 OutputMappings.find(Input); 471 if (OutputMapping != OutputMappings.end()) 472 return OutputMapping->second; 473 return Input; 474 } 475 476 /// Find whether \p Region matches the global value numbering to Constant 477 /// mapping found so far. 478 /// 479 /// \param Region - The OutlinableRegion we are checking for constants 480 /// \param GVNToConstant - The mapping of global value number to Constants. 481 /// \param NotSame - The set of global value numbers that do not have the same 482 /// constant in each region. 483 /// \returns true if all Constants are the same in every use of a Constant in \p 484 /// Region and false if not 485 static bool 486 collectRegionsConstants(OutlinableRegion &Region, 487 DenseMap<unsigned, Constant *> &GVNToConstant, 488 DenseSet<unsigned> &NotSame) { 489 bool ConstantsTheSame = true; 490 491 IRSimilarityCandidate &C = *Region.Candidate; 492 for (IRInstructionData &ID : C) { 493 494 // Iterate over the operands in an instruction. If the global value number, 495 // assigned by the IRSimilarityCandidate, has been seen before, we check if 496 // the the number has been found to be not the same value in each instance. 497 for (Value *V : ID.OperVals) { 498 Optional<unsigned> GVNOpt = C.getGVN(V); 499 assert(GVNOpt.hasValue() && "Expected a GVN for operand?"); 500 unsigned GVN = GVNOpt.getValue(); 501 502 // Check if this global value has been found to not be the same already. 503 if (NotSame.contains(GVN)) { 504 if (isa<Constant>(V)) 505 ConstantsTheSame = false; 506 continue; 507 } 508 509 // If it has been the same so far, we check the value for if the 510 // associated Constant value match the previous instances of the same 511 // global value number. If the global value does not map to a Constant, 512 // it is considered to not be the same value. 513 Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant); 514 if (ConstantMatches.hasValue()) { 515 if (ConstantMatches.getValue()) 516 continue; 517 else 518 ConstantsTheSame = false; 519 } 520 521 // While this value is a register, it might not have been previously, 522 // make sure we don't already have a constant mapped to this global value 523 // number. 524 if (GVNToConstant.find(GVN) != GVNToConstant.end()) 525 ConstantsTheSame = false; 526 527 NotSame.insert(GVN); 528 } 529 } 530 531 return ConstantsTheSame; 532 } 533 534 void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) { 535 DenseMap<unsigned, Constant *> GVNToConstant; 536 537 for (OutlinableRegion *Region : Regions) 538 collectRegionsConstants(*Region, GVNToConstant, NotSame); 539 } 540 541 void OutlinableGroup::collectGVNStoreSets(Module &M) { 542 for (OutlinableRegion *OS : Regions) 543 OutputGVNCombinations.insert(OS->GVNStores); 544 545 // We are adding an extracted argument to decide between which output path 546 // to use in the basic block. It is used in a switch statement and only 547 // needs to be an integer. 548 if (OutputGVNCombinations.size() > 1) 549 ArgumentTypes.push_back(Type::getInt32Ty(M.getContext())); 550 } 551 552 /// Get the subprogram if it exists for one of the outlined regions. 553 /// 554 /// \param [in] Group - The set of regions to find a subprogram for. 555 /// \returns the subprogram if it exists, or nullptr. 556 static DISubprogram *getSubprogramOrNull(OutlinableGroup &Group) { 557 for (OutlinableRegion *OS : Group.Regions) 558 if (Function *F = OS->Call->getFunction()) 559 if (DISubprogram *SP = F->getSubprogram()) 560 return SP; 561 562 return nullptr; 563 } 564 565 Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group, 566 unsigned FunctionNameSuffix) { 567 assert(!Group.OutlinedFunction && "Function is already defined!"); 568 569 Type *RetTy = Type::getVoidTy(M.getContext()); 570 // All extracted functions _should_ have the same return type at this point 571 // since the similarity identifier ensures that all branches outside of the 572 // region occur in the same place. 573 574 // NOTE: Should we ever move to the model that uses a switch at every point 575 // needed, meaning that we could branch within the region or out, it is 576 // possible that we will need to switch to using the most general case all of 577 // the time. 578 for (OutlinableRegion *R : Group.Regions) { 579 Type *ExtractedFuncType = R->ExtractedFunction->getReturnType(); 580 if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) || 581 (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16))) 582 RetTy = ExtractedFuncType; 583 } 584 585 Group.OutlinedFunctionType = FunctionType::get( 586 RetTy, Group.ArgumentTypes, false); 587 588 // These functions will only be called from within the same module, so 589 // we can set an internal linkage. 590 Group.OutlinedFunction = Function::Create( 591 Group.OutlinedFunctionType, GlobalValue::InternalLinkage, 592 "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M); 593 594 // Transfer the swifterr attribute to the correct function parameter. 595 if (Group.SwiftErrorArgument.hasValue()) 596 Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(), 597 Attribute::SwiftError); 598 599 Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize); 600 Group.OutlinedFunction->addFnAttr(Attribute::MinSize); 601 602 // If there's a DISubprogram associated with this outlined function, then 603 // emit debug info for the outlined function. 604 if (DISubprogram *SP = getSubprogramOrNull(Group)) { 605 Function *F = Group.OutlinedFunction; 606 // We have a DISubprogram. Get its DICompileUnit. 607 DICompileUnit *CU = SP->getUnit(); 608 DIBuilder DB(M, true, CU); 609 DIFile *Unit = SP->getFile(); 610 Mangler Mg; 611 // Get the mangled name of the function for the linkage name. 612 std::string Dummy; 613 llvm::raw_string_ostream MangledNameStream(Dummy); 614 Mg.getNameWithPrefix(MangledNameStream, F, false); 615 616 DISubprogram *OutlinedSP = DB.createFunction( 617 Unit /* Context */, F->getName(), MangledNameStream.str(), 618 Unit /* File */, 619 0 /* Line 0 is reserved for compiler-generated code. */, 620 DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */ 621 0, /* Line 0 is reserved for compiler-generated code. */ 622 DINode::DIFlags::FlagArtificial /* Compiler-generated code. */, 623 /* Outlined code is optimized code by definition. */ 624 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized); 625 626 // Don't add any new variables to the subprogram. 627 DB.finalizeSubprogram(OutlinedSP); 628 629 // Attach subprogram to the function. 630 F->setSubprogram(OutlinedSP); 631 // We're done with the DIBuilder. 632 DB.finalize(); 633 } 634 635 return Group.OutlinedFunction; 636 } 637 638 /// Move each BasicBlock in \p Old to \p New. 639 /// 640 /// \param [in] Old - The function to move the basic blocks from. 641 /// \param [in] New - The function to move the basic blocks to. 642 /// \param [out] NewEnds - The return blocks of the new overall function. 643 static void moveFunctionData(Function &Old, Function &New, 644 DenseMap<Value *, BasicBlock *> &NewEnds) { 645 for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) { 646 CurrBB.removeFromParent(); 647 CurrBB.insertInto(&New); 648 Instruction *I = CurrBB.getTerminator(); 649 650 // For each block we find a return instruction is, it is a potential exit 651 // path for the function. We keep track of each block based on the return 652 // value here. 653 if (ReturnInst *RI = dyn_cast<ReturnInst>(I)) 654 NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB)); 655 656 std::vector<Instruction *> DebugInsts; 657 658 for (Instruction &Val : CurrBB) { 659 // We must handle the scoping of called functions differently than 660 // other outlined instructions. 661 if (!isa<CallInst>(&Val)) { 662 // Remove the debug information for outlined functions. 663 Val.setDebugLoc(DebugLoc()); 664 continue; 665 } 666 667 // From this point we are only handling call instructions. 668 CallInst *CI = cast<CallInst>(&Val); 669 670 // We add any debug statements here, to be removed after. Since the 671 // instructions originate from many different locations in the program, 672 // it will cause incorrect reporting from a debugger if we keep the 673 // same debug instructions. 674 if (isa<DbgInfoIntrinsic>(CI)) { 675 DebugInsts.push_back(&Val); 676 continue; 677 } 678 679 // Edit the scope of called functions inside of outlined functions. 680 if (DISubprogram *SP = New.getSubprogram()) { 681 DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP); 682 Val.setDebugLoc(DI); 683 } 684 } 685 686 for (Instruction *I : DebugInsts) 687 I->eraseFromParent(); 688 } 689 690 assert(NewEnds.size() > 0 && "No return instruction for new function?"); 691 } 692 693 /// Find the the constants that will need to be lifted into arguments 694 /// as they are not the same in each instance of the region. 695 /// 696 /// \param [in] C - The IRSimilarityCandidate containing the region we are 697 /// analyzing. 698 /// \param [in] NotSame - The set of global value numbers that do not have a 699 /// single Constant across all OutlinableRegions similar to \p C. 700 /// \param [out] Inputs - The list containing the global value numbers of the 701 /// arguments needed for the region of code. 702 static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame, 703 std::vector<unsigned> &Inputs) { 704 DenseSet<unsigned> Seen; 705 // Iterate over the instructions, and find what constants will need to be 706 // extracted into arguments. 707 for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end(); 708 IDIt != EndIDIt; IDIt++) { 709 for (Value *V : (*IDIt).OperVals) { 710 // Since these are stored before any outlining, they will be in the 711 // global value numbering. 712 unsigned GVN = C.getGVN(V).getValue(); 713 if (isa<Constant>(V)) 714 if (NotSame.contains(GVN) && !Seen.contains(GVN)) { 715 Inputs.push_back(GVN); 716 Seen.insert(GVN); 717 } 718 } 719 } 720 } 721 722 /// Find the GVN for the inputs that have been found by the CodeExtractor. 723 /// 724 /// \param [in] C - The IRSimilarityCandidate containing the region we are 725 /// analyzing. 726 /// \param [in] CurrentInputs - The set of inputs found by the 727 /// CodeExtractor. 728 /// \param [in] OutputMappings - The mapping of values that have been replaced 729 /// by a new output value. 730 /// \param [out] EndInputNumbers - The global value numbers for the extracted 731 /// arguments. 732 static void mapInputsToGVNs(IRSimilarityCandidate &C, 733 SetVector<Value *> &CurrentInputs, 734 const DenseMap<Value *, Value *> &OutputMappings, 735 std::vector<unsigned> &EndInputNumbers) { 736 // Get the Global Value Number for each input. We check if the Value has been 737 // replaced by a different value at output, and use the original value before 738 // replacement. 739 for (Value *Input : CurrentInputs) { 740 assert(Input && "Have a nullptr as an input"); 741 if (OutputMappings.find(Input) != OutputMappings.end()) 742 Input = OutputMappings.find(Input)->second; 743 assert(C.getGVN(Input).hasValue() && 744 "Could not find a numbering for the given input"); 745 EndInputNumbers.push_back(C.getGVN(Input).getValue()); 746 } 747 } 748 749 /// Find the original value for the \p ArgInput values if any one of them was 750 /// replaced during a previous extraction. 751 /// 752 /// \param [in] ArgInputs - The inputs to be extracted by the code extractor. 753 /// \param [in] OutputMappings - The mapping of values that have been replaced 754 /// by a new output value. 755 /// \param [out] RemappedArgInputs - The remapped values according to 756 /// \p OutputMappings that will be extracted. 757 static void 758 remapExtractedInputs(const ArrayRef<Value *> ArgInputs, 759 const DenseMap<Value *, Value *> &OutputMappings, 760 SetVector<Value *> &RemappedArgInputs) { 761 // Get the global value number for each input that will be extracted as an 762 // argument by the code extractor, remapping if needed for reloaded values. 763 for (Value *Input : ArgInputs) { 764 if (OutputMappings.find(Input) != OutputMappings.end()) 765 Input = OutputMappings.find(Input)->second; 766 RemappedArgInputs.insert(Input); 767 } 768 } 769 770 /// Find the input GVNs and the output values for a region of Instructions. 771 /// Using the code extractor, we collect the inputs to the extracted function. 772 /// 773 /// The \p Region can be identified as needing to be ignored in this function. 774 /// It should be checked whether it should be ignored after a call to this 775 /// function. 776 /// 777 /// \param [in,out] Region - The region of code to be analyzed. 778 /// \param [out] InputGVNs - The global value numbers for the extracted 779 /// arguments. 780 /// \param [in] NotSame - The global value numbers in the region that do not 781 /// have the same constant value in the regions structurally similar to 782 /// \p Region. 783 /// \param [in] OutputMappings - The mapping of values that have been replaced 784 /// by a new output value after extraction. 785 /// \param [out] ArgInputs - The values of the inputs to the extracted function. 786 /// \param [out] Outputs - The set of values extracted by the CodeExtractor 787 /// as outputs. 788 static void getCodeExtractorArguments( 789 OutlinableRegion &Region, std::vector<unsigned> &InputGVNs, 790 DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings, 791 SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) { 792 IRSimilarityCandidate &C = *Region.Candidate; 793 794 // OverallInputs are the inputs to the region found by the CodeExtractor, 795 // SinkCands and HoistCands are used by the CodeExtractor to find sunken 796 // allocas of values whose lifetimes are contained completely within the 797 // outlined region. PremappedInputs are the arguments found by the 798 // CodeExtractor, removing conditions such as sunken allocas, but that 799 // may need to be remapped due to the extracted output values replacing 800 // the original values. We use DummyOutputs for this first run of finding 801 // inputs and outputs since the outputs could change during findAllocas, 802 // the correct set of extracted outputs will be in the final Outputs ValueSet. 803 SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands, 804 DummyOutputs; 805 806 // Use the code extractor to get the inputs and outputs, without sunken 807 // allocas or removing llvm.assumes. 808 CodeExtractor *CE = Region.CE; 809 CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands); 810 assert(Region.StartBB && "Region must have a start BasicBlock!"); 811 Function *OrigF = Region.StartBB->getParent(); 812 CodeExtractorAnalysisCache CEAC(*OrigF); 813 BasicBlock *Dummy = nullptr; 814 815 // The region may be ineligible due to VarArgs in the parent function. In this 816 // case we ignore the region. 817 if (!CE->isEligible()) { 818 Region.IgnoreRegion = true; 819 return; 820 } 821 822 // Find if any values are going to be sunk into the function when extracted 823 CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy); 824 CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands); 825 826 // TODO: Support regions with sunken allocas: values whose lifetimes are 827 // contained completely within the outlined region. These are not guaranteed 828 // to be the same in every region, so we must elevate them all to arguments 829 // when they appear. If these values are not equal, it means there is some 830 // Input in OverallInputs that was removed for ArgInputs. 831 if (OverallInputs.size() != PremappedInputs.size()) { 832 Region.IgnoreRegion = true; 833 return; 834 } 835 836 findConstants(C, NotSame, InputGVNs); 837 838 mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs); 839 840 remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings, 841 ArgInputs); 842 843 // Sort the GVNs, since we now have constants included in the \ref InputGVNs 844 // we need to make sure they are in a deterministic order. 845 stable_sort(InputGVNs); 846 } 847 848 /// Look over the inputs and map each input argument to an argument in the 849 /// overall function for the OutlinableRegions. This creates a way to replace 850 /// the arguments of the extracted function with the arguments of the new 851 /// overall function. 852 /// 853 /// \param [in,out] Region - The region of code to be analyzed. 854 /// \param [in] InputGVNs - The global value numbering of the input values 855 /// collected. 856 /// \param [in] ArgInputs - The values of the arguments to the extracted 857 /// function. 858 static void 859 findExtractedInputToOverallInputMapping(OutlinableRegion &Region, 860 std::vector<unsigned> &InputGVNs, 861 SetVector<Value *> &ArgInputs) { 862 863 IRSimilarityCandidate &C = *Region.Candidate; 864 OutlinableGroup &Group = *Region.Parent; 865 866 // This counts the argument number in the overall function. 867 unsigned TypeIndex = 0; 868 869 // This counts the argument number in the extracted function. 870 unsigned OriginalIndex = 0; 871 872 // Find the mapping of the extracted arguments to the arguments for the 873 // overall function. Since there may be extra arguments in the overall 874 // function to account for the extracted constants, we have two different 875 // counters as we find extracted arguments, and as we come across overall 876 // arguments. 877 878 // Additionally, in our first pass, for the first extracted function, 879 // we find argument locations for the canonical value numbering. This 880 // numbering overrides any discovered location for the extracted code. 881 for (unsigned InputVal : InputGVNs) { 882 Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal); 883 assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?"); 884 unsigned CanonicalNumber = CanonicalNumberOpt.getValue(); 885 886 Optional<Value *> InputOpt = C.fromGVN(InputVal); 887 assert(InputOpt.hasValue() && "Global value number not found?"); 888 Value *Input = InputOpt.getValue(); 889 890 DenseMap<unsigned, unsigned>::iterator AggArgIt = 891 Group.CanonicalNumberToAggArg.find(CanonicalNumber); 892 893 if (!Group.InputTypesSet) { 894 Group.ArgumentTypes.push_back(Input->getType()); 895 // If the input value has a swifterr attribute, make sure to mark the 896 // argument in the overall function. 897 if (Input->isSwiftError()) { 898 assert( 899 !Group.SwiftErrorArgument.hasValue() && 900 "Argument already marked with swifterr for this OutlinableGroup!"); 901 Group.SwiftErrorArgument = TypeIndex; 902 } 903 } 904 905 // Check if we have a constant. If we do add it to the overall argument 906 // number to Constant map for the region, and continue to the next input. 907 if (Constant *CST = dyn_cast<Constant>(Input)) { 908 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) 909 Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST)); 910 else { 911 Group.CanonicalNumberToAggArg.insert( 912 std::make_pair(CanonicalNumber, TypeIndex)); 913 Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST)); 914 } 915 TypeIndex++; 916 continue; 917 } 918 919 // It is not a constant, we create the mapping from extracted argument list 920 // to the overall argument list, using the canonical location, if it exists. 921 assert(ArgInputs.count(Input) && "Input cannot be found!"); 922 923 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) { 924 if (OriginalIndex != AggArgIt->second) 925 Region.ChangedArgOrder = true; 926 Region.ExtractedArgToAgg.insert( 927 std::make_pair(OriginalIndex, AggArgIt->second)); 928 Region.AggArgToExtracted.insert( 929 std::make_pair(AggArgIt->second, OriginalIndex)); 930 } else { 931 Group.CanonicalNumberToAggArg.insert( 932 std::make_pair(CanonicalNumber, TypeIndex)); 933 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex)); 934 Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex)); 935 } 936 OriginalIndex++; 937 TypeIndex++; 938 } 939 940 // If the function type definitions for the OutlinableGroup holding the region 941 // have not been set, set the length of the inputs here. We should have the 942 // same inputs for all of the different regions contained in the 943 // OutlinableGroup since they are all structurally similar to one another. 944 if (!Group.InputTypesSet) { 945 Group.NumAggregateInputs = TypeIndex; 946 Group.InputTypesSet = true; 947 } 948 949 Region.NumExtractedInputs = OriginalIndex; 950 } 951 952 /// Check if the \p V has any uses outside of the region other than \p PN. 953 /// 954 /// \param V [in] - The value to check. 955 /// \param PHILoc [in] - The location in the PHINode of \p V. 956 /// \param PN [in] - The PHINode using \p V. 957 /// \param Exits [in] - The potential blocks we exit to from the outlined 958 /// region. 959 /// \param BlocksInRegion [in] - The basic blocks contained in the region. 960 /// \returns true if \p V has any use soutside its region other than \p PN. 961 static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN, 962 SmallPtrSet<BasicBlock *, 1> &Exits, 963 DenseSet<BasicBlock *> &BlocksInRegion) { 964 // We check to see if the value is used by the PHINode from some other 965 // predecessor not included in the region. If it is, we make sure 966 // to keep it as an output. 967 SmallVector<unsigned, 2> IncomingNumbers(PN.getNumIncomingValues()); 968 std::iota(IncomingNumbers.begin(), IncomingNumbers.end(), 0); 969 if (any_of(IncomingNumbers, [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) { 970 return (Idx != PHILoc && V == PN.getIncomingValue(Idx) && 971 !BlocksInRegion.contains(PN.getIncomingBlock(Idx))); 972 })) 973 return true; 974 975 // Check if the value is used by any other instructions outside the region. 976 return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) { 977 Instruction *I = dyn_cast<Instruction>(U); 978 if (!I) 979 return false; 980 981 // If the use of the item is inside the region, we skip it. Uses 982 // inside the region give us useful information about how the item could be 983 // used as an output. 984 BasicBlock *Parent = I->getParent(); 985 if (BlocksInRegion.contains(Parent)) 986 return false; 987 988 // If it's not a PHINode then we definitely know the use matters. This 989 // output value will not completely combined with another item in a PHINode 990 // as it is directly reference by another non-phi instruction 991 if (!isa<PHINode>(I)) 992 return true; 993 994 // If we have a PHINode outside one of the exit locations, then it 995 // can be considered an outside use as well. If there is a PHINode 996 // contained in the Exit where this values use matters, it will be 997 // caught when we analyze that PHINode. 998 if (!Exits.contains(Parent)) 999 return true; 1000 1001 return false; 1002 }); 1003 } 1004 1005 /// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be 1006 /// considered outputs. A PHINodes is an output when more than one incoming 1007 /// value has been marked by the CodeExtractor as an output. 1008 /// 1009 /// \param CurrentExitFromRegion [in] - The block to analyze. 1010 /// \param PotentialExitsFromRegion [in] - The potential exit blocks from the 1011 /// region. 1012 /// \param RegionBlocks [in] - The basic blocks in the region. 1013 /// \param Outputs [in, out] - The existing outputs for the region, we may add 1014 /// PHINodes to this as we find that they replace output values. 1015 /// \param OutputsReplacedByPHINode [out] - A set containing outputs that are 1016 /// totally replaced by a PHINode. 1017 /// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used 1018 /// in PHINodes, but have other uses, and should still be considered outputs. 1019 static void analyzeExitPHIsForOutputUses( 1020 BasicBlock *CurrentExitFromRegion, 1021 SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion, 1022 DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs, 1023 DenseSet<Value *> &OutputsReplacedByPHINode, 1024 DenseSet<Value *> &OutputsWithNonPhiUses) { 1025 for (PHINode &PN : CurrentExitFromRegion->phis()) { 1026 // Find all incoming values from the outlining region. 1027 SmallVector<unsigned, 2> IncomingVals; 1028 for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I) 1029 if (RegionBlocks.contains(PN.getIncomingBlock(I))) 1030 IncomingVals.push_back(I); 1031 1032 // Do not process PHI if there are no predecessors from region. 1033 unsigned NumIncomingVals = IncomingVals.size(); 1034 if (NumIncomingVals == 0) 1035 continue; 1036 1037 // If there is one predecessor, we mark it as a value that needs to be kept 1038 // as an output. 1039 if (NumIncomingVals == 1) { 1040 Value *V = PN.getIncomingValue(*IncomingVals.begin()); 1041 OutputsWithNonPhiUses.insert(V); 1042 OutputsReplacedByPHINode.erase(V); 1043 continue; 1044 } 1045 1046 // This PHINode will be used as an output value, so we add it to our list. 1047 Outputs.insert(&PN); 1048 1049 // Not all of the incoming values should be ignored as other inputs and 1050 // outputs may have uses in outlined region. If they have other uses 1051 // outside of the single PHINode we should not skip over it. 1052 for (unsigned Idx : IncomingVals) { 1053 Value *V = PN.getIncomingValue(Idx); 1054 if (outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) { 1055 OutputsWithNonPhiUses.insert(V); 1056 OutputsReplacedByPHINode.erase(V); 1057 continue; 1058 } 1059 if (!OutputsWithNonPhiUses.contains(V)) 1060 OutputsReplacedByPHINode.insert(V); 1061 } 1062 } 1063 } 1064 1065 // Represents the type for the unsigned number denoting the output number for 1066 // phi node, along with the canonical number for the exit block. 1067 using ArgLocWithBBCanon = std::pair<unsigned, unsigned>; 1068 // The list of canonical numbers for the incoming values to a PHINode. 1069 using CanonList = SmallVector<unsigned, 2>; 1070 // The pair type representing the set of canonical values being combined in the 1071 // PHINode, along with the location data for the PHINode. 1072 using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>; 1073 1074 /// Encode \p PND as an integer for easy lookup based on the argument location, 1075 /// the parent BasicBlock canonical numbering, and the canonical numbering of 1076 /// the values stored in the PHINode. 1077 /// 1078 /// \param PND - The data to hash. 1079 /// \returns The hash code of \p PND. 1080 static hash_code encodePHINodeData(PHINodeData &PND) { 1081 return llvm::hash_combine( 1082 llvm::hash_value(PND.first.first), llvm::hash_value(PND.first.second), 1083 llvm::hash_combine_range(PND.second.begin(), PND.second.end())); 1084 } 1085 1086 /// Create a special GVN for PHINodes that will be used outside of 1087 /// the region. We create a hash code based on the Canonical number of the 1088 /// parent BasicBlock, the canonical numbering of the values stored in the 1089 /// PHINode and the aggregate argument location. This is used to find whether 1090 /// this PHINode type has been given a canonical numbering already. If not, we 1091 /// assign it a value and store it for later use. The value is returned to 1092 /// identify different output schemes for the set of regions. 1093 /// 1094 /// \param Region - The region that \p PN is an output for. 1095 /// \param PN - The PHINode we are analyzing. 1096 /// \param AggArgIdx - The argument \p PN will be stored into. 1097 /// \returns An optional holding the assigned canonical number, or None if 1098 /// there is some attribute of the PHINode blocking it from being used. 1099 static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region, 1100 PHINode *PN, unsigned AggArgIdx) { 1101 OutlinableGroup &Group = *Region.Parent; 1102 IRSimilarityCandidate &Cand = *Region.Candidate; 1103 BasicBlock *PHIBB = PN->getParent(); 1104 CanonList PHIGVNs; 1105 for (Value *Incoming : PN->incoming_values()) { 1106 // If we cannot find a GVN, this means that the input to the PHINode is 1107 // not included in the region we are trying to analyze, meaning, that if 1108 // it was outlined, we would be adding an extra input. We ignore this 1109 // case for now, and so ignore the region. 1110 Optional<unsigned> OGVN = Cand.getGVN(Incoming); 1111 if (!OGVN.hasValue()) { 1112 Region.IgnoreRegion = true; 1113 return None; 1114 } 1115 1116 // Collect the canonical numbers of the values in the PHINode. 1117 unsigned GVN = OGVN.getValue(); 1118 OGVN = Cand.getCanonicalNum(GVN); 1119 assert(OGVN.hasValue() && "No GVN found for incoming value?"); 1120 PHIGVNs.push_back(*OGVN); 1121 } 1122 1123 // Now that we have the GVNs for the incoming values, we are going to combine 1124 // them with the GVN of the incoming bock, and the output location of the 1125 // PHINode to generate a hash value representing this instance of the PHINode. 1126 DenseMap<hash_code, unsigned>::iterator GVNToPHIIt; 1127 DenseMap<unsigned, PHINodeData>::iterator PHIToGVNIt; 1128 Optional<unsigned> BBGVN = Cand.getGVN(PHIBB); 1129 assert(BBGVN.hasValue() && "Could not find GVN for the incoming block!"); 1130 1131 BBGVN = Cand.getCanonicalNum(BBGVN.getValue()); 1132 assert(BBGVN.hasValue() && 1133 "Could not find canonical number for the incoming block!"); 1134 // Create a pair of the exit block canonical value, and the aggregate 1135 // argument location, connected to the canonical numbers stored in the 1136 // PHINode. 1137 PHINodeData TemporaryPair = 1138 std::make_pair(std::make_pair(BBGVN.getValue(), AggArgIdx), PHIGVNs); 1139 hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair); 1140 1141 // Look for and create a new entry in our connection between canonical 1142 // numbers for PHINodes, and the set of objects we just created. 1143 GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash); 1144 if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) { 1145 bool Inserted = false; 1146 std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert( 1147 std::make_pair(Group.PHINodeGVNTracker, TemporaryPair)); 1148 std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert( 1149 std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--)); 1150 } 1151 1152 return GVNToPHIIt->second; 1153 } 1154 1155 /// Create a mapping of the output arguments for the \p Region to the output 1156 /// arguments of the overall outlined function. 1157 /// 1158 /// \param [in,out] Region - The region of code to be analyzed. 1159 /// \param [in] Outputs - The values found by the code extractor. 1160 static void 1161 findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region, 1162 SetVector<Value *> &Outputs) { 1163 OutlinableGroup &Group = *Region.Parent; 1164 IRSimilarityCandidate &C = *Region.Candidate; 1165 1166 SmallVector<BasicBlock *> BE; 1167 DenseSet<BasicBlock *> BlocksInRegion; 1168 C.getBasicBlocks(BlocksInRegion, BE); 1169 1170 // Find the exits to the region. 1171 SmallPtrSet<BasicBlock *, 1> Exits; 1172 for (BasicBlock *Block : BE) 1173 for (BasicBlock *Succ : successors(Block)) 1174 if (!BlocksInRegion.contains(Succ)) 1175 Exits.insert(Succ); 1176 1177 // After determining which blocks exit to PHINodes, we add these PHINodes to 1178 // the set of outputs to be processed. We also check the incoming values of 1179 // the PHINodes for whether they should no longer be considered outputs. 1180 DenseSet<Value *> OutputsReplacedByPHINode; 1181 DenseSet<Value *> OutputsWithNonPhiUses; 1182 for (BasicBlock *ExitBB : Exits) 1183 analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs, 1184 OutputsReplacedByPHINode, 1185 OutputsWithNonPhiUses); 1186 1187 // This counts the argument number in the extracted function. 1188 unsigned OriginalIndex = Region.NumExtractedInputs; 1189 1190 // This counts the argument number in the overall function. 1191 unsigned TypeIndex = Group.NumAggregateInputs; 1192 bool TypeFound; 1193 DenseSet<unsigned> AggArgsUsed; 1194 1195 // Iterate over the output types and identify if there is an aggregate pointer 1196 // type whose base type matches the current output type. If there is, we mark 1197 // that we will use this output register for this value. If not we add another 1198 // type to the overall argument type list. We also store the GVNs used for 1199 // stores to identify which values will need to be moved into an special 1200 // block that holds the stores to the output registers. 1201 for (Value *Output : Outputs) { 1202 TypeFound = false; 1203 // We can do this since it is a result value, and will have a number 1204 // that is necessarily the same. BUT if in the future, the instructions 1205 // do not have to be in same order, but are functionally the same, we will 1206 // have to use a different scheme, as one-to-one correspondence is not 1207 // guaranteed. 1208 unsigned ArgumentSize = Group.ArgumentTypes.size(); 1209 1210 // If the output is combined in a PHINode, we make sure to skip over it. 1211 if (OutputsReplacedByPHINode.contains(Output)) 1212 continue; 1213 1214 unsigned AggArgIdx = 0; 1215 for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) { 1216 if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType())) 1217 continue; 1218 1219 if (AggArgsUsed.contains(Jdx)) 1220 continue; 1221 1222 TypeFound = true; 1223 AggArgsUsed.insert(Jdx); 1224 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx)); 1225 Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex)); 1226 AggArgIdx = Jdx; 1227 break; 1228 } 1229 1230 // We were unable to find an unused type in the output type set that matches 1231 // the output, so we add a pointer type to the argument types of the overall 1232 // function to handle this output and create a mapping to it. 1233 if (!TypeFound) { 1234 Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType())); 1235 // Mark the new pointer type as the last value in the aggregate argument 1236 // list. 1237 unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1; 1238 AggArgsUsed.insert(ArgTypeIdx); 1239 Region.ExtractedArgToAgg.insert( 1240 std::make_pair(OriginalIndex, ArgTypeIdx)); 1241 Region.AggArgToExtracted.insert( 1242 std::make_pair(ArgTypeIdx, OriginalIndex)); 1243 AggArgIdx = ArgTypeIdx; 1244 } 1245 1246 // TODO: Adapt to the extra input from the PHINode. 1247 PHINode *PN = dyn_cast<PHINode>(Output); 1248 1249 Optional<unsigned> GVN; 1250 if (PN && !BlocksInRegion.contains(PN->getParent())) { 1251 // Values outside the region can be combined into PHINode when we 1252 // have multiple exits. We collect both of these into a list to identify 1253 // which values are being used in the PHINode. Each list identifies a 1254 // different PHINode, and a different output. We store the PHINode as it's 1255 // own canonical value. These canonical values are also dependent on the 1256 // output argument it is saved to. 1257 1258 // If two PHINodes have the same canonical values, but different aggregate 1259 // argument locations, then they will have distinct Canonical Values. 1260 GVN = getGVNForPHINode(Region, PN, AggArgIdx); 1261 if (!GVN.hasValue()) 1262 return; 1263 } else { 1264 // If we do not have a PHINode we use the global value numbering for the 1265 // output value, to find the canonical number to add to the set of stored 1266 // values. 1267 GVN = C.getGVN(Output); 1268 GVN = C.getCanonicalNum(*GVN); 1269 } 1270 1271 // Each region has a potentially unique set of outputs. We save which 1272 // values are output in a list of canonical values so we can differentiate 1273 // among the different store schemes. 1274 Region.GVNStores.push_back(*GVN); 1275 1276 OriginalIndex++; 1277 TypeIndex++; 1278 } 1279 1280 // We sort the stored values to make sure that we are not affected by analysis 1281 // order when determining what combination of items were stored. 1282 stable_sort(Region.GVNStores); 1283 } 1284 1285 void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region, 1286 DenseSet<unsigned> &NotSame) { 1287 std::vector<unsigned> Inputs; 1288 SetVector<Value *> ArgInputs, Outputs; 1289 1290 getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs, 1291 Outputs); 1292 1293 if (Region.IgnoreRegion) 1294 return; 1295 1296 // Map the inputs found by the CodeExtractor to the arguments found for 1297 // the overall function. 1298 findExtractedInputToOverallInputMapping(Region, Inputs, ArgInputs); 1299 1300 // Map the outputs found by the CodeExtractor to the arguments found for 1301 // the overall function. 1302 findExtractedOutputToOverallOutputMapping(Region, Outputs); 1303 } 1304 1305 /// Replace the extracted function in the Region with a call to the overall 1306 /// function constructed from the deduplicated similar regions, replacing and 1307 /// remapping the values passed to the extracted function as arguments to the 1308 /// new arguments of the overall function. 1309 /// 1310 /// \param [in] M - The module to outline from. 1311 /// \param [in] Region - The regions of extracted code to be replaced with a new 1312 /// function. 1313 /// \returns a call instruction with the replaced function. 1314 CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) { 1315 std::vector<Value *> NewCallArgs; 1316 DenseMap<unsigned, unsigned>::iterator ArgPair; 1317 1318 OutlinableGroup &Group = *Region.Parent; 1319 CallInst *Call = Region.Call; 1320 assert(Call && "Call to replace is nullptr?"); 1321 Function *AggFunc = Group.OutlinedFunction; 1322 assert(AggFunc && "Function to replace with is nullptr?"); 1323 1324 // If the arguments are the same size, there are not values that need to be 1325 // made into an argument, the argument ordering has not been change, or 1326 // different output registers to handle. We can simply replace the called 1327 // function in this case. 1328 if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) { 1329 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to " 1330 << *AggFunc << " with same number of arguments\n"); 1331 Call->setCalledFunction(AggFunc); 1332 return Call; 1333 } 1334 1335 // We have a different number of arguments than the new function, so 1336 // we need to use our previously mappings off extracted argument to overall 1337 // function argument, and constants to overall function argument to create the 1338 // new argument list. 1339 for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) { 1340 1341 if (AggArgIdx == AggFunc->arg_size() - 1 && 1342 Group.OutputGVNCombinations.size() > 1) { 1343 // If we are on the last argument, and we need to differentiate between 1344 // output blocks, add an integer to the argument list to determine 1345 // what block to take 1346 LLVM_DEBUG(dbgs() << "Set switch block argument to " 1347 << Region.OutputBlockNum << "\n"); 1348 NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()), 1349 Region.OutputBlockNum)); 1350 continue; 1351 } 1352 1353 ArgPair = Region.AggArgToExtracted.find(AggArgIdx); 1354 if (ArgPair != Region.AggArgToExtracted.end()) { 1355 Value *ArgumentValue = Call->getArgOperand(ArgPair->second); 1356 // If we found the mapping from the extracted function to the overall 1357 // function, we simply add it to the argument list. We use the same 1358 // value, it just needs to honor the new order of arguments. 1359 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value " 1360 << *ArgumentValue << "\n"); 1361 NewCallArgs.push_back(ArgumentValue); 1362 continue; 1363 } 1364 1365 // If it is a constant, we simply add it to the argument list as a value. 1366 if (Region.AggArgToConstant.find(AggArgIdx) != 1367 Region.AggArgToConstant.end()) { 1368 Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second; 1369 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value " 1370 << *CST << "\n"); 1371 NewCallArgs.push_back(CST); 1372 continue; 1373 } 1374 1375 // Add a nullptr value if the argument is not found in the extracted 1376 // function. If we cannot find a value, it means it is not in use 1377 // for the region, so we should not pass anything to it. 1378 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n"); 1379 NewCallArgs.push_back(ConstantPointerNull::get( 1380 static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType()))); 1381 } 1382 1383 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to " 1384 << *AggFunc << " with new set of arguments\n"); 1385 // Create the new call instruction and erase the old one. 1386 Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "", 1387 Call); 1388 1389 // It is possible that the call to the outlined function is either the first 1390 // instruction is in the new block, the last instruction, or both. If either 1391 // of these is the case, we need to make sure that we replace the instruction 1392 // in the IRInstructionData struct with the new call. 1393 CallInst *OldCall = Region.Call; 1394 if (Region.NewFront->Inst == OldCall) 1395 Region.NewFront->Inst = Call; 1396 if (Region.NewBack->Inst == OldCall) 1397 Region.NewBack->Inst = Call; 1398 1399 // Transfer any debug information. 1400 Call->setDebugLoc(Region.Call->getDebugLoc()); 1401 // Since our output may determine which branch we go to, we make sure to 1402 // propogate this new call value through the module. 1403 OldCall->replaceAllUsesWith(Call); 1404 1405 // Remove the old instruction. 1406 OldCall->eraseFromParent(); 1407 Region.Call = Call; 1408 1409 // Make sure that the argument in the new function has the SwiftError 1410 // argument. 1411 if (Group.SwiftErrorArgument.hasValue()) 1412 Call->addParamAttr(Group.SwiftErrorArgument.getValue(), 1413 Attribute::SwiftError); 1414 1415 return Call; 1416 } 1417 1418 /// Find or create a BasicBlock in the outlined function containing PhiBlocks 1419 /// for \p RetVal. 1420 /// 1421 /// \param Group - The OutlinableGroup containing the information about the 1422 /// overall outlined function. 1423 /// \param RetVal - The return value or exit option that we are currently 1424 /// evaluating. 1425 /// \returns The found or newly created BasicBlock to contain the needed 1426 /// PHINodes to be used as outputs. 1427 static BasicBlock *findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal) { 1428 DenseMap<Value *, BasicBlock *>::iterator PhiBlockForRetVal, 1429 ReturnBlockForRetVal; 1430 PhiBlockForRetVal = Group.PHIBlocks.find(RetVal); 1431 ReturnBlockForRetVal = Group.EndBBs.find(RetVal); 1432 assert(ReturnBlockForRetVal != Group.EndBBs.end() && 1433 "Could not find output value!"); 1434 BasicBlock *ReturnBB = ReturnBlockForRetVal->second; 1435 1436 // Find if a PHIBlock exists for this return value already. If it is 1437 // the first time we are analyzing this, we will not, so we record it. 1438 PhiBlockForRetVal = Group.PHIBlocks.find(RetVal); 1439 if (PhiBlockForRetVal != Group.PHIBlocks.end()) 1440 return PhiBlockForRetVal->second; 1441 1442 // If we did not find a block, we create one, and insert it into the 1443 // overall function and record it. 1444 bool Inserted = false; 1445 BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block", 1446 ReturnBB->getParent()); 1447 std::tie(PhiBlockForRetVal, Inserted) = 1448 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock)); 1449 1450 // We find the predecessors of the return block in the newly created outlined 1451 // function in order to point them to the new PHIBlock rather than the already 1452 // existing return block. 1453 SmallVector<BranchInst *, 2> BranchesToChange; 1454 for (BasicBlock *Pred : predecessors(ReturnBB)) 1455 BranchesToChange.push_back(cast<BranchInst>(Pred->getTerminator())); 1456 1457 // Now we mark the branch instructions found, and change the references of the 1458 // return block to the newly created PHIBlock. 1459 for (BranchInst *BI : BranchesToChange) 1460 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) { 1461 if (BI->getSuccessor(Succ) != ReturnBB) 1462 continue; 1463 BI->setSuccessor(Succ, PHIBlock); 1464 } 1465 1466 BranchInst::Create(ReturnBB, PHIBlock); 1467 1468 return PhiBlockForRetVal->second; 1469 } 1470 1471 /// For the function call now representing the \p Region, find the passed value 1472 /// to that call that represents Argument \p A at the call location if the 1473 /// call has already been replaced with a call to the overall, aggregate 1474 /// function. 1475 /// 1476 /// \param A - The Argument to get the passed value for. 1477 /// \param Region - The extracted Region corresponding to the outlined function. 1478 /// \returns The Value representing \p A at the call site. 1479 static Value * 1480 getPassedArgumentInAlreadyOutlinedFunction(const Argument *A, 1481 const OutlinableRegion &Region) { 1482 // If we don't need to adjust the argument number at all (since the call 1483 // has already been replaced by a call to the overall outlined function) 1484 // we can just get the specified argument. 1485 return Region.Call->getArgOperand(A->getArgNo()); 1486 } 1487 1488 /// For the function call now representing the \p Region, find the passed value 1489 /// to that call that represents Argument \p A at the call location if the 1490 /// call has only been replaced by the call to the aggregate function. 1491 /// 1492 /// \param A - The Argument to get the passed value for. 1493 /// \param Region - The extracted Region corresponding to the outlined function. 1494 /// \returns The Value representing \p A at the call site. 1495 static Value * 1496 getPassedArgumentAndAdjustArgumentLocation(const Argument *A, 1497 const OutlinableRegion &Region) { 1498 unsigned ArgNum = A->getArgNo(); 1499 1500 // If it is a constant, we can look at our mapping from when we created 1501 // the outputs to figure out what the constant value is. 1502 if (Region.AggArgToConstant.count(ArgNum)) 1503 return Region.AggArgToConstant.find(ArgNum)->second; 1504 1505 // If it is not a constant, and we are not looking at the overall function, we 1506 // need to adjust which argument we are looking at. 1507 ArgNum = Region.AggArgToExtracted.find(ArgNum)->second; 1508 return Region.Call->getArgOperand(ArgNum); 1509 } 1510 1511 /// Find the canonical numbering for the incoming Values into the PHINode \p PN. 1512 /// 1513 /// \param PN [in] - The PHINode that we are finding the canonical numbers for. 1514 /// \param Region [in] - The OutlinableRegion containing \p PN. 1515 /// \param OutputMappings [in] - The mapping of output values from outlined 1516 /// region to their original values. 1517 /// \param CanonNums [out] - The canonical numbering for the incoming values to 1518 /// \p PN. 1519 /// \param ReplacedWithOutlinedCall - A flag to use the extracted function call 1520 /// of \p Region rather than the overall function's call. 1521 static void 1522 findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, 1523 const DenseMap<Value *, Value *> &OutputMappings, 1524 DenseSet<unsigned> &CanonNums, 1525 bool ReplacedWithOutlinedCall = true) { 1526 // Iterate over the incoming values. 1527 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) { 1528 Value *IVal = PN->getIncomingValue(Idx); 1529 // If we have an argument as incoming value, we need to grab the passed 1530 // value from the call itself. 1531 if (Argument *A = dyn_cast<Argument>(IVal)) { 1532 if (ReplacedWithOutlinedCall) 1533 IVal = getPassedArgumentInAlreadyOutlinedFunction(A, Region); 1534 else 1535 IVal = getPassedArgumentAndAdjustArgumentLocation(A, Region); 1536 } 1537 1538 // Get the original value if it has been replaced by an output value. 1539 IVal = findOutputMapping(OutputMappings, IVal); 1540 1541 // Find and add the canonical number for the incoming value. 1542 Optional<unsigned> GVN = Region.Candidate->getGVN(IVal); 1543 assert(GVN.hasValue() && "No GVN for incoming value"); 1544 Optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN); 1545 assert(CanonNum.hasValue() && "No Canonical Number for GVN"); 1546 CanonNums.insert(*CanonNum); 1547 } 1548 } 1549 1550 /// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock 1551 /// in order to condense the number of instructions added to the outlined 1552 /// function. 1553 /// 1554 /// \param PN [in] - The PHINode that we are finding the canonical numbers for. 1555 /// \param Region [in] - The OutlinableRegion containing \p PN. 1556 /// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find 1557 /// \p PN in. 1558 /// \param OutputMappings [in] - The mapping of output values from outlined 1559 /// region to their original values. 1560 /// \return the newly found or created PHINode in \p OverallPhiBlock. 1561 static PHINode* 1562 findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, 1563 BasicBlock *OverallPhiBlock, 1564 const DenseMap<Value *, Value *> &OutputMappings) { 1565 OutlinableGroup &Group = *Region.Parent; 1566 1567 DenseSet<unsigned> PNCanonNums; 1568 // We have to use the extracted function since we have merged this region into 1569 // the overall function yet. We make sure to reassign the argument numbering 1570 // since it is possible that the argument ordering is different between the 1571 // functions. 1572 findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums, 1573 /* ReplacedWithOutlinedCall = */ false); 1574 1575 OutlinableRegion *FirstRegion = Group.Regions[0]; 1576 DenseSet<unsigned> CurrentCanonNums; 1577 // Find the Canonical Numbering for each PHINode, if it matches, we replace 1578 // the uses of the PHINode we are searching for, with the found PHINode. 1579 for (PHINode &CurrPN : OverallPhiBlock->phis()) { 1580 CurrentCanonNums.clear(); 1581 findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums, 1582 /* ReplacedWithOutlinedCall = */ true); 1583 1584 if (all_of(PNCanonNums, [&CurrentCanonNums](unsigned CanonNum) { 1585 return CurrentCanonNums.contains(CanonNum); 1586 })) 1587 return &CurrPN; 1588 } 1589 1590 // If we've made it here, it means we weren't able to replace the PHINode, so 1591 // we must insert it ourselves. 1592 PHINode *NewPN = cast<PHINode>(PN.clone()); 1593 NewPN->insertBefore(&*OverallPhiBlock->begin()); 1594 for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx; 1595 Idx++) { 1596 Value *IncomingVal = NewPN->getIncomingValue(Idx); 1597 BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx); 1598 1599 // Find corresponding basic block in the overall function for the incoming 1600 // block. 1601 Instruction *FirstNonPHI = IncomingBlock->getFirstNonPHI(); 1602 assert(FirstNonPHI && "Incoming block is empty?"); 1603 Value *CorrespondingVal = 1604 Region.findCorrespondingValueIn(*FirstRegion, FirstNonPHI); 1605 assert(CorrespondingVal && "Value is nullptr?"); 1606 BasicBlock *BlockToUse = cast<Instruction>(CorrespondingVal)->getParent(); 1607 NewPN->setIncomingBlock(Idx, BlockToUse); 1608 1609 // If we have an argument we make sure we replace using the argument from 1610 // the correct function. 1611 if (Argument *A = dyn_cast<Argument>(IncomingVal)) { 1612 Value *Val = Group.OutlinedFunction->getArg(A->getArgNo()); 1613 NewPN->setIncomingValue(Idx, Val); 1614 continue; 1615 } 1616 1617 // Find the corresponding value in the overall function. 1618 IncomingVal = findOutputMapping(OutputMappings, IncomingVal); 1619 Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal); 1620 assert(Val && "Value is nullptr?"); 1621 NewPN->setIncomingValue(Idx, Val); 1622 } 1623 return NewPN; 1624 } 1625 1626 // Within an extracted function, replace the argument uses of the extracted 1627 // region with the arguments of the function for an OutlinableGroup. 1628 // 1629 /// \param [in] Region - The region of extracted code to be changed. 1630 /// \param [in,out] OutputBBs - The BasicBlock for the output stores for this 1631 /// region. 1632 /// \param [in] FirstFunction - A flag to indicate whether we are using this 1633 /// function to define the overall outlined function for all the regions, or 1634 /// if we are operating on one of the following regions. 1635 static void 1636 replaceArgumentUses(OutlinableRegion &Region, 1637 DenseMap<Value *, BasicBlock *> &OutputBBs, 1638 const DenseMap<Value *, Value *> &OutputMappings, 1639 bool FirstFunction = false) { 1640 OutlinableGroup &Group = *Region.Parent; 1641 assert(Region.ExtractedFunction && "Region has no extracted function?"); 1642 1643 Function *DominatingFunction = Region.ExtractedFunction; 1644 if (FirstFunction) 1645 DominatingFunction = Group.OutlinedFunction; 1646 DominatorTree DT(*DominatingFunction); 1647 1648 for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size(); 1649 ArgIdx++) { 1650 assert(Region.ExtractedArgToAgg.find(ArgIdx) != 1651 Region.ExtractedArgToAgg.end() && 1652 "No mapping from extracted to outlined?"); 1653 unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second; 1654 Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx); 1655 Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx); 1656 // The argument is an input, so we can simply replace it with the overall 1657 // argument value 1658 if (ArgIdx < Region.NumExtractedInputs) { 1659 LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function " 1660 << *Region.ExtractedFunction << " with " << *AggArg 1661 << " in function " << *Group.OutlinedFunction << "\n"); 1662 Arg->replaceAllUsesWith(AggArg); 1663 continue; 1664 } 1665 1666 // If we are replacing an output, we place the store value in its own 1667 // block inside the overall function before replacing the use of the output 1668 // in the function. 1669 assert(Arg->hasOneUse() && "Output argument can only have one use"); 1670 User *InstAsUser = Arg->user_back(); 1671 assert(InstAsUser && "User is nullptr!"); 1672 1673 Instruction *I = cast<Instruction>(InstAsUser); 1674 BasicBlock *BB = I->getParent(); 1675 SmallVector<BasicBlock *, 4> Descendants; 1676 DT.getDescendants(BB, Descendants); 1677 bool EdgeAdded = false; 1678 if (Descendants.size() == 0) { 1679 EdgeAdded = true; 1680 DT.insertEdge(&DominatingFunction->getEntryBlock(), BB); 1681 DT.getDescendants(BB, Descendants); 1682 } 1683 1684 // Iterate over the following blocks, looking for return instructions, 1685 // if we find one, find the corresponding output block for the return value 1686 // and move our store instruction there. 1687 for (BasicBlock *DescendBB : Descendants) { 1688 ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator()); 1689 if (!RI) 1690 continue; 1691 Value *RetVal = RI->getReturnValue(); 1692 auto VBBIt = OutputBBs.find(RetVal); 1693 assert(VBBIt != OutputBBs.end() && "Could not find output value!"); 1694 1695 // If this is storing a PHINode, we must make sure it is included in the 1696 // overall function. 1697 StoreInst *SI = cast<StoreInst>(I); 1698 1699 Value *ValueOperand = SI->getValueOperand(); 1700 1701 StoreInst *NewI = cast<StoreInst>(I->clone()); 1702 NewI->setDebugLoc(DebugLoc()); 1703 BasicBlock *OutputBB = VBBIt->second; 1704 OutputBB->getInstList().push_back(NewI); 1705 LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to " 1706 << *OutputBB << "\n"); 1707 1708 // If this is storing a PHINode, we must make sure it is included in the 1709 // overall function. 1710 if (!isa<PHINode>(ValueOperand) || 1711 Region.Candidate->getGVN(ValueOperand).hasValue()) { 1712 if (FirstFunction) 1713 continue; 1714 Value *CorrVal = 1715 Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand); 1716 assert(CorrVal && "Value is nullptr?"); 1717 NewI->setOperand(0, CorrVal); 1718 continue; 1719 } 1720 PHINode *PN = cast<PHINode>(SI->getValueOperand()); 1721 // If it has a value, it was not split by the code extractor, which 1722 // is what we are looking for. 1723 if (Region.Candidate->getGVN(PN).hasValue()) 1724 continue; 1725 1726 // We record the parent block for the PHINode in the Region so that 1727 // we can exclude it from checks later on. 1728 Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent())); 1729 1730 // If this is the first function, we do not need to worry about mergiing 1731 // this with any other block in the overall outlined function, so we can 1732 // just continue. 1733 if (FirstFunction) { 1734 BasicBlock *PHIBlock = PN->getParent(); 1735 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock)); 1736 continue; 1737 } 1738 1739 // We look for the aggregate block that contains the PHINodes leading into 1740 // this exit path. If we can't find one, we create one. 1741 BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal); 1742 1743 // For our PHINode, we find the combined canonical numbering, and 1744 // attempt to find a matching PHINode in the overall PHIBlock. If we 1745 // cannot, we copy the PHINode and move it into this new block. 1746 PHINode *NewPN = 1747 findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, OutputMappings); 1748 NewI->setOperand(0, NewPN); 1749 } 1750 1751 // If we added an edge for basic blocks without a predecessor, we remove it 1752 // here. 1753 if (EdgeAdded) 1754 DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB); 1755 I->eraseFromParent(); 1756 1757 LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function " 1758 << *Region.ExtractedFunction << " with " << *AggArg 1759 << " in function " << *Group.OutlinedFunction << "\n"); 1760 Arg->replaceAllUsesWith(AggArg); 1761 } 1762 } 1763 1764 /// Within an extracted function, replace the constants that need to be lifted 1765 /// into arguments with the actual argument. 1766 /// 1767 /// \param Region [in] - The region of extracted code to be changed. 1768 void replaceConstants(OutlinableRegion &Region) { 1769 OutlinableGroup &Group = *Region.Parent; 1770 // Iterate over the constants that need to be elevated into arguments 1771 for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) { 1772 unsigned AggArgIdx = Const.first; 1773 Function *OutlinedFunction = Group.OutlinedFunction; 1774 assert(OutlinedFunction && "Overall Function is not defined?"); 1775 Constant *CST = Const.second; 1776 Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx); 1777 // Identify the argument it will be elevated to, and replace instances of 1778 // that constant in the function. 1779 1780 // TODO: If in the future constants do not have one global value number, 1781 // i.e. a constant 1 could be mapped to several values, this check will 1782 // have to be more strict. It cannot be using only replaceUsesWithIf. 1783 1784 LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST 1785 << " in function " << *OutlinedFunction << " with " 1786 << *Arg << "\n"); 1787 CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) { 1788 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) 1789 return I->getFunction() == OutlinedFunction; 1790 return false; 1791 }); 1792 } 1793 } 1794 1795 /// It is possible that there is a basic block that already performs the same 1796 /// stores. This returns a duplicate block, if it exists 1797 /// 1798 /// \param OutputBBs [in] the blocks we are looking for a duplicate of. 1799 /// \param OutputStoreBBs [in] The existing output blocks. 1800 /// \returns an optional value with the number output block if there is a match. 1801 Optional<unsigned> findDuplicateOutputBlock( 1802 DenseMap<Value *, BasicBlock *> &OutputBBs, 1803 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) { 1804 1805 bool Mismatch = false; 1806 unsigned MatchingNum = 0; 1807 // We compare the new set output blocks to the other sets of output blocks. 1808 // If they are the same number, and have identical instructions, they are 1809 // considered to be the same. 1810 for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) { 1811 Mismatch = false; 1812 for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) { 1813 DenseMap<Value *, BasicBlock *>::iterator OutputBBIt = 1814 OutputBBs.find(VToB.first); 1815 if (OutputBBIt == OutputBBs.end()) { 1816 Mismatch = true; 1817 break; 1818 } 1819 1820 BasicBlock *CompBB = VToB.second; 1821 BasicBlock *OutputBB = OutputBBIt->second; 1822 if (CompBB->size() - 1 != OutputBB->size()) { 1823 Mismatch = true; 1824 break; 1825 } 1826 1827 BasicBlock::iterator NIt = OutputBB->begin(); 1828 for (Instruction &I : *CompBB) { 1829 if (isa<BranchInst>(&I)) 1830 continue; 1831 1832 if (!I.isIdenticalTo(&(*NIt))) { 1833 Mismatch = true; 1834 break; 1835 } 1836 1837 NIt++; 1838 } 1839 } 1840 1841 if (!Mismatch) 1842 return MatchingNum; 1843 1844 MatchingNum++; 1845 } 1846 1847 return None; 1848 } 1849 1850 /// Remove empty output blocks from the outlined region. 1851 /// 1852 /// \param BlocksToPrune - Mapping of return values output blocks for the \p 1853 /// Region. 1854 /// \param Region - The OutlinableRegion we are analyzing. 1855 static bool 1856 analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune, 1857 OutlinableRegion &Region) { 1858 bool AllRemoved = true; 1859 Value *RetValueForBB; 1860 BasicBlock *NewBB; 1861 SmallVector<Value *, 4> ToRemove; 1862 // Iterate over the output blocks created in the outlined section. 1863 for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) { 1864 RetValueForBB = VtoBB.first; 1865 NewBB = VtoBB.second; 1866 1867 // If there are no instructions, we remove it from the module, and also 1868 // mark the value for removal from the return value to output block mapping. 1869 if (NewBB->size() == 0) { 1870 NewBB->eraseFromParent(); 1871 ToRemove.push_back(RetValueForBB); 1872 continue; 1873 } 1874 1875 // Mark that we could not remove all the blocks since they were not all 1876 // empty. 1877 AllRemoved = false; 1878 } 1879 1880 // Remove the return value from the mapping. 1881 for (Value *V : ToRemove) 1882 BlocksToPrune.erase(V); 1883 1884 // Mark the region as having the no output scheme. 1885 if (AllRemoved) 1886 Region.OutputBlockNum = -1; 1887 1888 return AllRemoved; 1889 } 1890 1891 /// For the outlined section, move needed the StoreInsts for the output 1892 /// registers into their own block. Then, determine if there is a duplicate 1893 /// output block already created. 1894 /// 1895 /// \param [in] OG - The OutlinableGroup of regions to be outlined. 1896 /// \param [in] Region - The OutlinableRegion that is being analyzed. 1897 /// \param [in,out] OutputBBs - the blocks that stores for this region will be 1898 /// placed in. 1899 /// \param [in] EndBBs - the final blocks of the extracted function. 1900 /// \param [in] OutputMappings - OutputMappings the mapping of values that have 1901 /// been replaced by a new output value. 1902 /// \param [in,out] OutputStoreBBs - The existing output blocks. 1903 static void alignOutputBlockWithAggFunc( 1904 OutlinableGroup &OG, OutlinableRegion &Region, 1905 DenseMap<Value *, BasicBlock *> &OutputBBs, 1906 DenseMap<Value *, BasicBlock *> &EndBBs, 1907 const DenseMap<Value *, Value *> &OutputMappings, 1908 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) { 1909 // If none of the output blocks have any instructions, this means that we do 1910 // not have to determine if it matches any of the other output schemes, and we 1911 // don't have to do anything else. 1912 if (analyzeAndPruneOutputBlocks(OutputBBs, Region)) 1913 return; 1914 1915 // Determine is there is a duplicate set of blocks. 1916 Optional<unsigned> MatchingBB = 1917 findDuplicateOutputBlock(OutputBBs, OutputStoreBBs); 1918 1919 // If there is, we remove the new output blocks. If it does not, 1920 // we add it to our list of sets of output blocks. 1921 if (MatchingBB.hasValue()) { 1922 LLVM_DEBUG(dbgs() << "Set output block for region in function" 1923 << Region.ExtractedFunction << " to " 1924 << MatchingBB.getValue()); 1925 1926 Region.OutputBlockNum = MatchingBB.getValue(); 1927 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) 1928 VtoBB.second->eraseFromParent(); 1929 return; 1930 } 1931 1932 Region.OutputBlockNum = OutputStoreBBs.size(); 1933 1934 Value *RetValueForBB; 1935 BasicBlock *NewBB; 1936 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>()); 1937 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) { 1938 RetValueForBB = VtoBB.first; 1939 NewBB = VtoBB.second; 1940 DenseMap<Value *, BasicBlock *>::iterator VBBIt = 1941 EndBBs.find(RetValueForBB); 1942 LLVM_DEBUG(dbgs() << "Create output block for region in" 1943 << Region.ExtractedFunction << " to " 1944 << *NewBB); 1945 BranchInst::Create(VBBIt->second, NewBB); 1946 OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB)); 1947 } 1948 } 1949 1950 /// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys, 1951 /// before creating a basic block for each \p NewMap, and inserting into the new 1952 /// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>". 1953 /// 1954 /// \param OldMap [in] - The mapping to base the new mapping off of. 1955 /// \param NewMap [out] - The output mapping using the keys of \p OldMap. 1956 /// \param ParentFunc [in] - The function to put the new basic block in. 1957 /// \param BaseName [in] - The start of the BasicBlock names to be appended to 1958 /// by an index value. 1959 static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap, 1960 DenseMap<Value *, BasicBlock *> &NewMap, 1961 Function *ParentFunc, Twine BaseName) { 1962 unsigned Idx = 0; 1963 std::vector<Value *> SortedKeys; 1964 1965 getSortedConstantKeys(SortedKeys, OldMap); 1966 1967 for (Value *RetVal : SortedKeys) { 1968 BasicBlock *NewBB = BasicBlock::Create( 1969 ParentFunc->getContext(), 1970 Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)), 1971 ParentFunc); 1972 NewMap.insert(std::make_pair(RetVal, NewBB)); 1973 } 1974 } 1975 1976 /// Create the switch statement for outlined function to differentiate between 1977 /// all the output blocks. 1978 /// 1979 /// For the outlined section, determine if an outlined block already exists that 1980 /// matches the needed stores for the extracted section. 1981 /// \param [in] M - The module we are outlining from. 1982 /// \param [in] OG - The group of regions to be outlined. 1983 /// \param [in] EndBBs - The final blocks of the extracted function. 1984 /// \param [in,out] OutputStoreBBs - The existing output blocks. 1985 void createSwitchStatement( 1986 Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs, 1987 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) { 1988 // We only need the switch statement if there is more than one store 1989 // combination, or there is more than one set of output blocks. The first 1990 // will occur when we store different sets of values for two different 1991 // regions. The second will occur when we have two outputs that are combined 1992 // in a PHINode outside of the region in one outlined instance, and are used 1993 // seaparately in another. This will create the same set of OutputGVNs, but 1994 // will generate two different output schemes. 1995 if (OG.OutputGVNCombinations.size() > 1) { 1996 Function *AggFunc = OG.OutlinedFunction; 1997 // Create a final block for each different return block. 1998 DenseMap<Value *, BasicBlock *> ReturnBBs; 1999 createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block"); 2000 2001 for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) { 2002 std::pair<Value *, BasicBlock *> &OutputBlock = 2003 *OG.EndBBs.find(RetBlockPair.first); 2004 BasicBlock *ReturnBlock = RetBlockPair.second; 2005 BasicBlock *EndBB = OutputBlock.second; 2006 Instruction *Term = EndBB->getTerminator(); 2007 // Move the return value to the final block instead of the original exit 2008 // stub. 2009 Term->moveBefore(*ReturnBlock, ReturnBlock->end()); 2010 // Put the switch statement in the old end basic block for the function 2011 // with a fall through to the new return block. 2012 LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for " 2013 << OutputStoreBBs.size() << "\n"); 2014 SwitchInst *SwitchI = 2015 SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1), 2016 ReturnBlock, OutputStoreBBs.size(), EndBB); 2017 2018 unsigned Idx = 0; 2019 for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) { 2020 DenseMap<Value *, BasicBlock *>::iterator OSBBIt = 2021 OutputStoreBB.find(OutputBlock.first); 2022 2023 if (OSBBIt == OutputStoreBB.end()) 2024 continue; 2025 2026 BasicBlock *BB = OSBBIt->second; 2027 SwitchI->addCase( 2028 ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB); 2029 Term = BB->getTerminator(); 2030 Term->setSuccessor(0, ReturnBlock); 2031 Idx++; 2032 } 2033 } 2034 return; 2035 } 2036 2037 assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!"); 2038 2039 // If there needs to be stores, move them from the output blocks to their 2040 // corresponding ending block. We do not check that the OutputGVNCombinations 2041 // is equal to 1 here since that could just been the case where there are 0 2042 // outputs. Instead, we check whether there is more than one set of output 2043 // blocks since this is the only case where we would have to move the 2044 // stores, and erase the extraneous blocks. 2045 if (OutputStoreBBs.size() == 1) { 2046 LLVM_DEBUG(dbgs() << "Move store instructions to the end block in " 2047 << *OG.OutlinedFunction << "\n"); 2048 DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0]; 2049 for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) { 2050 DenseMap<Value *, BasicBlock *>::iterator EndBBIt = 2051 EndBBs.find(VBPair.first); 2052 assert(EndBBIt != EndBBs.end() && "Could not find end block"); 2053 BasicBlock *EndBB = EndBBIt->second; 2054 BasicBlock *OutputBB = VBPair.second; 2055 Instruction *Term = OutputBB->getTerminator(); 2056 Term->eraseFromParent(); 2057 Term = EndBB->getTerminator(); 2058 moveBBContents(*OutputBB, *EndBB); 2059 Term->moveBefore(*EndBB, EndBB->end()); 2060 OutputBB->eraseFromParent(); 2061 } 2062 } 2063 } 2064 2065 /// Fill the new function that will serve as the replacement function for all of 2066 /// the extracted regions of a certain structure from the first region in the 2067 /// list of regions. Replace this first region's extracted function with the 2068 /// new overall function. 2069 /// 2070 /// \param [in] M - The module we are outlining from. 2071 /// \param [in] CurrentGroup - The group of regions to be outlined. 2072 /// \param [in,out] OutputStoreBBs - The output blocks for each different 2073 /// set of stores needed for the different functions. 2074 /// \param [in,out] FuncsToRemove - Extracted functions to erase from module 2075 /// once outlining is complete. 2076 /// \param [in] OutputMappings - Extracted functions to erase from module 2077 /// once outlining is complete. 2078 static void fillOverallFunction( 2079 Module &M, OutlinableGroup &CurrentGroup, 2080 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs, 2081 std::vector<Function *> &FuncsToRemove, 2082 const DenseMap<Value *, Value *> &OutputMappings) { 2083 OutlinableRegion *CurrentOS = CurrentGroup.Regions[0]; 2084 2085 // Move first extracted function's instructions into new function. 2086 LLVM_DEBUG(dbgs() << "Move instructions from " 2087 << *CurrentOS->ExtractedFunction << " to instruction " 2088 << *CurrentGroup.OutlinedFunction << "\n"); 2089 moveFunctionData(*CurrentOS->ExtractedFunction, 2090 *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs); 2091 2092 // Transfer the attributes from the function to the new function. 2093 for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs()) 2094 CurrentGroup.OutlinedFunction->addFnAttr(A); 2095 2096 // Create a new set of output blocks for the first extracted function. 2097 DenseMap<Value *, BasicBlock *> NewBBs; 2098 createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs, 2099 CurrentGroup.OutlinedFunction, "output_block_0"); 2100 CurrentOS->OutputBlockNum = 0; 2101 2102 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true); 2103 replaceConstants(*CurrentOS); 2104 2105 // We first identify if any output blocks are empty, if they are we remove 2106 // them. We then create a branch instruction to the basic block to the return 2107 // block for the function for each non empty output block. 2108 if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) { 2109 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>()); 2110 for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) { 2111 DenseMap<Value *, BasicBlock *>::iterator VBBIt = 2112 CurrentGroup.EndBBs.find(VToBB.first); 2113 BasicBlock *EndBB = VBBIt->second; 2114 BranchInst::Create(EndBB, VToBB.second); 2115 OutputStoreBBs.back().insert(VToBB); 2116 } 2117 } 2118 2119 // Replace the call to the extracted function with the outlined function. 2120 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS); 2121 2122 // We only delete the extracted functions at the end since we may need to 2123 // reference instructions contained in them for mapping purposes. 2124 FuncsToRemove.push_back(CurrentOS->ExtractedFunction); 2125 } 2126 2127 void IROutliner::deduplicateExtractedSections( 2128 Module &M, OutlinableGroup &CurrentGroup, 2129 std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) { 2130 createFunction(M, CurrentGroup, OutlinedFunctionNum); 2131 2132 std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs; 2133 2134 OutlinableRegion *CurrentOS; 2135 2136 fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove, 2137 OutputMappings); 2138 2139 std::vector<Value *> SortedKeys; 2140 for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) { 2141 CurrentOS = CurrentGroup.Regions[Idx]; 2142 AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction, 2143 *CurrentOS->ExtractedFunction); 2144 2145 // Create a set of BasicBlocks, one for each return block, to hold the 2146 // needed store instructions. 2147 DenseMap<Value *, BasicBlock *> NewBBs; 2148 createAndInsertBasicBlocks( 2149 CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction, 2150 "output_block_" + Twine(static_cast<unsigned>(Idx))); 2151 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings); 2152 alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs, 2153 CurrentGroup.EndBBs, OutputMappings, 2154 OutputStoreBBs); 2155 2156 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS); 2157 FuncsToRemove.push_back(CurrentOS->ExtractedFunction); 2158 } 2159 2160 // Create a switch statement to handle the different output schemes. 2161 createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs); 2162 2163 OutlinedFunctionNum++; 2164 } 2165 2166 /// Checks that the next instruction in the InstructionDataList matches the 2167 /// next instruction in the module. If they do not, there could be the 2168 /// possibility that extra code has been inserted, and we must ignore it. 2169 /// 2170 /// \param ID - The IRInstructionData to check the next instruction of. 2171 /// \returns true if the InstructionDataList and actual instruction match. 2172 static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) { 2173 // We check if there is a discrepancy between the InstructionDataList 2174 // and the actual next instruction in the module. If there is, it means 2175 // that an extra instruction was added, likely by the CodeExtractor. 2176 2177 // Since we do not have any similarity data about this particular 2178 // instruction, we cannot confidently outline it, and must discard this 2179 // candidate. 2180 IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator()); 2181 Instruction *NextIDLInst = NextIDIt->Inst; 2182 Instruction *NextModuleInst = nullptr; 2183 if (!ID.Inst->isTerminator()) 2184 NextModuleInst = ID.Inst->getNextNonDebugInstruction(); 2185 else if (NextIDLInst != nullptr) 2186 NextModuleInst = 2187 &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin(); 2188 2189 if (NextIDLInst && NextIDLInst != NextModuleInst) 2190 return false; 2191 2192 return true; 2193 } 2194 2195 bool IROutliner::isCompatibleWithAlreadyOutlinedCode( 2196 const OutlinableRegion &Region) { 2197 IRSimilarityCandidate *IRSC = Region.Candidate; 2198 unsigned StartIdx = IRSC->getStartIdx(); 2199 unsigned EndIdx = IRSC->getEndIdx(); 2200 2201 // A check to make sure that we are not about to attempt to outline something 2202 // that has already been outlined. 2203 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++) 2204 if (Outlined.contains(Idx)) 2205 return false; 2206 2207 // We check if the recorded instruction matches the actual next instruction, 2208 // if it does not, we fix it in the InstructionDataList. 2209 if (!Region.Candidate->backInstruction()->isTerminator()) { 2210 Instruction *NewEndInst = 2211 Region.Candidate->backInstruction()->getNextNonDebugInstruction(); 2212 assert(NewEndInst && "Next instruction is a nullptr?"); 2213 if (Region.Candidate->end()->Inst != NewEndInst) { 2214 IRInstructionDataList *IDL = Region.Candidate->front()->IDL; 2215 IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate()) 2216 IRInstructionData(*NewEndInst, 2217 InstructionClassifier.visit(*NewEndInst), *IDL); 2218 2219 // Insert the first IRInstructionData of the new region after the 2220 // last IRInstructionData of the IRSimilarityCandidate. 2221 IDL->insert(Region.Candidate->end(), *NewEndIRID); 2222 } 2223 } 2224 2225 return none_of(*IRSC, [this](IRInstructionData &ID) { 2226 if (!nextIRInstructionDataMatchesNextInst(ID)) 2227 return true; 2228 2229 return !this->InstructionClassifier.visit(ID.Inst); 2230 }); 2231 } 2232 2233 void IROutliner::pruneIncompatibleRegions( 2234 std::vector<IRSimilarityCandidate> &CandidateVec, 2235 OutlinableGroup &CurrentGroup) { 2236 bool PreviouslyOutlined; 2237 2238 // Sort from beginning to end, so the IRSimilarityCandidates are in order. 2239 stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS, 2240 const IRSimilarityCandidate &RHS) { 2241 return LHS.getStartIdx() < RHS.getStartIdx(); 2242 }); 2243 2244 IRSimilarityCandidate &FirstCandidate = CandidateVec[0]; 2245 // Since outlining a call and a branch instruction will be the same as only 2246 // outlinining a call instruction, we ignore it as a space saving. 2247 if (FirstCandidate.getLength() == 2) { 2248 if (isa<CallInst>(FirstCandidate.front()->Inst) && 2249 isa<BranchInst>(FirstCandidate.back()->Inst)) 2250 return; 2251 } 2252 2253 unsigned CurrentEndIdx = 0; 2254 for (IRSimilarityCandidate &IRSC : CandidateVec) { 2255 PreviouslyOutlined = false; 2256 unsigned StartIdx = IRSC.getStartIdx(); 2257 unsigned EndIdx = IRSC.getEndIdx(); 2258 2259 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++) 2260 if (Outlined.contains(Idx)) { 2261 PreviouslyOutlined = true; 2262 break; 2263 } 2264 2265 if (PreviouslyOutlined) 2266 continue; 2267 2268 // Check over the instructions, and if the basic block has its address 2269 // taken for use somewhere else, we do not outline that block. 2270 bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){ 2271 return ID.Inst->getParent()->hasAddressTaken(); 2272 }); 2273 2274 if (BBHasAddressTaken) 2275 continue; 2276 2277 if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() && 2278 !OutlineFromLinkODRs) 2279 continue; 2280 2281 // Greedily prune out any regions that will overlap with already chosen 2282 // regions. 2283 if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx) 2284 continue; 2285 2286 bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) { 2287 if (!nextIRInstructionDataMatchesNextInst(ID)) 2288 return true; 2289 2290 return !this->InstructionClassifier.visit(ID.Inst); 2291 }); 2292 2293 if (BadInst) 2294 continue; 2295 2296 OutlinableRegion *OS = new (RegionAllocator.Allocate()) 2297 OutlinableRegion(IRSC, CurrentGroup); 2298 CurrentGroup.Regions.push_back(OS); 2299 2300 CurrentEndIdx = EndIdx; 2301 } 2302 } 2303 2304 InstructionCost 2305 IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) { 2306 InstructionCost RegionBenefit = 0; 2307 for (OutlinableRegion *Region : CurrentGroup.Regions) { 2308 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent()); 2309 // We add the number of instructions in the region to the benefit as an 2310 // estimate as to how much will be removed. 2311 RegionBenefit += Region->getBenefit(TTI); 2312 LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit 2313 << " saved instructions to overfall benefit.\n"); 2314 } 2315 2316 return RegionBenefit; 2317 } 2318 2319 /// For the \p OutputCanon number passed in find the value represented by this 2320 /// canonical number. If it is from a PHINode, we pick the first incoming 2321 /// value and return that Value instead. 2322 /// 2323 /// \param Region - The OutlinableRegion to get the Value from. 2324 /// \param OutputCanon - The canonical number to find the Value from. 2325 /// \returns The Value represented by a canonical number \p OutputCanon in \p 2326 /// Region. 2327 static Value *findOutputValueInRegion(OutlinableRegion &Region, 2328 unsigned OutputCanon) { 2329 OutlinableGroup &CurrentGroup = *Region.Parent; 2330 // If the value is greater than the value in the tracker, we have a 2331 // PHINode and will instead use one of the incoming values to find the 2332 // type. 2333 if (OutputCanon > CurrentGroup.PHINodeGVNTracker) { 2334 auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon); 2335 assert(It != CurrentGroup.PHINodeGVNToGVNs.end() && 2336 "Could not find GVN set for PHINode number!"); 2337 assert(It->second.second.size() > 0 && "PHINode does not have any values!"); 2338 OutputCanon = *It->second.second.begin(); 2339 } 2340 Optional<unsigned> OGVN = Region.Candidate->fromCanonicalNum(OutputCanon); 2341 assert(OGVN.hasValue() && "Could not find GVN for Canonical Number?"); 2342 Optional<Value *> OV = Region.Candidate->fromGVN(*OGVN); 2343 assert(OV.hasValue() && "Could not find value for GVN?"); 2344 return *OV; 2345 } 2346 2347 InstructionCost 2348 IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) { 2349 InstructionCost OverallCost = 0; 2350 for (OutlinableRegion *Region : CurrentGroup.Regions) { 2351 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent()); 2352 2353 // Each output incurs a load after the call, so we add that to the cost. 2354 for (unsigned OutputCanon : Region->GVNStores) { 2355 Value *V = findOutputValueInRegion(*Region, OutputCanon); 2356 InstructionCost LoadCost = 2357 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0, 2358 TargetTransformInfo::TCK_CodeSize); 2359 2360 LLVM_DEBUG(dbgs() << "Adding: " << LoadCost 2361 << " instructions to cost for output of type " 2362 << *V->getType() << "\n"); 2363 OverallCost += LoadCost; 2364 } 2365 } 2366 2367 return OverallCost; 2368 } 2369 2370 /// Find the extra instructions needed to handle any output values for the 2371 /// region. 2372 /// 2373 /// \param [in] M - The Module to outline from. 2374 /// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze. 2375 /// \param [in] TTI - The TargetTransformInfo used to collect information for 2376 /// new instruction costs. 2377 /// \returns the additional cost to handle the outputs. 2378 static InstructionCost findCostForOutputBlocks(Module &M, 2379 OutlinableGroup &CurrentGroup, 2380 TargetTransformInfo &TTI) { 2381 InstructionCost OutputCost = 0; 2382 unsigned NumOutputBranches = 0; 2383 2384 OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0]; 2385 IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate; 2386 DenseSet<BasicBlock *> CandidateBlocks; 2387 Candidate.getBasicBlocks(CandidateBlocks); 2388 2389 // Count the number of different output branches that point to blocks outside 2390 // of the region. 2391 DenseSet<BasicBlock *> FoundBlocks; 2392 for (IRInstructionData &ID : Candidate) { 2393 if (!isa<BranchInst>(ID.Inst)) 2394 continue; 2395 2396 for (Value *V : ID.OperVals) { 2397 BasicBlock *BB = static_cast<BasicBlock *>(V); 2398 DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB); 2399 if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB)) 2400 continue; 2401 FoundBlocks.insert(BB); 2402 NumOutputBranches++; 2403 } 2404 } 2405 2406 CurrentGroup.BranchesToOutside = NumOutputBranches; 2407 2408 for (const ArrayRef<unsigned> &OutputUse : 2409 CurrentGroup.OutputGVNCombinations) { 2410 for (unsigned OutputCanon : OutputUse) { 2411 Value *V = findOutputValueInRegion(FirstRegion, OutputCanon); 2412 InstructionCost StoreCost = 2413 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0, 2414 TargetTransformInfo::TCK_CodeSize); 2415 2416 // An instruction cost is added for each store set that needs to occur for 2417 // various output combinations inside the function, plus a branch to 2418 // return to the exit block. 2419 LLVM_DEBUG(dbgs() << "Adding: " << StoreCost 2420 << " instructions to cost for output of type " 2421 << *V->getType() << "\n"); 2422 OutputCost += StoreCost * NumOutputBranches; 2423 } 2424 2425 InstructionCost BranchCost = 2426 TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize); 2427 LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for" 2428 << " a branch instruction\n"); 2429 OutputCost += BranchCost * NumOutputBranches; 2430 } 2431 2432 // If there is more than one output scheme, we must have a comparison and 2433 // branch for each different item in the switch statement. 2434 if (CurrentGroup.OutputGVNCombinations.size() > 1) { 2435 InstructionCost ComparisonCost = TTI.getCmpSelInstrCost( 2436 Instruction::ICmp, Type::getInt32Ty(M.getContext()), 2437 Type::getInt32Ty(M.getContext()), CmpInst::BAD_ICMP_PREDICATE, 2438 TargetTransformInfo::TCK_CodeSize); 2439 InstructionCost BranchCost = 2440 TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize); 2441 2442 unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size(); 2443 InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks; 2444 2445 LLVM_DEBUG(dbgs() << "Adding: " << TotalCost 2446 << " instructions for each switch case for each different" 2447 << " output path in a function\n"); 2448 OutputCost += TotalCost * NumOutputBranches; 2449 } 2450 2451 return OutputCost; 2452 } 2453 2454 void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) { 2455 InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup); 2456 CurrentGroup.Benefit += RegionBenefit; 2457 LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n"); 2458 2459 InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup); 2460 CurrentGroup.Cost += OutputReloadCost; 2461 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); 2462 2463 InstructionCost AverageRegionBenefit = 2464 RegionBenefit / CurrentGroup.Regions.size(); 2465 unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size(); 2466 unsigned NumRegions = CurrentGroup.Regions.size(); 2467 TargetTransformInfo &TTI = 2468 getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction()); 2469 2470 // We add one region to the cost once, to account for the instructions added 2471 // inside of the newly created function. 2472 LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit 2473 << " instructions to cost for body of new function.\n"); 2474 CurrentGroup.Cost += AverageRegionBenefit; 2475 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); 2476 2477 // For each argument, we must add an instruction for loading the argument 2478 // out of the register and into a value inside of the newly outlined function. 2479 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum 2480 << " instructions to cost for each argument in the new" 2481 << " function.\n"); 2482 CurrentGroup.Cost += 2483 OverallArgumentNum * TargetTransformInfo::TCC_Basic; 2484 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); 2485 2486 // Each argument needs to either be loaded into a register or onto the stack. 2487 // Some arguments will only be loaded into the stack once the argument 2488 // registers are filled. 2489 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum 2490 << " instructions to cost for each argument in the new" 2491 << " function " << NumRegions << " times for the " 2492 << "needed argument handling at the call site.\n"); 2493 CurrentGroup.Cost += 2494 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions; 2495 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); 2496 2497 CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI); 2498 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); 2499 } 2500 2501 void IROutliner::updateOutputMapping(OutlinableRegion &Region, 2502 ArrayRef<Value *> Outputs, 2503 LoadInst *LI) { 2504 // For and load instructions following the call 2505 Value *Operand = LI->getPointerOperand(); 2506 Optional<unsigned> OutputIdx = None; 2507 // Find if the operand it is an output register. 2508 for (unsigned ArgIdx = Region.NumExtractedInputs; 2509 ArgIdx < Region.Call->arg_size(); ArgIdx++) { 2510 if (Operand == Region.Call->getArgOperand(ArgIdx)) { 2511 OutputIdx = ArgIdx - Region.NumExtractedInputs; 2512 break; 2513 } 2514 } 2515 2516 // If we found an output register, place a mapping of the new value 2517 // to the original in the mapping. 2518 if (!OutputIdx.hasValue()) 2519 return; 2520 2521 if (OutputMappings.find(Outputs[OutputIdx.getValue()]) == 2522 OutputMappings.end()) { 2523 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to " 2524 << *Outputs[OutputIdx.getValue()] << "\n"); 2525 OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()])); 2526 } else { 2527 Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second; 2528 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to " 2529 << *Outputs[OutputIdx.getValue()] << "\n"); 2530 OutputMappings.insert(std::make_pair(LI, Orig)); 2531 } 2532 } 2533 2534 bool IROutliner::extractSection(OutlinableRegion &Region) { 2535 SetVector<Value *> ArgInputs, Outputs, SinkCands; 2536 assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!"); 2537 BasicBlock *InitialStart = Region.StartBB; 2538 Function *OrigF = Region.StartBB->getParent(); 2539 CodeExtractorAnalysisCache CEAC(*OrigF); 2540 Region.ExtractedFunction = 2541 Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs); 2542 2543 // If the extraction was successful, find the BasicBlock, and reassign the 2544 // OutlinableRegion blocks 2545 if (!Region.ExtractedFunction) { 2546 LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB 2547 << "\n"); 2548 Region.reattachCandidate(); 2549 return false; 2550 } 2551 2552 // Get the block containing the called branch, and reassign the blocks as 2553 // necessary. If the original block still exists, it is because we ended on 2554 // a branch instruction, and so we move the contents into the block before 2555 // and assign the previous block correctly. 2556 User *InstAsUser = Region.ExtractedFunction->user_back(); 2557 BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent(); 2558 Region.PrevBB = RewrittenBB->getSinglePredecessor(); 2559 assert(Region.PrevBB && "PrevBB is nullptr?"); 2560 if (Region.PrevBB == InitialStart) { 2561 BasicBlock *NewPrev = InitialStart->getSinglePredecessor(); 2562 Instruction *BI = NewPrev->getTerminator(); 2563 BI->eraseFromParent(); 2564 moveBBContents(*InitialStart, *NewPrev); 2565 Region.PrevBB = NewPrev; 2566 InitialStart->eraseFromParent(); 2567 } 2568 2569 Region.StartBB = RewrittenBB; 2570 Region.EndBB = RewrittenBB; 2571 2572 // The sequences of outlinable regions has now changed. We must fix the 2573 // IRInstructionDataList for consistency. Although they may not be illegal 2574 // instructions, they should not be compared with anything else as they 2575 // should not be outlined in this round. So marking these as illegal is 2576 // allowed. 2577 IRInstructionDataList *IDL = Region.Candidate->front()->IDL; 2578 Instruction *BeginRewritten = &*RewrittenBB->begin(); 2579 Instruction *EndRewritten = &*RewrittenBB->begin(); 2580 Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData( 2581 *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL); 2582 Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData( 2583 *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL); 2584 2585 // Insert the first IRInstructionData of the new region in front of the 2586 // first IRInstructionData of the IRSimilarityCandidate. 2587 IDL->insert(Region.Candidate->begin(), *Region.NewFront); 2588 // Insert the first IRInstructionData of the new region after the 2589 // last IRInstructionData of the IRSimilarityCandidate. 2590 IDL->insert(Region.Candidate->end(), *Region.NewBack); 2591 // Remove the IRInstructionData from the IRSimilarityCandidate. 2592 IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end())); 2593 2594 assert(RewrittenBB != nullptr && 2595 "Could not find a predecessor after extraction!"); 2596 2597 // Iterate over the new set of instructions to find the new call 2598 // instruction. 2599 for (Instruction &I : *RewrittenBB) 2600 if (CallInst *CI = dyn_cast<CallInst>(&I)) { 2601 if (Region.ExtractedFunction == CI->getCalledFunction()) 2602 Region.Call = CI; 2603 } else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) 2604 updateOutputMapping(Region, Outputs.getArrayRef(), LI); 2605 Region.reattachCandidate(); 2606 return true; 2607 } 2608 2609 unsigned IROutliner::doOutline(Module &M) { 2610 // Find the possible similarity sections. 2611 InstructionClassifier.EnableBranches = !DisableBranches; 2612 InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls; 2613 IRSimilarityIdentifier &Identifier = getIRSI(M); 2614 SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity(); 2615 2616 // Sort them by size of extracted sections 2617 unsigned OutlinedFunctionNum = 0; 2618 // If we only have one SimilarityGroup in SimilarityCandidates, we do not have 2619 // to sort them by the potential number of instructions to be outlined 2620 if (SimilarityCandidates.size() > 1) 2621 llvm::stable_sort(SimilarityCandidates, 2622 [](const std::vector<IRSimilarityCandidate> &LHS, 2623 const std::vector<IRSimilarityCandidate> &RHS) { 2624 return LHS[0].getLength() * LHS.size() > 2625 RHS[0].getLength() * RHS.size(); 2626 }); 2627 // Creating OutlinableGroups for each SimilarityCandidate to be used in 2628 // each of the following for loops to avoid making an allocator. 2629 std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size()); 2630 2631 DenseSet<unsigned> NotSame; 2632 std::vector<OutlinableGroup *> NegativeCostGroups; 2633 std::vector<OutlinableRegion *> OutlinedRegions; 2634 // Iterate over the possible sets of similarity. 2635 unsigned PotentialGroupIdx = 0; 2636 for (SimilarityGroup &CandidateVec : SimilarityCandidates) { 2637 OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++]; 2638 2639 // Remove entries that were previously outlined 2640 pruneIncompatibleRegions(CandidateVec, CurrentGroup); 2641 2642 // We pruned the number of regions to 0 to 1, meaning that it's not worth 2643 // trying to outlined since there is no compatible similar instance of this 2644 // code. 2645 if (CurrentGroup.Regions.size() < 2) 2646 continue; 2647 2648 // Determine if there are any values that are the same constant throughout 2649 // each section in the set. 2650 NotSame.clear(); 2651 CurrentGroup.findSameConstants(NotSame); 2652 2653 if (CurrentGroup.IgnoreGroup) 2654 continue; 2655 2656 // Create a CodeExtractor for each outlinable region. Identify inputs and 2657 // outputs for each section using the code extractor and create the argument 2658 // types for the Aggregate Outlining Function. 2659 OutlinedRegions.clear(); 2660 for (OutlinableRegion *OS : CurrentGroup.Regions) { 2661 // Break the outlinable region out of its parent BasicBlock into its own 2662 // BasicBlocks (see function implementation). 2663 OS->splitCandidate(); 2664 2665 // There's a chance that when the region is split, extra instructions are 2666 // added to the region. This makes the region no longer viable 2667 // to be split, so we ignore it for outlining. 2668 if (!OS->CandidateSplit) 2669 continue; 2670 2671 SmallVector<BasicBlock *> BE; 2672 DenseSet<BasicBlock *> BlocksInRegion; 2673 OS->Candidate->getBasicBlocks(BlocksInRegion, BE); 2674 OS->CE = new (ExtractorAllocator.Allocate()) 2675 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, 2676 false, "outlined"); 2677 findAddInputsOutputs(M, *OS, NotSame); 2678 if (!OS->IgnoreRegion) 2679 OutlinedRegions.push_back(OS); 2680 2681 // We recombine the blocks together now that we have gathered all the 2682 // needed information. 2683 OS->reattachCandidate(); 2684 } 2685 2686 CurrentGroup.Regions = std::move(OutlinedRegions); 2687 2688 if (CurrentGroup.Regions.empty()) 2689 continue; 2690 2691 CurrentGroup.collectGVNStoreSets(M); 2692 2693 if (CostModel) 2694 findCostBenefit(M, CurrentGroup); 2695 2696 // If we are adhering to the cost model, skip those groups where the cost 2697 // outweighs the benefits. 2698 if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) { 2699 OptimizationRemarkEmitter &ORE = 2700 getORE(*CurrentGroup.Regions[0]->Candidate->getFunction()); 2701 ORE.emit([&]() { 2702 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate; 2703 OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize", 2704 C->frontInstruction()); 2705 R << "did not outline " 2706 << ore::NV(std::to_string(CurrentGroup.Regions.size())) 2707 << " regions due to estimated increase of " 2708 << ore::NV("InstructionIncrease", 2709 CurrentGroup.Cost - CurrentGroup.Benefit) 2710 << " instructions at locations "; 2711 interleave( 2712 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(), 2713 [&R](OutlinableRegion *Region) { 2714 R << ore::NV( 2715 "DebugLoc", 2716 Region->Candidate->frontInstruction()->getDebugLoc()); 2717 }, 2718 [&R]() { R << " "; }); 2719 return R; 2720 }); 2721 continue; 2722 } 2723 2724 NegativeCostGroups.push_back(&CurrentGroup); 2725 } 2726 2727 ExtractorAllocator.DestroyAll(); 2728 2729 if (NegativeCostGroups.size() > 1) 2730 stable_sort(NegativeCostGroups, 2731 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) { 2732 return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost; 2733 }); 2734 2735 std::vector<Function *> FuncsToRemove; 2736 for (OutlinableGroup *CG : NegativeCostGroups) { 2737 OutlinableGroup &CurrentGroup = *CG; 2738 2739 OutlinedRegions.clear(); 2740 for (OutlinableRegion *Region : CurrentGroup.Regions) { 2741 // We check whether our region is compatible with what has already been 2742 // outlined, and whether we need to ignore this item. 2743 if (!isCompatibleWithAlreadyOutlinedCode(*Region)) 2744 continue; 2745 OutlinedRegions.push_back(Region); 2746 } 2747 2748 if (OutlinedRegions.size() < 2) 2749 continue; 2750 2751 // Reestimate the cost and benefit of the OutlinableGroup. Continue only if 2752 // we are still outlining enough regions to make up for the added cost. 2753 CurrentGroup.Regions = std::move(OutlinedRegions); 2754 if (CostModel) { 2755 CurrentGroup.Benefit = 0; 2756 CurrentGroup.Cost = 0; 2757 findCostBenefit(M, CurrentGroup); 2758 if (CurrentGroup.Cost >= CurrentGroup.Benefit) 2759 continue; 2760 } 2761 OutlinedRegions.clear(); 2762 for (OutlinableRegion *Region : CurrentGroup.Regions) { 2763 Region->splitCandidate(); 2764 if (!Region->CandidateSplit) 2765 continue; 2766 OutlinedRegions.push_back(Region); 2767 } 2768 2769 CurrentGroup.Regions = std::move(OutlinedRegions); 2770 if (CurrentGroup.Regions.size() < 2) { 2771 for (OutlinableRegion *R : CurrentGroup.Regions) 2772 R->reattachCandidate(); 2773 continue; 2774 } 2775 2776 LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost 2777 << " and benefit " << CurrentGroup.Benefit << "\n"); 2778 2779 // Create functions out of all the sections, and mark them as outlined. 2780 OutlinedRegions.clear(); 2781 for (OutlinableRegion *OS : CurrentGroup.Regions) { 2782 SmallVector<BasicBlock *> BE; 2783 DenseSet<BasicBlock *> BlocksInRegion; 2784 OS->Candidate->getBasicBlocks(BlocksInRegion, BE); 2785 OS->CE = new (ExtractorAllocator.Allocate()) 2786 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, 2787 false, "outlined"); 2788 bool FunctionOutlined = extractSection(*OS); 2789 if (FunctionOutlined) { 2790 unsigned StartIdx = OS->Candidate->getStartIdx(); 2791 unsigned EndIdx = OS->Candidate->getEndIdx(); 2792 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++) 2793 Outlined.insert(Idx); 2794 2795 OutlinedRegions.push_back(OS); 2796 } 2797 } 2798 2799 LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size() 2800 << " with benefit " << CurrentGroup.Benefit 2801 << " and cost " << CurrentGroup.Cost << "\n"); 2802 2803 CurrentGroup.Regions = std::move(OutlinedRegions); 2804 2805 if (CurrentGroup.Regions.empty()) 2806 continue; 2807 2808 OptimizationRemarkEmitter &ORE = 2809 getORE(*CurrentGroup.Regions[0]->Call->getFunction()); 2810 ORE.emit([&]() { 2811 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate; 2812 OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst); 2813 R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size())) 2814 << " regions with decrease of " 2815 << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost) 2816 << " instructions at locations "; 2817 interleave( 2818 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(), 2819 [&R](OutlinableRegion *Region) { 2820 R << ore::NV("DebugLoc", 2821 Region->Candidate->frontInstruction()->getDebugLoc()); 2822 }, 2823 [&R]() { R << " "; }); 2824 return R; 2825 }); 2826 2827 deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove, 2828 OutlinedFunctionNum); 2829 } 2830 2831 for (Function *F : FuncsToRemove) 2832 F->eraseFromParent(); 2833 2834 return OutlinedFunctionNum; 2835 } 2836 2837 bool IROutliner::run(Module &M) { 2838 CostModel = !NoCostModel; 2839 OutlineFromLinkODRs = EnableLinkOnceODRIROutlining; 2840 2841 return doOutline(M) > 0; 2842 } 2843 2844 // Pass Manager Boilerplate 2845 namespace { 2846 class IROutlinerLegacyPass : public ModulePass { 2847 public: 2848 static char ID; 2849 IROutlinerLegacyPass() : ModulePass(ID) { 2850 initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry()); 2851 } 2852 2853 void getAnalysisUsage(AnalysisUsage &AU) const override { 2854 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 2855 AU.addRequired<TargetTransformInfoWrapperPass>(); 2856 AU.addRequired<IRSimilarityIdentifierWrapperPass>(); 2857 } 2858 2859 bool runOnModule(Module &M) override; 2860 }; 2861 } // namespace 2862 2863 bool IROutlinerLegacyPass::runOnModule(Module &M) { 2864 if (skipModule(M)) 2865 return false; 2866 2867 std::unique_ptr<OptimizationRemarkEmitter> ORE; 2868 auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & { 2869 ORE.reset(new OptimizationRemarkEmitter(&F)); 2870 return *ORE.get(); 2871 }; 2872 2873 auto GTTI = [this](Function &F) -> TargetTransformInfo & { 2874 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 2875 }; 2876 2877 auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & { 2878 return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI(); 2879 }; 2880 2881 return IROutliner(GTTI, GIRSI, GORE).run(M); 2882 } 2883 2884 PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) { 2885 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2886 2887 std::function<TargetTransformInfo &(Function &)> GTTI = 2888 [&FAM](Function &F) -> TargetTransformInfo & { 2889 return FAM.getResult<TargetIRAnalysis>(F); 2890 }; 2891 2892 std::function<IRSimilarityIdentifier &(Module &)> GIRSI = 2893 [&AM](Module &M) -> IRSimilarityIdentifier & { 2894 return AM.getResult<IRSimilarityAnalysis>(M); 2895 }; 2896 2897 std::unique_ptr<OptimizationRemarkEmitter> ORE; 2898 std::function<OptimizationRemarkEmitter &(Function &)> GORE = 2899 [&ORE](Function &F) -> OptimizationRemarkEmitter & { 2900 ORE.reset(new OptimizationRemarkEmitter(&F)); 2901 return *ORE.get(); 2902 }; 2903 2904 if (IROutliner(GTTI, GIRSI, GORE).run(M)) 2905 return PreservedAnalyses::none(); 2906 return PreservedAnalyses::all(); 2907 } 2908 2909 char IROutlinerLegacyPass::ID = 0; 2910 INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false, 2911 false) 2912 INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass) 2913 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 2914 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 2915 INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false, 2916 false) 2917 2918 ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); } 2919