1 //===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 // Implementation for the IROutliner which is used by the IROutliner Pass. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/IPO/IROutliner.h" 15 #include "llvm/Analysis/IRSimilarityIdentifier.h" 16 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 17 #include "llvm/Analysis/TargetTransformInfo.h" 18 #include "llvm/IR/Attributes.h" 19 #include "llvm/IR/DebugInfoMetadata.h" 20 #include "llvm/IR/DIBuilder.h" 21 #include "llvm/IR/Dominators.h" 22 #include "llvm/IR/Mangler.h" 23 #include "llvm/IR/PassManager.h" 24 #include "llvm/InitializePasses.h" 25 #include "llvm/Pass.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Transforms/IPO.h" 28 #include <map> 29 #include <set> 30 #include <vector> 31 32 #define DEBUG_TYPE "iroutliner" 33 34 using namespace llvm; 35 using namespace IRSimilarity; 36 37 // A command flag to be used for debugging to exclude branches from similarity 38 // matching and outlining. 39 namespace llvm { 40 extern cl::opt<bool> DisableBranches; 41 42 // A command flag to be used for debugging to indirect calls from similarity 43 // matching and outlining. 44 extern cl::opt<bool> DisableIndirectCalls; 45 46 // A command flag to be used for debugging to exclude intrinsics from similarity 47 // matching and outlining. 48 extern cl::opt<bool> DisableIntrinsics; 49 50 } // namespace llvm 51 52 // Set to true if the user wants the ir outliner to run on linkonceodr linkage 53 // functions. This is false by default because the linker can dedupe linkonceodr 54 // functions. Since the outliner is confined to a single module (modulo LTO), 55 // this is off by default. It should, however, be the default behavior in 56 // LTO. 57 static cl::opt<bool> EnableLinkOnceODRIROutlining( 58 "enable-linkonceodr-ir-outlining", cl::Hidden, 59 cl::desc("Enable the IR outliner on linkonceodr functions"), 60 cl::init(false)); 61 62 // This is a debug option to test small pieces of code to ensure that outlining 63 // works correctly. 64 static cl::opt<bool> NoCostModel( 65 "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden, 66 cl::desc("Debug option to outline greedily, without restriction that " 67 "calculated benefit outweighs cost")); 68 69 /// The OutlinableGroup holds all the overarching information for outlining 70 /// a set of regions that are structurally similar to one another, such as the 71 /// types of the overall function, the output blocks, the sets of stores needed 72 /// and a list of the different regions. This information is used in the 73 /// deduplication of extracted regions with the same structure. 74 struct OutlinableGroup { 75 /// The sections that could be outlined 76 std::vector<OutlinableRegion *> Regions; 77 78 /// The argument types for the function created as the overall function to 79 /// replace the extracted function for each region. 80 std::vector<Type *> ArgumentTypes; 81 /// The FunctionType for the overall function. 82 FunctionType *OutlinedFunctionType = nullptr; 83 /// The Function for the collective overall function. 84 Function *OutlinedFunction = nullptr; 85 86 /// Flag for whether we should not consider this group of OutlinableRegions 87 /// for extraction. 88 bool IgnoreGroup = false; 89 90 /// The return blocks for the overall function. 91 DenseMap<Value *, BasicBlock *> EndBBs; 92 93 /// The PHIBlocks with their corresponding return block based on the return 94 /// value as the key. 95 DenseMap<Value *, BasicBlock *> PHIBlocks; 96 97 /// A set containing the different GVN store sets needed. Each array contains 98 /// a sorted list of the different values that need to be stored into output 99 /// registers. 100 DenseSet<ArrayRef<unsigned>> OutputGVNCombinations; 101 102 /// Flag for whether the \ref ArgumentTypes have been defined after the 103 /// extraction of the first region. 104 bool InputTypesSet = false; 105 106 /// The number of input values in \ref ArgumentTypes. Anything after this 107 /// index in ArgumentTypes is an output argument. 108 unsigned NumAggregateInputs = 0; 109 110 /// The mapping of the canonical numbering of the values in outlined sections 111 /// to specific arguments. 112 DenseMap<unsigned, unsigned> CanonicalNumberToAggArg; 113 114 /// The number of branches in the region target a basic block that is outside 115 /// of the region. 116 unsigned BranchesToOutside = 0; 117 118 /// Tracker counting backwards from the highest unsigned value possible to 119 /// avoid conflicting with the GVNs of assigned values. We start at -3 since 120 /// -2 and -1 are assigned by the DenseMap. 121 unsigned PHINodeGVNTracker = -3; 122 123 DenseMap<unsigned, 124 std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>> 125 PHINodeGVNToGVNs; 126 DenseMap<hash_code, unsigned> GVNsToPHINodeGVN; 127 128 /// The number of instructions that will be outlined by extracting \ref 129 /// Regions. 130 InstructionCost Benefit = 0; 131 /// The number of added instructions needed for the outlining of the \ref 132 /// Regions. 133 InstructionCost Cost = 0; 134 135 /// The argument that needs to be marked with the swifterr attribute. If not 136 /// needed, there is no value. 137 Optional<unsigned> SwiftErrorArgument; 138 139 /// For the \ref Regions, we look at every Value. If it is a constant, 140 /// we check whether it is the same in Region. 141 /// 142 /// \param [in,out] NotSame contains the global value numbers where the 143 /// constant is not always the same, and must be passed in as an argument. 144 void findSameConstants(DenseSet<unsigned> &NotSame); 145 146 /// For the regions, look at each set of GVN stores needed and account for 147 /// each combination. Add an argument to the argument types if there is 148 /// more than one combination. 149 /// 150 /// \param [in] M - The module we are outlining from. 151 void collectGVNStoreSets(Module &M); 152 }; 153 154 /// Move the contents of \p SourceBB to before the last instruction of \p 155 /// TargetBB. 156 /// \param SourceBB - the BasicBlock to pull Instructions from. 157 /// \param TargetBB - the BasicBlock to put Instruction into. 158 static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) { 159 for (Instruction &I : llvm::make_early_inc_range(SourceBB)) 160 I.moveBefore(TargetBB, TargetBB.end()); 161 } 162 163 /// A function to sort the keys of \p Map, which must be a mapping of constant 164 /// values to basic blocks and return it in \p SortedKeys 165 /// 166 /// \param SortedKeys - The vector the keys will be return in and sorted. 167 /// \param Map - The DenseMap containing keys to sort. 168 static void getSortedConstantKeys(std::vector<Value *> &SortedKeys, 169 DenseMap<Value *, BasicBlock *> &Map) { 170 for (auto &VtoBB : Map) 171 SortedKeys.push_back(VtoBB.first); 172 173 stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) { 174 const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS); 175 const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS); 176 assert(RHSC && "Not a constant integer in return value?"); 177 assert(LHSC && "Not a constant integer in return value?"); 178 179 return LHSC->getLimitedValue() < RHSC->getLimitedValue(); 180 }); 181 } 182 183 Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other, 184 Value *V) { 185 Optional<unsigned> GVN = Candidate->getGVN(V); 186 assert(GVN.hasValue() && "No GVN for incoming value"); 187 Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN); 188 Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum); 189 Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN); 190 return FoundValueOpt.getValueOr(nullptr); 191 } 192 193 /// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found 194 /// in \p Included to branch to BasicBlock \p Replace if they currently branch 195 /// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks 196 /// when PHINodes are included in outlined regions. 197 /// 198 /// \param PHIBlock - The BasicBlock containing the PHINodes that need to be 199 /// checked. 200 /// \param Find - The successor block to be replaced. 201 /// \param Replace - The new succesor block to branch to. 202 /// \param Included - The set of blocks about to be outlined. 203 static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find, 204 BasicBlock *Replace, 205 DenseSet<BasicBlock *> &Included) { 206 for (PHINode &PN : PHIBlock->phis()) { 207 for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd; 208 ++Idx) { 209 // Check if the incoming block is included in the set of blocks being 210 // outlined. 211 BasicBlock *Incoming = PN.getIncomingBlock(Idx); 212 if (!Included.contains(Incoming)) 213 continue; 214 215 BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator()); 216 assert(BI && "Not a branch instruction?"); 217 // Look over the branching instructions into this block to see if we 218 // used to branch to Find in this outlined block. 219 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End; 220 Succ++) { 221 // If we have found the block to replace, we do so here. 222 if (BI->getSuccessor(Succ) != Find) 223 continue; 224 BI->setSuccessor(Succ, Replace); 225 } 226 } 227 } 228 } 229 230 231 void OutlinableRegion::splitCandidate() { 232 assert(!CandidateSplit && "Candidate already split!"); 233 234 Instruction *BackInst = Candidate->backInstruction(); 235 236 Instruction *EndInst = nullptr; 237 // Check whether the last instruction is a terminator, if it is, we do 238 // not split on the following instruction. We leave the block as it is. We 239 // also check that this is not the last instruction in the Module, otherwise 240 // the check for whether the current following instruction matches the 241 // previously recorded instruction will be incorrect. 242 if (!BackInst->isTerminator() || 243 BackInst->getParent() != &BackInst->getFunction()->back()) { 244 EndInst = Candidate->end()->Inst; 245 assert(EndInst && "Expected an end instruction?"); 246 } 247 248 // We check if the current instruction following the last instruction in the 249 // region is the same as the recorded instruction following the last 250 // instruction. If they do not match, there could be problems in rewriting 251 // the program after outlining, so we ignore it. 252 if (!BackInst->isTerminator() && 253 EndInst != BackInst->getNextNonDebugInstruction()) 254 return; 255 256 Instruction *StartInst = (*Candidate->begin()).Inst; 257 assert(StartInst && "Expected a start instruction?"); 258 StartBB = StartInst->getParent(); 259 PrevBB = StartBB; 260 261 DenseSet<BasicBlock *> BBSet; 262 Candidate->getBasicBlocks(BBSet); 263 264 // We iterate over the instructions in the region, if we find a PHINode, we 265 // check if there are predecessors outside of the region, if there are, 266 // we ignore this region since we are unable to handle the severing of the 267 // phi node right now. 268 BasicBlock::iterator It = StartInst->getIterator(); 269 while (PHINode *PN = dyn_cast<PHINode>(&*It)) { 270 unsigned NumPredsOutsideRegion = 0; 271 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 272 if (!BBSet.contains(PN->getIncomingBlock(i))) 273 ++NumPredsOutsideRegion; 274 275 if (NumPredsOutsideRegion > 1) 276 return; 277 278 It++; 279 } 280 281 // If the region starts with a PHINode, but is not the initial instruction of 282 // the BasicBlock, we ignore this region for now. 283 if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin()) 284 return; 285 286 // If the region ends with a PHINode, but does not contain all of the phi node 287 // instructions of the region, we ignore it for now. 288 if (isa<PHINode>(BackInst)) { 289 EndBB = BackInst->getParent(); 290 if (BackInst != &*std::prev(EndBB->getFirstInsertionPt())) 291 return; 292 } 293 294 // The basic block gets split like so: 295 // block: block: 296 // inst1 inst1 297 // inst2 inst2 298 // region1 br block_to_outline 299 // region2 block_to_outline: 300 // region3 -> region1 301 // region4 region2 302 // inst3 region3 303 // inst4 region4 304 // br block_after_outline 305 // block_after_outline: 306 // inst3 307 // inst4 308 309 std::string OriginalName = PrevBB->getName().str(); 310 311 StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline"); 312 PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB); 313 314 CandidateSplit = true; 315 if (!BackInst->isTerminator()) { 316 EndBB = EndInst->getParent(); 317 FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline"); 318 EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB); 319 FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB); 320 } else { 321 EndBB = BackInst->getParent(); 322 EndsInBranch = true; 323 FollowBB = nullptr; 324 } 325 326 // Refind the basic block set. 327 BBSet.clear(); 328 Candidate->getBasicBlocks(BBSet); 329 // For the phi nodes in the new starting basic block of the region, we 330 // reassign the targets of the basic blocks branching instructions. 331 replaceTargetsFromPHINode(StartBB, PrevBB, StartBB, BBSet); 332 if (FollowBB) 333 replaceTargetsFromPHINode(FollowBB, EndBB, FollowBB, BBSet); 334 } 335 336 void OutlinableRegion::reattachCandidate() { 337 assert(CandidateSplit && "Candidate is not split!"); 338 339 // The basic block gets reattached like so: 340 // block: block: 341 // inst1 inst1 342 // inst2 inst2 343 // br block_to_outline region1 344 // block_to_outline: -> region2 345 // region1 region3 346 // region2 region4 347 // region3 inst3 348 // region4 inst4 349 // br block_after_outline 350 // block_after_outline: 351 // inst3 352 // inst4 353 assert(StartBB != nullptr && "StartBB for Candidate is not defined!"); 354 355 assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!"); 356 PrevBB->getTerminator()->eraseFromParent(); 357 358 // If we reattaching after outlining, we iterate over the phi nodes to 359 // the initial block, and reassign the branch instructions of the incoming 360 // blocks to the block we are remerging into. 361 if (!ExtractedFunction) { 362 DenseSet<BasicBlock *> BBSet; 363 Candidate->getBasicBlocks(BBSet); 364 365 replaceTargetsFromPHINode(StartBB, StartBB, PrevBB, BBSet); 366 if (!EndsInBranch) 367 replaceTargetsFromPHINode(FollowBB, FollowBB, EndBB, BBSet); 368 } 369 370 moveBBContents(*StartBB, *PrevBB); 371 372 BasicBlock *PlacementBB = PrevBB; 373 if (StartBB != EndBB) 374 PlacementBB = EndBB; 375 if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) { 376 assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!"); 377 assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!"); 378 PlacementBB->getTerminator()->eraseFromParent(); 379 moveBBContents(*FollowBB, *PlacementBB); 380 PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); 381 FollowBB->eraseFromParent(); 382 } 383 384 PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB); 385 StartBB->eraseFromParent(); 386 387 // Make sure to save changes back to the StartBB. 388 StartBB = PrevBB; 389 EndBB = nullptr; 390 PrevBB = nullptr; 391 FollowBB = nullptr; 392 393 CandidateSplit = false; 394 } 395 396 /// Find whether \p V matches the Constants previously found for the \p GVN. 397 /// 398 /// \param V - The value to check for consistency. 399 /// \param GVN - The global value number assigned to \p V. 400 /// \param GVNToConstant - The mapping of global value number to Constants. 401 /// \returns true if the Value matches the Constant mapped to by V and false if 402 /// it \p V is a Constant but does not match. 403 /// \returns None if \p V is not a Constant. 404 static Optional<bool> 405 constantMatches(Value *V, unsigned GVN, 406 DenseMap<unsigned, Constant *> &GVNToConstant) { 407 // See if we have a constants 408 Constant *CST = dyn_cast<Constant>(V); 409 if (!CST) 410 return None; 411 412 // Holds a mapping from a global value number to a Constant. 413 DenseMap<unsigned, Constant *>::iterator GVNToConstantIt; 414 bool Inserted; 415 416 417 // If we have a constant, try to make a new entry in the GVNToConstant. 418 std::tie(GVNToConstantIt, Inserted) = 419 GVNToConstant.insert(std::make_pair(GVN, CST)); 420 // If it was found and is not equal, it is not the same. We do not 421 // handle this case yet, and exit early. 422 if (Inserted || (GVNToConstantIt->second == CST)) 423 return true; 424 425 return false; 426 } 427 428 InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) { 429 InstructionCost Benefit = 0; 430 431 // Estimate the benefit of outlining a specific sections of the program. We 432 // delegate mostly this task to the TargetTransformInfo so that if the target 433 // has specific changes, we can have a more accurate estimate. 434 435 // However, getInstructionCost delegates the code size calculation for 436 // arithmetic instructions to getArithmeticInstrCost in 437 // include/Analysis/TargetTransformImpl.h, where it always estimates that the 438 // code size for a division and remainder instruction to be equal to 4, and 439 // everything else to 1. This is not an accurate representation of the 440 // division instruction for targets that have a native division instruction. 441 // To be overly conservative, we only add 1 to the number of instructions for 442 // each division instruction. 443 for (IRInstructionData &ID : *Candidate) { 444 Instruction *I = ID.Inst; 445 switch (I->getOpcode()) { 446 case Instruction::FDiv: 447 case Instruction::FRem: 448 case Instruction::SDiv: 449 case Instruction::SRem: 450 case Instruction::UDiv: 451 case Instruction::URem: 452 Benefit += 1; 453 break; 454 default: 455 Benefit += TTI.getInstructionCost(I, TargetTransformInfo::TCK_CodeSize); 456 break; 457 } 458 } 459 460 return Benefit; 461 } 462 463 /// Check the \p OutputMappings structure for value \p Input, if it exists 464 /// it has been used as an output for outlining, and has been renamed, and we 465 /// return the new value, otherwise, we return the same value. 466 /// 467 /// \param OutputMappings [in] - The mapping of values to their renamed value 468 /// after being used as an output for an outlined region. 469 /// \param Input [in] - The value to find the remapped value of, if it exists. 470 /// \return The remapped value if it has been renamed, and the same value if has 471 /// not. 472 static Value *findOutputMapping(const DenseMap<Value *, Value *> OutputMappings, 473 Value *Input) { 474 DenseMap<Value *, Value *>::const_iterator OutputMapping = 475 OutputMappings.find(Input); 476 if (OutputMapping != OutputMappings.end()) 477 return OutputMapping->second; 478 return Input; 479 } 480 481 /// Find whether \p Region matches the global value numbering to Constant 482 /// mapping found so far. 483 /// 484 /// \param Region - The OutlinableRegion we are checking for constants 485 /// \param GVNToConstant - The mapping of global value number to Constants. 486 /// \param NotSame - The set of global value numbers that do not have the same 487 /// constant in each region. 488 /// \returns true if all Constants are the same in every use of a Constant in \p 489 /// Region and false if not 490 static bool 491 collectRegionsConstants(OutlinableRegion &Region, 492 DenseMap<unsigned, Constant *> &GVNToConstant, 493 DenseSet<unsigned> &NotSame) { 494 bool ConstantsTheSame = true; 495 496 IRSimilarityCandidate &C = *Region.Candidate; 497 for (IRInstructionData &ID : C) { 498 499 // Iterate over the operands in an instruction. If the global value number, 500 // assigned by the IRSimilarityCandidate, has been seen before, we check if 501 // the the number has been found to be not the same value in each instance. 502 for (Value *V : ID.OperVals) { 503 Optional<unsigned> GVNOpt = C.getGVN(V); 504 assert(GVNOpt.hasValue() && "Expected a GVN for operand?"); 505 unsigned GVN = GVNOpt.getValue(); 506 507 // Check if this global value has been found to not be the same already. 508 if (NotSame.contains(GVN)) { 509 if (isa<Constant>(V)) 510 ConstantsTheSame = false; 511 continue; 512 } 513 514 // If it has been the same so far, we check the value for if the 515 // associated Constant value match the previous instances of the same 516 // global value number. If the global value does not map to a Constant, 517 // it is considered to not be the same value. 518 Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant); 519 if (ConstantMatches.hasValue()) { 520 if (ConstantMatches.getValue()) 521 continue; 522 else 523 ConstantsTheSame = false; 524 } 525 526 // While this value is a register, it might not have been previously, 527 // make sure we don't already have a constant mapped to this global value 528 // number. 529 if (GVNToConstant.find(GVN) != GVNToConstant.end()) 530 ConstantsTheSame = false; 531 532 NotSame.insert(GVN); 533 } 534 } 535 536 return ConstantsTheSame; 537 } 538 539 void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) { 540 DenseMap<unsigned, Constant *> GVNToConstant; 541 542 for (OutlinableRegion *Region : Regions) 543 collectRegionsConstants(*Region, GVNToConstant, NotSame); 544 } 545 546 void OutlinableGroup::collectGVNStoreSets(Module &M) { 547 for (OutlinableRegion *OS : Regions) 548 OutputGVNCombinations.insert(OS->GVNStores); 549 550 // We are adding an extracted argument to decide between which output path 551 // to use in the basic block. It is used in a switch statement and only 552 // needs to be an integer. 553 if (OutputGVNCombinations.size() > 1) 554 ArgumentTypes.push_back(Type::getInt32Ty(M.getContext())); 555 } 556 557 /// Get the subprogram if it exists for one of the outlined regions. 558 /// 559 /// \param [in] Group - The set of regions to find a subprogram for. 560 /// \returns the subprogram if it exists, or nullptr. 561 static DISubprogram *getSubprogramOrNull(OutlinableGroup &Group) { 562 for (OutlinableRegion *OS : Group.Regions) 563 if (Function *F = OS->Call->getFunction()) 564 if (DISubprogram *SP = F->getSubprogram()) 565 return SP; 566 567 return nullptr; 568 } 569 570 Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group, 571 unsigned FunctionNameSuffix) { 572 assert(!Group.OutlinedFunction && "Function is already defined!"); 573 574 Type *RetTy = Type::getVoidTy(M.getContext()); 575 // All extracted functions _should_ have the same return type at this point 576 // since the similarity identifier ensures that all branches outside of the 577 // region occur in the same place. 578 579 // NOTE: Should we ever move to the model that uses a switch at every point 580 // needed, meaning that we could branch within the region or out, it is 581 // possible that we will need to switch to using the most general case all of 582 // the time. 583 for (OutlinableRegion *R : Group.Regions) { 584 Type *ExtractedFuncType = R->ExtractedFunction->getReturnType(); 585 if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) || 586 (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16))) 587 RetTy = ExtractedFuncType; 588 } 589 590 Group.OutlinedFunctionType = FunctionType::get( 591 RetTy, Group.ArgumentTypes, false); 592 593 // These functions will only be called from within the same module, so 594 // we can set an internal linkage. 595 Group.OutlinedFunction = Function::Create( 596 Group.OutlinedFunctionType, GlobalValue::InternalLinkage, 597 "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M); 598 599 // Transfer the swifterr attribute to the correct function parameter. 600 if (Group.SwiftErrorArgument.hasValue()) 601 Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(), 602 Attribute::SwiftError); 603 604 Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize); 605 Group.OutlinedFunction->addFnAttr(Attribute::MinSize); 606 607 // If there's a DISubprogram associated with this outlined function, then 608 // emit debug info for the outlined function. 609 if (DISubprogram *SP = getSubprogramOrNull(Group)) { 610 Function *F = Group.OutlinedFunction; 611 // We have a DISubprogram. Get its DICompileUnit. 612 DICompileUnit *CU = SP->getUnit(); 613 DIBuilder DB(M, true, CU); 614 DIFile *Unit = SP->getFile(); 615 Mangler Mg; 616 // Get the mangled name of the function for the linkage name. 617 std::string Dummy; 618 llvm::raw_string_ostream MangledNameStream(Dummy); 619 Mg.getNameWithPrefix(MangledNameStream, F, false); 620 621 DISubprogram *OutlinedSP = DB.createFunction( 622 Unit /* Context */, F->getName(), MangledNameStream.str(), 623 Unit /* File */, 624 0 /* Line 0 is reserved for compiler-generated code. */, 625 DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */ 626 0, /* Line 0 is reserved for compiler-generated code. */ 627 DINode::DIFlags::FlagArtificial /* Compiler-generated code. */, 628 /* Outlined code is optimized code by definition. */ 629 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized); 630 631 // Don't add any new variables to the subprogram. 632 DB.finalizeSubprogram(OutlinedSP); 633 634 // Attach subprogram to the function. 635 F->setSubprogram(OutlinedSP); 636 // We're done with the DIBuilder. 637 DB.finalize(); 638 } 639 640 return Group.OutlinedFunction; 641 } 642 643 /// Move each BasicBlock in \p Old to \p New. 644 /// 645 /// \param [in] Old - The function to move the basic blocks from. 646 /// \param [in] New - The function to move the basic blocks to. 647 /// \param [out] NewEnds - The return blocks of the new overall function. 648 static void moveFunctionData(Function &Old, Function &New, 649 DenseMap<Value *, BasicBlock *> &NewEnds) { 650 for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) { 651 CurrBB.removeFromParent(); 652 CurrBB.insertInto(&New); 653 Instruction *I = CurrBB.getTerminator(); 654 655 // For each block we find a return instruction is, it is a potential exit 656 // path for the function. We keep track of each block based on the return 657 // value here. 658 if (ReturnInst *RI = dyn_cast<ReturnInst>(I)) 659 NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB)); 660 661 std::vector<Instruction *> DebugInsts; 662 663 for (Instruction &Val : CurrBB) { 664 // We must handle the scoping of called functions differently than 665 // other outlined instructions. 666 if (!isa<CallInst>(&Val)) { 667 // Remove the debug information for outlined functions. 668 Val.setDebugLoc(DebugLoc()); 669 continue; 670 } 671 672 // From this point we are only handling call instructions. 673 CallInst *CI = cast<CallInst>(&Val); 674 675 // We add any debug statements here, to be removed after. Since the 676 // instructions originate from many different locations in the program, 677 // it will cause incorrect reporting from a debugger if we keep the 678 // same debug instructions. 679 if (isa<DbgInfoIntrinsic>(CI)) { 680 DebugInsts.push_back(&Val); 681 continue; 682 } 683 684 // Edit the scope of called functions inside of outlined functions. 685 if (DISubprogram *SP = New.getSubprogram()) { 686 DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP); 687 Val.setDebugLoc(DI); 688 } 689 } 690 691 for (Instruction *I : DebugInsts) 692 I->eraseFromParent(); 693 } 694 } 695 696 /// Find the the constants that will need to be lifted into arguments 697 /// as they are not the same in each instance of the region. 698 /// 699 /// \param [in] C - The IRSimilarityCandidate containing the region we are 700 /// analyzing. 701 /// \param [in] NotSame - The set of global value numbers that do not have a 702 /// single Constant across all OutlinableRegions similar to \p C. 703 /// \param [out] Inputs - The list containing the global value numbers of the 704 /// arguments needed for the region of code. 705 static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame, 706 std::vector<unsigned> &Inputs) { 707 DenseSet<unsigned> Seen; 708 // Iterate over the instructions, and find what constants will need to be 709 // extracted into arguments. 710 for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end(); 711 IDIt != EndIDIt; IDIt++) { 712 for (Value *V : (*IDIt).OperVals) { 713 // Since these are stored before any outlining, they will be in the 714 // global value numbering. 715 unsigned GVN = C.getGVN(V).getValue(); 716 if (isa<Constant>(V)) 717 if (NotSame.contains(GVN) && !Seen.contains(GVN)) { 718 Inputs.push_back(GVN); 719 Seen.insert(GVN); 720 } 721 } 722 } 723 } 724 725 /// Find the GVN for the inputs that have been found by the CodeExtractor. 726 /// 727 /// \param [in] C - The IRSimilarityCandidate containing the region we are 728 /// analyzing. 729 /// \param [in] CurrentInputs - The set of inputs found by the 730 /// CodeExtractor. 731 /// \param [in] OutputMappings - The mapping of values that have been replaced 732 /// by a new output value. 733 /// \param [out] EndInputNumbers - The global value numbers for the extracted 734 /// arguments. 735 static void mapInputsToGVNs(IRSimilarityCandidate &C, 736 SetVector<Value *> &CurrentInputs, 737 const DenseMap<Value *, Value *> &OutputMappings, 738 std::vector<unsigned> &EndInputNumbers) { 739 // Get the Global Value Number for each input. We check if the Value has been 740 // replaced by a different value at output, and use the original value before 741 // replacement. 742 for (Value *Input : CurrentInputs) { 743 assert(Input && "Have a nullptr as an input"); 744 if (OutputMappings.find(Input) != OutputMappings.end()) 745 Input = OutputMappings.find(Input)->second; 746 assert(C.getGVN(Input).hasValue() && 747 "Could not find a numbering for the given input"); 748 EndInputNumbers.push_back(C.getGVN(Input).getValue()); 749 } 750 } 751 752 /// Find the original value for the \p ArgInput values if any one of them was 753 /// replaced during a previous extraction. 754 /// 755 /// \param [in] ArgInputs - The inputs to be extracted by the code extractor. 756 /// \param [in] OutputMappings - The mapping of values that have been replaced 757 /// by a new output value. 758 /// \param [out] RemappedArgInputs - The remapped values according to 759 /// \p OutputMappings that will be extracted. 760 static void 761 remapExtractedInputs(const ArrayRef<Value *> ArgInputs, 762 const DenseMap<Value *, Value *> &OutputMappings, 763 SetVector<Value *> &RemappedArgInputs) { 764 // Get the global value number for each input that will be extracted as an 765 // argument by the code extractor, remapping if needed for reloaded values. 766 for (Value *Input : ArgInputs) { 767 if (OutputMappings.find(Input) != OutputMappings.end()) 768 Input = OutputMappings.find(Input)->second; 769 RemappedArgInputs.insert(Input); 770 } 771 } 772 773 /// Find the input GVNs and the output values for a region of Instructions. 774 /// Using the code extractor, we collect the inputs to the extracted function. 775 /// 776 /// The \p Region can be identified as needing to be ignored in this function. 777 /// It should be checked whether it should be ignored after a call to this 778 /// function. 779 /// 780 /// \param [in,out] Region - The region of code to be analyzed. 781 /// \param [out] InputGVNs - The global value numbers for the extracted 782 /// arguments. 783 /// \param [in] NotSame - The global value numbers in the region that do not 784 /// have the same constant value in the regions structurally similar to 785 /// \p Region. 786 /// \param [in] OutputMappings - The mapping of values that have been replaced 787 /// by a new output value after extraction. 788 /// \param [out] ArgInputs - The values of the inputs to the extracted function. 789 /// \param [out] Outputs - The set of values extracted by the CodeExtractor 790 /// as outputs. 791 static void getCodeExtractorArguments( 792 OutlinableRegion &Region, std::vector<unsigned> &InputGVNs, 793 DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings, 794 SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) { 795 IRSimilarityCandidate &C = *Region.Candidate; 796 797 // OverallInputs are the inputs to the region found by the CodeExtractor, 798 // SinkCands and HoistCands are used by the CodeExtractor to find sunken 799 // allocas of values whose lifetimes are contained completely within the 800 // outlined region. PremappedInputs are the arguments found by the 801 // CodeExtractor, removing conditions such as sunken allocas, but that 802 // may need to be remapped due to the extracted output values replacing 803 // the original values. We use DummyOutputs for this first run of finding 804 // inputs and outputs since the outputs could change during findAllocas, 805 // the correct set of extracted outputs will be in the final Outputs ValueSet. 806 SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands, 807 DummyOutputs; 808 809 // Use the code extractor to get the inputs and outputs, without sunken 810 // allocas or removing llvm.assumes. 811 CodeExtractor *CE = Region.CE; 812 CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands); 813 assert(Region.StartBB && "Region must have a start BasicBlock!"); 814 Function *OrigF = Region.StartBB->getParent(); 815 CodeExtractorAnalysisCache CEAC(*OrigF); 816 BasicBlock *Dummy = nullptr; 817 818 // The region may be ineligible due to VarArgs in the parent function. In this 819 // case we ignore the region. 820 if (!CE->isEligible()) { 821 Region.IgnoreRegion = true; 822 return; 823 } 824 825 // Find if any values are going to be sunk into the function when extracted 826 CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy); 827 CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands); 828 829 // TODO: Support regions with sunken allocas: values whose lifetimes are 830 // contained completely within the outlined region. These are not guaranteed 831 // to be the same in every region, so we must elevate them all to arguments 832 // when they appear. If these values are not equal, it means there is some 833 // Input in OverallInputs that was removed for ArgInputs. 834 if (OverallInputs.size() != PremappedInputs.size()) { 835 Region.IgnoreRegion = true; 836 return; 837 } 838 839 findConstants(C, NotSame, InputGVNs); 840 841 mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs); 842 843 remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings, 844 ArgInputs); 845 846 // Sort the GVNs, since we now have constants included in the \ref InputGVNs 847 // we need to make sure they are in a deterministic order. 848 stable_sort(InputGVNs); 849 } 850 851 /// Look over the inputs and map each input argument to an argument in the 852 /// overall function for the OutlinableRegions. This creates a way to replace 853 /// the arguments of the extracted function with the arguments of the new 854 /// overall function. 855 /// 856 /// \param [in,out] Region - The region of code to be analyzed. 857 /// \param [in] InputGVNs - The global value numbering of the input values 858 /// collected. 859 /// \param [in] ArgInputs - The values of the arguments to the extracted 860 /// function. 861 static void 862 findExtractedInputToOverallInputMapping(OutlinableRegion &Region, 863 std::vector<unsigned> &InputGVNs, 864 SetVector<Value *> &ArgInputs) { 865 866 IRSimilarityCandidate &C = *Region.Candidate; 867 OutlinableGroup &Group = *Region.Parent; 868 869 // This counts the argument number in the overall function. 870 unsigned TypeIndex = 0; 871 872 // This counts the argument number in the extracted function. 873 unsigned OriginalIndex = 0; 874 875 // Find the mapping of the extracted arguments to the arguments for the 876 // overall function. Since there may be extra arguments in the overall 877 // function to account for the extracted constants, we have two different 878 // counters as we find extracted arguments, and as we come across overall 879 // arguments. 880 881 // Additionally, in our first pass, for the first extracted function, 882 // we find argument locations for the canonical value numbering. This 883 // numbering overrides any discovered location for the extracted code. 884 for (unsigned InputVal : InputGVNs) { 885 Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal); 886 assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?"); 887 unsigned CanonicalNumber = CanonicalNumberOpt.getValue(); 888 889 Optional<Value *> InputOpt = C.fromGVN(InputVal); 890 assert(InputOpt.hasValue() && "Global value number not found?"); 891 Value *Input = InputOpt.getValue(); 892 893 DenseMap<unsigned, unsigned>::iterator AggArgIt = 894 Group.CanonicalNumberToAggArg.find(CanonicalNumber); 895 896 if (!Group.InputTypesSet) { 897 Group.ArgumentTypes.push_back(Input->getType()); 898 // If the input value has a swifterr attribute, make sure to mark the 899 // argument in the overall function. 900 if (Input->isSwiftError()) { 901 assert( 902 !Group.SwiftErrorArgument.hasValue() && 903 "Argument already marked with swifterr for this OutlinableGroup!"); 904 Group.SwiftErrorArgument = TypeIndex; 905 } 906 } 907 908 // Check if we have a constant. If we do add it to the overall argument 909 // number to Constant map for the region, and continue to the next input. 910 if (Constant *CST = dyn_cast<Constant>(Input)) { 911 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) 912 Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST)); 913 else { 914 Group.CanonicalNumberToAggArg.insert( 915 std::make_pair(CanonicalNumber, TypeIndex)); 916 Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST)); 917 } 918 TypeIndex++; 919 continue; 920 } 921 922 // It is not a constant, we create the mapping from extracted argument list 923 // to the overall argument list, using the canonical location, if it exists. 924 assert(ArgInputs.count(Input) && "Input cannot be found!"); 925 926 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) { 927 if (OriginalIndex != AggArgIt->second) 928 Region.ChangedArgOrder = true; 929 Region.ExtractedArgToAgg.insert( 930 std::make_pair(OriginalIndex, AggArgIt->second)); 931 Region.AggArgToExtracted.insert( 932 std::make_pair(AggArgIt->second, OriginalIndex)); 933 } else { 934 Group.CanonicalNumberToAggArg.insert( 935 std::make_pair(CanonicalNumber, TypeIndex)); 936 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex)); 937 Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex)); 938 } 939 OriginalIndex++; 940 TypeIndex++; 941 } 942 943 // If the function type definitions for the OutlinableGroup holding the region 944 // have not been set, set the length of the inputs here. We should have the 945 // same inputs for all of the different regions contained in the 946 // OutlinableGroup since they are all structurally similar to one another. 947 if (!Group.InputTypesSet) { 948 Group.NumAggregateInputs = TypeIndex; 949 Group.InputTypesSet = true; 950 } 951 952 Region.NumExtractedInputs = OriginalIndex; 953 } 954 955 /// Check if the \p V has any uses outside of the region other than \p PN. 956 /// 957 /// \param V [in] - The value to check. 958 /// \param PHILoc [in] - The location in the PHINode of \p V. 959 /// \param PN [in] - The PHINode using \p V. 960 /// \param Exits [in] - The potential blocks we exit to from the outlined 961 /// region. 962 /// \param BlocksInRegion [in] - The basic blocks contained in the region. 963 /// \returns true if \p V has any use soutside its region other than \p PN. 964 static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN, 965 SmallPtrSet<BasicBlock *, 1> &Exits, 966 DenseSet<BasicBlock *> &BlocksInRegion) { 967 // We check to see if the value is used by the PHINode from some other 968 // predecessor not included in the region. If it is, we make sure 969 // to keep it as an output. 970 if (any_of(llvm::seq<unsigned>(0, PN.getNumIncomingValues()), 971 [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) { 972 return (Idx != PHILoc && V == PN.getIncomingValue(Idx) && 973 !BlocksInRegion.contains(PN.getIncomingBlock(Idx))); 974 })) 975 return true; 976 977 // Check if the value is used by any other instructions outside the region. 978 return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) { 979 Instruction *I = dyn_cast<Instruction>(U); 980 if (!I) 981 return false; 982 983 // If the use of the item is inside the region, we skip it. Uses 984 // inside the region give us useful information about how the item could be 985 // used as an output. 986 BasicBlock *Parent = I->getParent(); 987 if (BlocksInRegion.contains(Parent)) 988 return false; 989 990 // If it's not a PHINode then we definitely know the use matters. This 991 // output value will not completely combined with another item in a PHINode 992 // as it is directly reference by another non-phi instruction 993 if (!isa<PHINode>(I)) 994 return true; 995 996 // If we have a PHINode outside one of the exit locations, then it 997 // can be considered an outside use as well. If there is a PHINode 998 // contained in the Exit where this values use matters, it will be 999 // caught when we analyze that PHINode. 1000 if (!Exits.contains(Parent)) 1001 return true; 1002 1003 return false; 1004 }); 1005 } 1006 1007 /// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be 1008 /// considered outputs. A PHINodes is an output when more than one incoming 1009 /// value has been marked by the CodeExtractor as an output. 1010 /// 1011 /// \param CurrentExitFromRegion [in] - The block to analyze. 1012 /// \param PotentialExitsFromRegion [in] - The potential exit blocks from the 1013 /// region. 1014 /// \param RegionBlocks [in] - The basic blocks in the region. 1015 /// \param Outputs [in, out] - The existing outputs for the region, we may add 1016 /// PHINodes to this as we find that they replace output values. 1017 /// \param OutputsReplacedByPHINode [out] - A set containing outputs that are 1018 /// totally replaced by a PHINode. 1019 /// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used 1020 /// in PHINodes, but have other uses, and should still be considered outputs. 1021 static void analyzeExitPHIsForOutputUses( 1022 BasicBlock *CurrentExitFromRegion, 1023 SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion, 1024 DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs, 1025 DenseSet<Value *> &OutputsReplacedByPHINode, 1026 DenseSet<Value *> &OutputsWithNonPhiUses) { 1027 for (PHINode &PN : CurrentExitFromRegion->phis()) { 1028 // Find all incoming values from the outlining region. 1029 SmallVector<unsigned, 2> IncomingVals; 1030 for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I) 1031 if (RegionBlocks.contains(PN.getIncomingBlock(I))) 1032 IncomingVals.push_back(I); 1033 1034 // Do not process PHI if there are no predecessors from region. 1035 unsigned NumIncomingVals = IncomingVals.size(); 1036 if (NumIncomingVals == 0) 1037 continue; 1038 1039 // If there is one predecessor, we mark it as a value that needs to be kept 1040 // as an output. 1041 if (NumIncomingVals == 1) { 1042 Value *V = PN.getIncomingValue(*IncomingVals.begin()); 1043 OutputsWithNonPhiUses.insert(V); 1044 OutputsReplacedByPHINode.erase(V); 1045 continue; 1046 } 1047 1048 // This PHINode will be used as an output value, so we add it to our list. 1049 Outputs.insert(&PN); 1050 1051 // Not all of the incoming values should be ignored as other inputs and 1052 // outputs may have uses in outlined region. If they have other uses 1053 // outside of the single PHINode we should not skip over it. 1054 for (unsigned Idx : IncomingVals) { 1055 Value *V = PN.getIncomingValue(Idx); 1056 if (outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) { 1057 OutputsWithNonPhiUses.insert(V); 1058 OutputsReplacedByPHINode.erase(V); 1059 continue; 1060 } 1061 if (!OutputsWithNonPhiUses.contains(V)) 1062 OutputsReplacedByPHINode.insert(V); 1063 } 1064 } 1065 } 1066 1067 // Represents the type for the unsigned number denoting the output number for 1068 // phi node, along with the canonical number for the exit block. 1069 using ArgLocWithBBCanon = std::pair<unsigned, unsigned>; 1070 // The list of canonical numbers for the incoming values to a PHINode. 1071 using CanonList = SmallVector<unsigned, 2>; 1072 // The pair type representing the set of canonical values being combined in the 1073 // PHINode, along with the location data for the PHINode. 1074 using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>; 1075 1076 /// Encode \p PND as an integer for easy lookup based on the argument location, 1077 /// the parent BasicBlock canonical numbering, and the canonical numbering of 1078 /// the values stored in the PHINode. 1079 /// 1080 /// \param PND - The data to hash. 1081 /// \returns The hash code of \p PND. 1082 static hash_code encodePHINodeData(PHINodeData &PND) { 1083 return llvm::hash_combine( 1084 llvm::hash_value(PND.first.first), llvm::hash_value(PND.first.second), 1085 llvm::hash_combine_range(PND.second.begin(), PND.second.end())); 1086 } 1087 1088 /// Create a special GVN for PHINodes that will be used outside of 1089 /// the region. We create a hash code based on the Canonical number of the 1090 /// parent BasicBlock, the canonical numbering of the values stored in the 1091 /// PHINode and the aggregate argument location. This is used to find whether 1092 /// this PHINode type has been given a canonical numbering already. If not, we 1093 /// assign it a value and store it for later use. The value is returned to 1094 /// identify different output schemes for the set of regions. 1095 /// 1096 /// \param Region - The region that \p PN is an output for. 1097 /// \param PN - The PHINode we are analyzing. 1098 /// \param AggArgIdx - The argument \p PN will be stored into. 1099 /// \returns An optional holding the assigned canonical number, or None if 1100 /// there is some attribute of the PHINode blocking it from being used. 1101 static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region, 1102 PHINode *PN, unsigned AggArgIdx) { 1103 OutlinableGroup &Group = *Region.Parent; 1104 IRSimilarityCandidate &Cand = *Region.Candidate; 1105 BasicBlock *PHIBB = PN->getParent(); 1106 CanonList PHIGVNs; 1107 for (Value *Incoming : PN->incoming_values()) { 1108 // If we cannot find a GVN, this means that the input to the PHINode is 1109 // not included in the region we are trying to analyze, meaning, that if 1110 // it was outlined, we would be adding an extra input. We ignore this 1111 // case for now, and so ignore the region. 1112 Optional<unsigned> OGVN = Cand.getGVN(Incoming); 1113 if (!OGVN.hasValue()) { 1114 Region.IgnoreRegion = true; 1115 return None; 1116 } 1117 1118 // Collect the canonical numbers of the values in the PHINode. 1119 unsigned GVN = OGVN.getValue(); 1120 OGVN = Cand.getCanonicalNum(GVN); 1121 assert(OGVN.hasValue() && "No GVN found for incoming value?"); 1122 PHIGVNs.push_back(*OGVN); 1123 } 1124 1125 // Now that we have the GVNs for the incoming values, we are going to combine 1126 // them with the GVN of the incoming bock, and the output location of the 1127 // PHINode to generate a hash value representing this instance of the PHINode. 1128 DenseMap<hash_code, unsigned>::iterator GVNToPHIIt; 1129 DenseMap<unsigned, PHINodeData>::iterator PHIToGVNIt; 1130 Optional<unsigned> BBGVN = Cand.getGVN(PHIBB); 1131 assert(BBGVN.hasValue() && "Could not find GVN for the incoming block!"); 1132 1133 BBGVN = Cand.getCanonicalNum(BBGVN.getValue()); 1134 assert(BBGVN.hasValue() && 1135 "Could not find canonical number for the incoming block!"); 1136 // Create a pair of the exit block canonical value, and the aggregate 1137 // argument location, connected to the canonical numbers stored in the 1138 // PHINode. 1139 PHINodeData TemporaryPair = 1140 std::make_pair(std::make_pair(BBGVN.getValue(), AggArgIdx), PHIGVNs); 1141 hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair); 1142 1143 // Look for and create a new entry in our connection between canonical 1144 // numbers for PHINodes, and the set of objects we just created. 1145 GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash); 1146 if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) { 1147 bool Inserted = false; 1148 std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert( 1149 std::make_pair(Group.PHINodeGVNTracker, TemporaryPair)); 1150 std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert( 1151 std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--)); 1152 } 1153 1154 return GVNToPHIIt->second; 1155 } 1156 1157 /// Create a mapping of the output arguments for the \p Region to the output 1158 /// arguments of the overall outlined function. 1159 /// 1160 /// \param [in,out] Region - The region of code to be analyzed. 1161 /// \param [in] Outputs - The values found by the code extractor. 1162 static void 1163 findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region, 1164 SetVector<Value *> &Outputs) { 1165 OutlinableGroup &Group = *Region.Parent; 1166 IRSimilarityCandidate &C = *Region.Candidate; 1167 1168 SmallVector<BasicBlock *> BE; 1169 DenseSet<BasicBlock *> BlocksInRegion; 1170 C.getBasicBlocks(BlocksInRegion, BE); 1171 1172 // Find the exits to the region. 1173 SmallPtrSet<BasicBlock *, 1> Exits; 1174 for (BasicBlock *Block : BE) 1175 for (BasicBlock *Succ : successors(Block)) 1176 if (!BlocksInRegion.contains(Succ)) 1177 Exits.insert(Succ); 1178 1179 // After determining which blocks exit to PHINodes, we add these PHINodes to 1180 // the set of outputs to be processed. We also check the incoming values of 1181 // the PHINodes for whether they should no longer be considered outputs. 1182 DenseSet<Value *> OutputsReplacedByPHINode; 1183 DenseSet<Value *> OutputsWithNonPhiUses; 1184 for (BasicBlock *ExitBB : Exits) 1185 analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs, 1186 OutputsReplacedByPHINode, 1187 OutputsWithNonPhiUses); 1188 1189 // This counts the argument number in the extracted function. 1190 unsigned OriginalIndex = Region.NumExtractedInputs; 1191 1192 // This counts the argument number in the overall function. 1193 unsigned TypeIndex = Group.NumAggregateInputs; 1194 bool TypeFound; 1195 DenseSet<unsigned> AggArgsUsed; 1196 1197 // Iterate over the output types and identify if there is an aggregate pointer 1198 // type whose base type matches the current output type. If there is, we mark 1199 // that we will use this output register for this value. If not we add another 1200 // type to the overall argument type list. We also store the GVNs used for 1201 // stores to identify which values will need to be moved into an special 1202 // block that holds the stores to the output registers. 1203 for (Value *Output : Outputs) { 1204 TypeFound = false; 1205 // We can do this since it is a result value, and will have a number 1206 // that is necessarily the same. BUT if in the future, the instructions 1207 // do not have to be in same order, but are functionally the same, we will 1208 // have to use a different scheme, as one-to-one correspondence is not 1209 // guaranteed. 1210 unsigned ArgumentSize = Group.ArgumentTypes.size(); 1211 1212 // If the output is combined in a PHINode, we make sure to skip over it. 1213 if (OutputsReplacedByPHINode.contains(Output)) 1214 continue; 1215 1216 unsigned AggArgIdx = 0; 1217 for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) { 1218 if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType())) 1219 continue; 1220 1221 if (AggArgsUsed.contains(Jdx)) 1222 continue; 1223 1224 TypeFound = true; 1225 AggArgsUsed.insert(Jdx); 1226 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx)); 1227 Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex)); 1228 AggArgIdx = Jdx; 1229 break; 1230 } 1231 1232 // We were unable to find an unused type in the output type set that matches 1233 // the output, so we add a pointer type to the argument types of the overall 1234 // function to handle this output and create a mapping to it. 1235 if (!TypeFound) { 1236 Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType())); 1237 // Mark the new pointer type as the last value in the aggregate argument 1238 // list. 1239 unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1; 1240 AggArgsUsed.insert(ArgTypeIdx); 1241 Region.ExtractedArgToAgg.insert( 1242 std::make_pair(OriginalIndex, ArgTypeIdx)); 1243 Region.AggArgToExtracted.insert( 1244 std::make_pair(ArgTypeIdx, OriginalIndex)); 1245 AggArgIdx = ArgTypeIdx; 1246 } 1247 1248 // TODO: Adapt to the extra input from the PHINode. 1249 PHINode *PN = dyn_cast<PHINode>(Output); 1250 1251 Optional<unsigned> GVN; 1252 if (PN && !BlocksInRegion.contains(PN->getParent())) { 1253 // Values outside the region can be combined into PHINode when we 1254 // have multiple exits. We collect both of these into a list to identify 1255 // which values are being used in the PHINode. Each list identifies a 1256 // different PHINode, and a different output. We store the PHINode as it's 1257 // own canonical value. These canonical values are also dependent on the 1258 // output argument it is saved to. 1259 1260 // If two PHINodes have the same canonical values, but different aggregate 1261 // argument locations, then they will have distinct Canonical Values. 1262 GVN = getGVNForPHINode(Region, PN, AggArgIdx); 1263 if (!GVN.hasValue()) 1264 return; 1265 } else { 1266 // If we do not have a PHINode we use the global value numbering for the 1267 // output value, to find the canonical number to add to the set of stored 1268 // values. 1269 GVN = C.getGVN(Output); 1270 GVN = C.getCanonicalNum(*GVN); 1271 } 1272 1273 // Each region has a potentially unique set of outputs. We save which 1274 // values are output in a list of canonical values so we can differentiate 1275 // among the different store schemes. 1276 Region.GVNStores.push_back(*GVN); 1277 1278 OriginalIndex++; 1279 TypeIndex++; 1280 } 1281 1282 // We sort the stored values to make sure that we are not affected by analysis 1283 // order when determining what combination of items were stored. 1284 stable_sort(Region.GVNStores); 1285 } 1286 1287 void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region, 1288 DenseSet<unsigned> &NotSame) { 1289 std::vector<unsigned> Inputs; 1290 SetVector<Value *> ArgInputs, Outputs; 1291 1292 getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs, 1293 Outputs); 1294 1295 if (Region.IgnoreRegion) 1296 return; 1297 1298 // Map the inputs found by the CodeExtractor to the arguments found for 1299 // the overall function. 1300 findExtractedInputToOverallInputMapping(Region, Inputs, ArgInputs); 1301 1302 // Map the outputs found by the CodeExtractor to the arguments found for 1303 // the overall function. 1304 findExtractedOutputToOverallOutputMapping(Region, Outputs); 1305 } 1306 1307 /// Replace the extracted function in the Region with a call to the overall 1308 /// function constructed from the deduplicated similar regions, replacing and 1309 /// remapping the values passed to the extracted function as arguments to the 1310 /// new arguments of the overall function. 1311 /// 1312 /// \param [in] M - The module to outline from. 1313 /// \param [in] Region - The regions of extracted code to be replaced with a new 1314 /// function. 1315 /// \returns a call instruction with the replaced function. 1316 CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) { 1317 std::vector<Value *> NewCallArgs; 1318 DenseMap<unsigned, unsigned>::iterator ArgPair; 1319 1320 OutlinableGroup &Group = *Region.Parent; 1321 CallInst *Call = Region.Call; 1322 assert(Call && "Call to replace is nullptr?"); 1323 Function *AggFunc = Group.OutlinedFunction; 1324 assert(AggFunc && "Function to replace with is nullptr?"); 1325 1326 // If the arguments are the same size, there are not values that need to be 1327 // made into an argument, the argument ordering has not been change, or 1328 // different output registers to handle. We can simply replace the called 1329 // function in this case. 1330 if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) { 1331 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to " 1332 << *AggFunc << " with same number of arguments\n"); 1333 Call->setCalledFunction(AggFunc); 1334 return Call; 1335 } 1336 1337 // We have a different number of arguments than the new function, so 1338 // we need to use our previously mappings off extracted argument to overall 1339 // function argument, and constants to overall function argument to create the 1340 // new argument list. 1341 for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) { 1342 1343 if (AggArgIdx == AggFunc->arg_size() - 1 && 1344 Group.OutputGVNCombinations.size() > 1) { 1345 // If we are on the last argument, and we need to differentiate between 1346 // output blocks, add an integer to the argument list to determine 1347 // what block to take 1348 LLVM_DEBUG(dbgs() << "Set switch block argument to " 1349 << Region.OutputBlockNum << "\n"); 1350 NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()), 1351 Region.OutputBlockNum)); 1352 continue; 1353 } 1354 1355 ArgPair = Region.AggArgToExtracted.find(AggArgIdx); 1356 if (ArgPair != Region.AggArgToExtracted.end()) { 1357 Value *ArgumentValue = Call->getArgOperand(ArgPair->second); 1358 // If we found the mapping from the extracted function to the overall 1359 // function, we simply add it to the argument list. We use the same 1360 // value, it just needs to honor the new order of arguments. 1361 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value " 1362 << *ArgumentValue << "\n"); 1363 NewCallArgs.push_back(ArgumentValue); 1364 continue; 1365 } 1366 1367 // If it is a constant, we simply add it to the argument list as a value. 1368 if (Region.AggArgToConstant.find(AggArgIdx) != 1369 Region.AggArgToConstant.end()) { 1370 Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second; 1371 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value " 1372 << *CST << "\n"); 1373 NewCallArgs.push_back(CST); 1374 continue; 1375 } 1376 1377 // Add a nullptr value if the argument is not found in the extracted 1378 // function. If we cannot find a value, it means it is not in use 1379 // for the region, so we should not pass anything to it. 1380 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n"); 1381 NewCallArgs.push_back(ConstantPointerNull::get( 1382 static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType()))); 1383 } 1384 1385 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to " 1386 << *AggFunc << " with new set of arguments\n"); 1387 // Create the new call instruction and erase the old one. 1388 Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "", 1389 Call); 1390 1391 // It is possible that the call to the outlined function is either the first 1392 // instruction is in the new block, the last instruction, or both. If either 1393 // of these is the case, we need to make sure that we replace the instruction 1394 // in the IRInstructionData struct with the new call. 1395 CallInst *OldCall = Region.Call; 1396 if (Region.NewFront->Inst == OldCall) 1397 Region.NewFront->Inst = Call; 1398 if (Region.NewBack->Inst == OldCall) 1399 Region.NewBack->Inst = Call; 1400 1401 // Transfer any debug information. 1402 Call->setDebugLoc(Region.Call->getDebugLoc()); 1403 // Since our output may determine which branch we go to, we make sure to 1404 // propogate this new call value through the module. 1405 OldCall->replaceAllUsesWith(Call); 1406 1407 // Remove the old instruction. 1408 OldCall->eraseFromParent(); 1409 Region.Call = Call; 1410 1411 // Make sure that the argument in the new function has the SwiftError 1412 // argument. 1413 if (Group.SwiftErrorArgument.hasValue()) 1414 Call->addParamAttr(Group.SwiftErrorArgument.getValue(), 1415 Attribute::SwiftError); 1416 1417 return Call; 1418 } 1419 1420 /// Find or create a BasicBlock in the outlined function containing PhiBlocks 1421 /// for \p RetVal. 1422 /// 1423 /// \param Group - The OutlinableGroup containing the information about the 1424 /// overall outlined function. 1425 /// \param RetVal - The return value or exit option that we are currently 1426 /// evaluating. 1427 /// \returns The found or newly created BasicBlock to contain the needed 1428 /// PHINodes to be used as outputs. 1429 static BasicBlock *findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal) { 1430 DenseMap<Value *, BasicBlock *>::iterator PhiBlockForRetVal, 1431 ReturnBlockForRetVal; 1432 PhiBlockForRetVal = Group.PHIBlocks.find(RetVal); 1433 ReturnBlockForRetVal = Group.EndBBs.find(RetVal); 1434 assert(ReturnBlockForRetVal != Group.EndBBs.end() && 1435 "Could not find output value!"); 1436 BasicBlock *ReturnBB = ReturnBlockForRetVal->second; 1437 1438 // Find if a PHIBlock exists for this return value already. If it is 1439 // the first time we are analyzing this, we will not, so we record it. 1440 PhiBlockForRetVal = Group.PHIBlocks.find(RetVal); 1441 if (PhiBlockForRetVal != Group.PHIBlocks.end()) 1442 return PhiBlockForRetVal->second; 1443 1444 // If we did not find a block, we create one, and insert it into the 1445 // overall function and record it. 1446 bool Inserted = false; 1447 BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block", 1448 ReturnBB->getParent()); 1449 std::tie(PhiBlockForRetVal, Inserted) = 1450 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock)); 1451 1452 // We find the predecessors of the return block in the newly created outlined 1453 // function in order to point them to the new PHIBlock rather than the already 1454 // existing return block. 1455 SmallVector<BranchInst *, 2> BranchesToChange; 1456 for (BasicBlock *Pred : predecessors(ReturnBB)) 1457 BranchesToChange.push_back(cast<BranchInst>(Pred->getTerminator())); 1458 1459 // Now we mark the branch instructions found, and change the references of the 1460 // return block to the newly created PHIBlock. 1461 for (BranchInst *BI : BranchesToChange) 1462 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) { 1463 if (BI->getSuccessor(Succ) != ReturnBB) 1464 continue; 1465 BI->setSuccessor(Succ, PHIBlock); 1466 } 1467 1468 BranchInst::Create(ReturnBB, PHIBlock); 1469 1470 return PhiBlockForRetVal->second; 1471 } 1472 1473 /// For the function call now representing the \p Region, find the passed value 1474 /// to that call that represents Argument \p A at the call location if the 1475 /// call has already been replaced with a call to the overall, aggregate 1476 /// function. 1477 /// 1478 /// \param A - The Argument to get the passed value for. 1479 /// \param Region - The extracted Region corresponding to the outlined function. 1480 /// \returns The Value representing \p A at the call site. 1481 static Value * 1482 getPassedArgumentInAlreadyOutlinedFunction(const Argument *A, 1483 const OutlinableRegion &Region) { 1484 // If we don't need to adjust the argument number at all (since the call 1485 // has already been replaced by a call to the overall outlined function) 1486 // we can just get the specified argument. 1487 return Region.Call->getArgOperand(A->getArgNo()); 1488 } 1489 1490 /// For the function call now representing the \p Region, find the passed value 1491 /// to that call that represents Argument \p A at the call location if the 1492 /// call has only been replaced by the call to the aggregate function. 1493 /// 1494 /// \param A - The Argument to get the passed value for. 1495 /// \param Region - The extracted Region corresponding to the outlined function. 1496 /// \returns The Value representing \p A at the call site. 1497 static Value * 1498 getPassedArgumentAndAdjustArgumentLocation(const Argument *A, 1499 const OutlinableRegion &Region) { 1500 unsigned ArgNum = A->getArgNo(); 1501 1502 // If it is a constant, we can look at our mapping from when we created 1503 // the outputs to figure out what the constant value is. 1504 if (Region.AggArgToConstant.count(ArgNum)) 1505 return Region.AggArgToConstant.find(ArgNum)->second; 1506 1507 // If it is not a constant, and we are not looking at the overall function, we 1508 // need to adjust which argument we are looking at. 1509 ArgNum = Region.AggArgToExtracted.find(ArgNum)->second; 1510 return Region.Call->getArgOperand(ArgNum); 1511 } 1512 1513 /// Find the canonical numbering for the incoming Values into the PHINode \p PN. 1514 /// 1515 /// \param PN [in] - The PHINode that we are finding the canonical numbers for. 1516 /// \param Region [in] - The OutlinableRegion containing \p PN. 1517 /// \param OutputMappings [in] - The mapping of output values from outlined 1518 /// region to their original values. 1519 /// \param CanonNums [out] - The canonical numbering for the incoming values to 1520 /// \p PN. 1521 /// \param ReplacedWithOutlinedCall - A flag to use the extracted function call 1522 /// of \p Region rather than the overall function's call. 1523 static void 1524 findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, 1525 const DenseMap<Value *, Value *> &OutputMappings, 1526 DenseSet<unsigned> &CanonNums, 1527 bool ReplacedWithOutlinedCall = true) { 1528 // Iterate over the incoming values. 1529 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) { 1530 Value *IVal = PN->getIncomingValue(Idx); 1531 // If we have an argument as incoming value, we need to grab the passed 1532 // value from the call itself. 1533 if (Argument *A = dyn_cast<Argument>(IVal)) { 1534 if (ReplacedWithOutlinedCall) 1535 IVal = getPassedArgumentInAlreadyOutlinedFunction(A, Region); 1536 else 1537 IVal = getPassedArgumentAndAdjustArgumentLocation(A, Region); 1538 } 1539 1540 // Get the original value if it has been replaced by an output value. 1541 IVal = findOutputMapping(OutputMappings, IVal); 1542 1543 // Find and add the canonical number for the incoming value. 1544 Optional<unsigned> GVN = Region.Candidate->getGVN(IVal); 1545 assert(GVN.hasValue() && "No GVN for incoming value"); 1546 Optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN); 1547 assert(CanonNum.hasValue() && "No Canonical Number for GVN"); 1548 CanonNums.insert(*CanonNum); 1549 } 1550 } 1551 1552 /// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock 1553 /// in order to condense the number of instructions added to the outlined 1554 /// function. 1555 /// 1556 /// \param PN [in] - The PHINode that we are finding the canonical numbers for. 1557 /// \param Region [in] - The OutlinableRegion containing \p PN. 1558 /// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find 1559 /// \p PN in. 1560 /// \param OutputMappings [in] - The mapping of output values from outlined 1561 /// region to their original values. 1562 /// \return the newly found or created PHINode in \p OverallPhiBlock. 1563 static PHINode* 1564 findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, 1565 BasicBlock *OverallPhiBlock, 1566 const DenseMap<Value *, Value *> &OutputMappings) { 1567 OutlinableGroup &Group = *Region.Parent; 1568 1569 DenseSet<unsigned> PNCanonNums; 1570 // We have to use the extracted function since we have merged this region into 1571 // the overall function yet. We make sure to reassign the argument numbering 1572 // since it is possible that the argument ordering is different between the 1573 // functions. 1574 findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums, 1575 /* ReplacedWithOutlinedCall = */ false); 1576 1577 OutlinableRegion *FirstRegion = Group.Regions[0]; 1578 DenseSet<unsigned> CurrentCanonNums; 1579 // Find the Canonical Numbering for each PHINode, if it matches, we replace 1580 // the uses of the PHINode we are searching for, with the found PHINode. 1581 for (PHINode &CurrPN : OverallPhiBlock->phis()) { 1582 CurrentCanonNums.clear(); 1583 findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums, 1584 /* ReplacedWithOutlinedCall = */ true); 1585 1586 if (all_of(PNCanonNums, [&CurrentCanonNums](unsigned CanonNum) { 1587 return CurrentCanonNums.contains(CanonNum); 1588 })) 1589 return &CurrPN; 1590 } 1591 1592 // If we've made it here, it means we weren't able to replace the PHINode, so 1593 // we must insert it ourselves. 1594 PHINode *NewPN = cast<PHINode>(PN.clone()); 1595 NewPN->insertBefore(&*OverallPhiBlock->begin()); 1596 for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx; 1597 Idx++) { 1598 Value *IncomingVal = NewPN->getIncomingValue(Idx); 1599 BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx); 1600 1601 // Find corresponding basic block in the overall function for the incoming 1602 // block. 1603 Instruction *FirstNonPHI = IncomingBlock->getFirstNonPHI(); 1604 assert(FirstNonPHI && "Incoming block is empty?"); 1605 Value *CorrespondingVal = 1606 Region.findCorrespondingValueIn(*FirstRegion, FirstNonPHI); 1607 assert(CorrespondingVal && "Value is nullptr?"); 1608 BasicBlock *BlockToUse = cast<Instruction>(CorrespondingVal)->getParent(); 1609 NewPN->setIncomingBlock(Idx, BlockToUse); 1610 1611 // If we have an argument we make sure we replace using the argument from 1612 // the correct function. 1613 if (Argument *A = dyn_cast<Argument>(IncomingVal)) { 1614 Value *Val = Group.OutlinedFunction->getArg(A->getArgNo()); 1615 NewPN->setIncomingValue(Idx, Val); 1616 continue; 1617 } 1618 1619 // Find the corresponding value in the overall function. 1620 IncomingVal = findOutputMapping(OutputMappings, IncomingVal); 1621 Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal); 1622 assert(Val && "Value is nullptr?"); 1623 NewPN->setIncomingValue(Idx, Val); 1624 } 1625 return NewPN; 1626 } 1627 1628 // Within an extracted function, replace the argument uses of the extracted 1629 // region with the arguments of the function for an OutlinableGroup. 1630 // 1631 /// \param [in] Region - The region of extracted code to be changed. 1632 /// \param [in,out] OutputBBs - The BasicBlock for the output stores for this 1633 /// region. 1634 /// \param [in] FirstFunction - A flag to indicate whether we are using this 1635 /// function to define the overall outlined function for all the regions, or 1636 /// if we are operating on one of the following regions. 1637 static void 1638 replaceArgumentUses(OutlinableRegion &Region, 1639 DenseMap<Value *, BasicBlock *> &OutputBBs, 1640 const DenseMap<Value *, Value *> &OutputMappings, 1641 bool FirstFunction = false) { 1642 OutlinableGroup &Group = *Region.Parent; 1643 assert(Region.ExtractedFunction && "Region has no extracted function?"); 1644 1645 Function *DominatingFunction = Region.ExtractedFunction; 1646 if (FirstFunction) 1647 DominatingFunction = Group.OutlinedFunction; 1648 DominatorTree DT(*DominatingFunction); 1649 1650 for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size(); 1651 ArgIdx++) { 1652 assert(Region.ExtractedArgToAgg.find(ArgIdx) != 1653 Region.ExtractedArgToAgg.end() && 1654 "No mapping from extracted to outlined?"); 1655 unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second; 1656 Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx); 1657 Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx); 1658 // The argument is an input, so we can simply replace it with the overall 1659 // argument value 1660 if (ArgIdx < Region.NumExtractedInputs) { 1661 LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function " 1662 << *Region.ExtractedFunction << " with " << *AggArg 1663 << " in function " << *Group.OutlinedFunction << "\n"); 1664 Arg->replaceAllUsesWith(AggArg); 1665 continue; 1666 } 1667 1668 // If we are replacing an output, we place the store value in its own 1669 // block inside the overall function before replacing the use of the output 1670 // in the function. 1671 assert(Arg->hasOneUse() && "Output argument can only have one use"); 1672 User *InstAsUser = Arg->user_back(); 1673 assert(InstAsUser && "User is nullptr!"); 1674 1675 Instruction *I = cast<Instruction>(InstAsUser); 1676 BasicBlock *BB = I->getParent(); 1677 SmallVector<BasicBlock *, 4> Descendants; 1678 DT.getDescendants(BB, Descendants); 1679 bool EdgeAdded = false; 1680 if (Descendants.size() == 0) { 1681 EdgeAdded = true; 1682 DT.insertEdge(&DominatingFunction->getEntryBlock(), BB); 1683 DT.getDescendants(BB, Descendants); 1684 } 1685 1686 // Iterate over the following blocks, looking for return instructions, 1687 // if we find one, find the corresponding output block for the return value 1688 // and move our store instruction there. 1689 for (BasicBlock *DescendBB : Descendants) { 1690 ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator()); 1691 if (!RI) 1692 continue; 1693 Value *RetVal = RI->getReturnValue(); 1694 auto VBBIt = OutputBBs.find(RetVal); 1695 assert(VBBIt != OutputBBs.end() && "Could not find output value!"); 1696 1697 // If this is storing a PHINode, we must make sure it is included in the 1698 // overall function. 1699 StoreInst *SI = cast<StoreInst>(I); 1700 1701 Value *ValueOperand = SI->getValueOperand(); 1702 1703 StoreInst *NewI = cast<StoreInst>(I->clone()); 1704 NewI->setDebugLoc(DebugLoc()); 1705 BasicBlock *OutputBB = VBBIt->second; 1706 OutputBB->getInstList().push_back(NewI); 1707 LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to " 1708 << *OutputBB << "\n"); 1709 1710 // If this is storing a PHINode, we must make sure it is included in the 1711 // overall function. 1712 if (!isa<PHINode>(ValueOperand) || 1713 Region.Candidate->getGVN(ValueOperand).hasValue()) { 1714 if (FirstFunction) 1715 continue; 1716 Value *CorrVal = 1717 Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand); 1718 assert(CorrVal && "Value is nullptr?"); 1719 NewI->setOperand(0, CorrVal); 1720 continue; 1721 } 1722 PHINode *PN = cast<PHINode>(SI->getValueOperand()); 1723 // If it has a value, it was not split by the code extractor, which 1724 // is what we are looking for. 1725 if (Region.Candidate->getGVN(PN).hasValue()) 1726 continue; 1727 1728 // We record the parent block for the PHINode in the Region so that 1729 // we can exclude it from checks later on. 1730 Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent())); 1731 1732 // If this is the first function, we do not need to worry about mergiing 1733 // this with any other block in the overall outlined function, so we can 1734 // just continue. 1735 if (FirstFunction) { 1736 BasicBlock *PHIBlock = PN->getParent(); 1737 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock)); 1738 continue; 1739 } 1740 1741 // We look for the aggregate block that contains the PHINodes leading into 1742 // this exit path. If we can't find one, we create one. 1743 BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal); 1744 1745 // For our PHINode, we find the combined canonical numbering, and 1746 // attempt to find a matching PHINode in the overall PHIBlock. If we 1747 // cannot, we copy the PHINode and move it into this new block. 1748 PHINode *NewPN = 1749 findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, OutputMappings); 1750 NewI->setOperand(0, NewPN); 1751 } 1752 1753 // If we added an edge for basic blocks without a predecessor, we remove it 1754 // here. 1755 if (EdgeAdded) 1756 DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB); 1757 I->eraseFromParent(); 1758 1759 LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function " 1760 << *Region.ExtractedFunction << " with " << *AggArg 1761 << " in function " << *Group.OutlinedFunction << "\n"); 1762 Arg->replaceAllUsesWith(AggArg); 1763 } 1764 } 1765 1766 /// Within an extracted function, replace the constants that need to be lifted 1767 /// into arguments with the actual argument. 1768 /// 1769 /// \param Region [in] - The region of extracted code to be changed. 1770 void replaceConstants(OutlinableRegion &Region) { 1771 OutlinableGroup &Group = *Region.Parent; 1772 // Iterate over the constants that need to be elevated into arguments 1773 for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) { 1774 unsigned AggArgIdx = Const.first; 1775 Function *OutlinedFunction = Group.OutlinedFunction; 1776 assert(OutlinedFunction && "Overall Function is not defined?"); 1777 Constant *CST = Const.second; 1778 Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx); 1779 // Identify the argument it will be elevated to, and replace instances of 1780 // that constant in the function. 1781 1782 // TODO: If in the future constants do not have one global value number, 1783 // i.e. a constant 1 could be mapped to several values, this check will 1784 // have to be more strict. It cannot be using only replaceUsesWithIf. 1785 1786 LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST 1787 << " in function " << *OutlinedFunction << " with " 1788 << *Arg << "\n"); 1789 CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) { 1790 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) 1791 return I->getFunction() == OutlinedFunction; 1792 return false; 1793 }); 1794 } 1795 } 1796 1797 /// It is possible that there is a basic block that already performs the same 1798 /// stores. This returns a duplicate block, if it exists 1799 /// 1800 /// \param OutputBBs [in] the blocks we are looking for a duplicate of. 1801 /// \param OutputStoreBBs [in] The existing output blocks. 1802 /// \returns an optional value with the number output block if there is a match. 1803 Optional<unsigned> findDuplicateOutputBlock( 1804 DenseMap<Value *, BasicBlock *> &OutputBBs, 1805 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) { 1806 1807 bool Mismatch = false; 1808 unsigned MatchingNum = 0; 1809 // We compare the new set output blocks to the other sets of output blocks. 1810 // If they are the same number, and have identical instructions, they are 1811 // considered to be the same. 1812 for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) { 1813 Mismatch = false; 1814 for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) { 1815 DenseMap<Value *, BasicBlock *>::iterator OutputBBIt = 1816 OutputBBs.find(VToB.first); 1817 if (OutputBBIt == OutputBBs.end()) { 1818 Mismatch = true; 1819 break; 1820 } 1821 1822 BasicBlock *CompBB = VToB.second; 1823 BasicBlock *OutputBB = OutputBBIt->second; 1824 if (CompBB->size() - 1 != OutputBB->size()) { 1825 Mismatch = true; 1826 break; 1827 } 1828 1829 BasicBlock::iterator NIt = OutputBB->begin(); 1830 for (Instruction &I : *CompBB) { 1831 if (isa<BranchInst>(&I)) 1832 continue; 1833 1834 if (!I.isIdenticalTo(&(*NIt))) { 1835 Mismatch = true; 1836 break; 1837 } 1838 1839 NIt++; 1840 } 1841 } 1842 1843 if (!Mismatch) 1844 return MatchingNum; 1845 1846 MatchingNum++; 1847 } 1848 1849 return None; 1850 } 1851 1852 /// Remove empty output blocks from the outlined region. 1853 /// 1854 /// \param BlocksToPrune - Mapping of return values output blocks for the \p 1855 /// Region. 1856 /// \param Region - The OutlinableRegion we are analyzing. 1857 static bool 1858 analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune, 1859 OutlinableRegion &Region) { 1860 bool AllRemoved = true; 1861 Value *RetValueForBB; 1862 BasicBlock *NewBB; 1863 SmallVector<Value *, 4> ToRemove; 1864 // Iterate over the output blocks created in the outlined section. 1865 for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) { 1866 RetValueForBB = VtoBB.first; 1867 NewBB = VtoBB.second; 1868 1869 // If there are no instructions, we remove it from the module, and also 1870 // mark the value for removal from the return value to output block mapping. 1871 if (NewBB->size() == 0) { 1872 NewBB->eraseFromParent(); 1873 ToRemove.push_back(RetValueForBB); 1874 continue; 1875 } 1876 1877 // Mark that we could not remove all the blocks since they were not all 1878 // empty. 1879 AllRemoved = false; 1880 } 1881 1882 // Remove the return value from the mapping. 1883 for (Value *V : ToRemove) 1884 BlocksToPrune.erase(V); 1885 1886 // Mark the region as having the no output scheme. 1887 if (AllRemoved) 1888 Region.OutputBlockNum = -1; 1889 1890 return AllRemoved; 1891 } 1892 1893 /// For the outlined section, move needed the StoreInsts for the output 1894 /// registers into their own block. Then, determine if there is a duplicate 1895 /// output block already created. 1896 /// 1897 /// \param [in] OG - The OutlinableGroup of regions to be outlined. 1898 /// \param [in] Region - The OutlinableRegion that is being analyzed. 1899 /// \param [in,out] OutputBBs - the blocks that stores for this region will be 1900 /// placed in. 1901 /// \param [in] EndBBs - the final blocks of the extracted function. 1902 /// \param [in] OutputMappings - OutputMappings the mapping of values that have 1903 /// been replaced by a new output value. 1904 /// \param [in,out] OutputStoreBBs - The existing output blocks. 1905 static void alignOutputBlockWithAggFunc( 1906 OutlinableGroup &OG, OutlinableRegion &Region, 1907 DenseMap<Value *, BasicBlock *> &OutputBBs, 1908 DenseMap<Value *, BasicBlock *> &EndBBs, 1909 const DenseMap<Value *, Value *> &OutputMappings, 1910 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) { 1911 // If none of the output blocks have any instructions, this means that we do 1912 // not have to determine if it matches any of the other output schemes, and we 1913 // don't have to do anything else. 1914 if (analyzeAndPruneOutputBlocks(OutputBBs, Region)) 1915 return; 1916 1917 // Determine is there is a duplicate set of blocks. 1918 Optional<unsigned> MatchingBB = 1919 findDuplicateOutputBlock(OutputBBs, OutputStoreBBs); 1920 1921 // If there is, we remove the new output blocks. If it does not, 1922 // we add it to our list of sets of output blocks. 1923 if (MatchingBB.hasValue()) { 1924 LLVM_DEBUG(dbgs() << "Set output block for region in function" 1925 << Region.ExtractedFunction << " to " 1926 << MatchingBB.getValue()); 1927 1928 Region.OutputBlockNum = MatchingBB.getValue(); 1929 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) 1930 VtoBB.second->eraseFromParent(); 1931 return; 1932 } 1933 1934 Region.OutputBlockNum = OutputStoreBBs.size(); 1935 1936 Value *RetValueForBB; 1937 BasicBlock *NewBB; 1938 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>()); 1939 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) { 1940 RetValueForBB = VtoBB.first; 1941 NewBB = VtoBB.second; 1942 DenseMap<Value *, BasicBlock *>::iterator VBBIt = 1943 EndBBs.find(RetValueForBB); 1944 LLVM_DEBUG(dbgs() << "Create output block for region in" 1945 << Region.ExtractedFunction << " to " 1946 << *NewBB); 1947 BranchInst::Create(VBBIt->second, NewBB); 1948 OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB)); 1949 } 1950 } 1951 1952 /// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys, 1953 /// before creating a basic block for each \p NewMap, and inserting into the new 1954 /// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>". 1955 /// 1956 /// \param OldMap [in] - The mapping to base the new mapping off of. 1957 /// \param NewMap [out] - The output mapping using the keys of \p OldMap. 1958 /// \param ParentFunc [in] - The function to put the new basic block in. 1959 /// \param BaseName [in] - The start of the BasicBlock names to be appended to 1960 /// by an index value. 1961 static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap, 1962 DenseMap<Value *, BasicBlock *> &NewMap, 1963 Function *ParentFunc, Twine BaseName) { 1964 unsigned Idx = 0; 1965 std::vector<Value *> SortedKeys; 1966 1967 getSortedConstantKeys(SortedKeys, OldMap); 1968 1969 for (Value *RetVal : SortedKeys) { 1970 BasicBlock *NewBB = BasicBlock::Create( 1971 ParentFunc->getContext(), 1972 Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)), 1973 ParentFunc); 1974 NewMap.insert(std::make_pair(RetVal, NewBB)); 1975 } 1976 } 1977 1978 /// Create the switch statement for outlined function to differentiate between 1979 /// all the output blocks. 1980 /// 1981 /// For the outlined section, determine if an outlined block already exists that 1982 /// matches the needed stores for the extracted section. 1983 /// \param [in] M - The module we are outlining from. 1984 /// \param [in] OG - The group of regions to be outlined. 1985 /// \param [in] EndBBs - The final blocks of the extracted function. 1986 /// \param [in,out] OutputStoreBBs - The existing output blocks. 1987 void createSwitchStatement( 1988 Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs, 1989 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) { 1990 // We only need the switch statement if there is more than one store 1991 // combination, or there is more than one set of output blocks. The first 1992 // will occur when we store different sets of values for two different 1993 // regions. The second will occur when we have two outputs that are combined 1994 // in a PHINode outside of the region in one outlined instance, and are used 1995 // seaparately in another. This will create the same set of OutputGVNs, but 1996 // will generate two different output schemes. 1997 if (OG.OutputGVNCombinations.size() > 1) { 1998 Function *AggFunc = OG.OutlinedFunction; 1999 // Create a final block for each different return block. 2000 DenseMap<Value *, BasicBlock *> ReturnBBs; 2001 createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block"); 2002 2003 for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) { 2004 std::pair<Value *, BasicBlock *> &OutputBlock = 2005 *OG.EndBBs.find(RetBlockPair.first); 2006 BasicBlock *ReturnBlock = RetBlockPair.second; 2007 BasicBlock *EndBB = OutputBlock.second; 2008 Instruction *Term = EndBB->getTerminator(); 2009 // Move the return value to the final block instead of the original exit 2010 // stub. 2011 Term->moveBefore(*ReturnBlock, ReturnBlock->end()); 2012 // Put the switch statement in the old end basic block for the function 2013 // with a fall through to the new return block. 2014 LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for " 2015 << OutputStoreBBs.size() << "\n"); 2016 SwitchInst *SwitchI = 2017 SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1), 2018 ReturnBlock, OutputStoreBBs.size(), EndBB); 2019 2020 unsigned Idx = 0; 2021 for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) { 2022 DenseMap<Value *, BasicBlock *>::iterator OSBBIt = 2023 OutputStoreBB.find(OutputBlock.first); 2024 2025 if (OSBBIt == OutputStoreBB.end()) 2026 continue; 2027 2028 BasicBlock *BB = OSBBIt->second; 2029 SwitchI->addCase( 2030 ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB); 2031 Term = BB->getTerminator(); 2032 Term->setSuccessor(0, ReturnBlock); 2033 Idx++; 2034 } 2035 } 2036 return; 2037 } 2038 2039 assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!"); 2040 2041 // If there needs to be stores, move them from the output blocks to their 2042 // corresponding ending block. We do not check that the OutputGVNCombinations 2043 // is equal to 1 here since that could just been the case where there are 0 2044 // outputs. Instead, we check whether there is more than one set of output 2045 // blocks since this is the only case where we would have to move the 2046 // stores, and erase the extraneous blocks. 2047 if (OutputStoreBBs.size() == 1) { 2048 LLVM_DEBUG(dbgs() << "Move store instructions to the end block in " 2049 << *OG.OutlinedFunction << "\n"); 2050 DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0]; 2051 for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) { 2052 DenseMap<Value *, BasicBlock *>::iterator EndBBIt = 2053 EndBBs.find(VBPair.first); 2054 assert(EndBBIt != EndBBs.end() && "Could not find end block"); 2055 BasicBlock *EndBB = EndBBIt->second; 2056 BasicBlock *OutputBB = VBPair.second; 2057 Instruction *Term = OutputBB->getTerminator(); 2058 Term->eraseFromParent(); 2059 Term = EndBB->getTerminator(); 2060 moveBBContents(*OutputBB, *EndBB); 2061 Term->moveBefore(*EndBB, EndBB->end()); 2062 OutputBB->eraseFromParent(); 2063 } 2064 } 2065 } 2066 2067 /// Fill the new function that will serve as the replacement function for all of 2068 /// the extracted regions of a certain structure from the first region in the 2069 /// list of regions. Replace this first region's extracted function with the 2070 /// new overall function. 2071 /// 2072 /// \param [in] M - The module we are outlining from. 2073 /// \param [in] CurrentGroup - The group of regions to be outlined. 2074 /// \param [in,out] OutputStoreBBs - The output blocks for each different 2075 /// set of stores needed for the different functions. 2076 /// \param [in,out] FuncsToRemove - Extracted functions to erase from module 2077 /// once outlining is complete. 2078 /// \param [in] OutputMappings - Extracted functions to erase from module 2079 /// once outlining is complete. 2080 static void fillOverallFunction( 2081 Module &M, OutlinableGroup &CurrentGroup, 2082 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs, 2083 std::vector<Function *> &FuncsToRemove, 2084 const DenseMap<Value *, Value *> &OutputMappings) { 2085 OutlinableRegion *CurrentOS = CurrentGroup.Regions[0]; 2086 2087 // Move first extracted function's instructions into new function. 2088 LLVM_DEBUG(dbgs() << "Move instructions from " 2089 << *CurrentOS->ExtractedFunction << " to instruction " 2090 << *CurrentGroup.OutlinedFunction << "\n"); 2091 moveFunctionData(*CurrentOS->ExtractedFunction, 2092 *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs); 2093 2094 // Transfer the attributes from the function to the new function. 2095 for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs()) 2096 CurrentGroup.OutlinedFunction->addFnAttr(A); 2097 2098 // Create a new set of output blocks for the first extracted function. 2099 DenseMap<Value *, BasicBlock *> NewBBs; 2100 createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs, 2101 CurrentGroup.OutlinedFunction, "output_block_0"); 2102 CurrentOS->OutputBlockNum = 0; 2103 2104 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true); 2105 replaceConstants(*CurrentOS); 2106 2107 // We first identify if any output blocks are empty, if they are we remove 2108 // them. We then create a branch instruction to the basic block to the return 2109 // block for the function for each non empty output block. 2110 if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) { 2111 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>()); 2112 for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) { 2113 DenseMap<Value *, BasicBlock *>::iterator VBBIt = 2114 CurrentGroup.EndBBs.find(VToBB.first); 2115 BasicBlock *EndBB = VBBIt->second; 2116 BranchInst::Create(EndBB, VToBB.second); 2117 OutputStoreBBs.back().insert(VToBB); 2118 } 2119 } 2120 2121 // Replace the call to the extracted function with the outlined function. 2122 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS); 2123 2124 // We only delete the extracted functions at the end since we may need to 2125 // reference instructions contained in them for mapping purposes. 2126 FuncsToRemove.push_back(CurrentOS->ExtractedFunction); 2127 } 2128 2129 void IROutliner::deduplicateExtractedSections( 2130 Module &M, OutlinableGroup &CurrentGroup, 2131 std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) { 2132 createFunction(M, CurrentGroup, OutlinedFunctionNum); 2133 2134 std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs; 2135 2136 OutlinableRegion *CurrentOS; 2137 2138 fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove, 2139 OutputMappings); 2140 2141 std::vector<Value *> SortedKeys; 2142 for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) { 2143 CurrentOS = CurrentGroup.Regions[Idx]; 2144 AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction, 2145 *CurrentOS->ExtractedFunction); 2146 2147 // Create a set of BasicBlocks, one for each return block, to hold the 2148 // needed store instructions. 2149 DenseMap<Value *, BasicBlock *> NewBBs; 2150 createAndInsertBasicBlocks( 2151 CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction, 2152 "output_block_" + Twine(static_cast<unsigned>(Idx))); 2153 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings); 2154 alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs, 2155 CurrentGroup.EndBBs, OutputMappings, 2156 OutputStoreBBs); 2157 2158 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS); 2159 FuncsToRemove.push_back(CurrentOS->ExtractedFunction); 2160 } 2161 2162 // Create a switch statement to handle the different output schemes. 2163 createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs); 2164 2165 OutlinedFunctionNum++; 2166 } 2167 2168 /// Checks that the next instruction in the InstructionDataList matches the 2169 /// next instruction in the module. If they do not, there could be the 2170 /// possibility that extra code has been inserted, and we must ignore it. 2171 /// 2172 /// \param ID - The IRInstructionData to check the next instruction of. 2173 /// \returns true if the InstructionDataList and actual instruction match. 2174 static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) { 2175 // We check if there is a discrepancy between the InstructionDataList 2176 // and the actual next instruction in the module. If there is, it means 2177 // that an extra instruction was added, likely by the CodeExtractor. 2178 2179 // Since we do not have any similarity data about this particular 2180 // instruction, we cannot confidently outline it, and must discard this 2181 // candidate. 2182 IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator()); 2183 Instruction *NextIDLInst = NextIDIt->Inst; 2184 Instruction *NextModuleInst = nullptr; 2185 if (!ID.Inst->isTerminator()) 2186 NextModuleInst = ID.Inst->getNextNonDebugInstruction(); 2187 else if (NextIDLInst != nullptr) 2188 NextModuleInst = 2189 &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin(); 2190 2191 if (NextIDLInst && NextIDLInst != NextModuleInst) 2192 return false; 2193 2194 return true; 2195 } 2196 2197 bool IROutliner::isCompatibleWithAlreadyOutlinedCode( 2198 const OutlinableRegion &Region) { 2199 IRSimilarityCandidate *IRSC = Region.Candidate; 2200 unsigned StartIdx = IRSC->getStartIdx(); 2201 unsigned EndIdx = IRSC->getEndIdx(); 2202 2203 // A check to make sure that we are not about to attempt to outline something 2204 // that has already been outlined. 2205 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++) 2206 if (Outlined.contains(Idx)) 2207 return false; 2208 2209 // We check if the recorded instruction matches the actual next instruction, 2210 // if it does not, we fix it in the InstructionDataList. 2211 if (!Region.Candidate->backInstruction()->isTerminator()) { 2212 Instruction *NewEndInst = 2213 Region.Candidate->backInstruction()->getNextNonDebugInstruction(); 2214 assert(NewEndInst && "Next instruction is a nullptr?"); 2215 if (Region.Candidate->end()->Inst != NewEndInst) { 2216 IRInstructionDataList *IDL = Region.Candidate->front()->IDL; 2217 IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate()) 2218 IRInstructionData(*NewEndInst, 2219 InstructionClassifier.visit(*NewEndInst), *IDL); 2220 2221 // Insert the first IRInstructionData of the new region after the 2222 // last IRInstructionData of the IRSimilarityCandidate. 2223 IDL->insert(Region.Candidate->end(), *NewEndIRID); 2224 } 2225 } 2226 2227 return none_of(*IRSC, [this](IRInstructionData &ID) { 2228 if (!nextIRInstructionDataMatchesNextInst(ID)) 2229 return true; 2230 2231 return !this->InstructionClassifier.visit(ID.Inst); 2232 }); 2233 } 2234 2235 void IROutliner::pruneIncompatibleRegions( 2236 std::vector<IRSimilarityCandidate> &CandidateVec, 2237 OutlinableGroup &CurrentGroup) { 2238 bool PreviouslyOutlined; 2239 2240 // Sort from beginning to end, so the IRSimilarityCandidates are in order. 2241 stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS, 2242 const IRSimilarityCandidate &RHS) { 2243 return LHS.getStartIdx() < RHS.getStartIdx(); 2244 }); 2245 2246 IRSimilarityCandidate &FirstCandidate = CandidateVec[0]; 2247 // Since outlining a call and a branch instruction will be the same as only 2248 // outlinining a call instruction, we ignore it as a space saving. 2249 if (FirstCandidate.getLength() == 2) { 2250 if (isa<CallInst>(FirstCandidate.front()->Inst) && 2251 isa<BranchInst>(FirstCandidate.back()->Inst)) 2252 return; 2253 } 2254 2255 unsigned CurrentEndIdx = 0; 2256 for (IRSimilarityCandidate &IRSC : CandidateVec) { 2257 PreviouslyOutlined = false; 2258 unsigned StartIdx = IRSC.getStartIdx(); 2259 unsigned EndIdx = IRSC.getEndIdx(); 2260 2261 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++) 2262 if (Outlined.contains(Idx)) { 2263 PreviouslyOutlined = true; 2264 break; 2265 } 2266 2267 if (PreviouslyOutlined) 2268 continue; 2269 2270 // Check over the instructions, and if the basic block has its address 2271 // taken for use somewhere else, we do not outline that block. 2272 bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){ 2273 return ID.Inst->getParent()->hasAddressTaken(); 2274 }); 2275 2276 if (BBHasAddressTaken) 2277 continue; 2278 2279 if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() && 2280 !OutlineFromLinkODRs) 2281 continue; 2282 2283 // Greedily prune out any regions that will overlap with already chosen 2284 // regions. 2285 if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx) 2286 continue; 2287 2288 bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) { 2289 if (!nextIRInstructionDataMatchesNextInst(ID)) 2290 return true; 2291 2292 return !this->InstructionClassifier.visit(ID.Inst); 2293 }); 2294 2295 if (BadInst) 2296 continue; 2297 2298 OutlinableRegion *OS = new (RegionAllocator.Allocate()) 2299 OutlinableRegion(IRSC, CurrentGroup); 2300 CurrentGroup.Regions.push_back(OS); 2301 2302 CurrentEndIdx = EndIdx; 2303 } 2304 } 2305 2306 InstructionCost 2307 IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) { 2308 InstructionCost RegionBenefit = 0; 2309 for (OutlinableRegion *Region : CurrentGroup.Regions) { 2310 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent()); 2311 // We add the number of instructions in the region to the benefit as an 2312 // estimate as to how much will be removed. 2313 RegionBenefit += Region->getBenefit(TTI); 2314 LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit 2315 << " saved instructions to overfall benefit.\n"); 2316 } 2317 2318 return RegionBenefit; 2319 } 2320 2321 /// For the \p OutputCanon number passed in find the value represented by this 2322 /// canonical number. If it is from a PHINode, we pick the first incoming 2323 /// value and return that Value instead. 2324 /// 2325 /// \param Region - The OutlinableRegion to get the Value from. 2326 /// \param OutputCanon - The canonical number to find the Value from. 2327 /// \returns The Value represented by a canonical number \p OutputCanon in \p 2328 /// Region. 2329 static Value *findOutputValueInRegion(OutlinableRegion &Region, 2330 unsigned OutputCanon) { 2331 OutlinableGroup &CurrentGroup = *Region.Parent; 2332 // If the value is greater than the value in the tracker, we have a 2333 // PHINode and will instead use one of the incoming values to find the 2334 // type. 2335 if (OutputCanon > CurrentGroup.PHINodeGVNTracker) { 2336 auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon); 2337 assert(It != CurrentGroup.PHINodeGVNToGVNs.end() && 2338 "Could not find GVN set for PHINode number!"); 2339 assert(It->second.second.size() > 0 && "PHINode does not have any values!"); 2340 OutputCanon = *It->second.second.begin(); 2341 } 2342 Optional<unsigned> OGVN = Region.Candidate->fromCanonicalNum(OutputCanon); 2343 assert(OGVN.hasValue() && "Could not find GVN for Canonical Number?"); 2344 Optional<Value *> OV = Region.Candidate->fromGVN(*OGVN); 2345 assert(OV.hasValue() && "Could not find value for GVN?"); 2346 return *OV; 2347 } 2348 2349 InstructionCost 2350 IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) { 2351 InstructionCost OverallCost = 0; 2352 for (OutlinableRegion *Region : CurrentGroup.Regions) { 2353 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent()); 2354 2355 // Each output incurs a load after the call, so we add that to the cost. 2356 for (unsigned OutputCanon : Region->GVNStores) { 2357 Value *V = findOutputValueInRegion(*Region, OutputCanon); 2358 InstructionCost LoadCost = 2359 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0, 2360 TargetTransformInfo::TCK_CodeSize); 2361 2362 LLVM_DEBUG(dbgs() << "Adding: " << LoadCost 2363 << " instructions to cost for output of type " 2364 << *V->getType() << "\n"); 2365 OverallCost += LoadCost; 2366 } 2367 } 2368 2369 return OverallCost; 2370 } 2371 2372 /// Find the extra instructions needed to handle any output values for the 2373 /// region. 2374 /// 2375 /// \param [in] M - The Module to outline from. 2376 /// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze. 2377 /// \param [in] TTI - The TargetTransformInfo used to collect information for 2378 /// new instruction costs. 2379 /// \returns the additional cost to handle the outputs. 2380 static InstructionCost findCostForOutputBlocks(Module &M, 2381 OutlinableGroup &CurrentGroup, 2382 TargetTransformInfo &TTI) { 2383 InstructionCost OutputCost = 0; 2384 unsigned NumOutputBranches = 0; 2385 2386 OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0]; 2387 IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate; 2388 DenseSet<BasicBlock *> CandidateBlocks; 2389 Candidate.getBasicBlocks(CandidateBlocks); 2390 2391 // Count the number of different output branches that point to blocks outside 2392 // of the region. 2393 DenseSet<BasicBlock *> FoundBlocks; 2394 for (IRInstructionData &ID : Candidate) { 2395 if (!isa<BranchInst>(ID.Inst)) 2396 continue; 2397 2398 for (Value *V : ID.OperVals) { 2399 BasicBlock *BB = static_cast<BasicBlock *>(V); 2400 DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB); 2401 if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB)) 2402 continue; 2403 FoundBlocks.insert(BB); 2404 NumOutputBranches++; 2405 } 2406 } 2407 2408 CurrentGroup.BranchesToOutside = NumOutputBranches; 2409 2410 for (const ArrayRef<unsigned> &OutputUse : 2411 CurrentGroup.OutputGVNCombinations) { 2412 for (unsigned OutputCanon : OutputUse) { 2413 Value *V = findOutputValueInRegion(FirstRegion, OutputCanon); 2414 InstructionCost StoreCost = 2415 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0, 2416 TargetTransformInfo::TCK_CodeSize); 2417 2418 // An instruction cost is added for each store set that needs to occur for 2419 // various output combinations inside the function, plus a branch to 2420 // return to the exit block. 2421 LLVM_DEBUG(dbgs() << "Adding: " << StoreCost 2422 << " instructions to cost for output of type " 2423 << *V->getType() << "\n"); 2424 OutputCost += StoreCost * NumOutputBranches; 2425 } 2426 2427 InstructionCost BranchCost = 2428 TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize); 2429 LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for" 2430 << " a branch instruction\n"); 2431 OutputCost += BranchCost * NumOutputBranches; 2432 } 2433 2434 // If there is more than one output scheme, we must have a comparison and 2435 // branch for each different item in the switch statement. 2436 if (CurrentGroup.OutputGVNCombinations.size() > 1) { 2437 InstructionCost ComparisonCost = TTI.getCmpSelInstrCost( 2438 Instruction::ICmp, Type::getInt32Ty(M.getContext()), 2439 Type::getInt32Ty(M.getContext()), CmpInst::BAD_ICMP_PREDICATE, 2440 TargetTransformInfo::TCK_CodeSize); 2441 InstructionCost BranchCost = 2442 TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize); 2443 2444 unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size(); 2445 InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks; 2446 2447 LLVM_DEBUG(dbgs() << "Adding: " << TotalCost 2448 << " instructions for each switch case for each different" 2449 << " output path in a function\n"); 2450 OutputCost += TotalCost * NumOutputBranches; 2451 } 2452 2453 return OutputCost; 2454 } 2455 2456 void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) { 2457 InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup); 2458 CurrentGroup.Benefit += RegionBenefit; 2459 LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n"); 2460 2461 InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup); 2462 CurrentGroup.Cost += OutputReloadCost; 2463 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); 2464 2465 InstructionCost AverageRegionBenefit = 2466 RegionBenefit / CurrentGroup.Regions.size(); 2467 unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size(); 2468 unsigned NumRegions = CurrentGroup.Regions.size(); 2469 TargetTransformInfo &TTI = 2470 getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction()); 2471 2472 // We add one region to the cost once, to account for the instructions added 2473 // inside of the newly created function. 2474 LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit 2475 << " instructions to cost for body of new function.\n"); 2476 CurrentGroup.Cost += AverageRegionBenefit; 2477 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); 2478 2479 // For each argument, we must add an instruction for loading the argument 2480 // out of the register and into a value inside of the newly outlined function. 2481 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum 2482 << " instructions to cost for each argument in the new" 2483 << " function.\n"); 2484 CurrentGroup.Cost += 2485 OverallArgumentNum * TargetTransformInfo::TCC_Basic; 2486 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); 2487 2488 // Each argument needs to either be loaded into a register or onto the stack. 2489 // Some arguments will only be loaded into the stack once the argument 2490 // registers are filled. 2491 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum 2492 << " instructions to cost for each argument in the new" 2493 << " function " << NumRegions << " times for the " 2494 << "needed argument handling at the call site.\n"); 2495 CurrentGroup.Cost += 2496 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions; 2497 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); 2498 2499 CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI); 2500 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); 2501 } 2502 2503 void IROutliner::updateOutputMapping(OutlinableRegion &Region, 2504 ArrayRef<Value *> Outputs, 2505 LoadInst *LI) { 2506 // For and load instructions following the call 2507 Value *Operand = LI->getPointerOperand(); 2508 Optional<unsigned> OutputIdx = None; 2509 // Find if the operand it is an output register. 2510 for (unsigned ArgIdx = Region.NumExtractedInputs; 2511 ArgIdx < Region.Call->arg_size(); ArgIdx++) { 2512 if (Operand == Region.Call->getArgOperand(ArgIdx)) { 2513 OutputIdx = ArgIdx - Region.NumExtractedInputs; 2514 break; 2515 } 2516 } 2517 2518 // If we found an output register, place a mapping of the new value 2519 // to the original in the mapping. 2520 if (!OutputIdx.hasValue()) 2521 return; 2522 2523 if (OutputMappings.find(Outputs[OutputIdx.getValue()]) == 2524 OutputMappings.end()) { 2525 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to " 2526 << *Outputs[OutputIdx.getValue()] << "\n"); 2527 OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()])); 2528 } else { 2529 Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second; 2530 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to " 2531 << *Outputs[OutputIdx.getValue()] << "\n"); 2532 OutputMappings.insert(std::make_pair(LI, Orig)); 2533 } 2534 } 2535 2536 bool IROutliner::extractSection(OutlinableRegion &Region) { 2537 SetVector<Value *> ArgInputs, Outputs, SinkCands; 2538 assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!"); 2539 BasicBlock *InitialStart = Region.StartBB; 2540 Function *OrigF = Region.StartBB->getParent(); 2541 CodeExtractorAnalysisCache CEAC(*OrigF); 2542 Region.ExtractedFunction = 2543 Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs); 2544 2545 // If the extraction was successful, find the BasicBlock, and reassign the 2546 // OutlinableRegion blocks 2547 if (!Region.ExtractedFunction) { 2548 LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB 2549 << "\n"); 2550 Region.reattachCandidate(); 2551 return false; 2552 } 2553 2554 // Get the block containing the called branch, and reassign the blocks as 2555 // necessary. If the original block still exists, it is because we ended on 2556 // a branch instruction, and so we move the contents into the block before 2557 // and assign the previous block correctly. 2558 User *InstAsUser = Region.ExtractedFunction->user_back(); 2559 BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent(); 2560 Region.PrevBB = RewrittenBB->getSinglePredecessor(); 2561 assert(Region.PrevBB && "PrevBB is nullptr?"); 2562 if (Region.PrevBB == InitialStart) { 2563 BasicBlock *NewPrev = InitialStart->getSinglePredecessor(); 2564 Instruction *BI = NewPrev->getTerminator(); 2565 BI->eraseFromParent(); 2566 moveBBContents(*InitialStart, *NewPrev); 2567 Region.PrevBB = NewPrev; 2568 InitialStart->eraseFromParent(); 2569 } 2570 2571 Region.StartBB = RewrittenBB; 2572 Region.EndBB = RewrittenBB; 2573 2574 // The sequences of outlinable regions has now changed. We must fix the 2575 // IRInstructionDataList for consistency. Although they may not be illegal 2576 // instructions, they should not be compared with anything else as they 2577 // should not be outlined in this round. So marking these as illegal is 2578 // allowed. 2579 IRInstructionDataList *IDL = Region.Candidate->front()->IDL; 2580 Instruction *BeginRewritten = &*RewrittenBB->begin(); 2581 Instruction *EndRewritten = &*RewrittenBB->begin(); 2582 Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData( 2583 *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL); 2584 Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData( 2585 *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL); 2586 2587 // Insert the first IRInstructionData of the new region in front of the 2588 // first IRInstructionData of the IRSimilarityCandidate. 2589 IDL->insert(Region.Candidate->begin(), *Region.NewFront); 2590 // Insert the first IRInstructionData of the new region after the 2591 // last IRInstructionData of the IRSimilarityCandidate. 2592 IDL->insert(Region.Candidate->end(), *Region.NewBack); 2593 // Remove the IRInstructionData from the IRSimilarityCandidate. 2594 IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end())); 2595 2596 assert(RewrittenBB != nullptr && 2597 "Could not find a predecessor after extraction!"); 2598 2599 // Iterate over the new set of instructions to find the new call 2600 // instruction. 2601 for (Instruction &I : *RewrittenBB) 2602 if (CallInst *CI = dyn_cast<CallInst>(&I)) { 2603 if (Region.ExtractedFunction == CI->getCalledFunction()) 2604 Region.Call = CI; 2605 } else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) 2606 updateOutputMapping(Region, Outputs.getArrayRef(), LI); 2607 Region.reattachCandidate(); 2608 return true; 2609 } 2610 2611 unsigned IROutliner::doOutline(Module &M) { 2612 // Find the possible similarity sections. 2613 InstructionClassifier.EnableBranches = !DisableBranches; 2614 InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls; 2615 InstructionClassifier.EnableIntrinsics = !DisableIntrinsics; 2616 2617 IRSimilarityIdentifier &Identifier = getIRSI(M); 2618 SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity(); 2619 2620 // Sort them by size of extracted sections 2621 unsigned OutlinedFunctionNum = 0; 2622 // If we only have one SimilarityGroup in SimilarityCandidates, we do not have 2623 // to sort them by the potential number of instructions to be outlined 2624 if (SimilarityCandidates.size() > 1) 2625 llvm::stable_sort(SimilarityCandidates, 2626 [](const std::vector<IRSimilarityCandidate> &LHS, 2627 const std::vector<IRSimilarityCandidate> &RHS) { 2628 return LHS[0].getLength() * LHS.size() > 2629 RHS[0].getLength() * RHS.size(); 2630 }); 2631 // Creating OutlinableGroups for each SimilarityCandidate to be used in 2632 // each of the following for loops to avoid making an allocator. 2633 std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size()); 2634 2635 DenseSet<unsigned> NotSame; 2636 std::vector<OutlinableGroup *> NegativeCostGroups; 2637 std::vector<OutlinableRegion *> OutlinedRegions; 2638 // Iterate over the possible sets of similarity. 2639 unsigned PotentialGroupIdx = 0; 2640 for (SimilarityGroup &CandidateVec : SimilarityCandidates) { 2641 OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++]; 2642 2643 // Remove entries that were previously outlined 2644 pruneIncompatibleRegions(CandidateVec, CurrentGroup); 2645 2646 // We pruned the number of regions to 0 to 1, meaning that it's not worth 2647 // trying to outlined since there is no compatible similar instance of this 2648 // code. 2649 if (CurrentGroup.Regions.size() < 2) 2650 continue; 2651 2652 // Determine if there are any values that are the same constant throughout 2653 // each section in the set. 2654 NotSame.clear(); 2655 CurrentGroup.findSameConstants(NotSame); 2656 2657 if (CurrentGroup.IgnoreGroup) 2658 continue; 2659 2660 // Create a CodeExtractor for each outlinable region. Identify inputs and 2661 // outputs for each section using the code extractor and create the argument 2662 // types for the Aggregate Outlining Function. 2663 OutlinedRegions.clear(); 2664 for (OutlinableRegion *OS : CurrentGroup.Regions) { 2665 // Break the outlinable region out of its parent BasicBlock into its own 2666 // BasicBlocks (see function implementation). 2667 OS->splitCandidate(); 2668 2669 // There's a chance that when the region is split, extra instructions are 2670 // added to the region. This makes the region no longer viable 2671 // to be split, so we ignore it for outlining. 2672 if (!OS->CandidateSplit) 2673 continue; 2674 2675 SmallVector<BasicBlock *> BE; 2676 DenseSet<BasicBlock *> BlocksInRegion; 2677 OS->Candidate->getBasicBlocks(BlocksInRegion, BE); 2678 OS->CE = new (ExtractorAllocator.Allocate()) 2679 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, 2680 false, nullptr, "outlined"); 2681 findAddInputsOutputs(M, *OS, NotSame); 2682 if (!OS->IgnoreRegion) 2683 OutlinedRegions.push_back(OS); 2684 2685 // We recombine the blocks together now that we have gathered all the 2686 // needed information. 2687 OS->reattachCandidate(); 2688 } 2689 2690 CurrentGroup.Regions = std::move(OutlinedRegions); 2691 2692 if (CurrentGroup.Regions.empty()) 2693 continue; 2694 2695 CurrentGroup.collectGVNStoreSets(M); 2696 2697 if (CostModel) 2698 findCostBenefit(M, CurrentGroup); 2699 2700 // If we are adhering to the cost model, skip those groups where the cost 2701 // outweighs the benefits. 2702 if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) { 2703 OptimizationRemarkEmitter &ORE = 2704 getORE(*CurrentGroup.Regions[0]->Candidate->getFunction()); 2705 ORE.emit([&]() { 2706 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate; 2707 OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize", 2708 C->frontInstruction()); 2709 R << "did not outline " 2710 << ore::NV(std::to_string(CurrentGroup.Regions.size())) 2711 << " regions due to estimated increase of " 2712 << ore::NV("InstructionIncrease", 2713 CurrentGroup.Cost - CurrentGroup.Benefit) 2714 << " instructions at locations "; 2715 interleave( 2716 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(), 2717 [&R](OutlinableRegion *Region) { 2718 R << ore::NV( 2719 "DebugLoc", 2720 Region->Candidate->frontInstruction()->getDebugLoc()); 2721 }, 2722 [&R]() { R << " "; }); 2723 return R; 2724 }); 2725 continue; 2726 } 2727 2728 NegativeCostGroups.push_back(&CurrentGroup); 2729 } 2730 2731 ExtractorAllocator.DestroyAll(); 2732 2733 if (NegativeCostGroups.size() > 1) 2734 stable_sort(NegativeCostGroups, 2735 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) { 2736 return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost; 2737 }); 2738 2739 std::vector<Function *> FuncsToRemove; 2740 for (OutlinableGroup *CG : NegativeCostGroups) { 2741 OutlinableGroup &CurrentGroup = *CG; 2742 2743 OutlinedRegions.clear(); 2744 for (OutlinableRegion *Region : CurrentGroup.Regions) { 2745 // We check whether our region is compatible with what has already been 2746 // outlined, and whether we need to ignore this item. 2747 if (!isCompatibleWithAlreadyOutlinedCode(*Region)) 2748 continue; 2749 OutlinedRegions.push_back(Region); 2750 } 2751 2752 if (OutlinedRegions.size() < 2) 2753 continue; 2754 2755 // Reestimate the cost and benefit of the OutlinableGroup. Continue only if 2756 // we are still outlining enough regions to make up for the added cost. 2757 CurrentGroup.Regions = std::move(OutlinedRegions); 2758 if (CostModel) { 2759 CurrentGroup.Benefit = 0; 2760 CurrentGroup.Cost = 0; 2761 findCostBenefit(M, CurrentGroup); 2762 if (CurrentGroup.Cost >= CurrentGroup.Benefit) 2763 continue; 2764 } 2765 OutlinedRegions.clear(); 2766 for (OutlinableRegion *Region : CurrentGroup.Regions) { 2767 Region->splitCandidate(); 2768 if (!Region->CandidateSplit) 2769 continue; 2770 OutlinedRegions.push_back(Region); 2771 } 2772 2773 CurrentGroup.Regions = std::move(OutlinedRegions); 2774 if (CurrentGroup.Regions.size() < 2) { 2775 for (OutlinableRegion *R : CurrentGroup.Regions) 2776 R->reattachCandidate(); 2777 continue; 2778 } 2779 2780 LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost 2781 << " and benefit " << CurrentGroup.Benefit << "\n"); 2782 2783 // Create functions out of all the sections, and mark them as outlined. 2784 OutlinedRegions.clear(); 2785 for (OutlinableRegion *OS : CurrentGroup.Regions) { 2786 SmallVector<BasicBlock *> BE; 2787 DenseSet<BasicBlock *> BlocksInRegion; 2788 OS->Candidate->getBasicBlocks(BlocksInRegion, BE); 2789 OS->CE = new (ExtractorAllocator.Allocate()) 2790 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, 2791 false, nullptr, "outlined"); 2792 bool FunctionOutlined = extractSection(*OS); 2793 if (FunctionOutlined) { 2794 unsigned StartIdx = OS->Candidate->getStartIdx(); 2795 unsigned EndIdx = OS->Candidate->getEndIdx(); 2796 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++) 2797 Outlined.insert(Idx); 2798 2799 OutlinedRegions.push_back(OS); 2800 } 2801 } 2802 2803 LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size() 2804 << " with benefit " << CurrentGroup.Benefit 2805 << " and cost " << CurrentGroup.Cost << "\n"); 2806 2807 CurrentGroup.Regions = std::move(OutlinedRegions); 2808 2809 if (CurrentGroup.Regions.empty()) 2810 continue; 2811 2812 OptimizationRemarkEmitter &ORE = 2813 getORE(*CurrentGroup.Regions[0]->Call->getFunction()); 2814 ORE.emit([&]() { 2815 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate; 2816 OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst); 2817 R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size())) 2818 << " regions with decrease of " 2819 << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost) 2820 << " instructions at locations "; 2821 interleave( 2822 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(), 2823 [&R](OutlinableRegion *Region) { 2824 R << ore::NV("DebugLoc", 2825 Region->Candidate->frontInstruction()->getDebugLoc()); 2826 }, 2827 [&R]() { R << " "; }); 2828 return R; 2829 }); 2830 2831 deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove, 2832 OutlinedFunctionNum); 2833 } 2834 2835 for (Function *F : FuncsToRemove) 2836 F->eraseFromParent(); 2837 2838 return OutlinedFunctionNum; 2839 } 2840 2841 bool IROutliner::run(Module &M) { 2842 CostModel = !NoCostModel; 2843 OutlineFromLinkODRs = EnableLinkOnceODRIROutlining; 2844 2845 return doOutline(M) > 0; 2846 } 2847 2848 // Pass Manager Boilerplate 2849 namespace { 2850 class IROutlinerLegacyPass : public ModulePass { 2851 public: 2852 static char ID; 2853 IROutlinerLegacyPass() : ModulePass(ID) { 2854 initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry()); 2855 } 2856 2857 void getAnalysisUsage(AnalysisUsage &AU) const override { 2858 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 2859 AU.addRequired<TargetTransformInfoWrapperPass>(); 2860 AU.addRequired<IRSimilarityIdentifierWrapperPass>(); 2861 } 2862 2863 bool runOnModule(Module &M) override; 2864 }; 2865 } // namespace 2866 2867 bool IROutlinerLegacyPass::runOnModule(Module &M) { 2868 if (skipModule(M)) 2869 return false; 2870 2871 std::unique_ptr<OptimizationRemarkEmitter> ORE; 2872 auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & { 2873 ORE.reset(new OptimizationRemarkEmitter(&F)); 2874 return *ORE.get(); 2875 }; 2876 2877 auto GTTI = [this](Function &F) -> TargetTransformInfo & { 2878 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 2879 }; 2880 2881 auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & { 2882 return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI(); 2883 }; 2884 2885 return IROutliner(GTTI, GIRSI, GORE).run(M); 2886 } 2887 2888 PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) { 2889 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2890 2891 std::function<TargetTransformInfo &(Function &)> GTTI = 2892 [&FAM](Function &F) -> TargetTransformInfo & { 2893 return FAM.getResult<TargetIRAnalysis>(F); 2894 }; 2895 2896 std::function<IRSimilarityIdentifier &(Module &)> GIRSI = 2897 [&AM](Module &M) -> IRSimilarityIdentifier & { 2898 return AM.getResult<IRSimilarityAnalysis>(M); 2899 }; 2900 2901 std::unique_ptr<OptimizationRemarkEmitter> ORE; 2902 std::function<OptimizationRemarkEmitter &(Function &)> GORE = 2903 [&ORE](Function &F) -> OptimizationRemarkEmitter & { 2904 ORE.reset(new OptimizationRemarkEmitter(&F)); 2905 return *ORE.get(); 2906 }; 2907 2908 if (IROutliner(GTTI, GIRSI, GORE).run(M)) 2909 return PreservedAnalyses::none(); 2910 return PreservedAnalyses::all(); 2911 } 2912 2913 char IROutlinerLegacyPass::ID = 0; 2914 INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false, 2915 false) 2916 INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass) 2917 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 2918 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 2919 INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false, 2920 false) 2921 2922 ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); } 2923