1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass munges the code in the input function to better prepare it for 11 // SelectionDAG-based code generation. This works around limitations in it's 12 // basic-block-at-a-time approach. It should eventually be removed. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/CodeGen/Passes.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/Analysis/BlockFrequencyInfo.h" 21 #include "llvm/Analysis/BranchProbabilityInfo.h" 22 #include "llvm/Analysis/InstructionSimplify.h" 23 #include "llvm/Analysis/LoopInfo.h" 24 #include "llvm/Analysis/ProfileSummaryInfo.h" 25 #include "llvm/Analysis/TargetLibraryInfo.h" 26 #include "llvm/Analysis/TargetTransformInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/Analysis/MemoryBuiltins.h" 29 #include "llvm/CodeGen/Analysis.h" 30 #include "llvm/IR/CallSite.h" 31 #include "llvm/IR/Constants.h" 32 #include "llvm/IR/DataLayout.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/Dominators.h" 35 #include "llvm/IR/Function.h" 36 #include "llvm/IR/GetElementPtrTypeIterator.h" 37 #include "llvm/IR/IRBuilder.h" 38 #include "llvm/IR/InlineAsm.h" 39 #include "llvm/IR/Instructions.h" 40 #include "llvm/IR/IntrinsicInst.h" 41 #include "llvm/IR/MDBuilder.h" 42 #include "llvm/IR/PatternMatch.h" 43 #include "llvm/IR/Statepoint.h" 44 #include "llvm/IR/ValueHandle.h" 45 #include "llvm/IR/ValueMap.h" 46 #include "llvm/Pass.h" 47 #include "llvm/Support/BranchProbability.h" 48 #include "llvm/Support/CommandLine.h" 49 #include "llvm/Support/Debug.h" 50 #include "llvm/Support/raw_ostream.h" 51 #include "llvm/Target/TargetLowering.h" 52 #include "llvm/Target/TargetSubtargetInfo.h" 53 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 54 #include "llvm/Transforms/Utils/BuildLibCalls.h" 55 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 56 #include "llvm/Transforms/Utils/Local.h" 57 #include "llvm/Transforms/Utils/SimplifyLibCalls.h" 58 using namespace llvm; 59 using namespace llvm::PatternMatch; 60 61 #define DEBUG_TYPE "codegenprepare" 62 63 STATISTIC(NumBlocksElim, "Number of blocks eliminated"); 64 STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); 65 STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); 66 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " 67 "sunken Cmps"); 68 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " 69 "of sunken Casts"); 70 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " 71 "computations were sunk"); 72 STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); 73 STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); 74 STATISTIC(NumAndsAdded, 75 "Number of and mask instructions added to form ext loads"); 76 STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized"); 77 STATISTIC(NumRetsDup, "Number of return instructions duplicated"); 78 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); 79 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); 80 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); 81 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); 82 83 static cl::opt<bool> DisableBranchOpts( 84 "disable-cgp-branch-opts", cl::Hidden, cl::init(false), 85 cl::desc("Disable branch optimizations in CodeGenPrepare")); 86 87 static cl::opt<bool> 88 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), 89 cl::desc("Disable GC optimizations in CodeGenPrepare")); 90 91 static cl::opt<bool> DisableSelectToBranch( 92 "disable-cgp-select2branch", cl::Hidden, cl::init(false), 93 cl::desc("Disable select to branch conversion.")); 94 95 static cl::opt<bool> AddrSinkUsingGEPs( 96 "addr-sink-using-gep", cl::Hidden, cl::init(false), 97 cl::desc("Address sinking in CGP using GEPs.")); 98 99 static cl::opt<bool> EnableAndCmpSinking( 100 "enable-andcmp-sinking", cl::Hidden, cl::init(true), 101 cl::desc("Enable sinkinig and/cmp into branches.")); 102 103 static cl::opt<bool> DisableStoreExtract( 104 "disable-cgp-store-extract", cl::Hidden, cl::init(false), 105 cl::desc("Disable store(extract) optimizations in CodeGenPrepare")); 106 107 static cl::opt<bool> StressStoreExtract( 108 "stress-cgp-store-extract", cl::Hidden, cl::init(false), 109 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare")); 110 111 static cl::opt<bool> DisableExtLdPromotion( 112 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), 113 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " 114 "CodeGenPrepare")); 115 116 static cl::opt<bool> StressExtLdPromotion( 117 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), 118 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " 119 "optimization in CodeGenPrepare")); 120 121 static cl::opt<bool> DisablePreheaderProtect( 122 "disable-preheader-prot", cl::Hidden, cl::init(false), 123 cl::desc("Disable protection against removing loop preheaders")); 124 125 static cl::opt<bool> ProfileGuidedSectionPrefix( 126 "profile-guided-section-prefix", cl::Hidden, cl::init(true), 127 cl::desc("Use profile info to add section prefix for hot/cold functions")); 128 129 static cl::opt<unsigned> FreqRatioToSkipMerge( 130 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), 131 cl::desc("Skip merging empty blocks if (frequency of empty block) / " 132 "(frequency of destination block) is greater than this ratio")); 133 134 static cl::opt<bool> ForceSplitStore( 135 "force-split-store", cl::Hidden, cl::init(false), 136 cl::desc("Force store splitting no matter what the target query says.")); 137 138 namespace { 139 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; 140 typedef PointerIntPair<Type *, 1, bool> TypeIsSExt; 141 typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy; 142 class TypePromotionTransaction; 143 144 class CodeGenPrepare : public FunctionPass { 145 const TargetMachine *TM; 146 const TargetLowering *TLI; 147 const TargetTransformInfo *TTI; 148 const TargetLibraryInfo *TLInfo; 149 const LoopInfo *LI; 150 std::unique_ptr<BlockFrequencyInfo> BFI; 151 std::unique_ptr<BranchProbabilityInfo> BPI; 152 153 /// As we scan instructions optimizing them, this is the next instruction 154 /// to optimize. Transforms that can invalidate this should update it. 155 BasicBlock::iterator CurInstIterator; 156 157 /// Keeps track of non-local addresses that have been sunk into a block. 158 /// This allows us to avoid inserting duplicate code for blocks with 159 /// multiple load/stores of the same address. 160 ValueMap<Value*, Value*> SunkAddrs; 161 162 /// Keeps track of all instructions inserted for the current function. 163 SetOfInstrs InsertedInsts; 164 /// Keeps track of the type of the related instruction before their 165 /// promotion for the current function. 166 InstrToOrigTy PromotedInsts; 167 168 /// True if CFG is modified in any way. 169 bool ModifiedDT; 170 171 /// True if optimizing for size. 172 bool OptSize; 173 174 /// DataLayout for the Function being processed. 175 const DataLayout *DL; 176 177 public: 178 static char ID; // Pass identification, replacement for typeid 179 explicit CodeGenPrepare(const TargetMachine *TM = nullptr) 180 : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) { 181 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); 182 } 183 bool runOnFunction(Function &F) override; 184 185 StringRef getPassName() const override { return "CodeGen Prepare"; } 186 187 void getAnalysisUsage(AnalysisUsage &AU) const override { 188 // FIXME: When we can selectively preserve passes, preserve the domtree. 189 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 190 AU.addRequired<TargetLibraryInfoWrapperPass>(); 191 AU.addRequired<TargetTransformInfoWrapperPass>(); 192 AU.addRequired<LoopInfoWrapperPass>(); 193 } 194 195 private: 196 bool eliminateFallThrough(Function &F); 197 bool eliminateMostlyEmptyBlocks(Function &F); 198 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); 199 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 200 void eliminateMostlyEmptyBlock(BasicBlock *BB); 201 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, 202 bool isPreheader); 203 bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT); 204 bool optimizeInst(Instruction *I, bool& ModifiedDT); 205 bool optimizeMemoryInst(Instruction *I, Value *Addr, 206 Type *AccessTy, unsigned AS); 207 bool optimizeInlineAsmInst(CallInst *CS); 208 bool optimizeCallInst(CallInst *CI, bool& ModifiedDT); 209 bool moveExtToFormExtLoad(Instruction *&I); 210 bool optimizeExtUses(Instruction *I); 211 bool optimizeLoadExt(LoadInst *I); 212 bool optimizeSelectInst(SelectInst *SI); 213 bool optimizeShuffleVectorInst(ShuffleVectorInst *SI); 214 bool optimizeSwitchInst(SwitchInst *CI); 215 bool optimizeExtractElementInst(Instruction *Inst); 216 bool dupRetToEnableTailCallOpts(BasicBlock *BB); 217 bool placeDbgValues(Function &F); 218 bool sinkAndCmp(Function &F); 219 bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, 220 Instruction *&Inst, 221 const SmallVectorImpl<Instruction *> &Exts, 222 unsigned CreatedInstCost); 223 bool splitBranchCondition(Function &F); 224 bool simplifyOffsetableRelocate(Instruction &I); 225 }; 226 } 227 228 char CodeGenPrepare::ID = 0; 229 INITIALIZE_TM_PASS_BEGIN(CodeGenPrepare, "codegenprepare", 230 "Optimize for code generation", false, false) 231 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 232 INITIALIZE_TM_PASS_END(CodeGenPrepare, "codegenprepare", 233 "Optimize for code generation", false, false) 234 235 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { 236 return new CodeGenPrepare(TM); 237 } 238 239 bool CodeGenPrepare::runOnFunction(Function &F) { 240 if (skipFunction(F)) 241 return false; 242 243 DL = &F.getParent()->getDataLayout(); 244 245 bool EverMadeChange = false; 246 // Clear per function information. 247 InsertedInsts.clear(); 248 PromotedInsts.clear(); 249 BFI.reset(); 250 BPI.reset(); 251 252 ModifiedDT = false; 253 if (TM) 254 TLI = TM->getSubtargetImpl(F)->getTargetLowering(); 255 TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 256 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 257 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 258 OptSize = F.optForSize(); 259 260 if (ProfileGuidedSectionPrefix) { 261 ProfileSummaryInfo *PSI = 262 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 263 if (PSI->isFunctionEntryHot(&F)) 264 F.setSectionPrefix(".hot"); 265 else if (PSI->isFunctionEntryCold(&F)) 266 F.setSectionPrefix(".cold"); 267 } 268 269 /// This optimization identifies DIV instructions that can be 270 /// profitably bypassed and carried out with a shorter, faster divide. 271 if (!OptSize && TLI && TLI->isSlowDivBypassed()) { 272 const DenseMap<unsigned int, unsigned int> &BypassWidths = 273 TLI->getBypassSlowDivWidths(); 274 BasicBlock* BB = &*F.begin(); 275 while (BB != nullptr) { 276 // bypassSlowDivision may create new BBs, but we don't want to reapply the 277 // optimization to those blocks. 278 BasicBlock* Next = BB->getNextNode(); 279 EverMadeChange |= bypassSlowDivision(BB, BypassWidths); 280 BB = Next; 281 } 282 } 283 284 // Eliminate blocks that contain only PHI nodes and an 285 // unconditional branch. 286 EverMadeChange |= eliminateMostlyEmptyBlocks(F); 287 288 // llvm.dbg.value is far away from the value then iSel may not be able 289 // handle it properly. iSel will drop llvm.dbg.value if it can not 290 // find a node corresponding to the value. 291 EverMadeChange |= placeDbgValues(F); 292 293 // If there is a mask, compare against zero, and branch that can be combined 294 // into a single target instruction, push the mask and compare into branch 295 // users. Do this before OptimizeBlock -> OptimizeInst -> 296 // OptimizeCmpExpression, which perturbs the pattern being searched for. 297 if (!DisableBranchOpts) { 298 EverMadeChange |= sinkAndCmp(F); 299 EverMadeChange |= splitBranchCondition(F); 300 } 301 302 bool MadeChange = true; 303 while (MadeChange) { 304 MadeChange = false; 305 for (Function::iterator I = F.begin(); I != F.end(); ) { 306 BasicBlock *BB = &*I++; 307 bool ModifiedDTOnIteration = false; 308 MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); 309 310 // Restart BB iteration if the dominator tree of the Function was changed 311 if (ModifiedDTOnIteration) 312 break; 313 } 314 EverMadeChange |= MadeChange; 315 } 316 317 SunkAddrs.clear(); 318 319 if (!DisableBranchOpts) { 320 MadeChange = false; 321 SmallPtrSet<BasicBlock*, 8> WorkList; 322 for (BasicBlock &BB : F) { 323 SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB)); 324 MadeChange |= ConstantFoldTerminator(&BB, true); 325 if (!MadeChange) continue; 326 327 for (SmallVectorImpl<BasicBlock*>::iterator 328 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 329 if (pred_begin(*II) == pred_end(*II)) 330 WorkList.insert(*II); 331 } 332 333 // Delete the dead blocks and any of their dead successors. 334 MadeChange |= !WorkList.empty(); 335 while (!WorkList.empty()) { 336 BasicBlock *BB = *WorkList.begin(); 337 WorkList.erase(BB); 338 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 339 340 DeleteDeadBlock(BB); 341 342 for (SmallVectorImpl<BasicBlock*>::iterator 343 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 344 if (pred_begin(*II) == pred_end(*II)) 345 WorkList.insert(*II); 346 } 347 348 // Merge pairs of basic blocks with unconditional branches, connected by 349 // a single edge. 350 if (EverMadeChange || MadeChange) 351 MadeChange |= eliminateFallThrough(F); 352 353 EverMadeChange |= MadeChange; 354 } 355 356 if (!DisableGCOpts) { 357 SmallVector<Instruction *, 2> Statepoints; 358 for (BasicBlock &BB : F) 359 for (Instruction &I : BB) 360 if (isStatepoint(I)) 361 Statepoints.push_back(&I); 362 for (auto &I : Statepoints) 363 EverMadeChange |= simplifyOffsetableRelocate(*I); 364 } 365 366 return EverMadeChange; 367 } 368 369 /// Merge basic blocks which are connected by a single edge, where one of the 370 /// basic blocks has a single successor pointing to the other basic block, 371 /// which has a single predecessor. 372 bool CodeGenPrepare::eliminateFallThrough(Function &F) { 373 bool Changed = false; 374 // Scan all of the blocks in the function, except for the entry block. 375 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { 376 BasicBlock *BB = &*I++; 377 // If the destination block has a single pred, then this is a trivial 378 // edge, just collapse it. 379 BasicBlock *SinglePred = BB->getSinglePredecessor(); 380 381 // Don't merge if BB's address is taken. 382 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; 383 384 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); 385 if (Term && !Term->isConditional()) { 386 Changed = true; 387 DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); 388 // Remember if SinglePred was the entry block of the function. 389 // If so, we will need to move BB back to the entry position. 390 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 391 MergeBasicBlockIntoOnlyPred(BB, nullptr); 392 393 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 394 BB->moveBefore(&BB->getParent()->getEntryBlock()); 395 396 // We have erased a block. Update the iterator. 397 I = BB->getIterator(); 398 } 399 } 400 return Changed; 401 } 402 403 /// Find a destination block from BB if BB is mergeable empty block. 404 BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) { 405 // If this block doesn't end with an uncond branch, ignore it. 406 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 407 if (!BI || !BI->isUnconditional()) 408 return nullptr; 409 410 // If the instruction before the branch (skipping debug info) isn't a phi 411 // node, then other stuff is happening here. 412 BasicBlock::iterator BBI = BI->getIterator(); 413 if (BBI != BB->begin()) { 414 --BBI; 415 while (isa<DbgInfoIntrinsic>(BBI)) { 416 if (BBI == BB->begin()) 417 break; 418 --BBI; 419 } 420 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 421 return nullptr; 422 } 423 424 // Do not break infinite loops. 425 BasicBlock *DestBB = BI->getSuccessor(0); 426 if (DestBB == BB) 427 return nullptr; 428 429 if (!canMergeBlocks(BB, DestBB)) 430 DestBB = nullptr; 431 432 return DestBB; 433 } 434 435 /// Eliminate blocks that contain only PHI nodes, debug info directives, and an 436 /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split 437 /// edges in ways that are non-optimal for isel. Start by eliminating these 438 /// blocks so we can split them the way we want them. 439 bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { 440 SmallPtrSet<BasicBlock *, 16> Preheaders; 441 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end()); 442 while (!LoopList.empty()) { 443 Loop *L = LoopList.pop_back_val(); 444 LoopList.insert(LoopList.end(), L->begin(), L->end()); 445 if (BasicBlock *Preheader = L->getLoopPreheader()) 446 Preheaders.insert(Preheader); 447 } 448 449 bool MadeChange = false; 450 // Note that this intentionally skips the entry block. 451 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { 452 BasicBlock *BB = &*I++; 453 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB); 454 if (!DestBB || 455 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB))) 456 continue; 457 458 eliminateMostlyEmptyBlock(BB); 459 MadeChange = true; 460 } 461 return MadeChange; 462 } 463 464 bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, 465 BasicBlock *DestBB, 466 bool isPreheader) { 467 // Do not delete loop preheaders if doing so would create a critical edge. 468 // Loop preheaders can be good locations to spill registers. If the 469 // preheader is deleted and we create a critical edge, registers may be 470 // spilled in the loop body instead. 471 if (!DisablePreheaderProtect && isPreheader && 472 !(BB->getSinglePredecessor() && 473 BB->getSinglePredecessor()->getSingleSuccessor())) 474 return false; 475 476 // Try to skip merging if the unique predecessor of BB is terminated by a 477 // switch or indirect branch instruction, and BB is used as an incoming block 478 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to 479 // add COPY instructions in the predecessor of BB instead of BB (if it is not 480 // merged). Note that the critical edge created by merging such blocks wont be 481 // split in MachineSink because the jump table is not analyzable. By keeping 482 // such empty block (BB), ISel will place COPY instructions in BB, not in the 483 // predecessor of BB. 484 BasicBlock *Pred = BB->getUniquePredecessor(); 485 if (!Pred || 486 !(isa<SwitchInst>(Pred->getTerminator()) || 487 isa<IndirectBrInst>(Pred->getTerminator()))) 488 return true; 489 490 if (BB->getTerminator() != BB->getFirstNonPHI()) 491 return true; 492 493 // We use a simple cost heuristic which determine skipping merging is 494 // profitable if the cost of skipping merging is less than the cost of 495 // merging : Cost(skipping merging) < Cost(merging BB), where the 496 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and 497 // the Cost(merging BB) is Freq(Pred) * Cost(Copy). 498 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to : 499 // Freq(Pred) / Freq(BB) > 2. 500 // Note that if there are multiple empty blocks sharing the same incoming 501 // value for the PHIs in the DestBB, we consider them together. In such 502 // case, Cost(merging BB) will be the sum of their frequencies. 503 504 if (!isa<PHINode>(DestBB->begin())) 505 return true; 506 507 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs; 508 509 // Find all other incoming blocks from which incoming values of all PHIs in 510 // DestBB are the same as the ones from BB. 511 for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E; 512 ++PI) { 513 BasicBlock *DestBBPred = *PI; 514 if (DestBBPred == BB) 515 continue; 516 517 bool HasAllSameValue = true; 518 BasicBlock::const_iterator DestBBI = DestBB->begin(); 519 while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) { 520 if (DestPN->getIncomingValueForBlock(BB) != 521 DestPN->getIncomingValueForBlock(DestBBPred)) { 522 HasAllSameValue = false; 523 break; 524 } 525 } 526 if (HasAllSameValue) 527 SameIncomingValueBBs.insert(DestBBPred); 528 } 529 530 // See if all BB's incoming values are same as the value from Pred. In this 531 // case, no reason to skip merging because COPYs are expected to be place in 532 // Pred already. 533 if (SameIncomingValueBBs.count(Pred)) 534 return true; 535 536 if (!BFI) { 537 Function &F = *BB->getParent(); 538 LoopInfo LI{DominatorTree(F)}; 539 BPI.reset(new BranchProbabilityInfo(F, LI)); 540 BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); 541 } 542 543 BlockFrequency PredFreq = BFI->getBlockFreq(Pred); 544 BlockFrequency BBFreq = BFI->getBlockFreq(BB); 545 546 for (auto SameValueBB : SameIncomingValueBBs) 547 if (SameValueBB->getUniquePredecessor() == Pred && 548 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB)) 549 BBFreq += BFI->getBlockFreq(SameValueBB); 550 551 return PredFreq.getFrequency() <= 552 BBFreq.getFrequency() * FreqRatioToSkipMerge; 553 } 554 555 /// Return true if we can merge BB into DestBB if there is a single 556 /// unconditional branch between them, and BB contains no other non-phi 557 /// instructions. 558 bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, 559 const BasicBlock *DestBB) const { 560 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 561 // the successor. If there are more complex condition (e.g. preheaders), 562 // don't mess around with them. 563 BasicBlock::const_iterator BBI = BB->begin(); 564 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 565 for (const User *U : PN->users()) { 566 const Instruction *UI = cast<Instruction>(U); 567 if (UI->getParent() != DestBB || !isa<PHINode>(UI)) 568 return false; 569 // If User is inside DestBB block and it is a PHINode then check 570 // incoming value. If incoming value is not from BB then this is 571 // a complex condition (e.g. preheaders) we want to avoid here. 572 if (UI->getParent() == DestBB) { 573 if (const PHINode *UPN = dyn_cast<PHINode>(UI)) 574 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 575 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 576 if (Insn && Insn->getParent() == BB && 577 Insn->getParent() != UPN->getIncomingBlock(I)) 578 return false; 579 } 580 } 581 } 582 } 583 584 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 585 // and DestBB may have conflicting incoming values for the block. If so, we 586 // can't merge the block. 587 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 588 if (!DestBBPN) return true; // no conflict. 589 590 // Collect the preds of BB. 591 SmallPtrSet<const BasicBlock*, 16> BBPreds; 592 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 593 // It is faster to get preds from a PHI than with pred_iterator. 594 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 595 BBPreds.insert(BBPN->getIncomingBlock(i)); 596 } else { 597 BBPreds.insert(pred_begin(BB), pred_end(BB)); 598 } 599 600 // Walk the preds of DestBB. 601 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 602 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 603 if (BBPreds.count(Pred)) { // Common predecessor? 604 BBI = DestBB->begin(); 605 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 606 const Value *V1 = PN->getIncomingValueForBlock(Pred); 607 const Value *V2 = PN->getIncomingValueForBlock(BB); 608 609 // If V2 is a phi node in BB, look up what the mapped value will be. 610 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 611 if (V2PN->getParent() == BB) 612 V2 = V2PN->getIncomingValueForBlock(Pred); 613 614 // If there is a conflict, bail out. 615 if (V1 != V2) return false; 616 } 617 } 618 } 619 620 return true; 621 } 622 623 624 /// Eliminate a basic block that has only phi's and an unconditional branch in 625 /// it. 626 void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { 627 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 628 BasicBlock *DestBB = BI->getSuccessor(0); 629 630 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 631 632 // If the destination block has a single pred, then this is a trivial edge, 633 // just collapse it. 634 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 635 if (SinglePred != DestBB) { 636 // Remember if SinglePred was the entry block of the function. If so, we 637 // will need to move BB back to the entry position. 638 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 639 MergeBasicBlockIntoOnlyPred(DestBB, nullptr); 640 641 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 642 BB->moveBefore(&BB->getParent()->getEntryBlock()); 643 644 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 645 return; 646 } 647 } 648 649 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 650 // to handle the new incoming edges it is about to have. 651 PHINode *PN; 652 for (BasicBlock::iterator BBI = DestBB->begin(); 653 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 654 // Remove the incoming value for BB, and remember it. 655 Value *InVal = PN->removeIncomingValue(BB, false); 656 657 // Two options: either the InVal is a phi node defined in BB or it is some 658 // value that dominates BB. 659 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 660 if (InValPhi && InValPhi->getParent() == BB) { 661 // Add all of the input values of the input PHI as inputs of this phi. 662 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 663 PN->addIncoming(InValPhi->getIncomingValue(i), 664 InValPhi->getIncomingBlock(i)); 665 } else { 666 // Otherwise, add one instance of the dominating value for each edge that 667 // we will be adding. 668 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 669 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 670 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 671 } else { 672 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 673 PN->addIncoming(InVal, *PI); 674 } 675 } 676 } 677 678 // The PHIs are now updated, change everything that refers to BB to use 679 // DestBB and remove BB. 680 BB->replaceAllUsesWith(DestBB); 681 BB->eraseFromParent(); 682 ++NumBlocksElim; 683 684 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 685 } 686 687 // Computes a map of base pointer relocation instructions to corresponding 688 // derived pointer relocation instructions given a vector of all relocate calls 689 static void computeBaseDerivedRelocateMap( 690 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls, 691 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> 692 &RelocateInstMap) { 693 // Collect information in two maps: one primarily for locating the base object 694 // while filling the second map; the second map is the final structure holding 695 // a mapping between Base and corresponding Derived relocate calls 696 DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap; 697 for (auto *ThisRelocate : AllRelocateCalls) { 698 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(), 699 ThisRelocate->getDerivedPtrIndex()); 700 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate)); 701 } 702 for (auto &Item : RelocateIdxMap) { 703 std::pair<unsigned, unsigned> Key = Item.first; 704 if (Key.first == Key.second) 705 // Base relocation: nothing to insert 706 continue; 707 708 GCRelocateInst *I = Item.second; 709 auto BaseKey = std::make_pair(Key.first, Key.first); 710 711 // We're iterating over RelocateIdxMap so we cannot modify it. 712 auto MaybeBase = RelocateIdxMap.find(BaseKey); 713 if (MaybeBase == RelocateIdxMap.end()) 714 // TODO: We might want to insert a new base object relocate and gep off 715 // that, if there are enough derived object relocates. 716 continue; 717 718 RelocateInstMap[MaybeBase->second].push_back(I); 719 } 720 } 721 722 // Accepts a GEP and extracts the operands into a vector provided they're all 723 // small integer constants 724 static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, 725 SmallVectorImpl<Value *> &OffsetV) { 726 for (unsigned i = 1; i < GEP->getNumOperands(); i++) { 727 // Only accept small constant integer operands 728 auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i)); 729 if (!Op || Op->getZExtValue() > 20) 730 return false; 731 } 732 733 for (unsigned i = 1; i < GEP->getNumOperands(); i++) 734 OffsetV.push_back(GEP->getOperand(i)); 735 return true; 736 } 737 738 // Takes a RelocatedBase (base pointer relocation instruction) and Targets to 739 // replace, computes a replacement, and affects it. 740 static bool 741 simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, 742 const SmallVectorImpl<GCRelocateInst *> &Targets) { 743 bool MadeChange = false; 744 for (GCRelocateInst *ToReplace : Targets) { 745 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && 746 "Not relocating a derived object of the original base object"); 747 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) { 748 // A duplicate relocate call. TODO: coalesce duplicates. 749 continue; 750 } 751 752 if (RelocatedBase->getParent() != ToReplace->getParent()) { 753 // Base and derived relocates are in different basic blocks. 754 // In this case transform is only valid when base dominates derived 755 // relocate. However it would be too expensive to check dominance 756 // for each such relocate, so we skip the whole transformation. 757 continue; 758 } 759 760 Value *Base = ToReplace->getBasePtr(); 761 auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr()); 762 if (!Derived || Derived->getPointerOperand() != Base) 763 continue; 764 765 SmallVector<Value *, 2> OffsetV; 766 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV)) 767 continue; 768 769 // Create a Builder and replace the target callsite with a gep 770 assert(RelocatedBase->getNextNode() && 771 "Should always have one since it's not a terminator"); 772 773 // Insert after RelocatedBase 774 IRBuilder<> Builder(RelocatedBase->getNextNode()); 775 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); 776 777 // If gc_relocate does not match the actual type, cast it to the right type. 778 // In theory, there must be a bitcast after gc_relocate if the type does not 779 // match, and we should reuse it to get the derived pointer. But it could be 780 // cases like this: 781 // bb1: 782 // ... 783 // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) 784 // br label %merge 785 // 786 // bb2: 787 // ... 788 // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) 789 // br label %merge 790 // 791 // merge: 792 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ] 793 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)* 794 // 795 // In this case, we can not find the bitcast any more. So we insert a new bitcast 796 // no matter there is already one or not. In this way, we can handle all cases, and 797 // the extra bitcast should be optimized away in later passes. 798 Value *ActualRelocatedBase = RelocatedBase; 799 if (RelocatedBase->getType() != Base->getType()) { 800 ActualRelocatedBase = 801 Builder.CreateBitCast(RelocatedBase, Base->getType()); 802 } 803 Value *Replacement = Builder.CreateGEP( 804 Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV)); 805 Replacement->takeName(ToReplace); 806 // If the newly generated derived pointer's type does not match the original derived 807 // pointer's type, cast the new derived pointer to match it. Same reasoning as above. 808 Value *ActualReplacement = Replacement; 809 if (Replacement->getType() != ToReplace->getType()) { 810 ActualReplacement = 811 Builder.CreateBitCast(Replacement, ToReplace->getType()); 812 } 813 ToReplace->replaceAllUsesWith(ActualReplacement); 814 ToReplace->eraseFromParent(); 815 816 MadeChange = true; 817 } 818 return MadeChange; 819 } 820 821 // Turns this: 822 // 823 // %base = ... 824 // %ptr = gep %base + 15 825 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) 826 // %base' = relocate(%tok, i32 4, i32 4) 827 // %ptr' = relocate(%tok, i32 4, i32 5) 828 // %val = load %ptr' 829 // 830 // into this: 831 // 832 // %base = ... 833 // %ptr = gep %base + 15 834 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) 835 // %base' = gc.relocate(%tok, i32 4, i32 4) 836 // %ptr' = gep %base' + 15 837 // %val = load %ptr' 838 bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { 839 bool MadeChange = false; 840 SmallVector<GCRelocateInst *, 2> AllRelocateCalls; 841 842 for (auto *U : I.users()) 843 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U)) 844 // Collect all the relocate calls associated with a statepoint 845 AllRelocateCalls.push_back(Relocate); 846 847 // We need atleast one base pointer relocation + one derived pointer 848 // relocation to mangle 849 if (AllRelocateCalls.size() < 2) 850 return false; 851 852 // RelocateInstMap is a mapping from the base relocate instruction to the 853 // corresponding derived relocate instructions 854 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap; 855 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); 856 if (RelocateInstMap.empty()) 857 return false; 858 859 for (auto &Item : RelocateInstMap) 860 // Item.first is the RelocatedBase to offset against 861 // Item.second is the vector of Targets to replace 862 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second); 863 return MadeChange; 864 } 865 866 /// SinkCast - Sink the specified cast instruction into its user blocks 867 static bool SinkCast(CastInst *CI) { 868 BasicBlock *DefBB = CI->getParent(); 869 870 /// InsertedCasts - Only insert a cast in each block once. 871 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 872 873 bool MadeChange = false; 874 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 875 UI != E; ) { 876 Use &TheUse = UI.getUse(); 877 Instruction *User = cast<Instruction>(*UI); 878 879 // Figure out which BB this cast is used in. For PHI's this is the 880 // appropriate predecessor block. 881 BasicBlock *UserBB = User->getParent(); 882 if (PHINode *PN = dyn_cast<PHINode>(User)) { 883 UserBB = PN->getIncomingBlock(TheUse); 884 } 885 886 // Preincrement use iterator so we don't invalidate it. 887 ++UI; 888 889 // The first insertion point of a block containing an EH pad is after the 890 // pad. If the pad is the user, we cannot sink the cast past the pad. 891 if (User->isEHPad()) 892 continue; 893 894 // If the block selected to receive the cast is an EH pad that does not 895 // allow non-PHI instructions before the terminator, we can't sink the 896 // cast. 897 if (UserBB->getTerminator()->isEHPad()) 898 continue; 899 900 // If this user is in the same block as the cast, don't change the cast. 901 if (UserBB == DefBB) continue; 902 903 // If we have already inserted a cast into this block, use it. 904 CastInst *&InsertedCast = InsertedCasts[UserBB]; 905 906 if (!InsertedCast) { 907 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 908 assert(InsertPt != UserBB->end()); 909 InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), 910 CI->getType(), "", &*InsertPt); 911 } 912 913 // Replace a use of the cast with a use of the new cast. 914 TheUse = InsertedCast; 915 MadeChange = true; 916 ++NumCastUses; 917 } 918 919 // If we removed all uses, nuke the cast. 920 if (CI->use_empty()) { 921 CI->eraseFromParent(); 922 MadeChange = true; 923 } 924 925 return MadeChange; 926 } 927 928 /// If the specified cast instruction is a noop copy (e.g. it's casting from 929 /// one pointer type to another, i32->i8 on PPC), sink it into user blocks to 930 /// reduce the number of virtual registers that must be created and coalesced. 931 /// 932 /// Return true if any changes are made. 933 /// 934 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, 935 const DataLayout &DL) { 936 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition 937 // than sinking only nop casts, but is helpful on some platforms. 938 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) { 939 if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(), 940 ASC->getDestAddressSpace())) 941 return false; 942 } 943 944 // If this is a noop copy, 945 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType()); 946 EVT DstVT = TLI.getValueType(DL, CI->getType()); 947 948 // This is an fp<->int conversion? 949 if (SrcVT.isInteger() != DstVT.isInteger()) 950 return false; 951 952 // If this is an extension, it will be a zero or sign extension, which 953 // isn't a noop. 954 if (SrcVT.bitsLT(DstVT)) return false; 955 956 // If these values will be promoted, find out what they will be promoted 957 // to. This helps us consider truncates on PPC as noop copies when they 958 // are. 959 if (TLI.getTypeAction(CI->getContext(), SrcVT) == 960 TargetLowering::TypePromoteInteger) 961 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 962 if (TLI.getTypeAction(CI->getContext(), DstVT) == 963 TargetLowering::TypePromoteInteger) 964 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 965 966 // If, after promotion, these are the same types, this is a noop copy. 967 if (SrcVT != DstVT) 968 return false; 969 970 return SinkCast(CI); 971 } 972 973 /// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if 974 /// possible. 975 /// 976 /// Return true if any changes were made. 977 static bool CombineUAddWithOverflow(CmpInst *CI) { 978 Value *A, *B; 979 Instruction *AddI; 980 if (!match(CI, 981 m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI)))) 982 return false; 983 984 Type *Ty = AddI->getType(); 985 if (!isa<IntegerType>(Ty)) 986 return false; 987 988 // We don't want to move around uses of condition values this late, so we we 989 // check if it is legal to create the call to the intrinsic in the basic 990 // block containing the icmp: 991 992 if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse()) 993 return false; 994 995 #ifndef NDEBUG 996 // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption 997 // for now: 998 if (AddI->hasOneUse()) 999 assert(*AddI->user_begin() == CI && "expected!"); 1000 #endif 1001 1002 Module *M = CI->getModule(); 1003 Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); 1004 1005 auto *InsertPt = AddI->hasOneUse() ? CI : AddI; 1006 1007 auto *UAddWithOverflow = 1008 CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt); 1009 auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt); 1010 auto *Overflow = 1011 ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt); 1012 1013 CI->replaceAllUsesWith(Overflow); 1014 AddI->replaceAllUsesWith(UAdd); 1015 CI->eraseFromParent(); 1016 AddI->eraseFromParent(); 1017 return true; 1018 } 1019 1020 /// Sink the given CmpInst into user blocks to reduce the number of virtual 1021 /// registers that must be created and coalesced. This is a clear win except on 1022 /// targets with multiple condition code registers (PowerPC), where it might 1023 /// lose; some adjustment may be wanted there. 1024 /// 1025 /// Return true if any changes are made. 1026 static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) { 1027 BasicBlock *DefBB = CI->getParent(); 1028 1029 // Avoid sinking soft-FP comparisons, since this can move them into a loop. 1030 if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI)) 1031 return false; 1032 1033 // Only insert a cmp in each block once. 1034 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 1035 1036 bool MadeChange = false; 1037 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 1038 UI != E; ) { 1039 Use &TheUse = UI.getUse(); 1040 Instruction *User = cast<Instruction>(*UI); 1041 1042 // Preincrement use iterator so we don't invalidate it. 1043 ++UI; 1044 1045 // Don't bother for PHI nodes. 1046 if (isa<PHINode>(User)) 1047 continue; 1048 1049 // Figure out which BB this cmp is used in. 1050 BasicBlock *UserBB = User->getParent(); 1051 1052 // If this user is in the same block as the cmp, don't change the cmp. 1053 if (UserBB == DefBB) continue; 1054 1055 // If we have already inserted a cmp into this block, use it. 1056 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 1057 1058 if (!InsertedCmp) { 1059 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 1060 assert(InsertPt != UserBB->end()); 1061 InsertedCmp = 1062 CmpInst::Create(CI->getOpcode(), CI->getPredicate(), 1063 CI->getOperand(0), CI->getOperand(1), "", &*InsertPt); 1064 // Propagate the debug info. 1065 InsertedCmp->setDebugLoc(CI->getDebugLoc()); 1066 } 1067 1068 // Replace a use of the cmp with a use of the new cmp. 1069 TheUse = InsertedCmp; 1070 MadeChange = true; 1071 ++NumCmpUses; 1072 } 1073 1074 // If we removed all uses, nuke the cmp. 1075 if (CI->use_empty()) { 1076 CI->eraseFromParent(); 1077 MadeChange = true; 1078 } 1079 1080 return MadeChange; 1081 } 1082 1083 static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) { 1084 if (SinkCmpExpression(CI, TLI)) 1085 return true; 1086 1087 if (CombineUAddWithOverflow(CI)) 1088 return true; 1089 1090 return false; 1091 } 1092 1093 /// Check if the candidates could be combined with a shift instruction, which 1094 /// includes: 1095 /// 1. Truncate instruction 1096 /// 2. And instruction and the imm is a mask of the low bits: 1097 /// imm & (imm+1) == 0 1098 static bool isExtractBitsCandidateUse(Instruction *User) { 1099 if (!isa<TruncInst>(User)) { 1100 if (User->getOpcode() != Instruction::And || 1101 !isa<ConstantInt>(User->getOperand(1))) 1102 return false; 1103 1104 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue(); 1105 1106 if ((Cimm & (Cimm + 1)).getBoolValue()) 1107 return false; 1108 } 1109 return true; 1110 } 1111 1112 /// Sink both shift and truncate instruction to the use of truncate's BB. 1113 static bool 1114 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, 1115 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, 1116 const TargetLowering &TLI, const DataLayout &DL) { 1117 BasicBlock *UserBB = User->getParent(); 1118 DenseMap<BasicBlock *, CastInst *> InsertedTruncs; 1119 TruncInst *TruncI = dyn_cast<TruncInst>(User); 1120 bool MadeChange = false; 1121 1122 for (Value::user_iterator TruncUI = TruncI->user_begin(), 1123 TruncE = TruncI->user_end(); 1124 TruncUI != TruncE;) { 1125 1126 Use &TruncTheUse = TruncUI.getUse(); 1127 Instruction *TruncUser = cast<Instruction>(*TruncUI); 1128 // Preincrement use iterator so we don't invalidate it. 1129 1130 ++TruncUI; 1131 1132 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); 1133 if (!ISDOpcode) 1134 continue; 1135 1136 // If the use is actually a legal node, there will not be an 1137 // implicit truncate. 1138 // FIXME: always querying the result type is just an 1139 // approximation; some nodes' legality is determined by the 1140 // operand or other means. There's no good way to find out though. 1141 if (TLI.isOperationLegalOrCustom( 1142 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true))) 1143 continue; 1144 1145 // Don't bother for PHI nodes. 1146 if (isa<PHINode>(TruncUser)) 1147 continue; 1148 1149 BasicBlock *TruncUserBB = TruncUser->getParent(); 1150 1151 if (UserBB == TruncUserBB) 1152 continue; 1153 1154 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; 1155 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; 1156 1157 if (!InsertedShift && !InsertedTrunc) { 1158 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); 1159 assert(InsertPt != TruncUserBB->end()); 1160 // Sink the shift 1161 if (ShiftI->getOpcode() == Instruction::AShr) 1162 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, 1163 "", &*InsertPt); 1164 else 1165 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, 1166 "", &*InsertPt); 1167 1168 // Sink the trunc 1169 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); 1170 TruncInsertPt++; 1171 assert(TruncInsertPt != TruncUserBB->end()); 1172 1173 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, 1174 TruncI->getType(), "", &*TruncInsertPt); 1175 1176 MadeChange = true; 1177 1178 TruncTheUse = InsertedTrunc; 1179 } 1180 } 1181 return MadeChange; 1182 } 1183 1184 /// Sink the shift *right* instruction into user blocks if the uses could 1185 /// potentially be combined with this shift instruction and generate BitExtract 1186 /// instruction. It will only be applied if the architecture supports BitExtract 1187 /// instruction. Here is an example: 1188 /// BB1: 1189 /// %x.extract.shift = lshr i64 %arg1, 32 1190 /// BB2: 1191 /// %x.extract.trunc = trunc i64 %x.extract.shift to i16 1192 /// ==> 1193 /// 1194 /// BB2: 1195 /// %x.extract.shift.1 = lshr i64 %arg1, 32 1196 /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 1197 /// 1198 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract 1199 /// instruction. 1200 /// Return true if any changes are made. 1201 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, 1202 const TargetLowering &TLI, 1203 const DataLayout &DL) { 1204 BasicBlock *DefBB = ShiftI->getParent(); 1205 1206 /// Only insert instructions in each block once. 1207 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts; 1208 1209 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType())); 1210 1211 bool MadeChange = false; 1212 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); 1213 UI != E;) { 1214 Use &TheUse = UI.getUse(); 1215 Instruction *User = cast<Instruction>(*UI); 1216 // Preincrement use iterator so we don't invalidate it. 1217 ++UI; 1218 1219 // Don't bother for PHI nodes. 1220 if (isa<PHINode>(User)) 1221 continue; 1222 1223 if (!isExtractBitsCandidateUse(User)) 1224 continue; 1225 1226 BasicBlock *UserBB = User->getParent(); 1227 1228 if (UserBB == DefBB) { 1229 // If the shift and truncate instruction are in the same BB. The use of 1230 // the truncate(TruncUse) may still introduce another truncate if not 1231 // legal. In this case, we would like to sink both shift and truncate 1232 // instruction to the BB of TruncUse. 1233 // for example: 1234 // BB1: 1235 // i64 shift.result = lshr i64 opnd, imm 1236 // trunc.result = trunc shift.result to i16 1237 // 1238 // BB2: 1239 // ----> We will have an implicit truncate here if the architecture does 1240 // not have i16 compare. 1241 // cmp i16 trunc.result, opnd2 1242 // 1243 if (isa<TruncInst>(User) && shiftIsLegal 1244 // If the type of the truncate is legal, no trucate will be 1245 // introduced in other basic blocks. 1246 && 1247 (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType())))) 1248 MadeChange = 1249 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL); 1250 1251 continue; 1252 } 1253 // If we have already inserted a shift into this block, use it. 1254 BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; 1255 1256 if (!InsertedShift) { 1257 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 1258 assert(InsertPt != UserBB->end()); 1259 1260 if (ShiftI->getOpcode() == Instruction::AShr) 1261 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, 1262 "", &*InsertPt); 1263 else 1264 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, 1265 "", &*InsertPt); 1266 1267 MadeChange = true; 1268 } 1269 1270 // Replace a use of the shift with a use of the new shift. 1271 TheUse = InsertedShift; 1272 } 1273 1274 // If we removed all uses, nuke the shift. 1275 if (ShiftI->use_empty()) 1276 ShiftI->eraseFromParent(); 1277 1278 return MadeChange; 1279 } 1280 1281 // Translate a masked load intrinsic like 1282 // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, 1283 // <16 x i1> %mask, <16 x i32> %passthru) 1284 // to a chain of basic blocks, with loading element one-by-one if 1285 // the appropriate mask bit is set 1286 // 1287 // %1 = bitcast i8* %addr to i32* 1288 // %2 = extractelement <16 x i1> %mask, i32 0 1289 // %3 = icmp eq i1 %2, true 1290 // br i1 %3, label %cond.load, label %else 1291 // 1292 //cond.load: ; preds = %0 1293 // %4 = getelementptr i32* %1, i32 0 1294 // %5 = load i32* %4 1295 // %6 = insertelement <16 x i32> undef, i32 %5, i32 0 1296 // br label %else 1297 // 1298 //else: ; preds = %0, %cond.load 1299 // %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ] 1300 // %7 = extractelement <16 x i1> %mask, i32 1 1301 // %8 = icmp eq i1 %7, true 1302 // br i1 %8, label %cond.load1, label %else2 1303 // 1304 //cond.load1: ; preds = %else 1305 // %9 = getelementptr i32* %1, i32 1 1306 // %10 = load i32* %9 1307 // %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1 1308 // br label %else2 1309 // 1310 //else2: ; preds = %else, %cond.load1 1311 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] 1312 // %12 = extractelement <16 x i1> %mask, i32 2 1313 // %13 = icmp eq i1 %12, true 1314 // br i1 %13, label %cond.load4, label %else5 1315 // 1316 static void scalarizeMaskedLoad(CallInst *CI) { 1317 Value *Ptr = CI->getArgOperand(0); 1318 Value *Alignment = CI->getArgOperand(1); 1319 Value *Mask = CI->getArgOperand(2); 1320 Value *Src0 = CI->getArgOperand(3); 1321 1322 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); 1323 VectorType *VecType = dyn_cast<VectorType>(CI->getType()); 1324 assert(VecType && "Unexpected return type of masked load intrinsic"); 1325 1326 Type *EltTy = CI->getType()->getVectorElementType(); 1327 1328 IRBuilder<> Builder(CI->getContext()); 1329 Instruction *InsertPt = CI; 1330 BasicBlock *IfBlock = CI->getParent(); 1331 BasicBlock *CondBlock = nullptr; 1332 BasicBlock *PrevIfBlock = CI->getParent(); 1333 1334 Builder.SetInsertPoint(InsertPt); 1335 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 1336 1337 // Short-cut if the mask is all-true. 1338 bool IsAllOnesMask = isa<Constant>(Mask) && 1339 cast<Constant>(Mask)->isAllOnesValue(); 1340 1341 if (IsAllOnesMask) { 1342 Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal); 1343 CI->replaceAllUsesWith(NewI); 1344 CI->eraseFromParent(); 1345 return; 1346 } 1347 1348 // Adjust alignment for the scalar instruction. 1349 AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8); 1350 // Bitcast %addr fron i8* to EltTy* 1351 Type *NewPtrType = 1352 EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); 1353 Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); 1354 unsigned VectorWidth = VecType->getNumElements(); 1355 1356 Value *UndefVal = UndefValue::get(VecType); 1357 1358 // The result vector 1359 Value *VResult = UndefVal; 1360 1361 if (isa<ConstantVector>(Mask)) { 1362 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 1363 if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) 1364 continue; 1365 Value *Gep = 1366 Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); 1367 LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal); 1368 VResult = Builder.CreateInsertElement(VResult, Load, 1369 Builder.getInt32(Idx)); 1370 } 1371 Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); 1372 CI->replaceAllUsesWith(NewI); 1373 CI->eraseFromParent(); 1374 return; 1375 } 1376 1377 PHINode *Phi = nullptr; 1378 Value *PrevPhi = UndefVal; 1379 1380 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 1381 1382 // Fill the "else" block, created in the previous iteration 1383 // 1384 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] 1385 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx 1386 // %to_load = icmp eq i1 %mask_1, true 1387 // br i1 %to_load, label %cond.load, label %else 1388 // 1389 if (Idx > 0) { 1390 Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 1391 Phi->addIncoming(VResult, CondBlock); 1392 Phi->addIncoming(PrevPhi, PrevIfBlock); 1393 PrevPhi = Phi; 1394 VResult = Phi; 1395 } 1396 1397 Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); 1398 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, 1399 ConstantInt::get(Predicate->getType(), 1)); 1400 1401 // Create "cond" block 1402 // 1403 // %EltAddr = getelementptr i32* %1, i32 0 1404 // %Elt = load i32* %EltAddr 1405 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 1406 // 1407 CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); 1408 Builder.SetInsertPoint(InsertPt); 1409 1410 Value *Gep = 1411 Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); 1412 LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); 1413 VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); 1414 1415 // Create "else" block, fill it in the next iteration 1416 BasicBlock *NewIfBlock = 1417 CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); 1418 Builder.SetInsertPoint(InsertPt); 1419 Instruction *OldBr = IfBlock->getTerminator(); 1420 BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); 1421 OldBr->eraseFromParent(); 1422 PrevIfBlock = IfBlock; 1423 IfBlock = NewIfBlock; 1424 } 1425 1426 Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); 1427 Phi->addIncoming(VResult, CondBlock); 1428 Phi->addIncoming(PrevPhi, PrevIfBlock); 1429 Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); 1430 CI->replaceAllUsesWith(NewI); 1431 CI->eraseFromParent(); 1432 } 1433 1434 // Translate a masked store intrinsic, like 1435 // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, 1436 // <16 x i1> %mask) 1437 // to a chain of basic blocks, that stores element one-by-one if 1438 // the appropriate mask bit is set 1439 // 1440 // %1 = bitcast i8* %addr to i32* 1441 // %2 = extractelement <16 x i1> %mask, i32 0 1442 // %3 = icmp eq i1 %2, true 1443 // br i1 %3, label %cond.store, label %else 1444 // 1445 // cond.store: ; preds = %0 1446 // %4 = extractelement <16 x i32> %val, i32 0 1447 // %5 = getelementptr i32* %1, i32 0 1448 // store i32 %4, i32* %5 1449 // br label %else 1450 // 1451 // else: ; preds = %0, %cond.store 1452 // %6 = extractelement <16 x i1> %mask, i32 1 1453 // %7 = icmp eq i1 %6, true 1454 // br i1 %7, label %cond.store1, label %else2 1455 // 1456 // cond.store1: ; preds = %else 1457 // %8 = extractelement <16 x i32> %val, i32 1 1458 // %9 = getelementptr i32* %1, i32 1 1459 // store i32 %8, i32* %9 1460 // br label %else2 1461 // . . . 1462 static void scalarizeMaskedStore(CallInst *CI) { 1463 Value *Src = CI->getArgOperand(0); 1464 Value *Ptr = CI->getArgOperand(1); 1465 Value *Alignment = CI->getArgOperand(2); 1466 Value *Mask = CI->getArgOperand(3); 1467 1468 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); 1469 VectorType *VecType = dyn_cast<VectorType>(Src->getType()); 1470 assert(VecType && "Unexpected data type in masked store intrinsic"); 1471 1472 Type *EltTy = VecType->getElementType(); 1473 1474 IRBuilder<> Builder(CI->getContext()); 1475 Instruction *InsertPt = CI; 1476 BasicBlock *IfBlock = CI->getParent(); 1477 Builder.SetInsertPoint(InsertPt); 1478 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 1479 1480 // Short-cut if the mask is all-true. 1481 bool IsAllOnesMask = isa<Constant>(Mask) && 1482 cast<Constant>(Mask)->isAllOnesValue(); 1483 1484 if (IsAllOnesMask) { 1485 Builder.CreateAlignedStore(Src, Ptr, AlignVal); 1486 CI->eraseFromParent(); 1487 return; 1488 } 1489 1490 // Adjust alignment for the scalar instruction. 1491 AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8); 1492 // Bitcast %addr fron i8* to EltTy* 1493 Type *NewPtrType = 1494 EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); 1495 Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); 1496 unsigned VectorWidth = VecType->getNumElements(); 1497 1498 if (isa<ConstantVector>(Mask)) { 1499 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 1500 if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) 1501 continue; 1502 Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); 1503 Value *Gep = 1504 Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); 1505 Builder.CreateAlignedStore(OneElt, Gep, AlignVal); 1506 } 1507 CI->eraseFromParent(); 1508 return; 1509 } 1510 1511 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 1512 1513 // Fill the "else" block, created in the previous iteration 1514 // 1515 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx 1516 // %to_store = icmp eq i1 %mask_1, true 1517 // br i1 %to_store, label %cond.store, label %else 1518 // 1519 Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); 1520 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, 1521 ConstantInt::get(Predicate->getType(), 1)); 1522 1523 // Create "cond" block 1524 // 1525 // %OneElt = extractelement <16 x i32> %Src, i32 Idx 1526 // %EltAddr = getelementptr i32* %1, i32 0 1527 // %store i32 %OneElt, i32* %EltAddr 1528 // 1529 BasicBlock *CondBlock = 1530 IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); 1531 Builder.SetInsertPoint(InsertPt); 1532 1533 Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); 1534 Value *Gep = 1535 Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); 1536 Builder.CreateAlignedStore(OneElt, Gep, AlignVal); 1537 1538 // Create "else" block, fill it in the next iteration 1539 BasicBlock *NewIfBlock = 1540 CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); 1541 Builder.SetInsertPoint(InsertPt); 1542 Instruction *OldBr = IfBlock->getTerminator(); 1543 BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); 1544 OldBr->eraseFromParent(); 1545 IfBlock = NewIfBlock; 1546 } 1547 CI->eraseFromParent(); 1548 } 1549 1550 // Translate a masked gather intrinsic like 1551 // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, 1552 // <16 x i1> %Mask, <16 x i32> %Src) 1553 // to a chain of basic blocks, with loading element one-by-one if 1554 // the appropriate mask bit is set 1555 // 1556 // % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind 1557 // % Mask0 = extractelement <16 x i1> %Mask, i32 0 1558 // % ToLoad0 = icmp eq i1 % Mask0, true 1559 // br i1 % ToLoad0, label %cond.load, label %else 1560 // 1561 // cond.load: 1562 // % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 1563 // % Load0 = load i32, i32* % Ptr0, align 4 1564 // % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0 1565 // br label %else 1566 // 1567 // else: 1568 // %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0] 1569 // % Mask1 = extractelement <16 x i1> %Mask, i32 1 1570 // % ToLoad1 = icmp eq i1 % Mask1, true 1571 // br i1 % ToLoad1, label %cond.load1, label %else2 1572 // 1573 // cond.load1: 1574 // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 1575 // % Load1 = load i32, i32* % Ptr1, align 4 1576 // % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1 1577 // br label %else2 1578 // . . . 1579 // % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src 1580 // ret <16 x i32> %Result 1581 static void scalarizeMaskedGather(CallInst *CI) { 1582 Value *Ptrs = CI->getArgOperand(0); 1583 Value *Alignment = CI->getArgOperand(1); 1584 Value *Mask = CI->getArgOperand(2); 1585 Value *Src0 = CI->getArgOperand(3); 1586 1587 VectorType *VecType = dyn_cast<VectorType>(CI->getType()); 1588 1589 assert(VecType && "Unexpected return type of masked load intrinsic"); 1590 1591 IRBuilder<> Builder(CI->getContext()); 1592 Instruction *InsertPt = CI; 1593 BasicBlock *IfBlock = CI->getParent(); 1594 BasicBlock *CondBlock = nullptr; 1595 BasicBlock *PrevIfBlock = CI->getParent(); 1596 Builder.SetInsertPoint(InsertPt); 1597 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); 1598 1599 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 1600 1601 Value *UndefVal = UndefValue::get(VecType); 1602 1603 // The result vector 1604 Value *VResult = UndefVal; 1605 unsigned VectorWidth = VecType->getNumElements(); 1606 1607 // Shorten the way if the mask is a vector of constants. 1608 bool IsConstMask = isa<ConstantVector>(Mask); 1609 1610 if (IsConstMask) { 1611 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 1612 if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) 1613 continue; 1614 Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), 1615 "Ptr" + Twine(Idx)); 1616 LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, 1617 "Load" + Twine(Idx)); 1618 VResult = Builder.CreateInsertElement(VResult, Load, 1619 Builder.getInt32(Idx), 1620 "Res" + Twine(Idx)); 1621 } 1622 Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); 1623 CI->replaceAllUsesWith(NewI); 1624 CI->eraseFromParent(); 1625 return; 1626 } 1627 1628 PHINode *Phi = nullptr; 1629 Value *PrevPhi = UndefVal; 1630 1631 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 1632 1633 // Fill the "else" block, created in the previous iteration 1634 // 1635 // %Mask1 = extractelement <16 x i1> %Mask, i32 1 1636 // %ToLoad1 = icmp eq i1 %Mask1, true 1637 // br i1 %ToLoad1, label %cond.load, label %else 1638 // 1639 if (Idx > 0) { 1640 Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); 1641 Phi->addIncoming(VResult, CondBlock); 1642 Phi->addIncoming(PrevPhi, PrevIfBlock); 1643 PrevPhi = Phi; 1644 VResult = Phi; 1645 } 1646 1647 Value *Predicate = Builder.CreateExtractElement(Mask, 1648 Builder.getInt32(Idx), 1649 "Mask" + Twine(Idx)); 1650 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, 1651 ConstantInt::get(Predicate->getType(), 1), 1652 "ToLoad" + Twine(Idx)); 1653 1654 // Create "cond" block 1655 // 1656 // %EltAddr = getelementptr i32* %1, i32 0 1657 // %Elt = load i32* %EltAddr 1658 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx 1659 // 1660 CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); 1661 Builder.SetInsertPoint(InsertPt); 1662 1663 Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), 1664 "Ptr" + Twine(Idx)); 1665 LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, 1666 "Load" + Twine(Idx)); 1667 VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx), 1668 "Res" + Twine(Idx)); 1669 1670 // Create "else" block, fill it in the next iteration 1671 BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); 1672 Builder.SetInsertPoint(InsertPt); 1673 Instruction *OldBr = IfBlock->getTerminator(); 1674 BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); 1675 OldBr->eraseFromParent(); 1676 PrevIfBlock = IfBlock; 1677 IfBlock = NewIfBlock; 1678 } 1679 1680 Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); 1681 Phi->addIncoming(VResult, CondBlock); 1682 Phi->addIncoming(PrevPhi, PrevIfBlock); 1683 Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); 1684 CI->replaceAllUsesWith(NewI); 1685 CI->eraseFromParent(); 1686 } 1687 1688 // Translate a masked scatter intrinsic, like 1689 // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, 1690 // <16 x i1> %Mask) 1691 // to a chain of basic blocks, that stores element one-by-one if 1692 // the appropriate mask bit is set. 1693 // 1694 // % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind 1695 // % Mask0 = extractelement <16 x i1> % Mask, i32 0 1696 // % ToStore0 = icmp eq i1 % Mask0, true 1697 // br i1 %ToStore0, label %cond.store, label %else 1698 // 1699 // cond.store: 1700 // % Elt0 = extractelement <16 x i32> %Src, i32 0 1701 // % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 1702 // store i32 %Elt0, i32* % Ptr0, align 4 1703 // br label %else 1704 // 1705 // else: 1706 // % Mask1 = extractelement <16 x i1> % Mask, i32 1 1707 // % ToStore1 = icmp eq i1 % Mask1, true 1708 // br i1 % ToStore1, label %cond.store1, label %else2 1709 // 1710 // cond.store1: 1711 // % Elt1 = extractelement <16 x i32> %Src, i32 1 1712 // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 1713 // store i32 % Elt1, i32* % Ptr1, align 4 1714 // br label %else2 1715 // . . . 1716 static void scalarizeMaskedScatter(CallInst *CI) { 1717 Value *Src = CI->getArgOperand(0); 1718 Value *Ptrs = CI->getArgOperand(1); 1719 Value *Alignment = CI->getArgOperand(2); 1720 Value *Mask = CI->getArgOperand(3); 1721 1722 assert(isa<VectorType>(Src->getType()) && 1723 "Unexpected data type in masked scatter intrinsic"); 1724 assert(isa<VectorType>(Ptrs->getType()) && 1725 isa<PointerType>(Ptrs->getType()->getVectorElementType()) && 1726 "Vector of pointers is expected in masked scatter intrinsic"); 1727 1728 IRBuilder<> Builder(CI->getContext()); 1729 Instruction *InsertPt = CI; 1730 BasicBlock *IfBlock = CI->getParent(); 1731 Builder.SetInsertPoint(InsertPt); 1732 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 1733 1734 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); 1735 unsigned VectorWidth = Src->getType()->getVectorNumElements(); 1736 1737 // Shorten the way if the mask is a vector of constants. 1738 bool IsConstMask = isa<ConstantVector>(Mask); 1739 1740 if (IsConstMask) { 1741 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 1742 if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) 1743 continue; 1744 Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), 1745 "Elt" + Twine(Idx)); 1746 Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), 1747 "Ptr" + Twine(Idx)); 1748 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); 1749 } 1750 CI->eraseFromParent(); 1751 return; 1752 } 1753 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { 1754 // Fill the "else" block, created in the previous iteration 1755 // 1756 // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx 1757 // % ToStore = icmp eq i1 % Mask1, true 1758 // br i1 % ToStore, label %cond.store, label %else 1759 // 1760 Value *Predicate = Builder.CreateExtractElement(Mask, 1761 Builder.getInt32(Idx), 1762 "Mask" + Twine(Idx)); 1763 Value *Cmp = 1764 Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, 1765 ConstantInt::get(Predicate->getType(), 1), 1766 "ToStore" + Twine(Idx)); 1767 1768 // Create "cond" block 1769 // 1770 // % Elt1 = extractelement <16 x i32> %Src, i32 1 1771 // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 1772 // %store i32 % Elt1, i32* % Ptr1 1773 // 1774 BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); 1775 Builder.SetInsertPoint(InsertPt); 1776 1777 Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), 1778 "Elt" + Twine(Idx)); 1779 Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), 1780 "Ptr" + Twine(Idx)); 1781 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); 1782 1783 // Create "else" block, fill it in the next iteration 1784 BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); 1785 Builder.SetInsertPoint(InsertPt); 1786 Instruction *OldBr = IfBlock->getTerminator(); 1787 BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); 1788 OldBr->eraseFromParent(); 1789 IfBlock = NewIfBlock; 1790 } 1791 CI->eraseFromParent(); 1792 } 1793 1794 /// If counting leading or trailing zeros is an expensive operation and a zero 1795 /// input is defined, add a check for zero to avoid calling the intrinsic. 1796 /// 1797 /// We want to transform: 1798 /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false) 1799 /// 1800 /// into: 1801 /// entry: 1802 /// %cmpz = icmp eq i64 %A, 0 1803 /// br i1 %cmpz, label %cond.end, label %cond.false 1804 /// cond.false: 1805 /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true) 1806 /// br label %cond.end 1807 /// cond.end: 1808 /// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ] 1809 /// 1810 /// If the transform is performed, return true and set ModifiedDT to true. 1811 static bool despeculateCountZeros(IntrinsicInst *CountZeros, 1812 const TargetLowering *TLI, 1813 const DataLayout *DL, 1814 bool &ModifiedDT) { 1815 if (!TLI || !DL) 1816 return false; 1817 1818 // If a zero input is undefined, it doesn't make sense to despeculate that. 1819 if (match(CountZeros->getOperand(1), m_One())) 1820 return false; 1821 1822 // If it's cheap to speculate, there's nothing to do. 1823 auto IntrinsicID = CountZeros->getIntrinsicID(); 1824 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) || 1825 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz())) 1826 return false; 1827 1828 // Only handle legal scalar cases. Anything else requires too much work. 1829 Type *Ty = CountZeros->getType(); 1830 unsigned SizeInBits = Ty->getPrimitiveSizeInBits(); 1831 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits()) 1832 return false; 1833 1834 // The intrinsic will be sunk behind a compare against zero and branch. 1835 BasicBlock *StartBlock = CountZeros->getParent(); 1836 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false"); 1837 1838 // Create another block after the count zero intrinsic. A PHI will be added 1839 // in this block to select the result of the intrinsic or the bit-width 1840 // constant if the input to the intrinsic is zero. 1841 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros)); 1842 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end"); 1843 1844 // Set up a builder to create a compare, conditional branch, and PHI. 1845 IRBuilder<> Builder(CountZeros->getContext()); 1846 Builder.SetInsertPoint(StartBlock->getTerminator()); 1847 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc()); 1848 1849 // Replace the unconditional branch that was created by the first split with 1850 // a compare against zero and a conditional branch. 1851 Value *Zero = Constant::getNullValue(Ty); 1852 Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz"); 1853 Builder.CreateCondBr(Cmp, EndBlock, CallBlock); 1854 StartBlock->getTerminator()->eraseFromParent(); 1855 1856 // Create a PHI in the end block to select either the output of the intrinsic 1857 // or the bit width of the operand. 1858 Builder.SetInsertPoint(&EndBlock->front()); 1859 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz"); 1860 CountZeros->replaceAllUsesWith(PN); 1861 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits)); 1862 PN->addIncoming(BitWidth, StartBlock); 1863 PN->addIncoming(CountZeros, CallBlock); 1864 1865 // We are explicitly handling the zero case, so we can set the intrinsic's 1866 // undefined zero argument to 'true'. This will also prevent reprocessing the 1867 // intrinsic; we only despeculate when a zero input is defined. 1868 CountZeros->setArgOperand(1, Builder.getTrue()); 1869 ModifiedDT = true; 1870 return true; 1871 } 1872 1873 bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { 1874 BasicBlock *BB = CI->getParent(); 1875 1876 // Lower inline assembly if we can. 1877 // If we found an inline asm expession, and if the target knows how to 1878 // lower it to normal LLVM code, do so now. 1879 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 1880 if (TLI->ExpandInlineAsm(CI)) { 1881 // Avoid invalidating the iterator. 1882 CurInstIterator = BB->begin(); 1883 // Avoid processing instructions out of order, which could cause 1884 // reuse before a value is defined. 1885 SunkAddrs.clear(); 1886 return true; 1887 } 1888 // Sink address computing for memory operands into the block. 1889 if (optimizeInlineAsmInst(CI)) 1890 return true; 1891 } 1892 1893 // Align the pointer arguments to this call if the target thinks it's a good 1894 // idea 1895 unsigned MinSize, PrefAlign; 1896 if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { 1897 for (auto &Arg : CI->arg_operands()) { 1898 // We want to align both objects whose address is used directly and 1899 // objects whose address is used in casts and GEPs, though it only makes 1900 // sense for GEPs if the offset is a multiple of the desired alignment and 1901 // if size - offset meets the size threshold. 1902 if (!Arg->getType()->isPointerTy()) 1903 continue; 1904 APInt Offset(DL->getPointerSizeInBits( 1905 cast<PointerType>(Arg->getType())->getAddressSpace()), 1906 0); 1907 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); 1908 uint64_t Offset2 = Offset.getLimitedValue(); 1909 if ((Offset2 & (PrefAlign-1)) != 0) 1910 continue; 1911 AllocaInst *AI; 1912 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign && 1913 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) 1914 AI->setAlignment(PrefAlign); 1915 // Global variables can only be aligned if they are defined in this 1916 // object (i.e. they are uniquely initialized in this object), and 1917 // over-aligning global variables that have an explicit section is 1918 // forbidden. 1919 GlobalVariable *GV; 1920 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() && 1921 GV->getPointerAlignment(*DL) < PrefAlign && 1922 DL->getTypeAllocSize(GV->getValueType()) >= 1923 MinSize + Offset2) 1924 GV->setAlignment(PrefAlign); 1925 } 1926 // If this is a memcpy (or similar) then we may be able to improve the 1927 // alignment 1928 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { 1929 unsigned Align = getKnownAlignment(MI->getDest(), *DL); 1930 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) 1931 Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL)); 1932 if (Align > MI->getAlignment()) 1933 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); 1934 } 1935 } 1936 1937 // If we have a cold call site, try to sink addressing computation into the 1938 // cold block. This interacts with our handling for loads and stores to 1939 // ensure that we can fold all uses of a potential addressing computation 1940 // into their uses. TODO: generalize this to work over profiling data 1941 if (!OptSize && CI->hasFnAttr(Attribute::Cold)) 1942 for (auto &Arg : CI->arg_operands()) { 1943 if (!Arg->getType()->isPointerTy()) 1944 continue; 1945 unsigned AS = Arg->getType()->getPointerAddressSpace(); 1946 return optimizeMemoryInst(CI, Arg, Arg->getType(), AS); 1947 } 1948 1949 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 1950 if (II) { 1951 switch (II->getIntrinsicID()) { 1952 default: break; 1953 case Intrinsic::objectsize: { 1954 // Lower all uses of llvm.objectsize.* 1955 ConstantInt *RetVal = 1956 lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); 1957 // Substituting this can cause recursive simplifications, which can 1958 // invalidate our iterator. Use a WeakVH to hold onto it in case this 1959 // happens. 1960 Value *CurValue = &*CurInstIterator; 1961 WeakVH IterHandle(CurValue); 1962 1963 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); 1964 1965 // If the iterator instruction was recursively deleted, start over at the 1966 // start of the block. 1967 if (IterHandle != CurValue) { 1968 CurInstIterator = BB->begin(); 1969 SunkAddrs.clear(); 1970 } 1971 return true; 1972 } 1973 case Intrinsic::masked_load: { 1974 // Scalarize unsupported vector masked load 1975 if (!TTI->isLegalMaskedLoad(CI->getType())) { 1976 scalarizeMaskedLoad(CI); 1977 ModifiedDT = true; 1978 return true; 1979 } 1980 return false; 1981 } 1982 case Intrinsic::masked_store: { 1983 if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { 1984 scalarizeMaskedStore(CI); 1985 ModifiedDT = true; 1986 return true; 1987 } 1988 return false; 1989 } 1990 case Intrinsic::masked_gather: { 1991 if (!TTI->isLegalMaskedGather(CI->getType())) { 1992 scalarizeMaskedGather(CI); 1993 ModifiedDT = true; 1994 return true; 1995 } 1996 return false; 1997 } 1998 case Intrinsic::masked_scatter: { 1999 if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { 2000 scalarizeMaskedScatter(CI); 2001 ModifiedDT = true; 2002 return true; 2003 } 2004 return false; 2005 } 2006 case Intrinsic::aarch64_stlxr: 2007 case Intrinsic::aarch64_stxr: { 2008 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0)); 2009 if (!ExtVal || !ExtVal->hasOneUse() || 2010 ExtVal->getParent() == CI->getParent()) 2011 return false; 2012 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it. 2013 ExtVal->moveBefore(CI); 2014 // Mark this instruction as "inserted by CGP", so that other 2015 // optimizations don't touch it. 2016 InsertedInsts.insert(ExtVal); 2017 return true; 2018 } 2019 case Intrinsic::invariant_group_barrier: 2020 II->replaceAllUsesWith(II->getArgOperand(0)); 2021 II->eraseFromParent(); 2022 return true; 2023 2024 case Intrinsic::cttz: 2025 case Intrinsic::ctlz: 2026 // If counting zeros is expensive, try to avoid it. 2027 return despeculateCountZeros(II, TLI, DL, ModifiedDT); 2028 } 2029 2030 if (TLI) { 2031 // Unknown address space. 2032 // TODO: Target hook to pick which address space the intrinsic cares 2033 // about? 2034 unsigned AddrSpace = ~0u; 2035 SmallVector<Value*, 2> PtrOps; 2036 Type *AccessTy; 2037 if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace)) 2038 while (!PtrOps.empty()) 2039 if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace)) 2040 return true; 2041 } 2042 } 2043 2044 // From here on out we're working with named functions. 2045 if (!CI->getCalledFunction()) return false; 2046 2047 // Lower all default uses of _chk calls. This is very similar 2048 // to what InstCombineCalls does, but here we are only lowering calls 2049 // to fortified library functions (e.g. __memcpy_chk) that have the default 2050 // "don't know" as the objectsize. Anything else should be left alone. 2051 FortifiedLibCallSimplifier Simplifier(TLInfo, true); 2052 if (Value *V = Simplifier.optimizeCall(CI)) { 2053 CI->replaceAllUsesWith(V); 2054 CI->eraseFromParent(); 2055 return true; 2056 } 2057 return false; 2058 } 2059 2060 /// Look for opportunities to duplicate return instructions to the predecessor 2061 /// to enable tail call optimizations. The case it is currently looking for is: 2062 /// @code 2063 /// bb0: 2064 /// %tmp0 = tail call i32 @f0() 2065 /// br label %return 2066 /// bb1: 2067 /// %tmp1 = tail call i32 @f1() 2068 /// br label %return 2069 /// bb2: 2070 /// %tmp2 = tail call i32 @f2() 2071 /// br label %return 2072 /// return: 2073 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 2074 /// ret i32 %retval 2075 /// @endcode 2076 /// 2077 /// => 2078 /// 2079 /// @code 2080 /// bb0: 2081 /// %tmp0 = tail call i32 @f0() 2082 /// ret i32 %tmp0 2083 /// bb1: 2084 /// %tmp1 = tail call i32 @f1() 2085 /// ret i32 %tmp1 2086 /// bb2: 2087 /// %tmp2 = tail call i32 @f2() 2088 /// ret i32 %tmp2 2089 /// @endcode 2090 bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { 2091 if (!TLI) 2092 return false; 2093 2094 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator()); 2095 if (!RetI) 2096 return false; 2097 2098 PHINode *PN = nullptr; 2099 BitCastInst *BCI = nullptr; 2100 Value *V = RetI->getReturnValue(); 2101 if (V) { 2102 BCI = dyn_cast<BitCastInst>(V); 2103 if (BCI) 2104 V = BCI->getOperand(0); 2105 2106 PN = dyn_cast<PHINode>(V); 2107 if (!PN) 2108 return false; 2109 } 2110 2111 if (PN && PN->getParent() != BB) 2112 return false; 2113 2114 // Make sure there are no instructions between the PHI and return, or that the 2115 // return is the first instruction in the block. 2116 if (PN) { 2117 BasicBlock::iterator BI = BB->begin(); 2118 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); 2119 if (&*BI == BCI) 2120 // Also skip over the bitcast. 2121 ++BI; 2122 if (&*BI != RetI) 2123 return false; 2124 } else { 2125 BasicBlock::iterator BI = BB->begin(); 2126 while (isa<DbgInfoIntrinsic>(BI)) ++BI; 2127 if (&*BI != RetI) 2128 return false; 2129 } 2130 2131 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail 2132 /// call. 2133 const Function *F = BB->getParent(); 2134 SmallVector<CallInst*, 4> TailCalls; 2135 if (PN) { 2136 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 2137 CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); 2138 // Make sure the phi value is indeed produced by the tail call. 2139 if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && 2140 TLI->mayBeEmittedAsTailCall(CI) && 2141 attributesPermitTailCall(F, CI, RetI, *TLI)) 2142 TailCalls.push_back(CI); 2143 } 2144 } else { 2145 SmallPtrSet<BasicBlock*, 4> VisitedBBs; 2146 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { 2147 if (!VisitedBBs.insert(*PI).second) 2148 continue; 2149 2150 BasicBlock::InstListType &InstList = (*PI)->getInstList(); 2151 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); 2152 BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); 2153 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); 2154 if (RI == RE) 2155 continue; 2156 2157 CallInst *CI = dyn_cast<CallInst>(&*RI); 2158 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && 2159 attributesPermitTailCall(F, CI, RetI, *TLI)) 2160 TailCalls.push_back(CI); 2161 } 2162 } 2163 2164 bool Changed = false; 2165 for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { 2166 CallInst *CI = TailCalls[i]; 2167 CallSite CS(CI); 2168 2169 // Conservatively require the attributes of the call to match those of the 2170 // return. Ignore noalias because it doesn't affect the call sequence. 2171 AttributeSet CalleeAttrs = CS.getAttributes(); 2172 if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 2173 removeAttribute(Attribute::NoAlias) != 2174 AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 2175 removeAttribute(Attribute::NoAlias)) 2176 continue; 2177 2178 // Make sure the call instruction is followed by an unconditional branch to 2179 // the return block. 2180 BasicBlock *CallBB = CI->getParent(); 2181 BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); 2182 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) 2183 continue; 2184 2185 // Duplicate the return into CallBB. 2186 (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB); 2187 ModifiedDT = Changed = true; 2188 ++NumRetsDup; 2189 } 2190 2191 // If we eliminated all predecessors of the block, delete the block now. 2192 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) 2193 BB->eraseFromParent(); 2194 2195 return Changed; 2196 } 2197 2198 //===----------------------------------------------------------------------===// 2199 // Memory Optimization 2200 //===----------------------------------------------------------------------===// 2201 2202 namespace { 2203 2204 /// This is an extended version of TargetLowering::AddrMode 2205 /// which holds actual Value*'s for register values. 2206 struct ExtAddrMode : public TargetLowering::AddrMode { 2207 Value *BaseReg; 2208 Value *ScaledReg; 2209 ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {} 2210 void print(raw_ostream &OS) const; 2211 void dump() const; 2212 2213 bool operator==(const ExtAddrMode& O) const { 2214 return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && 2215 (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && 2216 (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); 2217 } 2218 }; 2219 2220 #ifndef NDEBUG 2221 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { 2222 AM.print(OS); 2223 return OS; 2224 } 2225 #endif 2226 2227 void ExtAddrMode::print(raw_ostream &OS) const { 2228 bool NeedPlus = false; 2229 OS << "["; 2230 if (BaseGV) { 2231 OS << (NeedPlus ? " + " : "") 2232 << "GV:"; 2233 BaseGV->printAsOperand(OS, /*PrintType=*/false); 2234 NeedPlus = true; 2235 } 2236 2237 if (BaseOffs) { 2238 OS << (NeedPlus ? " + " : "") 2239 << BaseOffs; 2240 NeedPlus = true; 2241 } 2242 2243 if (BaseReg) { 2244 OS << (NeedPlus ? " + " : "") 2245 << "Base:"; 2246 BaseReg->printAsOperand(OS, /*PrintType=*/false); 2247 NeedPlus = true; 2248 } 2249 if (Scale) { 2250 OS << (NeedPlus ? " + " : "") 2251 << Scale << "*"; 2252 ScaledReg->printAsOperand(OS, /*PrintType=*/false); 2253 } 2254 2255 OS << ']'; 2256 } 2257 2258 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2259 LLVM_DUMP_METHOD void ExtAddrMode::dump() const { 2260 print(dbgs()); 2261 dbgs() << '\n'; 2262 } 2263 #endif 2264 2265 /// \brief This class provides transaction based operation on the IR. 2266 /// Every change made through this class is recorded in the internal state and 2267 /// can be undone (rollback) until commit is called. 2268 class TypePromotionTransaction { 2269 2270 /// \brief This represents the common interface of the individual transaction. 2271 /// Each class implements the logic for doing one specific modification on 2272 /// the IR via the TypePromotionTransaction. 2273 class TypePromotionAction { 2274 protected: 2275 /// The Instruction modified. 2276 Instruction *Inst; 2277 2278 public: 2279 /// \brief Constructor of the action. 2280 /// The constructor performs the related action on the IR. 2281 TypePromotionAction(Instruction *Inst) : Inst(Inst) {} 2282 2283 virtual ~TypePromotionAction() {} 2284 2285 /// \brief Undo the modification done by this action. 2286 /// When this method is called, the IR must be in the same state as it was 2287 /// before this action was applied. 2288 /// \pre Undoing the action works if and only if the IR is in the exact same 2289 /// state as it was directly after this action was applied. 2290 virtual void undo() = 0; 2291 2292 /// \brief Advocate every change made by this action. 2293 /// When the results on the IR of the action are to be kept, it is important 2294 /// to call this function, otherwise hidden information may be kept forever. 2295 virtual void commit() { 2296 // Nothing to be done, this action is not doing anything. 2297 } 2298 }; 2299 2300 /// \brief Utility to remember the position of an instruction. 2301 class InsertionHandler { 2302 /// Position of an instruction. 2303 /// Either an instruction: 2304 /// - Is the first in a basic block: BB is used. 2305 /// - Has a previous instructon: PrevInst is used. 2306 union { 2307 Instruction *PrevInst; 2308 BasicBlock *BB; 2309 } Point; 2310 /// Remember whether or not the instruction had a previous instruction. 2311 bool HasPrevInstruction; 2312 2313 public: 2314 /// \brief Record the position of \p Inst. 2315 InsertionHandler(Instruction *Inst) { 2316 BasicBlock::iterator It = Inst->getIterator(); 2317 HasPrevInstruction = (It != (Inst->getParent()->begin())); 2318 if (HasPrevInstruction) 2319 Point.PrevInst = &*--It; 2320 else 2321 Point.BB = Inst->getParent(); 2322 } 2323 2324 /// \brief Insert \p Inst at the recorded position. 2325 void insert(Instruction *Inst) { 2326 if (HasPrevInstruction) { 2327 if (Inst->getParent()) 2328 Inst->removeFromParent(); 2329 Inst->insertAfter(Point.PrevInst); 2330 } else { 2331 Instruction *Position = &*Point.BB->getFirstInsertionPt(); 2332 if (Inst->getParent()) 2333 Inst->moveBefore(Position); 2334 else 2335 Inst->insertBefore(Position); 2336 } 2337 } 2338 }; 2339 2340 /// \brief Move an instruction before another. 2341 class InstructionMoveBefore : public TypePromotionAction { 2342 /// Original position of the instruction. 2343 InsertionHandler Position; 2344 2345 public: 2346 /// \brief Move \p Inst before \p Before. 2347 InstructionMoveBefore(Instruction *Inst, Instruction *Before) 2348 : TypePromotionAction(Inst), Position(Inst) { 2349 DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n"); 2350 Inst->moveBefore(Before); 2351 } 2352 2353 /// \brief Move the instruction back to its original position. 2354 void undo() override { 2355 DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); 2356 Position.insert(Inst); 2357 } 2358 }; 2359 2360 /// \brief Set the operand of an instruction with a new value. 2361 class OperandSetter : public TypePromotionAction { 2362 /// Original operand of the instruction. 2363 Value *Origin; 2364 /// Index of the modified instruction. 2365 unsigned Idx; 2366 2367 public: 2368 /// \brief Set \p Idx operand of \p Inst with \p NewVal. 2369 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) 2370 : TypePromotionAction(Inst), Idx(Idx) { 2371 DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" 2372 << "for:" << *Inst << "\n" 2373 << "with:" << *NewVal << "\n"); 2374 Origin = Inst->getOperand(Idx); 2375 Inst->setOperand(Idx, NewVal); 2376 } 2377 2378 /// \brief Restore the original value of the instruction. 2379 void undo() override { 2380 DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" 2381 << "for: " << *Inst << "\n" 2382 << "with: " << *Origin << "\n"); 2383 Inst->setOperand(Idx, Origin); 2384 } 2385 }; 2386 2387 /// \brief Hide the operands of an instruction. 2388 /// Do as if this instruction was not using any of its operands. 2389 class OperandsHider : public TypePromotionAction { 2390 /// The list of original operands. 2391 SmallVector<Value *, 4> OriginalValues; 2392 2393 public: 2394 /// \brief Remove \p Inst from the uses of the operands of \p Inst. 2395 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { 2396 DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); 2397 unsigned NumOpnds = Inst->getNumOperands(); 2398 OriginalValues.reserve(NumOpnds); 2399 for (unsigned It = 0; It < NumOpnds; ++It) { 2400 // Save the current operand. 2401 Value *Val = Inst->getOperand(It); 2402 OriginalValues.push_back(Val); 2403 // Set a dummy one. 2404 // We could use OperandSetter here, but that would imply an overhead 2405 // that we are not willing to pay. 2406 Inst->setOperand(It, UndefValue::get(Val->getType())); 2407 } 2408 } 2409 2410 /// \brief Restore the original list of uses. 2411 void undo() override { 2412 DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); 2413 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) 2414 Inst->setOperand(It, OriginalValues[It]); 2415 } 2416 }; 2417 2418 /// \brief Build a truncate instruction. 2419 class TruncBuilder : public TypePromotionAction { 2420 Value *Val; 2421 public: 2422 /// \brief Build a truncate instruction of \p Opnd producing a \p Ty 2423 /// result. 2424 /// trunc Opnd to Ty. 2425 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { 2426 IRBuilder<> Builder(Opnd); 2427 Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); 2428 DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); 2429 } 2430 2431 /// \brief Get the built value. 2432 Value *getBuiltValue() { return Val; } 2433 2434 /// \brief Remove the built instruction. 2435 void undo() override { 2436 DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); 2437 if (Instruction *IVal = dyn_cast<Instruction>(Val)) 2438 IVal->eraseFromParent(); 2439 } 2440 }; 2441 2442 /// \brief Build a sign extension instruction. 2443 class SExtBuilder : public TypePromotionAction { 2444 Value *Val; 2445 public: 2446 /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty 2447 /// result. 2448 /// sext Opnd to Ty. 2449 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) 2450 : TypePromotionAction(InsertPt) { 2451 IRBuilder<> Builder(InsertPt); 2452 Val = Builder.CreateSExt(Opnd, Ty, "promoted"); 2453 DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); 2454 } 2455 2456 /// \brief Get the built value. 2457 Value *getBuiltValue() { return Val; } 2458 2459 /// \brief Remove the built instruction. 2460 void undo() override { 2461 DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); 2462 if (Instruction *IVal = dyn_cast<Instruction>(Val)) 2463 IVal->eraseFromParent(); 2464 } 2465 }; 2466 2467 /// \brief Build a zero extension instruction. 2468 class ZExtBuilder : public TypePromotionAction { 2469 Value *Val; 2470 public: 2471 /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty 2472 /// result. 2473 /// zext Opnd to Ty. 2474 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) 2475 : TypePromotionAction(InsertPt) { 2476 IRBuilder<> Builder(InsertPt); 2477 Val = Builder.CreateZExt(Opnd, Ty, "promoted"); 2478 DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); 2479 } 2480 2481 /// \brief Get the built value. 2482 Value *getBuiltValue() { return Val; } 2483 2484 /// \brief Remove the built instruction. 2485 void undo() override { 2486 DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); 2487 if (Instruction *IVal = dyn_cast<Instruction>(Val)) 2488 IVal->eraseFromParent(); 2489 } 2490 }; 2491 2492 /// \brief Mutate an instruction to another type. 2493 class TypeMutator : public TypePromotionAction { 2494 /// Record the original type. 2495 Type *OrigTy; 2496 2497 public: 2498 /// \brief Mutate the type of \p Inst into \p NewTy. 2499 TypeMutator(Instruction *Inst, Type *NewTy) 2500 : TypePromotionAction(Inst), OrigTy(Inst->getType()) { 2501 DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy 2502 << "\n"); 2503 Inst->mutateType(NewTy); 2504 } 2505 2506 /// \brief Mutate the instruction back to its original type. 2507 void undo() override { 2508 DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy 2509 << "\n"); 2510 Inst->mutateType(OrigTy); 2511 } 2512 }; 2513 2514 /// \brief Replace the uses of an instruction by another instruction. 2515 class UsesReplacer : public TypePromotionAction { 2516 /// Helper structure to keep track of the replaced uses. 2517 struct InstructionAndIdx { 2518 /// The instruction using the instruction. 2519 Instruction *Inst; 2520 /// The index where this instruction is used for Inst. 2521 unsigned Idx; 2522 InstructionAndIdx(Instruction *Inst, unsigned Idx) 2523 : Inst(Inst), Idx(Idx) {} 2524 }; 2525 2526 /// Keep track of the original uses (pair Instruction, Index). 2527 SmallVector<InstructionAndIdx, 4> OriginalUses; 2528 typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator; 2529 2530 public: 2531 /// \brief Replace all the use of \p Inst by \p New. 2532 UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { 2533 DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New 2534 << "\n"); 2535 // Record the original uses. 2536 for (Use &U : Inst->uses()) { 2537 Instruction *UserI = cast<Instruction>(U.getUser()); 2538 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); 2539 } 2540 // Now, we can replace the uses. 2541 Inst->replaceAllUsesWith(New); 2542 } 2543 2544 /// \brief Reassign the original uses of Inst to Inst. 2545 void undo() override { 2546 DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); 2547 for (use_iterator UseIt = OriginalUses.begin(), 2548 EndIt = OriginalUses.end(); 2549 UseIt != EndIt; ++UseIt) { 2550 UseIt->Inst->setOperand(UseIt->Idx, Inst); 2551 } 2552 } 2553 }; 2554 2555 /// \brief Remove an instruction from the IR. 2556 class InstructionRemover : public TypePromotionAction { 2557 /// Original position of the instruction. 2558 InsertionHandler Inserter; 2559 /// Helper structure to hide all the link to the instruction. In other 2560 /// words, this helps to do as if the instruction was removed. 2561 OperandsHider Hider; 2562 /// Keep track of the uses replaced, if any. 2563 UsesReplacer *Replacer; 2564 2565 public: 2566 /// \brief Remove all reference of \p Inst and optinally replace all its 2567 /// uses with New. 2568 /// \pre If !Inst->use_empty(), then New != nullptr 2569 InstructionRemover(Instruction *Inst, Value *New = nullptr) 2570 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), 2571 Replacer(nullptr) { 2572 if (New) 2573 Replacer = new UsesReplacer(Inst, New); 2574 DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); 2575 Inst->removeFromParent(); 2576 } 2577 2578 ~InstructionRemover() override { delete Replacer; } 2579 2580 /// \brief Really remove the instruction. 2581 void commit() override { delete Inst; } 2582 2583 /// \brief Resurrect the instruction and reassign it to the proper uses if 2584 /// new value was provided when build this action. 2585 void undo() override { 2586 DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); 2587 Inserter.insert(Inst); 2588 if (Replacer) 2589 Replacer->undo(); 2590 Hider.undo(); 2591 } 2592 }; 2593 2594 public: 2595 /// Restoration point. 2596 /// The restoration point is a pointer to an action instead of an iterator 2597 /// because the iterator may be invalidated but not the pointer. 2598 typedef const TypePromotionAction *ConstRestorationPt; 2599 /// Advocate every changes made in that transaction. 2600 void commit(); 2601 /// Undo all the changes made after the given point. 2602 void rollback(ConstRestorationPt Point); 2603 /// Get the current restoration point. 2604 ConstRestorationPt getRestorationPoint() const; 2605 2606 /// \name API for IR modification with state keeping to support rollback. 2607 /// @{ 2608 /// Same as Instruction::setOperand. 2609 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); 2610 /// Same as Instruction::eraseFromParent. 2611 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); 2612 /// Same as Value::replaceAllUsesWith. 2613 void replaceAllUsesWith(Instruction *Inst, Value *New); 2614 /// Same as Value::mutateType. 2615 void mutateType(Instruction *Inst, Type *NewTy); 2616 /// Same as IRBuilder::createTrunc. 2617 Value *createTrunc(Instruction *Opnd, Type *Ty); 2618 /// Same as IRBuilder::createSExt. 2619 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); 2620 /// Same as IRBuilder::createZExt. 2621 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty); 2622 /// Same as Instruction::moveBefore. 2623 void moveBefore(Instruction *Inst, Instruction *Before); 2624 /// @} 2625 2626 private: 2627 /// The ordered list of actions made so far. 2628 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions; 2629 typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt; 2630 }; 2631 2632 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, 2633 Value *NewVal) { 2634 Actions.push_back( 2635 make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal)); 2636 } 2637 2638 void TypePromotionTransaction::eraseInstruction(Instruction *Inst, 2639 Value *NewVal) { 2640 Actions.push_back( 2641 make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal)); 2642 } 2643 2644 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, 2645 Value *New) { 2646 Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); 2647 } 2648 2649 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { 2650 Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); 2651 } 2652 2653 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, 2654 Type *Ty) { 2655 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty)); 2656 Value *Val = Ptr->getBuiltValue(); 2657 Actions.push_back(std::move(Ptr)); 2658 return Val; 2659 } 2660 2661 Value *TypePromotionTransaction::createSExt(Instruction *Inst, 2662 Value *Opnd, Type *Ty) { 2663 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty)); 2664 Value *Val = Ptr->getBuiltValue(); 2665 Actions.push_back(std::move(Ptr)); 2666 return Val; 2667 } 2668 2669 Value *TypePromotionTransaction::createZExt(Instruction *Inst, 2670 Value *Opnd, Type *Ty) { 2671 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty)); 2672 Value *Val = Ptr->getBuiltValue(); 2673 Actions.push_back(std::move(Ptr)); 2674 return Val; 2675 } 2676 2677 void TypePromotionTransaction::moveBefore(Instruction *Inst, 2678 Instruction *Before) { 2679 Actions.push_back( 2680 make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before)); 2681 } 2682 2683 TypePromotionTransaction::ConstRestorationPt 2684 TypePromotionTransaction::getRestorationPoint() const { 2685 return !Actions.empty() ? Actions.back().get() : nullptr; 2686 } 2687 2688 void TypePromotionTransaction::commit() { 2689 for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; 2690 ++It) 2691 (*It)->commit(); 2692 Actions.clear(); 2693 } 2694 2695 void TypePromotionTransaction::rollback( 2696 TypePromotionTransaction::ConstRestorationPt Point) { 2697 while (!Actions.empty() && Point != Actions.back().get()) { 2698 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val(); 2699 Curr->undo(); 2700 } 2701 } 2702 2703 /// \brief A helper class for matching addressing modes. 2704 /// 2705 /// This encapsulates the logic for matching the target-legal addressing modes. 2706 class AddressingModeMatcher { 2707 SmallVectorImpl<Instruction*> &AddrModeInsts; 2708 const TargetMachine &TM; 2709 const TargetLowering &TLI; 2710 const DataLayout &DL; 2711 2712 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and 2713 /// the memory instruction that we're computing this address for. 2714 Type *AccessTy; 2715 unsigned AddrSpace; 2716 Instruction *MemoryInst; 2717 2718 /// This is the addressing mode that we're building up. This is 2719 /// part of the return value of this addressing mode matching stuff. 2720 ExtAddrMode &AddrMode; 2721 2722 /// The instructions inserted by other CodeGenPrepare optimizations. 2723 const SetOfInstrs &InsertedInsts; 2724 /// A map from the instructions to their type before promotion. 2725 InstrToOrigTy &PromotedInsts; 2726 /// The ongoing transaction where every action should be registered. 2727 TypePromotionTransaction &TPT; 2728 2729 /// This is set to true when we should not do profitability checks. 2730 /// When true, IsProfitableToFoldIntoAddressingMode always returns true. 2731 bool IgnoreProfitability; 2732 2733 AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI, 2734 const TargetMachine &TM, Type *AT, unsigned AS, 2735 Instruction *MI, ExtAddrMode &AM, 2736 const SetOfInstrs &InsertedInsts, 2737 InstrToOrigTy &PromotedInsts, 2738 TypePromotionTransaction &TPT) 2739 : AddrModeInsts(AMI), TM(TM), 2740 TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent()) 2741 ->getTargetLowering()), 2742 DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS), 2743 MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), 2744 PromotedInsts(PromotedInsts), TPT(TPT) { 2745 IgnoreProfitability = false; 2746 } 2747 public: 2748 2749 /// Find the maximal addressing mode that a load/store of V can fold, 2750 /// give an access type of AccessTy. This returns a list of involved 2751 /// instructions in AddrModeInsts. 2752 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare 2753 /// optimizations. 2754 /// \p PromotedInsts maps the instructions to their type before promotion. 2755 /// \p The ongoing transaction where every action should be registered. 2756 static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS, 2757 Instruction *MemoryInst, 2758 SmallVectorImpl<Instruction*> &AddrModeInsts, 2759 const TargetMachine &TM, 2760 const SetOfInstrs &InsertedInsts, 2761 InstrToOrigTy &PromotedInsts, 2762 TypePromotionTransaction &TPT) { 2763 ExtAddrMode Result; 2764 2765 bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS, 2766 MemoryInst, Result, InsertedInsts, 2767 PromotedInsts, TPT).matchAddr(V, 0); 2768 (void)Success; assert(Success && "Couldn't select *anything*?"); 2769 return Result; 2770 } 2771 private: 2772 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); 2773 bool matchAddr(Value *V, unsigned Depth); 2774 bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, 2775 bool *MovedAway = nullptr); 2776 bool isProfitableToFoldIntoAddressingMode(Instruction *I, 2777 ExtAddrMode &AMBefore, 2778 ExtAddrMode &AMAfter); 2779 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); 2780 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost, 2781 Value *PromotedOperand) const; 2782 }; 2783 2784 /// Try adding ScaleReg*Scale to the current addressing mode. 2785 /// Return true and update AddrMode if this addr mode is legal for the target, 2786 /// false if not. 2787 bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, 2788 unsigned Depth) { 2789 // If Scale is 1, then this is the same as adding ScaleReg to the addressing 2790 // mode. Just process that directly. 2791 if (Scale == 1) 2792 return matchAddr(ScaleReg, Depth); 2793 2794 // If the scale is 0, it takes nothing to add this. 2795 if (Scale == 0) 2796 return true; 2797 2798 // If we already have a scale of this value, we can add to it, otherwise, we 2799 // need an available scale field. 2800 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 2801 return false; 2802 2803 ExtAddrMode TestAddrMode = AddrMode; 2804 2805 // Add scale to turn X*4+X*3 -> X*7. This could also do things like 2806 // [A+B + A*7] -> [B+A*8]. 2807 TestAddrMode.Scale += Scale; 2808 TestAddrMode.ScaledReg = ScaleReg; 2809 2810 // If the new address isn't legal, bail out. 2811 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) 2812 return false; 2813 2814 // It was legal, so commit it. 2815 AddrMode = TestAddrMode; 2816 2817 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 2818 // to see if ScaleReg is actually X+C. If so, we can turn this into adding 2819 // X*Scale + C*Scale to addr mode. 2820 ConstantInt *CI = nullptr; Value *AddLHS = nullptr; 2821 if (isa<Instruction>(ScaleReg) && // not a constant expr. 2822 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { 2823 TestAddrMode.ScaledReg = AddLHS; 2824 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; 2825 2826 // If this addressing mode is legal, commit it and remember that we folded 2827 // this instruction. 2828 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) { 2829 AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); 2830 AddrMode = TestAddrMode; 2831 return true; 2832 } 2833 } 2834 2835 // Otherwise, not (x+c)*scale, just return what we have. 2836 return true; 2837 } 2838 2839 /// This is a little filter, which returns true if an addressing computation 2840 /// involving I might be folded into a load/store accessing it. 2841 /// This doesn't need to be perfect, but needs to accept at least 2842 /// the set of instructions that MatchOperationAddr can. 2843 static bool MightBeFoldableInst(Instruction *I) { 2844 switch (I->getOpcode()) { 2845 case Instruction::BitCast: 2846 case Instruction::AddrSpaceCast: 2847 // Don't touch identity bitcasts. 2848 if (I->getType() == I->getOperand(0)->getType()) 2849 return false; 2850 return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); 2851 case Instruction::PtrToInt: 2852 // PtrToInt is always a noop, as we know that the int type is pointer sized. 2853 return true; 2854 case Instruction::IntToPtr: 2855 // We know the input is intptr_t, so this is foldable. 2856 return true; 2857 case Instruction::Add: 2858 return true; 2859 case Instruction::Mul: 2860 case Instruction::Shl: 2861 // Can only handle X*C and X << C. 2862 return isa<ConstantInt>(I->getOperand(1)); 2863 case Instruction::GetElementPtr: 2864 return true; 2865 default: 2866 return false; 2867 } 2868 } 2869 2870 /// \brief Check whether or not \p Val is a legal instruction for \p TLI. 2871 /// \note \p Val is assumed to be the product of some type promotion. 2872 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed 2873 /// to be legal, as the non-promoted value would have had the same state. 2874 static bool isPromotedInstructionLegal(const TargetLowering &TLI, 2875 const DataLayout &DL, Value *Val) { 2876 Instruction *PromotedInst = dyn_cast<Instruction>(Val); 2877 if (!PromotedInst) 2878 return false; 2879 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); 2880 // If the ISDOpcode is undefined, it was undefined before the promotion. 2881 if (!ISDOpcode) 2882 return true; 2883 // Otherwise, check if the promoted instruction is legal or not. 2884 return TLI.isOperationLegalOrCustom( 2885 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType())); 2886 } 2887 2888 /// \brief Hepler class to perform type promotion. 2889 class TypePromotionHelper { 2890 /// \brief Utility function to check whether or not a sign or zero extension 2891 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by 2892 /// either using the operands of \p Inst or promoting \p Inst. 2893 /// The type of the extension is defined by \p IsSExt. 2894 /// In other words, check if: 2895 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType. 2896 /// #1 Promotion applies: 2897 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...). 2898 /// #2 Operand reuses: 2899 /// ext opnd1 to ConsideredExtType. 2900 /// \p PromotedInsts maps the instructions to their type before promotion. 2901 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType, 2902 const InstrToOrigTy &PromotedInsts, bool IsSExt); 2903 2904 /// \brief Utility function to determine if \p OpIdx should be promoted when 2905 /// promoting \p Inst. 2906 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { 2907 return !(isa<SelectInst>(Inst) && OpIdx == 0); 2908 } 2909 2910 /// \brief Utility function to promote the operand of \p Ext when this 2911 /// operand is a promotable trunc or sext or zext. 2912 /// \p PromotedInsts maps the instructions to their type before promotion. 2913 /// \p CreatedInstsCost[out] contains the cost of all instructions 2914 /// created to promote the operand of Ext. 2915 /// Newly added extensions are inserted in \p Exts. 2916 /// Newly added truncates are inserted in \p Truncs. 2917 /// Should never be called directly. 2918 /// \return The promoted value which is used instead of Ext. 2919 static Value *promoteOperandForTruncAndAnyExt( 2920 Instruction *Ext, TypePromotionTransaction &TPT, 2921 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, 2922 SmallVectorImpl<Instruction *> *Exts, 2923 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI); 2924 2925 /// \brief Utility function to promote the operand of \p Ext when this 2926 /// operand is promotable and is not a supported trunc or sext. 2927 /// \p PromotedInsts maps the instructions to their type before promotion. 2928 /// \p CreatedInstsCost[out] contains the cost of all the instructions 2929 /// created to promote the operand of Ext. 2930 /// Newly added extensions are inserted in \p Exts. 2931 /// Newly added truncates are inserted in \p Truncs. 2932 /// Should never be called directly. 2933 /// \return The promoted value which is used instead of Ext. 2934 static Value *promoteOperandForOther(Instruction *Ext, 2935 TypePromotionTransaction &TPT, 2936 InstrToOrigTy &PromotedInsts, 2937 unsigned &CreatedInstsCost, 2938 SmallVectorImpl<Instruction *> *Exts, 2939 SmallVectorImpl<Instruction *> *Truncs, 2940 const TargetLowering &TLI, bool IsSExt); 2941 2942 /// \see promoteOperandForOther. 2943 static Value *signExtendOperandForOther( 2944 Instruction *Ext, TypePromotionTransaction &TPT, 2945 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, 2946 SmallVectorImpl<Instruction *> *Exts, 2947 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { 2948 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, 2949 Exts, Truncs, TLI, true); 2950 } 2951 2952 /// \see promoteOperandForOther. 2953 static Value *zeroExtendOperandForOther( 2954 Instruction *Ext, TypePromotionTransaction &TPT, 2955 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, 2956 SmallVectorImpl<Instruction *> *Exts, 2957 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { 2958 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, 2959 Exts, Truncs, TLI, false); 2960 } 2961 2962 public: 2963 /// Type for the utility function that promotes the operand of Ext. 2964 typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT, 2965 InstrToOrigTy &PromotedInsts, 2966 unsigned &CreatedInstsCost, 2967 SmallVectorImpl<Instruction *> *Exts, 2968 SmallVectorImpl<Instruction *> *Truncs, 2969 const TargetLowering &TLI); 2970 /// \brief Given a sign/zero extend instruction \p Ext, return the approriate 2971 /// action to promote the operand of \p Ext instead of using Ext. 2972 /// \return NULL if no promotable action is possible with the current 2973 /// sign extension. 2974 /// \p InsertedInsts keeps track of all the instructions inserted by the 2975 /// other CodeGenPrepare optimizations. This information is important 2976 /// because we do not want to promote these instructions as CodeGenPrepare 2977 /// will reinsert them later. Thus creating an infinite loop: create/remove. 2978 /// \p PromotedInsts maps the instructions to their type before promotion. 2979 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts, 2980 const TargetLowering &TLI, 2981 const InstrToOrigTy &PromotedInsts); 2982 }; 2983 2984 bool TypePromotionHelper::canGetThrough(const Instruction *Inst, 2985 Type *ConsideredExtType, 2986 const InstrToOrigTy &PromotedInsts, 2987 bool IsSExt) { 2988 // The promotion helper does not know how to deal with vector types yet. 2989 // To be able to fix that, we would need to fix the places where we 2990 // statically extend, e.g., constants and such. 2991 if (Inst->getType()->isVectorTy()) 2992 return false; 2993 2994 // We can always get through zext. 2995 if (isa<ZExtInst>(Inst)) 2996 return true; 2997 2998 // sext(sext) is ok too. 2999 if (IsSExt && isa<SExtInst>(Inst)) 3000 return true; 3001 3002 // We can get through binary operator, if it is legal. In other words, the 3003 // binary operator must have a nuw or nsw flag. 3004 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); 3005 if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && 3006 ((!IsSExt && BinOp->hasNoUnsignedWrap()) || 3007 (IsSExt && BinOp->hasNoSignedWrap()))) 3008 return true; 3009 3010 // Check if we can do the following simplification. 3011 // ext(trunc(opnd)) --> ext(opnd) 3012 if (!isa<TruncInst>(Inst)) 3013 return false; 3014 3015 Value *OpndVal = Inst->getOperand(0); 3016 // Check if we can use this operand in the extension. 3017 // If the type is larger than the result type of the extension, we cannot. 3018 if (!OpndVal->getType()->isIntegerTy() || 3019 OpndVal->getType()->getIntegerBitWidth() > 3020 ConsideredExtType->getIntegerBitWidth()) 3021 return false; 3022 3023 // If the operand of the truncate is not an instruction, we will not have 3024 // any information on the dropped bits. 3025 // (Actually we could for constant but it is not worth the extra logic). 3026 Instruction *Opnd = dyn_cast<Instruction>(OpndVal); 3027 if (!Opnd) 3028 return false; 3029 3030 // Check if the source of the type is narrow enough. 3031 // I.e., check that trunc just drops extended bits of the same kind of 3032 // the extension. 3033 // #1 get the type of the operand and check the kind of the extended bits. 3034 const Type *OpndType; 3035 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); 3036 if (It != PromotedInsts.end() && It->second.getInt() == IsSExt) 3037 OpndType = It->second.getPointer(); 3038 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd))) 3039 OpndType = Opnd->getOperand(0)->getType(); 3040 else 3041 return false; 3042 3043 // #2 check that the truncate just drops extended bits. 3044 return Inst->getType()->getIntegerBitWidth() >= 3045 OpndType->getIntegerBitWidth(); 3046 } 3047 3048 TypePromotionHelper::Action TypePromotionHelper::getAction( 3049 Instruction *Ext, const SetOfInstrs &InsertedInsts, 3050 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { 3051 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && 3052 "Unexpected instruction type"); 3053 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0)); 3054 Type *ExtTy = Ext->getType(); 3055 bool IsSExt = isa<SExtInst>(Ext); 3056 // If the operand of the extension is not an instruction, we cannot 3057 // get through. 3058 // If it, check we can get through. 3059 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt)) 3060 return nullptr; 3061 3062 // Do not promote if the operand has been added by codegenprepare. 3063 // Otherwise, it means we are undoing an optimization that is likely to be 3064 // redone, thus causing potential infinite loop. 3065 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd)) 3066 return nullptr; 3067 3068 // SExt or Trunc instructions. 3069 // Return the related handler. 3070 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) || 3071 isa<ZExtInst>(ExtOpnd)) 3072 return promoteOperandForTruncAndAnyExt; 3073 3074 // Regular instruction. 3075 // Abort early if we will have to insert non-free instructions. 3076 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType())) 3077 return nullptr; 3078 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther; 3079 } 3080 3081 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( 3082 llvm::Instruction *SExt, TypePromotionTransaction &TPT, 3083 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, 3084 SmallVectorImpl<Instruction *> *Exts, 3085 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { 3086 // By construction, the operand of SExt is an instruction. Otherwise we cannot 3087 // get through it and this method should not be called. 3088 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); 3089 Value *ExtVal = SExt; 3090 bool HasMergedNonFreeExt = false; 3091 if (isa<ZExtInst>(SExtOpnd)) { 3092 // Replace s|zext(zext(opnd)) 3093 // => zext(opnd). 3094 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd); 3095 Value *ZExt = 3096 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); 3097 TPT.replaceAllUsesWith(SExt, ZExt); 3098 TPT.eraseInstruction(SExt); 3099 ExtVal = ZExt; 3100 } else { 3101 // Replace z|sext(trunc(opnd)) or sext(sext(opnd)) 3102 // => z|sext(opnd). 3103 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); 3104 } 3105 CreatedInstsCost = 0; 3106 3107 // Remove dead code. 3108 if (SExtOpnd->use_empty()) 3109 TPT.eraseInstruction(SExtOpnd); 3110 3111 // Check if the extension is still needed. 3112 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal); 3113 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { 3114 if (ExtInst) { 3115 if (Exts) 3116 Exts->push_back(ExtInst); 3117 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt; 3118 } 3119 return ExtVal; 3120 } 3121 3122 // At this point we have: ext ty opnd to ty. 3123 // Reassign the uses of ExtInst to the opnd and remove ExtInst. 3124 Value *NextVal = ExtInst->getOperand(0); 3125 TPT.eraseInstruction(ExtInst, NextVal); 3126 return NextVal; 3127 } 3128 3129 Value *TypePromotionHelper::promoteOperandForOther( 3130 Instruction *Ext, TypePromotionTransaction &TPT, 3131 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, 3132 SmallVectorImpl<Instruction *> *Exts, 3133 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI, 3134 bool IsSExt) { 3135 // By construction, the operand of Ext is an instruction. Otherwise we cannot 3136 // get through it and this method should not be called. 3137 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0)); 3138 CreatedInstsCost = 0; 3139 if (!ExtOpnd->hasOneUse()) { 3140 // ExtOpnd will be promoted. 3141 // All its uses, but Ext, will need to use a truncated value of the 3142 // promoted version. 3143 // Create the truncate now. 3144 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType()); 3145 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) { 3146 ITrunc->removeFromParent(); 3147 // Insert it just after the definition. 3148 ITrunc->insertAfter(ExtOpnd); 3149 if (Truncs) 3150 Truncs->push_back(ITrunc); 3151 } 3152 3153 TPT.replaceAllUsesWith(ExtOpnd, Trunc); 3154 // Restore the operand of Ext (which has been replaced by the previous call 3155 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. 3156 TPT.setOperand(Ext, 0, ExtOpnd); 3157 } 3158 3159 // Get through the Instruction: 3160 // 1. Update its type. 3161 // 2. Replace the uses of Ext by Inst. 3162 // 3. Extend each operand that needs to be extended. 3163 3164 // Remember the original type of the instruction before promotion. 3165 // This is useful to know that the high bits are sign extended bits. 3166 PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>( 3167 ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt))); 3168 // Step #1. 3169 TPT.mutateType(ExtOpnd, Ext->getType()); 3170 // Step #2. 3171 TPT.replaceAllUsesWith(Ext, ExtOpnd); 3172 // Step #3. 3173 Instruction *ExtForOpnd = Ext; 3174 3175 DEBUG(dbgs() << "Propagate Ext to operands\n"); 3176 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx; 3177 ++OpIdx) { 3178 DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n'); 3179 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() || 3180 !shouldExtOperand(ExtOpnd, OpIdx)) { 3181 DEBUG(dbgs() << "No need to propagate\n"); 3182 continue; 3183 } 3184 // Check if we can statically extend the operand. 3185 Value *Opnd = ExtOpnd->getOperand(OpIdx); 3186 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { 3187 DEBUG(dbgs() << "Statically extend\n"); 3188 unsigned BitWidth = Ext->getType()->getIntegerBitWidth(); 3189 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth) 3190 : Cst->getValue().zext(BitWidth); 3191 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal)); 3192 continue; 3193 } 3194 // UndefValue are typed, so we have to statically sign extend them. 3195 if (isa<UndefValue>(Opnd)) { 3196 DEBUG(dbgs() << "Statically extend\n"); 3197 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType())); 3198 continue; 3199 } 3200 3201 // Otherwise we have to explicity sign extend the operand. 3202 // Check if Ext was reused to extend an operand. 3203 if (!ExtForOpnd) { 3204 // If yes, create a new one. 3205 DEBUG(dbgs() << "More operands to ext\n"); 3206 Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) 3207 : TPT.createZExt(Ext, Opnd, Ext->getType()); 3208 if (!isa<Instruction>(ValForExtOpnd)) { 3209 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd); 3210 continue; 3211 } 3212 ExtForOpnd = cast<Instruction>(ValForExtOpnd); 3213 } 3214 if (Exts) 3215 Exts->push_back(ExtForOpnd); 3216 TPT.setOperand(ExtForOpnd, 0, Opnd); 3217 3218 // Move the sign extension before the insertion point. 3219 TPT.moveBefore(ExtForOpnd, ExtOpnd); 3220 TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); 3221 CreatedInstsCost += !TLI.isExtFree(ExtForOpnd); 3222 // If more sext are required, new instructions will have to be created. 3223 ExtForOpnd = nullptr; 3224 } 3225 if (ExtForOpnd == Ext) { 3226 DEBUG(dbgs() << "Extension is useless now\n"); 3227 TPT.eraseInstruction(Ext); 3228 } 3229 return ExtOpnd; 3230 } 3231 3232 /// Check whether or not promoting an instruction to a wider type is profitable. 3233 /// \p NewCost gives the cost of extension instructions created by the 3234 /// promotion. 3235 /// \p OldCost gives the cost of extension instructions before the promotion 3236 /// plus the number of instructions that have been 3237 /// matched in the addressing mode the promotion. 3238 /// \p PromotedOperand is the value that has been promoted. 3239 /// \return True if the promotion is profitable, false otherwise. 3240 bool AddressingModeMatcher::isPromotionProfitable( 3241 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { 3242 DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n'); 3243 // The cost of the new extensions is greater than the cost of the 3244 // old extension plus what we folded. 3245 // This is not profitable. 3246 if (NewCost > OldCost) 3247 return false; 3248 if (NewCost < OldCost) 3249 return true; 3250 // The promotion is neutral but it may help folding the sign extension in 3251 // loads for instance. 3252 // Check that we did not create an illegal instruction. 3253 return isPromotedInstructionLegal(TLI, DL, PromotedOperand); 3254 } 3255 3256 /// Given an instruction or constant expr, see if we can fold the operation 3257 /// into the addressing mode. If so, update the addressing mode and return 3258 /// true, otherwise return false without modifying AddrMode. 3259 /// If \p MovedAway is not NULL, it contains the information of whether or 3260 /// not AddrInst has to be folded into the addressing mode on success. 3261 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing 3262 /// because it has been moved away. 3263 /// Thus AddrInst must not be added in the matched instructions. 3264 /// This state can happen when AddrInst is a sext, since it may be moved away. 3265 /// Therefore, AddrInst may not be valid when MovedAway is true and it must 3266 /// not be referenced anymore. 3267 bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, 3268 unsigned Depth, 3269 bool *MovedAway) { 3270 // Avoid exponential behavior on extremely deep expression trees. 3271 if (Depth >= 5) return false; 3272 3273 // By default, all matched instructions stay in place. 3274 if (MovedAway) 3275 *MovedAway = false; 3276 3277 switch (Opcode) { 3278 case Instruction::PtrToInt: 3279 // PtrToInt is always a noop, as we know that the int type is pointer sized. 3280 return matchAddr(AddrInst->getOperand(0), Depth); 3281 case Instruction::IntToPtr: { 3282 auto AS = AddrInst->getType()->getPointerAddressSpace(); 3283 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); 3284 // This inttoptr is a no-op if the integer type is pointer sized. 3285 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy) 3286 return matchAddr(AddrInst->getOperand(0), Depth); 3287 return false; 3288 } 3289 case Instruction::BitCast: 3290 // BitCast is always a noop, and we can handle it as long as it is 3291 // int->int or pointer->pointer (we don't want int<->fp or something). 3292 if ((AddrInst->getOperand(0)->getType()->isPointerTy() || 3293 AddrInst->getOperand(0)->getType()->isIntegerTy()) && 3294 // Don't touch identity bitcasts. These were probably put here by LSR, 3295 // and we don't want to mess around with them. Assume it knows what it 3296 // is doing. 3297 AddrInst->getOperand(0)->getType() != AddrInst->getType()) 3298 return matchAddr(AddrInst->getOperand(0), Depth); 3299 return false; 3300 case Instruction::AddrSpaceCast: { 3301 unsigned SrcAS 3302 = AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); 3303 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); 3304 if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) 3305 return matchAddr(AddrInst->getOperand(0), Depth); 3306 return false; 3307 } 3308 case Instruction::Add: { 3309 // Check to see if we can merge in the RHS then the LHS. If so, we win. 3310 ExtAddrMode BackupAddrMode = AddrMode; 3311 unsigned OldSize = AddrModeInsts.size(); 3312 // Start a transaction at this point. 3313 // The LHS may match but not the RHS. 3314 // Therefore, we need a higher level restoration point to undo partially 3315 // matched operation. 3316 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 3317 TPT.getRestorationPoint(); 3318 3319 if (matchAddr(AddrInst->getOperand(1), Depth+1) && 3320 matchAddr(AddrInst->getOperand(0), Depth+1)) 3321 return true; 3322 3323 // Restore the old addr mode info. 3324 AddrMode = BackupAddrMode; 3325 AddrModeInsts.resize(OldSize); 3326 TPT.rollback(LastKnownGood); 3327 3328 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 3329 if (matchAddr(AddrInst->getOperand(0), Depth+1) && 3330 matchAddr(AddrInst->getOperand(1), Depth+1)) 3331 return true; 3332 3333 // Otherwise we definitely can't merge the ADD in. 3334 AddrMode = BackupAddrMode; 3335 AddrModeInsts.resize(OldSize); 3336 TPT.rollback(LastKnownGood); 3337 break; 3338 } 3339 //case Instruction::Or: 3340 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 3341 //break; 3342 case Instruction::Mul: 3343 case Instruction::Shl: { 3344 // Can only handle X*C and X << C. 3345 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 3346 if (!RHS) 3347 return false; 3348 int64_t Scale = RHS->getSExtValue(); 3349 if (Opcode == Instruction::Shl) 3350 Scale = 1LL << Scale; 3351 3352 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth); 3353 } 3354 case Instruction::GetElementPtr: { 3355 // Scan the GEP. We check it if it contains constant offsets and at most 3356 // one variable offset. 3357 int VariableOperand = -1; 3358 unsigned VariableScale = 0; 3359 3360 int64_t ConstantOffset = 0; 3361 gep_type_iterator GTI = gep_type_begin(AddrInst); 3362 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 3363 if (StructType *STy = GTI.getStructTypeOrNull()) { 3364 const StructLayout *SL = DL.getStructLayout(STy); 3365 unsigned Idx = 3366 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 3367 ConstantOffset += SL->getElementOffset(Idx); 3368 } else { 3369 uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); 3370 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 3371 ConstantOffset += CI->getSExtValue()*TypeSize; 3372 } else if (TypeSize) { // Scales of zero don't do anything. 3373 // We only allow one variable index at the moment. 3374 if (VariableOperand != -1) 3375 return false; 3376 3377 // Remember the variable index. 3378 VariableOperand = i; 3379 VariableScale = TypeSize; 3380 } 3381 } 3382 } 3383 3384 // A common case is for the GEP to only do a constant offset. In this case, 3385 // just add it to the disp field and check validity. 3386 if (VariableOperand == -1) { 3387 AddrMode.BaseOffs += ConstantOffset; 3388 if (ConstantOffset == 0 || 3389 TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) { 3390 // Check to see if we can fold the base pointer in too. 3391 if (matchAddr(AddrInst->getOperand(0), Depth+1)) 3392 return true; 3393 } 3394 AddrMode.BaseOffs -= ConstantOffset; 3395 return false; 3396 } 3397 3398 // Save the valid addressing mode in case we can't match. 3399 ExtAddrMode BackupAddrMode = AddrMode; 3400 unsigned OldSize = AddrModeInsts.size(); 3401 3402 // See if the scale and offset amount is valid for this target. 3403 AddrMode.BaseOffs += ConstantOffset; 3404 3405 // Match the base operand of the GEP. 3406 if (!matchAddr(AddrInst->getOperand(0), Depth+1)) { 3407 // If it couldn't be matched, just stuff the value in a register. 3408 if (AddrMode.HasBaseReg) { 3409 AddrMode = BackupAddrMode; 3410 AddrModeInsts.resize(OldSize); 3411 return false; 3412 } 3413 AddrMode.HasBaseReg = true; 3414 AddrMode.BaseReg = AddrInst->getOperand(0); 3415 } 3416 3417 // Match the remaining variable portion of the GEP. 3418 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, 3419 Depth)) { 3420 // If it couldn't be matched, try stuffing the base into a register 3421 // instead of matching it, and retrying the match of the scale. 3422 AddrMode = BackupAddrMode; 3423 AddrModeInsts.resize(OldSize); 3424 if (AddrMode.HasBaseReg) 3425 return false; 3426 AddrMode.HasBaseReg = true; 3427 AddrMode.BaseReg = AddrInst->getOperand(0); 3428 AddrMode.BaseOffs += ConstantOffset; 3429 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), 3430 VariableScale, Depth)) { 3431 // If even that didn't work, bail. 3432 AddrMode = BackupAddrMode; 3433 AddrModeInsts.resize(OldSize); 3434 return false; 3435 } 3436 } 3437 3438 return true; 3439 } 3440 case Instruction::SExt: 3441 case Instruction::ZExt: { 3442 Instruction *Ext = dyn_cast<Instruction>(AddrInst); 3443 if (!Ext) 3444 return false; 3445 3446 // Try to move this ext out of the way of the addressing mode. 3447 // Ask for a method for doing so. 3448 TypePromotionHelper::Action TPH = 3449 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts); 3450 if (!TPH) 3451 return false; 3452 3453 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 3454 TPT.getRestorationPoint(); 3455 unsigned CreatedInstsCost = 0; 3456 unsigned ExtCost = !TLI.isExtFree(Ext); 3457 Value *PromotedOperand = 3458 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI); 3459 // SExt has been moved away. 3460 // Thus either it will be rematched later in the recursive calls or it is 3461 // gone. Anyway, we must not fold it into the addressing mode at this point. 3462 // E.g., 3463 // op = add opnd, 1 3464 // idx = ext op 3465 // addr = gep base, idx 3466 // is now: 3467 // promotedOpnd = ext opnd <- no match here 3468 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) 3469 // addr = gep base, op <- match 3470 if (MovedAway) 3471 *MovedAway = true; 3472 3473 assert(PromotedOperand && 3474 "TypePromotionHelper should have filtered out those cases"); 3475 3476 ExtAddrMode BackupAddrMode = AddrMode; 3477 unsigned OldSize = AddrModeInsts.size(); 3478 3479 if (!matchAddr(PromotedOperand, Depth) || 3480 // The total of the new cost is equal to the cost of the created 3481 // instructions. 3482 // The total of the old cost is equal to the cost of the extension plus 3483 // what we have saved in the addressing mode. 3484 !isPromotionProfitable(CreatedInstsCost, 3485 ExtCost + (AddrModeInsts.size() - OldSize), 3486 PromotedOperand)) { 3487 AddrMode = BackupAddrMode; 3488 AddrModeInsts.resize(OldSize); 3489 DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); 3490 TPT.rollback(LastKnownGood); 3491 return false; 3492 } 3493 return true; 3494 } 3495 } 3496 return false; 3497 } 3498 3499 /// If we can, try to add the value of 'Addr' into the current addressing mode. 3500 /// If Addr can't be added to AddrMode this returns false and leaves AddrMode 3501 /// unmodified. This assumes that Addr is either a pointer type or intptr_t 3502 /// for the target. 3503 /// 3504 bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { 3505 // Start a transaction at this point that we will rollback if the matching 3506 // fails. 3507 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 3508 TPT.getRestorationPoint(); 3509 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 3510 // Fold in immediates if legal for the target. 3511 AddrMode.BaseOffs += CI->getSExtValue(); 3512 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) 3513 return true; 3514 AddrMode.BaseOffs -= CI->getSExtValue(); 3515 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 3516 // If this is a global variable, try to fold it into the addressing mode. 3517 if (!AddrMode.BaseGV) { 3518 AddrMode.BaseGV = GV; 3519 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) 3520 return true; 3521 AddrMode.BaseGV = nullptr; 3522 } 3523 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { 3524 ExtAddrMode BackupAddrMode = AddrMode; 3525 unsigned OldSize = AddrModeInsts.size(); 3526 3527 // Check to see if it is possible to fold this operation. 3528 bool MovedAway = false; 3529 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { 3530 // This instruction may have been moved away. If so, there is nothing 3531 // to check here. 3532 if (MovedAway) 3533 return true; 3534 // Okay, it's possible to fold this. Check to see if it is actually 3535 // *profitable* to do so. We use a simple cost model to avoid increasing 3536 // register pressure too much. 3537 if (I->hasOneUse() || 3538 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { 3539 AddrModeInsts.push_back(I); 3540 return true; 3541 } 3542 3543 // It isn't profitable to do this, roll back. 3544 //cerr << "NOT FOLDING: " << *I; 3545 AddrMode = BackupAddrMode; 3546 AddrModeInsts.resize(OldSize); 3547 TPT.rollback(LastKnownGood); 3548 } 3549 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 3550 if (matchOperationAddr(CE, CE->getOpcode(), Depth)) 3551 return true; 3552 TPT.rollback(LastKnownGood); 3553 } else if (isa<ConstantPointerNull>(Addr)) { 3554 // Null pointer gets folded without affecting the addressing mode. 3555 return true; 3556 } 3557 3558 // Worse case, the target should support [reg] addressing modes. :) 3559 if (!AddrMode.HasBaseReg) { 3560 AddrMode.HasBaseReg = true; 3561 AddrMode.BaseReg = Addr; 3562 // Still check for legality in case the target supports [imm] but not [i+r]. 3563 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) 3564 return true; 3565 AddrMode.HasBaseReg = false; 3566 AddrMode.BaseReg = nullptr; 3567 } 3568 3569 // If the base register is already taken, see if we can do [r+r]. 3570 if (AddrMode.Scale == 0) { 3571 AddrMode.Scale = 1; 3572 AddrMode.ScaledReg = Addr; 3573 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) 3574 return true; 3575 AddrMode.Scale = 0; 3576 AddrMode.ScaledReg = nullptr; 3577 } 3578 // Couldn't match. 3579 TPT.rollback(LastKnownGood); 3580 return false; 3581 } 3582 3583 /// Check to see if all uses of OpVal by the specified inline asm call are due 3584 /// to memory operands. If so, return true, otherwise return false. 3585 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, 3586 const TargetMachine &TM) { 3587 const Function *F = CI->getParent()->getParent(); 3588 const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering(); 3589 const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo(); 3590 TargetLowering::AsmOperandInfoVector TargetConstraints = 3591 TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI, 3592 ImmutableCallSite(CI)); 3593 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 3594 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 3595 3596 // Compute the constraint code and ConstraintType to use. 3597 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 3598 3599 // If this asm operand is our Value*, and if it isn't an indirect memory 3600 // operand, we can't fold it! 3601 if (OpInfo.CallOperandVal == OpVal && 3602 (OpInfo.ConstraintType != TargetLowering::C_Memory || 3603 !OpInfo.isIndirect)) 3604 return false; 3605 } 3606 3607 return true; 3608 } 3609 3610 /// Recursively walk all the uses of I until we find a memory use. 3611 /// If we find an obviously non-foldable instruction, return true. 3612 /// Add the ultimately found memory instructions to MemoryUses. 3613 static bool FindAllMemoryUses( 3614 Instruction *I, 3615 SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses, 3616 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) { 3617 // If we already considered this instruction, we're done. 3618 if (!ConsideredInsts.insert(I).second) 3619 return false; 3620 3621 // If this is an obviously unfoldable instruction, bail out. 3622 if (!MightBeFoldableInst(I)) 3623 return true; 3624 3625 const bool OptSize = I->getFunction()->optForSize(); 3626 3627 // Loop over all the uses, recursively processing them. 3628 for (Use &U : I->uses()) { 3629 Instruction *UserI = cast<Instruction>(U.getUser()); 3630 3631 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { 3632 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); 3633 continue; 3634 } 3635 3636 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { 3637 unsigned opNo = U.getOperandNo(); 3638 if (opNo == 0) return true; // Storing addr, not into addr. 3639 MemoryUses.push_back(std::make_pair(SI, opNo)); 3640 continue; 3641 } 3642 3643 if (CallInst *CI = dyn_cast<CallInst>(UserI)) { 3644 // If this is a cold call, we can sink the addressing calculation into 3645 // the cold path. See optimizeCallInst 3646 if (!OptSize && CI->hasFnAttr(Attribute::Cold)) 3647 continue; 3648 3649 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); 3650 if (!IA) return true; 3651 3652 // If this is a memory operand, we're cool, otherwise bail out. 3653 if (!IsOperandAMemoryOperand(CI, IA, I, TM)) 3654 return true; 3655 continue; 3656 } 3657 3658 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM)) 3659 return true; 3660 } 3661 3662 return false; 3663 } 3664 3665 /// Return true if Val is already known to be live at the use site that we're 3666 /// folding it into. If so, there is no cost to include it in the addressing 3667 /// mode. KnownLive1 and KnownLive2 are two values that we know are live at the 3668 /// instruction already. 3669 bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, 3670 Value *KnownLive2) { 3671 // If Val is either of the known-live values, we know it is live! 3672 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) 3673 return true; 3674 3675 // All values other than instructions and arguments (e.g. constants) are live. 3676 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; 3677 3678 // If Val is a constant sized alloca in the entry block, it is live, this is 3679 // true because it is just a reference to the stack/frame pointer, which is 3680 // live for the whole function. 3681 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) 3682 if (AI->isStaticAlloca()) 3683 return true; 3684 3685 // Check to see if this value is already used in the memory instruction's 3686 // block. If so, it's already live into the block at the very least, so we 3687 // can reasonably fold it. 3688 return Val->isUsedInBasicBlock(MemoryInst->getParent()); 3689 } 3690 3691 /// It is possible for the addressing mode of the machine to fold the specified 3692 /// instruction into a load or store that ultimately uses it. 3693 /// However, the specified instruction has multiple uses. 3694 /// Given this, it may actually increase register pressure to fold it 3695 /// into the load. For example, consider this code: 3696 /// 3697 /// X = ... 3698 /// Y = X+1 3699 /// use(Y) -> nonload/store 3700 /// Z = Y+1 3701 /// load Z 3702 /// 3703 /// In this case, Y has multiple uses, and can be folded into the load of Z 3704 /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to 3705 /// be live at the use(Y) line. If we don't fold Y into load Z, we use one 3706 /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the 3707 /// number of computations either. 3708 /// 3709 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If 3710 /// X was live across 'load Z' for other reasons, we actually *would* want to 3711 /// fold the addressing mode in the Z case. This would make Y die earlier. 3712 bool AddressingModeMatcher:: 3713 isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, 3714 ExtAddrMode &AMAfter) { 3715 if (IgnoreProfitability) return true; 3716 3717 // AMBefore is the addressing mode before this instruction was folded into it, 3718 // and AMAfter is the addressing mode after the instruction was folded. Get 3719 // the set of registers referenced by AMAfter and subtract out those 3720 // referenced by AMBefore: this is the set of values which folding in this 3721 // address extends the lifetime of. 3722 // 3723 // Note that there are only two potential values being referenced here, 3724 // BaseReg and ScaleReg (global addresses are always available, as are any 3725 // folded immediates). 3726 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; 3727 3728 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their 3729 // lifetime wasn't extended by adding this instruction. 3730 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 3731 BaseReg = nullptr; 3732 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 3733 ScaledReg = nullptr; 3734 3735 // If folding this instruction (and it's subexprs) didn't extend any live 3736 // ranges, we're ok with it. 3737 if (!BaseReg && !ScaledReg) 3738 return true; 3739 3740 // If all uses of this instruction can have the address mode sunk into them, 3741 // we can remove the addressing mode and effectively trade one live register 3742 // for another (at worst.) In this context, folding an addressing mode into 3743 // the use is just a particularly nice way of sinking it. 3744 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; 3745 SmallPtrSet<Instruction*, 16> ConsideredInsts; 3746 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM)) 3747 return false; // Has a non-memory, non-foldable use! 3748 3749 // Now that we know that all uses of this instruction are part of a chain of 3750 // computation involving only operations that could theoretically be folded 3751 // into a memory use, loop over each of these memory operation uses and see 3752 // if they could *actually* fold the instruction. The assumption is that 3753 // addressing modes are cheap and that duplicating the computation involved 3754 // many times is worthwhile, even on a fastpath. For sinking candidates 3755 // (i.e. cold call sites), this serves as a way to prevent excessive code 3756 // growth since most architectures have some reasonable small and fast way to 3757 // compute an effective address. (i.e LEA on x86) 3758 SmallVector<Instruction*, 32> MatchedAddrModeInsts; 3759 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { 3760 Instruction *User = MemoryUses[i].first; 3761 unsigned OpNo = MemoryUses[i].second; 3762 3763 // Get the access type of this use. If the use isn't a pointer, we don't 3764 // know what it accesses. 3765 Value *Address = User->getOperand(OpNo); 3766 PointerType *AddrTy = dyn_cast<PointerType>(Address->getType()); 3767 if (!AddrTy) 3768 return false; 3769 Type *AddressAccessTy = AddrTy->getElementType(); 3770 unsigned AS = AddrTy->getAddressSpace(); 3771 3772 // Do a match against the root of this address, ignoring profitability. This 3773 // will tell us if the addressing mode for the memory operation will 3774 // *actually* cover the shared instruction. 3775 ExtAddrMode Result; 3776 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 3777 TPT.getRestorationPoint(); 3778 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS, 3779 MemoryInst, Result, InsertedInsts, 3780 PromotedInsts, TPT); 3781 Matcher.IgnoreProfitability = true; 3782 bool Success = Matcher.matchAddr(Address, 0); 3783 (void)Success; assert(Success && "Couldn't select *anything*?"); 3784 3785 // The match was to check the profitability, the changes made are not 3786 // part of the original matcher. Therefore, they should be dropped 3787 // otherwise the original matcher will not present the right state. 3788 TPT.rollback(LastKnownGood); 3789 3790 // If the match didn't cover I, then it won't be shared by it. 3791 if (!is_contained(MatchedAddrModeInsts, I)) 3792 return false; 3793 3794 MatchedAddrModeInsts.clear(); 3795 } 3796 3797 return true; 3798 } 3799 3800 } // end anonymous namespace 3801 3802 /// Return true if the specified values are defined in a 3803 /// different basic block than BB. 3804 static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 3805 if (Instruction *I = dyn_cast<Instruction>(V)) 3806 return I->getParent() != BB; 3807 return false; 3808 } 3809 3810 /// Sink addressing mode computation immediate before MemoryInst if doing so 3811 /// can be done without increasing register pressure. The need for the 3812 /// register pressure constraint means this can end up being an all or nothing 3813 /// decision for all uses of the same addressing computation. 3814 /// 3815 /// Load and Store Instructions often have addressing modes that can do 3816 /// significant amounts of computation. As such, instruction selection will try 3817 /// to get the load or store to do as much computation as possible for the 3818 /// program. The problem is that isel can only see within a single block. As 3819 /// such, we sink as much legal addressing mode work into the block as possible. 3820 /// 3821 /// This method is used to optimize both load/store and inline asms with memory 3822 /// operands. It's also used to sink addressing computations feeding into cold 3823 /// call sites into their (cold) basic block. 3824 /// 3825 /// The motivation for handling sinking into cold blocks is that doing so can 3826 /// both enable other address mode sinking (by satisfying the register pressure 3827 /// constraint above), and reduce register pressure globally (by removing the 3828 /// addressing mode computation from the fast path entirely.). 3829 bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 3830 Type *AccessTy, unsigned AddrSpace) { 3831 Value *Repl = Addr; 3832 3833 // Try to collapse single-value PHI nodes. This is necessary to undo 3834 // unprofitable PRE transformations. 3835 SmallVector<Value*, 8> worklist; 3836 SmallPtrSet<Value*, 16> Visited; 3837 worklist.push_back(Addr); 3838 3839 // Use a worklist to iteratively look through PHI nodes, and ensure that 3840 // the addressing mode obtained from the non-PHI roots of the graph 3841 // are equivalent. 3842 Value *Consensus = nullptr; 3843 unsigned NumUsesConsensus = 0; 3844 bool IsNumUsesConsensusValid = false; 3845 SmallVector<Instruction*, 16> AddrModeInsts; 3846 ExtAddrMode AddrMode; 3847 TypePromotionTransaction TPT; 3848 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 3849 TPT.getRestorationPoint(); 3850 while (!worklist.empty()) { 3851 Value *V = worklist.back(); 3852 worklist.pop_back(); 3853 3854 // Break use-def graph loops. 3855 if (!Visited.insert(V).second) { 3856 Consensus = nullptr; 3857 break; 3858 } 3859 3860 // For a PHI node, push all of its incoming values. 3861 if (PHINode *P = dyn_cast<PHINode>(V)) { 3862 for (Value *IncValue : P->incoming_values()) 3863 worklist.push_back(IncValue); 3864 continue; 3865 } 3866 3867 // For non-PHIs, determine the addressing mode being computed. Note that 3868 // the result may differ depending on what other uses our candidate 3869 // addressing instructions might have. 3870 SmallVector<Instruction*, 16> NewAddrModeInsts; 3871 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( 3872 V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM, 3873 InsertedInsts, PromotedInsts, TPT); 3874 3875 // This check is broken into two cases with very similar code to avoid using 3876 // getNumUses() as much as possible. Some values have a lot of uses, so 3877 // calling getNumUses() unconditionally caused a significant compile-time 3878 // regression. 3879 if (!Consensus) { 3880 Consensus = V; 3881 AddrMode = NewAddrMode; 3882 AddrModeInsts = NewAddrModeInsts; 3883 continue; 3884 } else if (NewAddrMode == AddrMode) { 3885 if (!IsNumUsesConsensusValid) { 3886 NumUsesConsensus = Consensus->getNumUses(); 3887 IsNumUsesConsensusValid = true; 3888 } 3889 3890 // Ensure that the obtained addressing mode is equivalent to that obtained 3891 // for all other roots of the PHI traversal. Also, when choosing one 3892 // such root as representative, select the one with the most uses in order 3893 // to keep the cost modeling heuristics in AddressingModeMatcher 3894 // applicable. 3895 unsigned NumUses = V->getNumUses(); 3896 if (NumUses > NumUsesConsensus) { 3897 Consensus = V; 3898 NumUsesConsensus = NumUses; 3899 AddrModeInsts = NewAddrModeInsts; 3900 } 3901 continue; 3902 } 3903 3904 Consensus = nullptr; 3905 break; 3906 } 3907 3908 // If the addressing mode couldn't be determined, or if multiple different 3909 // ones were determined, bail out now. 3910 if (!Consensus) { 3911 TPT.rollback(LastKnownGood); 3912 return false; 3913 } 3914 TPT.commit(); 3915 3916 // If all the instructions matched are already in this BB, don't do anything. 3917 if (none_of(AddrModeInsts, [&](Value *V) { 3918 return IsNonLocalValue(V, MemoryInst->getParent()); 3919 })) { 3920 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 3921 return false; 3922 } 3923 3924 // Insert this computation right after this user. Since our caller is 3925 // scanning from the top of the BB to the bottom, reuse of the expr are 3926 // guaranteed to happen later. 3927 IRBuilder<> Builder(MemoryInst); 3928 3929 // Now that we determined the addressing expression we want to use and know 3930 // that we have to sink it into this block. Check to see if we have already 3931 // done this for some other load/store instr in this block. If so, reuse the 3932 // computation. 3933 Value *&SunkAddr = SunkAddrs[Addr]; 3934 if (SunkAddr) { 3935 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 3936 << *MemoryInst << "\n"); 3937 if (SunkAddr->getType() != Addr->getType()) 3938 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 3939 } else if (AddrSinkUsingGEPs || 3940 (!AddrSinkUsingGEPs.getNumOccurrences() && TM && 3941 TM->getSubtargetImpl(*MemoryInst->getParent()->getParent()) 3942 ->useAA())) { 3943 // By default, we use the GEP-based method when AA is used later. This 3944 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. 3945 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 3946 << *MemoryInst << "\n"); 3947 Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); 3948 Value *ResultPtr = nullptr, *ResultIndex = nullptr; 3949 3950 // First, find the pointer. 3951 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { 3952 ResultPtr = AddrMode.BaseReg; 3953 AddrMode.BaseReg = nullptr; 3954 } 3955 3956 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { 3957 // We can't add more than one pointer together, nor can we scale a 3958 // pointer (both of which seem meaningless). 3959 if (ResultPtr || AddrMode.Scale != 1) 3960 return false; 3961 3962 ResultPtr = AddrMode.ScaledReg; 3963 AddrMode.Scale = 0; 3964 } 3965 3966 if (AddrMode.BaseGV) { 3967 if (ResultPtr) 3968 return false; 3969 3970 ResultPtr = AddrMode.BaseGV; 3971 } 3972 3973 // If the real base value actually came from an inttoptr, then the matcher 3974 // will look through it and provide only the integer value. In that case, 3975 // use it here. 3976 if (!ResultPtr && AddrMode.BaseReg) { 3977 ResultPtr = 3978 Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); 3979 AddrMode.BaseReg = nullptr; 3980 } else if (!ResultPtr && AddrMode.Scale == 1) { 3981 ResultPtr = 3982 Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); 3983 AddrMode.Scale = 0; 3984 } 3985 3986 if (!ResultPtr && 3987 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { 3988 SunkAddr = Constant::getNullValue(Addr->getType()); 3989 } else if (!ResultPtr) { 3990 return false; 3991 } else { 3992 Type *I8PtrTy = 3993 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); 3994 Type *I8Ty = Builder.getInt8Ty(); 3995 3996 // Start with the base register. Do this first so that subsequent address 3997 // matching finds it last, which will prevent it from trying to match it 3998 // as the scaled value in case it happens to be a mul. That would be 3999 // problematic if we've sunk a different mul for the scale, because then 4000 // we'd end up sinking both muls. 4001 if (AddrMode.BaseReg) { 4002 Value *V = AddrMode.BaseReg; 4003 if (V->getType() != IntPtrTy) 4004 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 4005 4006 ResultIndex = V; 4007 } 4008 4009 // Add the scale value. 4010 if (AddrMode.Scale) { 4011 Value *V = AddrMode.ScaledReg; 4012 if (V->getType() == IntPtrTy) { 4013 // done. 4014 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 4015 cast<IntegerType>(V->getType())->getBitWidth()) { 4016 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 4017 } else { 4018 // It is only safe to sign extend the BaseReg if we know that the math 4019 // required to create it did not overflow before we extend it. Since 4020 // the original IR value was tossed in favor of a constant back when 4021 // the AddrMode was created we need to bail out gracefully if widths 4022 // do not match instead of extending it. 4023 Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex); 4024 if (I && (ResultIndex != AddrMode.BaseReg)) 4025 I->eraseFromParent(); 4026 return false; 4027 } 4028 4029 if (AddrMode.Scale != 1) 4030 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 4031 "sunkaddr"); 4032 if (ResultIndex) 4033 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); 4034 else 4035 ResultIndex = V; 4036 } 4037 4038 // Add in the Base Offset if present. 4039 if (AddrMode.BaseOffs) { 4040 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 4041 if (ResultIndex) { 4042 // We need to add this separately from the scale above to help with 4043 // SDAG consecutive load/store merging. 4044 if (ResultPtr->getType() != I8PtrTy) 4045 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); 4046 ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); 4047 } 4048 4049 ResultIndex = V; 4050 } 4051 4052 if (!ResultIndex) { 4053 SunkAddr = ResultPtr; 4054 } else { 4055 if (ResultPtr->getType() != I8PtrTy) 4056 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); 4057 SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); 4058 } 4059 4060 if (SunkAddr->getType() != Addr->getType()) 4061 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 4062 } 4063 } else { 4064 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 4065 << *MemoryInst << "\n"); 4066 Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); 4067 Value *Result = nullptr; 4068 4069 // Start with the base register. Do this first so that subsequent address 4070 // matching finds it last, which will prevent it from trying to match it 4071 // as the scaled value in case it happens to be a mul. That would be 4072 // problematic if we've sunk a different mul for the scale, because then 4073 // we'd end up sinking both muls. 4074 if (AddrMode.BaseReg) { 4075 Value *V = AddrMode.BaseReg; 4076 if (V->getType()->isPointerTy()) 4077 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 4078 if (V->getType() != IntPtrTy) 4079 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 4080 Result = V; 4081 } 4082 4083 // Add the scale value. 4084 if (AddrMode.Scale) { 4085 Value *V = AddrMode.ScaledReg; 4086 if (V->getType() == IntPtrTy) { 4087 // done. 4088 } else if (V->getType()->isPointerTy()) { 4089 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 4090 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 4091 cast<IntegerType>(V->getType())->getBitWidth()) { 4092 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 4093 } else { 4094 // It is only safe to sign extend the BaseReg if we know that the math 4095 // required to create it did not overflow before we extend it. Since 4096 // the original IR value was tossed in favor of a constant back when 4097 // the AddrMode was created we need to bail out gracefully if widths 4098 // do not match instead of extending it. 4099 Instruction *I = dyn_cast_or_null<Instruction>(Result); 4100 if (I && (Result != AddrMode.BaseReg)) 4101 I->eraseFromParent(); 4102 return false; 4103 } 4104 if (AddrMode.Scale != 1) 4105 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 4106 "sunkaddr"); 4107 if (Result) 4108 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 4109 else 4110 Result = V; 4111 } 4112 4113 // Add in the BaseGV if present. 4114 if (AddrMode.BaseGV) { 4115 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); 4116 if (Result) 4117 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 4118 else 4119 Result = V; 4120 } 4121 4122 // Add in the Base Offset if present. 4123 if (AddrMode.BaseOffs) { 4124 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 4125 if (Result) 4126 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 4127 else 4128 Result = V; 4129 } 4130 4131 if (!Result) 4132 SunkAddr = Constant::getNullValue(Addr->getType()); 4133 else 4134 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); 4135 } 4136 4137 MemoryInst->replaceUsesOfWith(Repl, SunkAddr); 4138 4139 // If we have no uses, recursively delete the value and all dead instructions 4140 // using it. 4141 if (Repl->use_empty()) { 4142 // This can cause recursive deletion, which can invalidate our iterator. 4143 // Use a WeakVH to hold onto it in case this happens. 4144 Value *CurValue = &*CurInstIterator; 4145 WeakVH IterHandle(CurValue); 4146 BasicBlock *BB = CurInstIterator->getParent(); 4147 4148 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); 4149 4150 if (IterHandle != CurValue) { 4151 // If the iterator instruction was recursively deleted, start over at the 4152 // start of the block. 4153 CurInstIterator = BB->begin(); 4154 SunkAddrs.clear(); 4155 } 4156 } 4157 ++NumMemoryInsts; 4158 return true; 4159 } 4160 4161 /// If there are any memory operands, use OptimizeMemoryInst to sink their 4162 /// address computing into the block when possible / profitable. 4163 bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { 4164 bool MadeChange = false; 4165 4166 const TargetRegisterInfo *TRI = 4167 TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo(); 4168 TargetLowering::AsmOperandInfoVector TargetConstraints = 4169 TLI->ParseConstraints(*DL, TRI, CS); 4170 unsigned ArgNo = 0; 4171 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 4172 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 4173 4174 // Compute the constraint code and ConstraintType to use. 4175 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 4176 4177 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 4178 OpInfo.isIndirect) { 4179 Value *OpVal = CS->getArgOperand(ArgNo++); 4180 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); 4181 } else if (OpInfo.Type == InlineAsm::isInput) 4182 ArgNo++; 4183 } 4184 4185 return MadeChange; 4186 } 4187 4188 /// \brief Check if all the uses of \p Inst are equivalent (or free) zero or 4189 /// sign extensions. 4190 static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { 4191 assert(!Inst->use_empty() && "Input must have at least one use"); 4192 const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin()); 4193 bool IsSExt = isa<SExtInst>(FirstUser); 4194 Type *ExtTy = FirstUser->getType(); 4195 for (const User *U : Inst->users()) { 4196 const Instruction *UI = cast<Instruction>(U); 4197 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI))) 4198 return false; 4199 Type *CurTy = UI->getType(); 4200 // Same input and output types: Same instruction after CSE. 4201 if (CurTy == ExtTy) 4202 continue; 4203 4204 // If IsSExt is true, we are in this situation: 4205 // a = Inst 4206 // b = sext ty1 a to ty2 4207 // c = sext ty1 a to ty3 4208 // Assuming ty2 is shorter than ty3, this could be turned into: 4209 // a = Inst 4210 // b = sext ty1 a to ty2 4211 // c = sext ty2 b to ty3 4212 // However, the last sext is not free. 4213 if (IsSExt) 4214 return false; 4215 4216 // This is a ZExt, maybe this is free to extend from one type to another. 4217 // In that case, we would not account for a different use. 4218 Type *NarrowTy; 4219 Type *LargeTy; 4220 if (ExtTy->getScalarType()->getIntegerBitWidth() > 4221 CurTy->getScalarType()->getIntegerBitWidth()) { 4222 NarrowTy = CurTy; 4223 LargeTy = ExtTy; 4224 } else { 4225 NarrowTy = ExtTy; 4226 LargeTy = CurTy; 4227 } 4228 4229 if (!TLI.isZExtFree(NarrowTy, LargeTy)) 4230 return false; 4231 } 4232 // All uses are the same or can be derived from one another for free. 4233 return true; 4234 } 4235 4236 /// \brief Try to form ExtLd by promoting \p Exts until they reach a 4237 /// load instruction. 4238 /// If an ext(load) can be formed, it is returned via \p LI for the load 4239 /// and \p Inst for the extension. 4240 /// Otherwise LI == nullptr and Inst == nullptr. 4241 /// When some promotion happened, \p TPT contains the proper state to 4242 /// revert them. 4243 /// 4244 /// \return true when promoting was necessary to expose the ext(load) 4245 /// opportunity, false otherwise. 4246 /// 4247 /// Example: 4248 /// \code 4249 /// %ld = load i32* %addr 4250 /// %add = add nuw i32 %ld, 4 4251 /// %zext = zext i32 %add to i64 4252 /// \endcode 4253 /// => 4254 /// \code 4255 /// %ld = load i32* %addr 4256 /// %zext = zext i32 %ld to i64 4257 /// %add = add nuw i64 %zext, 4 4258 /// \encode 4259 /// Thanks to the promotion, we can match zext(load i32*) to i64. 4260 bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT, 4261 LoadInst *&LI, Instruction *&Inst, 4262 const SmallVectorImpl<Instruction *> &Exts, 4263 unsigned CreatedInstsCost = 0) { 4264 // Iterate over all the extensions to see if one form an ext(load). 4265 for (auto I : Exts) { 4266 // Check if we directly have ext(load). 4267 if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) { 4268 Inst = I; 4269 // No promotion happened here. 4270 return false; 4271 } 4272 // Check whether or not we want to do any promotion. 4273 if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion) 4274 continue; 4275 // Get the action to perform the promotion. 4276 TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( 4277 I, InsertedInsts, *TLI, PromotedInsts); 4278 // Check if we can promote. 4279 if (!TPH) 4280 continue; 4281 // Save the current state. 4282 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 4283 TPT.getRestorationPoint(); 4284 SmallVector<Instruction *, 4> NewExts; 4285 unsigned NewCreatedInstsCost = 0; 4286 unsigned ExtCost = !TLI->isExtFree(I); 4287 // Promote. 4288 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost, 4289 &NewExts, nullptr, *TLI); 4290 assert(PromotedVal && 4291 "TypePromotionHelper should have filtered out those cases"); 4292 4293 // We would be able to merge only one extension in a load. 4294 // Therefore, if we have more than 1 new extension we heuristically 4295 // cut this search path, because it means we degrade the code quality. 4296 // With exactly 2, the transformation is neutral, because we will merge 4297 // one extension but leave one. However, we optimistically keep going, 4298 // because the new extension may be removed too. 4299 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost; 4300 TotalCreatedInstsCost -= ExtCost; 4301 if (!StressExtLdPromotion && 4302 (TotalCreatedInstsCost > 1 || 4303 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) { 4304 // The promotion is not profitable, rollback to the previous state. 4305 TPT.rollback(LastKnownGood); 4306 continue; 4307 } 4308 // The promotion is profitable. 4309 // Check if it exposes an ext(load). 4310 (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost); 4311 if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || 4312 // If we have created a new extension, i.e., now we have two 4313 // extensions. We must make sure one of them is merged with 4314 // the load, otherwise we may degrade the code quality. 4315 (LI->hasOneUse() || hasSameExtUse(LI, *TLI)))) 4316 // Promotion happened. 4317 return true; 4318 // If this does not help to expose an ext(load) then, rollback. 4319 TPT.rollback(LastKnownGood); 4320 } 4321 // None of the extension can form an ext(load). 4322 LI = nullptr; 4323 Inst = nullptr; 4324 return false; 4325 } 4326 4327 /// Move a zext or sext fed by a load into the same basic block as the load, 4328 /// unless conditions are unfavorable. This allows SelectionDAG to fold the 4329 /// extend into the load. 4330 /// \p I[in/out] the extension may be modified during the process if some 4331 /// promotions apply. 4332 /// 4333 bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) { 4334 // ExtLoad formation infrastructure requires TLI to be effective. 4335 if (!TLI) 4336 return false; 4337 4338 // Try to promote a chain of computation if it allows to form 4339 // an extended load. 4340 TypePromotionTransaction TPT; 4341 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 4342 TPT.getRestorationPoint(); 4343 SmallVector<Instruction *, 1> Exts; 4344 Exts.push_back(I); 4345 // Look for a load being extended. 4346 LoadInst *LI = nullptr; 4347 Instruction *OldExt = I; 4348 bool HasPromoted = extLdPromotion(TPT, LI, I, Exts); 4349 if (!LI || !I) { 4350 assert(!HasPromoted && !LI && "If we did not match any load instruction " 4351 "the code must remain the same"); 4352 I = OldExt; 4353 return false; 4354 } 4355 4356 // If they're already in the same block, there's nothing to do. 4357 // Make the cheap checks first if we did not promote. 4358 // If we promoted, we need to check if it is indeed profitable. 4359 if (!HasPromoted && LI->getParent() == I->getParent()) 4360 return false; 4361 4362 EVT VT = TLI->getValueType(*DL, I->getType()); 4363 EVT LoadVT = TLI->getValueType(*DL, LI->getType()); 4364 4365 // If the load has other users and the truncate is not free, this probably 4366 // isn't worthwhile. 4367 if (!LI->hasOneUse() && 4368 (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) && 4369 !TLI->isTruncateFree(I->getType(), LI->getType())) { 4370 I = OldExt; 4371 TPT.rollback(LastKnownGood); 4372 return false; 4373 } 4374 4375 // Check whether the target supports casts folded into loads. 4376 unsigned LType; 4377 if (isa<ZExtInst>(I)) 4378 LType = ISD::ZEXTLOAD; 4379 else { 4380 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 4381 LType = ISD::SEXTLOAD; 4382 } 4383 if (!TLI->isLoadExtLegal(LType, VT, LoadVT)) { 4384 I = OldExt; 4385 TPT.rollback(LastKnownGood); 4386 return false; 4387 } 4388 4389 // Move the extend into the same block as the load, so that SelectionDAG 4390 // can fold it. 4391 TPT.commit(); 4392 I->removeFromParent(); 4393 I->insertAfter(LI); 4394 // CGP does not check if the zext would be speculatively executed when moved 4395 // to the same basic block as the load. Preserving its original location would 4396 // pessimize the debugging experience, as well as negatively impact the 4397 // quality of sample pgo. We don't want to use "line 0" as that has a 4398 // size cost in the line-table section and logically the zext can be seen as 4399 // part of the load. Therefore we conservatively reuse the same debug location 4400 // for the load and the zext. 4401 I->setDebugLoc(LI->getDebugLoc()); 4402 ++NumExtsMoved; 4403 return true; 4404 } 4405 4406 bool CodeGenPrepare::optimizeExtUses(Instruction *I) { 4407 BasicBlock *DefBB = I->getParent(); 4408 4409 // If the result of a {s|z}ext and its source are both live out, rewrite all 4410 // other uses of the source with result of extension. 4411 Value *Src = I->getOperand(0); 4412 if (Src->hasOneUse()) 4413 return false; 4414 4415 // Only do this xform if truncating is free. 4416 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 4417 return false; 4418 4419 // Only safe to perform the optimization if the source is also defined in 4420 // this block. 4421 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 4422 return false; 4423 4424 bool DefIsLiveOut = false; 4425 for (User *U : I->users()) { 4426 Instruction *UI = cast<Instruction>(U); 4427 4428 // Figure out which BB this ext is used in. 4429 BasicBlock *UserBB = UI->getParent(); 4430 if (UserBB == DefBB) continue; 4431 DefIsLiveOut = true; 4432 break; 4433 } 4434 if (!DefIsLiveOut) 4435 return false; 4436 4437 // Make sure none of the uses are PHI nodes. 4438 for (User *U : Src->users()) { 4439 Instruction *UI = cast<Instruction>(U); 4440 BasicBlock *UserBB = UI->getParent(); 4441 if (UserBB == DefBB) continue; 4442 // Be conservative. We don't want this xform to end up introducing 4443 // reloads just before load / store instructions. 4444 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI)) 4445 return false; 4446 } 4447 4448 // InsertedTruncs - Only insert one trunc in each block once. 4449 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 4450 4451 bool MadeChange = false; 4452 for (Use &U : Src->uses()) { 4453 Instruction *User = cast<Instruction>(U.getUser()); 4454 4455 // Figure out which BB this ext is used in. 4456 BasicBlock *UserBB = User->getParent(); 4457 if (UserBB == DefBB) continue; 4458 4459 // Both src and def are live in this block. Rewrite the use. 4460 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 4461 4462 if (!InsertedTrunc) { 4463 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 4464 assert(InsertPt != UserBB->end()); 4465 InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt); 4466 InsertedInsts.insert(InsertedTrunc); 4467 } 4468 4469 // Replace a use of the {s|z}ext source with a use of the result. 4470 U = InsertedTrunc; 4471 ++NumExtUses; 4472 MadeChange = true; 4473 } 4474 4475 return MadeChange; 4476 } 4477 4478 // Find loads whose uses only use some of the loaded value's bits. Add an "and" 4479 // just after the load if the target can fold this into one extload instruction, 4480 // with the hope of eliminating some of the other later "and" instructions using 4481 // the loaded value. "and"s that are made trivially redundant by the insertion 4482 // of the new "and" are removed by this function, while others (e.g. those whose 4483 // path from the load goes through a phi) are left for isel to potentially 4484 // remove. 4485 // 4486 // For example: 4487 // 4488 // b0: 4489 // x = load i32 4490 // ... 4491 // b1: 4492 // y = and x, 0xff 4493 // z = use y 4494 // 4495 // becomes: 4496 // 4497 // b0: 4498 // x = load i32 4499 // x' = and x, 0xff 4500 // ... 4501 // b1: 4502 // z = use x' 4503 // 4504 // whereas: 4505 // 4506 // b0: 4507 // x1 = load i32 4508 // ... 4509 // b1: 4510 // x2 = load i32 4511 // ... 4512 // b2: 4513 // x = phi x1, x2 4514 // y = and x, 0xff 4515 // 4516 // becomes (after a call to optimizeLoadExt for each load): 4517 // 4518 // b0: 4519 // x1 = load i32 4520 // x1' = and x1, 0xff 4521 // ... 4522 // b1: 4523 // x2 = load i32 4524 // x2' = and x2, 0xff 4525 // ... 4526 // b2: 4527 // x = phi x1', x2' 4528 // y = and x, 0xff 4529 // 4530 4531 bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { 4532 4533 if (!Load->isSimple() || 4534 !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy())) 4535 return false; 4536 4537 // Skip loads we've already transformed or have no reason to transform. 4538 if (Load->hasOneUse()) { 4539 User *LoadUser = *Load->user_begin(); 4540 if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() && 4541 !dyn_cast<PHINode>(LoadUser)) 4542 return false; 4543 } 4544 4545 // Look at all uses of Load, looking through phis, to determine how many bits 4546 // of the loaded value are needed. 4547 SmallVector<Instruction *, 8> WorkList; 4548 SmallPtrSet<Instruction *, 16> Visited; 4549 SmallVector<Instruction *, 8> AndsToMaybeRemove; 4550 for (auto *U : Load->users()) 4551 WorkList.push_back(cast<Instruction>(U)); 4552 4553 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType()); 4554 unsigned BitWidth = LoadResultVT.getSizeInBits(); 4555 APInt DemandBits(BitWidth, 0); 4556 APInt WidestAndBits(BitWidth, 0); 4557 4558 while (!WorkList.empty()) { 4559 Instruction *I = WorkList.back(); 4560 WorkList.pop_back(); 4561 4562 // Break use-def graph loops. 4563 if (!Visited.insert(I).second) 4564 continue; 4565 4566 // For a PHI node, push all of its users. 4567 if (auto *Phi = dyn_cast<PHINode>(I)) { 4568 for (auto *U : Phi->users()) 4569 WorkList.push_back(cast<Instruction>(U)); 4570 continue; 4571 } 4572 4573 switch (I->getOpcode()) { 4574 case llvm::Instruction::And: { 4575 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1)); 4576 if (!AndC) 4577 return false; 4578 APInt AndBits = AndC->getValue(); 4579 DemandBits |= AndBits; 4580 // Keep track of the widest and mask we see. 4581 if (AndBits.ugt(WidestAndBits)) 4582 WidestAndBits = AndBits; 4583 if (AndBits == WidestAndBits && I->getOperand(0) == Load) 4584 AndsToMaybeRemove.push_back(I); 4585 break; 4586 } 4587 4588 case llvm::Instruction::Shl: { 4589 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1)); 4590 if (!ShlC) 4591 return false; 4592 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1); 4593 auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt); 4594 DemandBits |= ShlDemandBits; 4595 break; 4596 } 4597 4598 case llvm::Instruction::Trunc: { 4599 EVT TruncVT = TLI->getValueType(*DL, I->getType()); 4600 unsigned TruncBitWidth = TruncVT.getSizeInBits(); 4601 auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth); 4602 DemandBits |= TruncBits; 4603 break; 4604 } 4605 4606 default: 4607 return false; 4608 } 4609 } 4610 4611 uint32_t ActiveBits = DemandBits.getActiveBits(); 4612 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the 4613 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example, 4614 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but 4615 // (and (load x) 1) is not matched as a single instruction, rather as a LDR 4616 // followed by an AND. 4617 // TODO: Look into removing this restriction by fixing backends to either 4618 // return false for isLoadExtLegal for i1 or have them select this pattern to 4619 // a single instruction. 4620 // 4621 // Also avoid hoisting if we didn't see any ands with the exact DemandBits 4622 // mask, since these are the only ands that will be removed by isel. 4623 if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) || 4624 WidestAndBits != DemandBits) 4625 return false; 4626 4627 LLVMContext &Ctx = Load->getType()->getContext(); 4628 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits); 4629 EVT TruncVT = TLI->getValueType(*DL, TruncTy); 4630 4631 // Reject cases that won't be matched as extloads. 4632 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() || 4633 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT)) 4634 return false; 4635 4636 IRBuilder<> Builder(Load->getNextNode()); 4637 auto *NewAnd = dyn_cast<Instruction>( 4638 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); 4639 4640 // Replace all uses of load with new and (except for the use of load in the 4641 // new and itself). 4642 Load->replaceAllUsesWith(NewAnd); 4643 NewAnd->setOperand(0, Load); 4644 4645 // Remove any and instructions that are now redundant. 4646 for (auto *And : AndsToMaybeRemove) 4647 // Check that the and mask is the same as the one we decided to put on the 4648 // new and. 4649 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) { 4650 And->replaceAllUsesWith(NewAnd); 4651 if (&*CurInstIterator == And) 4652 CurInstIterator = std::next(And->getIterator()); 4653 And->eraseFromParent(); 4654 ++NumAndUses; 4655 } 4656 4657 ++NumAndsAdded; 4658 return true; 4659 } 4660 4661 /// Check if V (an operand of a select instruction) is an expensive instruction 4662 /// that is only used once. 4663 static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { 4664 auto *I = dyn_cast<Instruction>(V); 4665 // If it's safe to speculatively execute, then it should not have side 4666 // effects; therefore, it's safe to sink and possibly *not* execute. 4667 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && 4668 TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive; 4669 } 4670 4671 /// Returns true if a SelectInst should be turned into an explicit branch. 4672 static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, 4673 const TargetLowering *TLI, 4674 SelectInst *SI) { 4675 // If even a predictable select is cheap, then a branch can't be cheaper. 4676 if (!TLI->isPredictableSelectExpensive()) 4677 return false; 4678 4679 // FIXME: This should use the same heuristics as IfConversion to determine 4680 // whether a select is better represented as a branch. 4681 4682 // If metadata tells us that the select condition is obviously predictable, 4683 // then we want to replace the select with a branch. 4684 uint64_t TrueWeight, FalseWeight; 4685 if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { 4686 uint64_t Max = std::max(TrueWeight, FalseWeight); 4687 uint64_t Sum = TrueWeight + FalseWeight; 4688 if (Sum != 0) { 4689 auto Probability = BranchProbability::getBranchProbability(Max, Sum); 4690 if (Probability > TLI->getPredictableBranchThreshold()) 4691 return true; 4692 } 4693 } 4694 4695 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); 4696 4697 // If a branch is predictable, an out-of-order CPU can avoid blocking on its 4698 // comparison condition. If the compare has more than one use, there's 4699 // probably another cmov or setcc around, so it's not worth emitting a branch. 4700 if (!Cmp || !Cmp->hasOneUse()) 4701 return false; 4702 4703 // If either operand of the select is expensive and only needed on one side 4704 // of the select, we should form a branch. 4705 if (sinkSelectOperand(TTI, SI->getTrueValue()) || 4706 sinkSelectOperand(TTI, SI->getFalseValue())) 4707 return true; 4708 4709 return false; 4710 } 4711 4712 /// If \p isTrue is true, return the true value of \p SI, otherwise return 4713 /// false value of \p SI. If the true/false value of \p SI is defined by any 4714 /// select instructions in \p Selects, look through the defining select 4715 /// instruction until the true/false value is not defined in \p Selects. 4716 static Value *getTrueOrFalseValue( 4717 SelectInst *SI, bool isTrue, 4718 const SmallPtrSet<const Instruction *, 2> &Selects) { 4719 Value *V; 4720 4721 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); 4722 DefSI = dyn_cast<SelectInst>(V)) { 4723 assert(DefSI->getCondition() == SI->getCondition() && 4724 "The condition of DefSI does not match with SI"); 4725 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); 4726 } 4727 return V; 4728 } 4729 4730 /// If we have a SelectInst that will likely profit from branch prediction, 4731 /// turn it into a branch. 4732 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { 4733 // Find all consecutive select instructions that share the same condition. 4734 SmallVector<SelectInst *, 2> ASI; 4735 ASI.push_back(SI); 4736 for (BasicBlock::iterator It = ++BasicBlock::iterator(SI); 4737 It != SI->getParent()->end(); ++It) { 4738 SelectInst *I = dyn_cast<SelectInst>(&*It); 4739 if (I && SI->getCondition() == I->getCondition()) { 4740 ASI.push_back(I); 4741 } else { 4742 break; 4743 } 4744 } 4745 4746 SelectInst *LastSI = ASI.back(); 4747 // Increment the current iterator to skip all the rest of select instructions 4748 // because they will be either "not lowered" or "all lowered" to branch. 4749 CurInstIterator = std::next(LastSI->getIterator()); 4750 4751 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 4752 4753 // Can we convert the 'select' to CF ? 4754 if (DisableSelectToBranch || OptSize || !TLI || VectorCond || 4755 SI->getMetadata(LLVMContext::MD_unpredictable)) 4756 return false; 4757 4758 TargetLowering::SelectSupportKind SelectKind; 4759 if (VectorCond) 4760 SelectKind = TargetLowering::VectorMaskSelect; 4761 else if (SI->getType()->isVectorTy()) 4762 SelectKind = TargetLowering::ScalarCondVectorVal; 4763 else 4764 SelectKind = TargetLowering::ScalarValSelect; 4765 4766 if (TLI->isSelectSupported(SelectKind) && 4767 !isFormingBranchFromSelectProfitable(TTI, TLI, SI)) 4768 return false; 4769 4770 ModifiedDT = true; 4771 4772 // Transform a sequence like this: 4773 // start: 4774 // %cmp = cmp uge i32 %a, %b 4775 // %sel = select i1 %cmp, i32 %c, i32 %d 4776 // 4777 // Into: 4778 // start: 4779 // %cmp = cmp uge i32 %a, %b 4780 // br i1 %cmp, label %select.true, label %select.false 4781 // select.true: 4782 // br label %select.end 4783 // select.false: 4784 // br label %select.end 4785 // select.end: 4786 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] 4787 // 4788 // In addition, we may sink instructions that produce %c or %d from 4789 // the entry block into the destination(s) of the new branch. 4790 // If the true or false blocks do not contain a sunken instruction, that 4791 // block and its branch may be optimized away. In that case, one side of the 4792 // first branch will point directly to select.end, and the corresponding PHI 4793 // predecessor block will be the start block. 4794 4795 // First, we split the block containing the select into 2 blocks. 4796 BasicBlock *StartBlock = SI->getParent(); 4797 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); 4798 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 4799 4800 // Delete the unconditional branch that was just created by the split. 4801 StartBlock->getTerminator()->eraseFromParent(); 4802 4803 // These are the new basic blocks for the conditional branch. 4804 // At least one will become an actual new basic block. 4805 BasicBlock *TrueBlock = nullptr; 4806 BasicBlock *FalseBlock = nullptr; 4807 BranchInst *TrueBranch = nullptr; 4808 BranchInst *FalseBranch = nullptr; 4809 4810 // Sink expensive instructions into the conditional blocks to avoid executing 4811 // them speculatively. 4812 for (SelectInst *SI : ASI) { 4813 if (sinkSelectOperand(TTI, SI->getTrueValue())) { 4814 if (TrueBlock == nullptr) { 4815 TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", 4816 EndBlock->getParent(), EndBlock); 4817 TrueBranch = BranchInst::Create(EndBlock, TrueBlock); 4818 } 4819 auto *TrueInst = cast<Instruction>(SI->getTrueValue()); 4820 TrueInst->moveBefore(TrueBranch); 4821 } 4822 if (sinkSelectOperand(TTI, SI->getFalseValue())) { 4823 if (FalseBlock == nullptr) { 4824 FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", 4825 EndBlock->getParent(), EndBlock); 4826 FalseBranch = BranchInst::Create(EndBlock, FalseBlock); 4827 } 4828 auto *FalseInst = cast<Instruction>(SI->getFalseValue()); 4829 FalseInst->moveBefore(FalseBranch); 4830 } 4831 } 4832 4833 // If there was nothing to sink, then arbitrarily choose the 'false' side 4834 // for a new input value to the PHI. 4835 if (TrueBlock == FalseBlock) { 4836 assert(TrueBlock == nullptr && 4837 "Unexpected basic block transform while optimizing select"); 4838 4839 FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", 4840 EndBlock->getParent(), EndBlock); 4841 BranchInst::Create(EndBlock, FalseBlock); 4842 } 4843 4844 // Insert the real conditional branch based on the original condition. 4845 // If we did not create a new block for one of the 'true' or 'false' paths 4846 // of the condition, it means that side of the branch goes to the end block 4847 // directly and the path originates from the start block from the point of 4848 // view of the new PHI. 4849 BasicBlock *TT, *FT; 4850 if (TrueBlock == nullptr) { 4851 TT = EndBlock; 4852 FT = FalseBlock; 4853 TrueBlock = StartBlock; 4854 } else if (FalseBlock == nullptr) { 4855 TT = TrueBlock; 4856 FT = EndBlock; 4857 FalseBlock = StartBlock; 4858 } else { 4859 TT = TrueBlock; 4860 FT = FalseBlock; 4861 } 4862 IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI); 4863 4864 SmallPtrSet<const Instruction *, 2> INS; 4865 INS.insert(ASI.begin(), ASI.end()); 4866 // Use reverse iterator because later select may use the value of the 4867 // earlier select, and we need to propagate value through earlier select 4868 // to get the PHI operand. 4869 for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { 4870 SelectInst *SI = *It; 4871 // The select itself is replaced with a PHI Node. 4872 PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); 4873 PN->takeName(SI); 4874 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); 4875 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); 4876 4877 SI->replaceAllUsesWith(PN); 4878 SI->eraseFromParent(); 4879 INS.erase(SI); 4880 ++NumSelectsExpanded; 4881 } 4882 4883 // Instruct OptimizeBlock to skip to the next block. 4884 CurInstIterator = StartBlock->end(); 4885 return true; 4886 } 4887 4888 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { 4889 SmallVector<int, 16> Mask(SVI->getShuffleMask()); 4890 int SplatElem = -1; 4891 for (unsigned i = 0; i < Mask.size(); ++i) { 4892 if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem) 4893 return false; 4894 SplatElem = Mask[i]; 4895 } 4896 4897 return true; 4898 } 4899 4900 /// Some targets have expensive vector shifts if the lanes aren't all the same 4901 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases 4902 /// it's often worth sinking a shufflevector splat down to its use so that 4903 /// codegen can spot all lanes are identical. 4904 bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { 4905 BasicBlock *DefBB = SVI->getParent(); 4906 4907 // Only do this xform if variable vector shifts are particularly expensive. 4908 if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType())) 4909 return false; 4910 4911 // We only expect better codegen by sinking a shuffle if we can recognise a 4912 // constant splat. 4913 if (!isBroadcastShuffle(SVI)) 4914 return false; 4915 4916 // InsertedShuffles - Only insert a shuffle in each block once. 4917 DenseMap<BasicBlock*, Instruction*> InsertedShuffles; 4918 4919 bool MadeChange = false; 4920 for (User *U : SVI->users()) { 4921 Instruction *UI = cast<Instruction>(U); 4922 4923 // Figure out which BB this ext is used in. 4924 BasicBlock *UserBB = UI->getParent(); 4925 if (UserBB == DefBB) continue; 4926 4927 // For now only apply this when the splat is used by a shift instruction. 4928 if (!UI->isShift()) continue; 4929 4930 // Everything checks out, sink the shuffle if the user's block doesn't 4931 // already have a copy. 4932 Instruction *&InsertedShuffle = InsertedShuffles[UserBB]; 4933 4934 if (!InsertedShuffle) { 4935 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 4936 assert(InsertPt != UserBB->end()); 4937 InsertedShuffle = 4938 new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), 4939 SVI->getOperand(2), "", &*InsertPt); 4940 } 4941 4942 UI->replaceUsesOfWith(SVI, InsertedShuffle); 4943 MadeChange = true; 4944 } 4945 4946 // If we removed all uses, nuke the shuffle. 4947 if (SVI->use_empty()) { 4948 SVI->eraseFromParent(); 4949 MadeChange = true; 4950 } 4951 4952 return MadeChange; 4953 } 4954 4955 bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { 4956 if (!TLI || !DL) 4957 return false; 4958 4959 Value *Cond = SI->getCondition(); 4960 Type *OldType = Cond->getType(); 4961 LLVMContext &Context = Cond->getContext(); 4962 MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType)); 4963 unsigned RegWidth = RegType.getSizeInBits(); 4964 4965 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth()) 4966 return false; 4967 4968 // If the register width is greater than the type width, expand the condition 4969 // of the switch instruction and each case constant to the width of the 4970 // register. By widening the type of the switch condition, subsequent 4971 // comparisons (for case comparisons) will not need to be extended to the 4972 // preferred register width, so we will potentially eliminate N-1 extends, 4973 // where N is the number of cases in the switch. 4974 auto *NewType = Type::getIntNTy(Context, RegWidth); 4975 4976 // Zero-extend the switch condition and case constants unless the switch 4977 // condition is a function argument that is already being sign-extended. 4978 // In that case, we can avoid an unnecessary mask/extension by sign-extending 4979 // everything instead. 4980 Instruction::CastOps ExtType = Instruction::ZExt; 4981 if (auto *Arg = dyn_cast<Argument>(Cond)) 4982 if (Arg->hasSExtAttr()) 4983 ExtType = Instruction::SExt; 4984 4985 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType); 4986 ExtInst->insertBefore(SI); 4987 SI->setCondition(ExtInst); 4988 for (SwitchInst::CaseIt Case : SI->cases()) { 4989 APInt NarrowConst = Case.getCaseValue()->getValue(); 4990 APInt WideConst = (ExtType == Instruction::ZExt) ? 4991 NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth); 4992 Case.setValue(ConstantInt::get(Context, WideConst)); 4993 } 4994 4995 return true; 4996 } 4997 4998 namespace { 4999 /// \brief Helper class to promote a scalar operation to a vector one. 5000 /// This class is used to move downward extractelement transition. 5001 /// E.g., 5002 /// a = vector_op <2 x i32> 5003 /// b = extractelement <2 x i32> a, i32 0 5004 /// c = scalar_op b 5005 /// store c 5006 /// 5007 /// => 5008 /// a = vector_op <2 x i32> 5009 /// c = vector_op a (equivalent to scalar_op on the related lane) 5010 /// * d = extractelement <2 x i32> c, i32 0 5011 /// * store d 5012 /// Assuming both extractelement and store can be combine, we get rid of the 5013 /// transition. 5014 class VectorPromoteHelper { 5015 /// DataLayout associated with the current module. 5016 const DataLayout &DL; 5017 5018 /// Used to perform some checks on the legality of vector operations. 5019 const TargetLowering &TLI; 5020 5021 /// Used to estimated the cost of the promoted chain. 5022 const TargetTransformInfo &TTI; 5023 5024 /// The transition being moved downwards. 5025 Instruction *Transition; 5026 /// The sequence of instructions to be promoted. 5027 SmallVector<Instruction *, 4> InstsToBePromoted; 5028 /// Cost of combining a store and an extract. 5029 unsigned StoreExtractCombineCost; 5030 /// Instruction that will be combined with the transition. 5031 Instruction *CombineInst; 5032 5033 /// \brief The instruction that represents the current end of the transition. 5034 /// Since we are faking the promotion until we reach the end of the chain 5035 /// of computation, we need a way to get the current end of the transition. 5036 Instruction *getEndOfTransition() const { 5037 if (InstsToBePromoted.empty()) 5038 return Transition; 5039 return InstsToBePromoted.back(); 5040 } 5041 5042 /// \brief Return the index of the original value in the transition. 5043 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value, 5044 /// c, is at index 0. 5045 unsigned getTransitionOriginalValueIdx() const { 5046 assert(isa<ExtractElementInst>(Transition) && 5047 "Other kind of transitions are not supported yet"); 5048 return 0; 5049 } 5050 5051 /// \brief Return the index of the index in the transition. 5052 /// E.g., for "extractelement <2 x i32> c, i32 0" the index 5053 /// is at index 1. 5054 unsigned getTransitionIdx() const { 5055 assert(isa<ExtractElementInst>(Transition) && 5056 "Other kind of transitions are not supported yet"); 5057 return 1; 5058 } 5059 5060 /// \brief Get the type of the transition. 5061 /// This is the type of the original value. 5062 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the 5063 /// transition is <2 x i32>. 5064 Type *getTransitionType() const { 5065 return Transition->getOperand(getTransitionOriginalValueIdx())->getType(); 5066 } 5067 5068 /// \brief Promote \p ToBePromoted by moving \p Def downward through. 5069 /// I.e., we have the following sequence: 5070 /// Def = Transition <ty1> a to <ty2> 5071 /// b = ToBePromoted <ty2> Def, ... 5072 /// => 5073 /// b = ToBePromoted <ty1> a, ... 5074 /// Def = Transition <ty1> ToBePromoted to <ty2> 5075 void promoteImpl(Instruction *ToBePromoted); 5076 5077 /// \brief Check whether or not it is profitable to promote all the 5078 /// instructions enqueued to be promoted. 5079 bool isProfitableToPromote() { 5080 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx()); 5081 unsigned Index = isa<ConstantInt>(ValIdx) 5082 ? cast<ConstantInt>(ValIdx)->getZExtValue() 5083 : -1; 5084 Type *PromotedType = getTransitionType(); 5085 5086 StoreInst *ST = cast<StoreInst>(CombineInst); 5087 unsigned AS = ST->getPointerAddressSpace(); 5088 unsigned Align = ST->getAlignment(); 5089 // Check if this store is supported. 5090 if (!TLI.allowsMisalignedMemoryAccesses( 5091 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS, 5092 Align)) { 5093 // If this is not supported, there is no way we can combine 5094 // the extract with the store. 5095 return false; 5096 } 5097 5098 // The scalar chain of computation has to pay for the transition 5099 // scalar to vector. 5100 // The vector chain has to account for the combining cost. 5101 uint64_t ScalarCost = 5102 TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); 5103 uint64_t VectorCost = StoreExtractCombineCost; 5104 for (const auto &Inst : InstsToBePromoted) { 5105 // Compute the cost. 5106 // By construction, all instructions being promoted are arithmetic ones. 5107 // Moreover, one argument is a constant that can be viewed as a splat 5108 // constant. 5109 Value *Arg0 = Inst->getOperand(0); 5110 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) || 5111 isa<ConstantFP>(Arg0); 5112 TargetTransformInfo::OperandValueKind Arg0OVK = 5113 IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue 5114 : TargetTransformInfo::OK_AnyValue; 5115 TargetTransformInfo::OperandValueKind Arg1OVK = 5116 !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue 5117 : TargetTransformInfo::OK_AnyValue; 5118 ScalarCost += TTI.getArithmeticInstrCost( 5119 Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK); 5120 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, 5121 Arg0OVK, Arg1OVK); 5122 } 5123 DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: " 5124 << ScalarCost << "\nVector: " << VectorCost << '\n'); 5125 return ScalarCost > VectorCost; 5126 } 5127 5128 /// \brief Generate a constant vector with \p Val with the same 5129 /// number of elements as the transition. 5130 /// \p UseSplat defines whether or not \p Val should be replicated 5131 /// across the whole vector. 5132 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>, 5133 /// otherwise we generate a vector with as many undef as possible: 5134 /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only 5135 /// used at the index of the extract. 5136 Value *getConstantVector(Constant *Val, bool UseSplat) const { 5137 unsigned ExtractIdx = UINT_MAX; 5138 if (!UseSplat) { 5139 // If we cannot determine where the constant must be, we have to 5140 // use a splat constant. 5141 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx()); 5142 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx)) 5143 ExtractIdx = CstVal->getSExtValue(); 5144 else 5145 UseSplat = true; 5146 } 5147 5148 unsigned End = getTransitionType()->getVectorNumElements(); 5149 if (UseSplat) 5150 return ConstantVector::getSplat(End, Val); 5151 5152 SmallVector<Constant *, 4> ConstVec; 5153 UndefValue *UndefVal = UndefValue::get(Val->getType()); 5154 for (unsigned Idx = 0; Idx != End; ++Idx) { 5155 if (Idx == ExtractIdx) 5156 ConstVec.push_back(Val); 5157 else 5158 ConstVec.push_back(UndefVal); 5159 } 5160 return ConstantVector::get(ConstVec); 5161 } 5162 5163 /// \brief Check if promoting to a vector type an operand at \p OperandIdx 5164 /// in \p Use can trigger undefined behavior. 5165 static bool canCauseUndefinedBehavior(const Instruction *Use, 5166 unsigned OperandIdx) { 5167 // This is not safe to introduce undef when the operand is on 5168 // the right hand side of a division-like instruction. 5169 if (OperandIdx != 1) 5170 return false; 5171 switch (Use->getOpcode()) { 5172 default: 5173 return false; 5174 case Instruction::SDiv: 5175 case Instruction::UDiv: 5176 case Instruction::SRem: 5177 case Instruction::URem: 5178 return true; 5179 case Instruction::FDiv: 5180 case Instruction::FRem: 5181 return !Use->hasNoNaNs(); 5182 } 5183 llvm_unreachable(nullptr); 5184 } 5185 5186 public: 5187 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI, 5188 const TargetTransformInfo &TTI, Instruction *Transition, 5189 unsigned CombineCost) 5190 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition), 5191 StoreExtractCombineCost(CombineCost), CombineInst(nullptr) { 5192 assert(Transition && "Do not know how to promote null"); 5193 } 5194 5195 /// \brief Check if we can promote \p ToBePromoted to \p Type. 5196 bool canPromote(const Instruction *ToBePromoted) const { 5197 // We could support CastInst too. 5198 return isa<BinaryOperator>(ToBePromoted); 5199 } 5200 5201 /// \brief Check if it is profitable to promote \p ToBePromoted 5202 /// by moving downward the transition through. 5203 bool shouldPromote(const Instruction *ToBePromoted) const { 5204 // Promote only if all the operands can be statically expanded. 5205 // Indeed, we do not want to introduce any new kind of transitions. 5206 for (const Use &U : ToBePromoted->operands()) { 5207 const Value *Val = U.get(); 5208 if (Val == getEndOfTransition()) { 5209 // If the use is a division and the transition is on the rhs, 5210 // we cannot promote the operation, otherwise we may create a 5211 // division by zero. 5212 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())) 5213 return false; 5214 continue; 5215 } 5216 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) && 5217 !isa<ConstantFP>(Val)) 5218 return false; 5219 } 5220 // Check that the resulting operation is legal. 5221 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode()); 5222 if (!ISDOpcode) 5223 return false; 5224 return StressStoreExtract || 5225 TLI.isOperationLegalOrCustom( 5226 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true)); 5227 } 5228 5229 /// \brief Check whether or not \p Use can be combined 5230 /// with the transition. 5231 /// I.e., is it possible to do Use(Transition) => AnotherUse? 5232 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); } 5233 5234 /// \brief Record \p ToBePromoted as part of the chain to be promoted. 5235 void enqueueForPromotion(Instruction *ToBePromoted) { 5236 InstsToBePromoted.push_back(ToBePromoted); 5237 } 5238 5239 /// \brief Set the instruction that will be combined with the transition. 5240 void recordCombineInstruction(Instruction *ToBeCombined) { 5241 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine"); 5242 CombineInst = ToBeCombined; 5243 } 5244 5245 /// \brief Promote all the instructions enqueued for promotion if it is 5246 /// is profitable. 5247 /// \return True if the promotion happened, false otherwise. 5248 bool promote() { 5249 // Check if there is something to promote. 5250 // Right now, if we do not have anything to combine with, 5251 // we assume the promotion is not profitable. 5252 if (InstsToBePromoted.empty() || !CombineInst) 5253 return false; 5254 5255 // Check cost. 5256 if (!StressStoreExtract && !isProfitableToPromote()) 5257 return false; 5258 5259 // Promote. 5260 for (auto &ToBePromoted : InstsToBePromoted) 5261 promoteImpl(ToBePromoted); 5262 InstsToBePromoted.clear(); 5263 return true; 5264 } 5265 }; 5266 } // End of anonymous namespace. 5267 5268 void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { 5269 // At this point, we know that all the operands of ToBePromoted but Def 5270 // can be statically promoted. 5271 // For Def, we need to use its parameter in ToBePromoted: 5272 // b = ToBePromoted ty1 a 5273 // Def = Transition ty1 b to ty2 5274 // Move the transition down. 5275 // 1. Replace all uses of the promoted operation by the transition. 5276 // = ... b => = ... Def. 5277 assert(ToBePromoted->getType() == Transition->getType() && 5278 "The type of the result of the transition does not match " 5279 "the final type"); 5280 ToBePromoted->replaceAllUsesWith(Transition); 5281 // 2. Update the type of the uses. 5282 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def. 5283 Type *TransitionTy = getTransitionType(); 5284 ToBePromoted->mutateType(TransitionTy); 5285 // 3. Update all the operands of the promoted operation with promoted 5286 // operands. 5287 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a. 5288 for (Use &U : ToBePromoted->operands()) { 5289 Value *Val = U.get(); 5290 Value *NewVal = nullptr; 5291 if (Val == Transition) 5292 NewVal = Transition->getOperand(getTransitionOriginalValueIdx()); 5293 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) || 5294 isa<ConstantFP>(Val)) { 5295 // Use a splat constant if it is not safe to use undef. 5296 NewVal = getConstantVector( 5297 cast<Constant>(Val), 5298 isa<UndefValue>(Val) || 5299 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())); 5300 } else 5301 llvm_unreachable("Did you modified shouldPromote and forgot to update " 5302 "this?"); 5303 ToBePromoted->setOperand(U.getOperandNo(), NewVal); 5304 } 5305 Transition->removeFromParent(); 5306 Transition->insertAfter(ToBePromoted); 5307 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); 5308 } 5309 5310 /// Some targets can do store(extractelement) with one instruction. 5311 /// Try to push the extractelement towards the stores when the target 5312 /// has this feature and this is profitable. 5313 bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { 5314 unsigned CombineCost = UINT_MAX; 5315 if (DisableStoreExtract || !TLI || 5316 (!StressStoreExtract && 5317 !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(), 5318 Inst->getOperand(1), CombineCost))) 5319 return false; 5320 5321 // At this point we know that Inst is a vector to scalar transition. 5322 // Try to move it down the def-use chain, until: 5323 // - We can combine the transition with its single use 5324 // => we got rid of the transition. 5325 // - We escape the current basic block 5326 // => we would need to check that we are moving it at a cheaper place and 5327 // we do not do that for now. 5328 BasicBlock *Parent = Inst->getParent(); 5329 DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); 5330 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost); 5331 // If the transition has more than one use, assume this is not going to be 5332 // beneficial. 5333 while (Inst->hasOneUse()) { 5334 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin()); 5335 DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n'); 5336 5337 if (ToBePromoted->getParent() != Parent) { 5338 DEBUG(dbgs() << "Instruction to promote is in a different block (" 5339 << ToBePromoted->getParent()->getName() 5340 << ") than the transition (" << Parent->getName() << ").\n"); 5341 return false; 5342 } 5343 5344 if (VPH.canCombine(ToBePromoted)) { 5345 DEBUG(dbgs() << "Assume " << *Inst << '\n' 5346 << "will be combined with: " << *ToBePromoted << '\n'); 5347 VPH.recordCombineInstruction(ToBePromoted); 5348 bool Changed = VPH.promote(); 5349 NumStoreExtractExposed += Changed; 5350 return Changed; 5351 } 5352 5353 DEBUG(dbgs() << "Try promoting.\n"); 5354 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted)) 5355 return false; 5356 5357 DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n"); 5358 5359 VPH.enqueueForPromotion(ToBePromoted); 5360 Inst = ToBePromoted; 5361 } 5362 return false; 5363 } 5364 5365 /// For the instruction sequence of store below, F and I values 5366 /// are bundled together as an i64 value before being stored into memory. 5367 /// Sometimes it is more efficent to generate separate stores for F and I, 5368 /// which can remove the bitwise instructions or sink them to colder places. 5369 /// 5370 /// (store (or (zext (bitcast F to i32) to i64), 5371 /// (shl (zext I to i64), 32)), addr) --> 5372 /// (store F, addr) and (store I, addr+4) 5373 /// 5374 /// Similarly, splitting for other merged store can also be beneficial, like: 5375 /// For pair of {i32, i32}, i64 store --> two i32 stores. 5376 /// For pair of {i32, i16}, i64 store --> two i32 stores. 5377 /// For pair of {i16, i16}, i32 store --> two i16 stores. 5378 /// For pair of {i16, i8}, i32 store --> two i16 stores. 5379 /// For pair of {i8, i8}, i16 store --> two i8 stores. 5380 /// 5381 /// We allow each target to determine specifically which kind of splitting is 5382 /// supported. 5383 /// 5384 /// The store patterns are commonly seen from the simple code snippet below 5385 /// if only std::make_pair(...) is sroa transformed before inlined into hoo. 5386 /// void goo(const std::pair<int, float> &); 5387 /// hoo() { 5388 /// ... 5389 /// goo(std::make_pair(tmp, ftmp)); 5390 /// ... 5391 /// } 5392 /// 5393 /// Although we already have similar splitting in DAG Combine, we duplicate 5394 /// it in CodeGenPrepare to catch the case in which pattern is across 5395 /// multiple BBs. The logic in DAG Combine is kept to catch case generated 5396 /// during code expansion. 5397 static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, 5398 const TargetLowering &TLI) { 5399 // Handle simple but common cases only. 5400 Type *StoreType = SI.getValueOperand()->getType(); 5401 if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) || 5402 DL.getTypeSizeInBits(StoreType) == 0) 5403 return false; 5404 5405 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2; 5406 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize); 5407 if (DL.getTypeStoreSizeInBits(SplitStoreType) != 5408 DL.getTypeSizeInBits(SplitStoreType)) 5409 return false; 5410 5411 // Match the following patterns: 5412 // (store (or (zext LValue to i64), 5413 // (shl (zext HValue to i64), 32)), HalfValBitSize) 5414 // or 5415 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize) 5416 // (zext LValue to i64), 5417 // Expect both operands of OR and the first operand of SHL have only 5418 // one use. 5419 Value *LValue, *HValue; 5420 if (!match(SI.getValueOperand(), 5421 m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))), 5422 m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))), 5423 m_SpecificInt(HalfValBitSize)))))) 5424 return false; 5425 5426 // Check LValue and HValue are int with size less or equal than 32. 5427 if (!LValue->getType()->isIntegerTy() || 5428 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize || 5429 !HValue->getType()->isIntegerTy() || 5430 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize) 5431 return false; 5432 5433 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast 5434 // as the input of target query. 5435 auto *LBC = dyn_cast<BitCastInst>(LValue); 5436 auto *HBC = dyn_cast<BitCastInst>(HValue); 5437 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType()) 5438 : EVT::getEVT(LValue->getType()); 5439 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType()) 5440 : EVT::getEVT(HValue->getType()); 5441 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) 5442 return false; 5443 5444 // Start to split store. 5445 IRBuilder<> Builder(SI.getContext()); 5446 Builder.SetInsertPoint(&SI); 5447 5448 // If LValue/HValue is a bitcast in another BB, create a new one in current 5449 // BB so it may be merged with the splitted stores by dag combiner. 5450 if (LBC && LBC->getParent() != SI.getParent()) 5451 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType()); 5452 if (HBC && HBC->getParent() != SI.getParent()) 5453 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType()); 5454 5455 auto CreateSplitStore = [&](Value *V, bool Upper) { 5456 V = Builder.CreateZExtOrBitCast(V, SplitStoreType); 5457 Value *Addr = Builder.CreateBitCast( 5458 SI.getOperand(1), 5459 SplitStoreType->getPointerTo(SI.getPointerAddressSpace())); 5460 if (Upper) 5461 Addr = Builder.CreateGEP( 5462 SplitStoreType, Addr, 5463 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); 5464 Builder.CreateAlignedStore( 5465 V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment()); 5466 }; 5467 5468 CreateSplitStore(LValue, false); 5469 CreateSplitStore(HValue, true); 5470 5471 // Delete the old store. 5472 SI.eraseFromParent(); 5473 return true; 5474 } 5475 5476 bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { 5477 // Bail out if we inserted the instruction to prevent optimizations from 5478 // stepping on each other's toes. 5479 if (InsertedInsts.count(I)) 5480 return false; 5481 5482 if (PHINode *P = dyn_cast<PHINode>(I)) { 5483 // It is possible for very late stage optimizations (such as SimplifyCFG) 5484 // to introduce PHI nodes too late to be cleaned up. If we detect such a 5485 // trivial PHI, go ahead and zap it here. 5486 if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) { 5487 P->replaceAllUsesWith(V); 5488 P->eraseFromParent(); 5489 ++NumPHIsElim; 5490 return true; 5491 } 5492 return false; 5493 } 5494 5495 if (CastInst *CI = dyn_cast<CastInst>(I)) { 5496 // If the source of the cast is a constant, then this should have 5497 // already been constant folded. The only reason NOT to constant fold 5498 // it is if something (e.g. LSR) was careful to place the constant 5499 // evaluation in a block other than then one that uses it (e.g. to hoist 5500 // the address of globals out of a loop). If this is the case, we don't 5501 // want to forward-subst the cast. 5502 if (isa<Constant>(CI->getOperand(0))) 5503 return false; 5504 5505 if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL)) 5506 return true; 5507 5508 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { 5509 /// Sink a zext or sext into its user blocks if the target type doesn't 5510 /// fit in one register 5511 if (TLI && 5512 TLI->getTypeAction(CI->getContext(), 5513 TLI->getValueType(*DL, CI->getType())) == 5514 TargetLowering::TypeExpandInteger) { 5515 return SinkCast(CI); 5516 } else { 5517 bool MadeChange = moveExtToFormExtLoad(I); 5518 return MadeChange | optimizeExtUses(I); 5519 } 5520 } 5521 return false; 5522 } 5523 5524 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 5525 if (!TLI || !TLI->hasMultipleConditionRegisters()) 5526 return OptimizeCmpExpression(CI, TLI); 5527 5528 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 5529 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr); 5530 if (TLI) { 5531 bool Modified = optimizeLoadExt(LI); 5532 unsigned AS = LI->getPointerAddressSpace(); 5533 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); 5534 return Modified; 5535 } 5536 return false; 5537 } 5538 5539 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 5540 if (TLI && splitMergedValStore(*SI, *DL, *TLI)) 5541 return true; 5542 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr); 5543 if (TLI) { 5544 unsigned AS = SI->getPointerAddressSpace(); 5545 return optimizeMemoryInst(I, SI->getOperand(1), 5546 SI->getOperand(0)->getType(), AS); 5547 } 5548 return false; 5549 } 5550 5551 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); 5552 5553 if (BinOp && (BinOp->getOpcode() == Instruction::AShr || 5554 BinOp->getOpcode() == Instruction::LShr)) { 5555 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); 5556 if (TLI && CI && TLI->hasExtractBitsInsn()) 5557 return OptimizeExtractBits(BinOp, CI, *TLI, *DL); 5558 5559 return false; 5560 } 5561 5562 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 5563 if (GEPI->hasAllZeroIndices()) { 5564 /// The GEP operand must be a pointer, so must its result -> BitCast 5565 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 5566 GEPI->getName(), GEPI); 5567 GEPI->replaceAllUsesWith(NC); 5568 GEPI->eraseFromParent(); 5569 ++NumGEPsElim; 5570 optimizeInst(NC, ModifiedDT); 5571 return true; 5572 } 5573 return false; 5574 } 5575 5576 if (CallInst *CI = dyn_cast<CallInst>(I)) 5577 return optimizeCallInst(CI, ModifiedDT); 5578 5579 if (SelectInst *SI = dyn_cast<SelectInst>(I)) 5580 return optimizeSelectInst(SI); 5581 5582 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) 5583 return optimizeShuffleVectorInst(SVI); 5584 5585 if (auto *Switch = dyn_cast<SwitchInst>(I)) 5586 return optimizeSwitchInst(Switch); 5587 5588 if (isa<ExtractElementInst>(I)) 5589 return optimizeExtractElementInst(I); 5590 5591 return false; 5592 } 5593 5594 /// Given an OR instruction, check to see if this is a bitreverse 5595 /// idiom. If so, insert the new intrinsic and return true. 5596 static bool makeBitReverse(Instruction &I, const DataLayout &DL, 5597 const TargetLowering &TLI) { 5598 if (!I.getType()->isIntegerTy() || 5599 !TLI.isOperationLegalOrCustom(ISD::BITREVERSE, 5600 TLI.getValueType(DL, I.getType(), true))) 5601 return false; 5602 5603 SmallVector<Instruction*, 4> Insts; 5604 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts)) 5605 return false; 5606 Instruction *LastInst = Insts.back(); 5607 I.replaceAllUsesWith(LastInst); 5608 RecursivelyDeleteTriviallyDeadInstructions(&I); 5609 return true; 5610 } 5611 5612 // In this pass we look for GEP and cast instructions that are used 5613 // across basic blocks and rewrite them to improve basic-block-at-a-time 5614 // selection. 5615 bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) { 5616 SunkAddrs.clear(); 5617 bool MadeChange = false; 5618 5619 CurInstIterator = BB.begin(); 5620 while (CurInstIterator != BB.end()) { 5621 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT); 5622 if (ModifiedDT) 5623 return true; 5624 } 5625 5626 bool MadeBitReverse = true; 5627 while (TLI && MadeBitReverse) { 5628 MadeBitReverse = false; 5629 for (auto &I : reverse(BB)) { 5630 if (makeBitReverse(I, *DL, *TLI)) { 5631 MadeBitReverse = MadeChange = true; 5632 ModifiedDT = true; 5633 break; 5634 } 5635 } 5636 } 5637 MadeChange |= dupRetToEnableTailCallOpts(&BB); 5638 5639 return MadeChange; 5640 } 5641 5642 // llvm.dbg.value is far away from the value then iSel may not be able 5643 // handle it properly. iSel will drop llvm.dbg.value if it can not 5644 // find a node corresponding to the value. 5645 bool CodeGenPrepare::placeDbgValues(Function &F) { 5646 bool MadeChange = false; 5647 for (BasicBlock &BB : F) { 5648 Instruction *PrevNonDbgInst = nullptr; 5649 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { 5650 Instruction *Insn = &*BI++; 5651 DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); 5652 // Leave dbg.values that refer to an alloca alone. These 5653 // instrinsics describe the address of a variable (= the alloca) 5654 // being taken. They should not be moved next to the alloca 5655 // (and to the beginning of the scope), but rather stay close to 5656 // where said address is used. 5657 if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) { 5658 PrevNonDbgInst = Insn; 5659 continue; 5660 } 5661 5662 Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); 5663 if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { 5664 // If VI is a phi in a block with an EHPad terminator, we can't insert 5665 // after it. 5666 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad()) 5667 continue; 5668 DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); 5669 DVI->removeFromParent(); 5670 if (isa<PHINode>(VI)) 5671 DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); 5672 else 5673 DVI->insertAfter(VI); 5674 MadeChange = true; 5675 ++NumDbgValueMoved; 5676 } 5677 } 5678 } 5679 return MadeChange; 5680 } 5681 5682 // If there is a sequence that branches based on comparing a single bit 5683 // against zero that can be combined into a single instruction, and the 5684 // target supports folding these into a single instruction, sink the 5685 // mask and compare into the branch uses. Do this before OptimizeBlock -> 5686 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being 5687 // searched for. 5688 bool CodeGenPrepare::sinkAndCmp(Function &F) { 5689 if (!EnableAndCmpSinking) 5690 return false; 5691 if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) 5692 return false; 5693 bool MadeChange = false; 5694 for (BasicBlock &BB : F) { 5695 // Does this BB end with the following? 5696 // %andVal = and %val, #single-bit-set 5697 // %icmpVal = icmp %andResult, 0 5698 // br i1 %cmpVal label %dest1, label %dest2" 5699 BranchInst *Brcc = dyn_cast<BranchInst>(BB.getTerminator()); 5700 if (!Brcc || !Brcc->isConditional()) 5701 continue; 5702 ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0)); 5703 if (!Cmp || Cmp->getParent() != &BB) 5704 continue; 5705 ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1)); 5706 if (!Zero || !Zero->isZero()) 5707 continue; 5708 Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0)); 5709 if (!And || And->getOpcode() != Instruction::And || And->getParent() != &BB) 5710 continue; 5711 ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1)); 5712 if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) 5713 continue; 5714 DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB.dump()); 5715 5716 // Push the "and; icmp" for any users that are conditional branches. 5717 // Since there can only be one branch use per BB, we don't need to keep 5718 // track of which BBs we insert into. 5719 for (Use &TheUse : Cmp->uses()) { 5720 // Find brcc use. 5721 BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse); 5722 if (!BrccUser || !BrccUser->isConditional()) 5723 continue; 5724 BasicBlock *UserBB = BrccUser->getParent(); 5725 if (UserBB == &BB) continue; 5726 DEBUG(dbgs() << "found Brcc use\n"); 5727 5728 // Sink the "and; icmp" to use. 5729 MadeChange = true; 5730 BinaryOperator *NewAnd = 5731 BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", 5732 BrccUser); 5733 CmpInst *NewCmp = 5734 CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, 5735 "", BrccUser); 5736 TheUse = NewCmp; 5737 ++NumAndCmpsMoved; 5738 DEBUG(BrccUser->getParent()->dump()); 5739 } 5740 } 5741 return MadeChange; 5742 } 5743 5744 /// \brief Scale down both weights to fit into uint32_t. 5745 static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { 5746 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; 5747 uint32_t Scale = (NewMax / UINT32_MAX) + 1; 5748 NewTrue = NewTrue / Scale; 5749 NewFalse = NewFalse / Scale; 5750 } 5751 5752 /// \brief Some targets prefer to split a conditional branch like: 5753 /// \code 5754 /// %0 = icmp ne i32 %a, 0 5755 /// %1 = icmp ne i32 %b, 0 5756 /// %or.cond = or i1 %0, %1 5757 /// br i1 %or.cond, label %TrueBB, label %FalseBB 5758 /// \endcode 5759 /// into multiple branch instructions like: 5760 /// \code 5761 /// bb1: 5762 /// %0 = icmp ne i32 %a, 0 5763 /// br i1 %0, label %TrueBB, label %bb2 5764 /// bb2: 5765 /// %1 = icmp ne i32 %b, 0 5766 /// br i1 %1, label %TrueBB, label %FalseBB 5767 /// \endcode 5768 /// This usually allows instruction selection to do even further optimizations 5769 /// and combine the compare with the branch instruction. Currently this is 5770 /// applied for targets which have "cheap" jump instructions. 5771 /// 5772 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. 5773 /// 5774 bool CodeGenPrepare::splitBranchCondition(Function &F) { 5775 if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive()) 5776 return false; 5777 5778 bool MadeChange = false; 5779 for (auto &BB : F) { 5780 // Does this BB end with the following? 5781 // %cond1 = icmp|fcmp|binary instruction ... 5782 // %cond2 = icmp|fcmp|binary instruction ... 5783 // %cond.or = or|and i1 %cond1, cond2 5784 // br i1 %cond.or label %dest1, label %dest2" 5785 BinaryOperator *LogicOp; 5786 BasicBlock *TBB, *FBB; 5787 if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB))) 5788 continue; 5789 5790 auto *Br1 = cast<BranchInst>(BB.getTerminator()); 5791 if (Br1->getMetadata(LLVMContext::MD_unpredictable)) 5792 continue; 5793 5794 unsigned Opc; 5795 Value *Cond1, *Cond2; 5796 if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)), 5797 m_OneUse(m_Value(Cond2))))) 5798 Opc = Instruction::And; 5799 else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)), 5800 m_OneUse(m_Value(Cond2))))) 5801 Opc = Instruction::Or; 5802 else 5803 continue; 5804 5805 if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) || 5806 !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) ) 5807 continue; 5808 5809 DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); 5810 5811 // Create a new BB. 5812 auto TmpBB = 5813 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split", 5814 BB.getParent(), BB.getNextNode()); 5815 5816 // Update original basic block by using the first condition directly by the 5817 // branch instruction and removing the no longer needed and/or instruction. 5818 Br1->setCondition(Cond1); 5819 LogicOp->eraseFromParent(); 5820 5821 // Depending on the conditon we have to either replace the true or the false 5822 // successor of the original branch instruction. 5823 if (Opc == Instruction::And) 5824 Br1->setSuccessor(0, TmpBB); 5825 else 5826 Br1->setSuccessor(1, TmpBB); 5827 5828 // Fill in the new basic block. 5829 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB); 5830 if (auto *I = dyn_cast<Instruction>(Cond2)) { 5831 I->removeFromParent(); 5832 I->insertBefore(Br2); 5833 } 5834 5835 // Update PHI nodes in both successors. The original BB needs to be 5836 // replaced in one succesor's PHI nodes, because the branch comes now from 5837 // the newly generated BB (NewBB). In the other successor we need to add one 5838 // incoming edge to the PHI nodes, because both branch instructions target 5839 // now the same successor. Depending on the original branch condition 5840 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that 5841 // we perform the correct update for the PHI nodes. 5842 // This doesn't change the successor order of the just created branch 5843 // instruction (or any other instruction). 5844 if (Opc == Instruction::Or) 5845 std::swap(TBB, FBB); 5846 5847 // Replace the old BB with the new BB. 5848 for (auto &I : *TBB) { 5849 PHINode *PN = dyn_cast<PHINode>(&I); 5850 if (!PN) 5851 break; 5852 int i; 5853 while ((i = PN->getBasicBlockIndex(&BB)) >= 0) 5854 PN->setIncomingBlock(i, TmpBB); 5855 } 5856 5857 // Add another incoming edge form the new BB. 5858 for (auto &I : *FBB) { 5859 PHINode *PN = dyn_cast<PHINode>(&I); 5860 if (!PN) 5861 break; 5862 auto *Val = PN->getIncomingValueForBlock(&BB); 5863 PN->addIncoming(Val, TmpBB); 5864 } 5865 5866 // Update the branch weights (from SelectionDAGBuilder:: 5867 // FindMergedConditions). 5868 if (Opc == Instruction::Or) { 5869 // Codegen X | Y as: 5870 // BB1: 5871 // jmp_if_X TBB 5872 // jmp TmpBB 5873 // TmpBB: 5874 // jmp_if_Y TBB 5875 // jmp FBB 5876 // 5877 5878 // We have flexibility in setting Prob for BB1 and Prob for NewBB. 5879 // The requirement is that 5880 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) 5881 // = TrueProb for orignal BB. 5882 // Assuming the orignal weights are A and B, one choice is to set BB1's 5883 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice 5884 // assumes that 5885 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. 5886 // Another choice is to assume TrueProb for BB1 equals to TrueProb for 5887 // TmpBB, but the math is more complicated. 5888 uint64_t TrueWeight, FalseWeight; 5889 if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) { 5890 uint64_t NewTrueWeight = TrueWeight; 5891 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight; 5892 scaleWeights(NewTrueWeight, NewFalseWeight); 5893 Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) 5894 .createBranchWeights(TrueWeight, FalseWeight)); 5895 5896 NewTrueWeight = TrueWeight; 5897 NewFalseWeight = 2 * FalseWeight; 5898 scaleWeights(NewTrueWeight, NewFalseWeight); 5899 Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) 5900 .createBranchWeights(TrueWeight, FalseWeight)); 5901 } 5902 } else { 5903 // Codegen X & Y as: 5904 // BB1: 5905 // jmp_if_X TmpBB 5906 // jmp FBB 5907 // TmpBB: 5908 // jmp_if_Y TBB 5909 // jmp FBB 5910 // 5911 // This requires creation of TmpBB after CurBB. 5912 5913 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. 5914 // The requirement is that 5915 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) 5916 // = FalseProb for orignal BB. 5917 // Assuming the orignal weights are A and B, one choice is to set BB1's 5918 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice 5919 // assumes that 5920 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. 5921 uint64_t TrueWeight, FalseWeight; 5922 if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) { 5923 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight; 5924 uint64_t NewFalseWeight = FalseWeight; 5925 scaleWeights(NewTrueWeight, NewFalseWeight); 5926 Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) 5927 .createBranchWeights(TrueWeight, FalseWeight)); 5928 5929 NewTrueWeight = 2 * TrueWeight; 5930 NewFalseWeight = FalseWeight; 5931 scaleWeights(NewTrueWeight, NewFalseWeight); 5932 Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) 5933 .createBranchWeights(TrueWeight, FalseWeight)); 5934 } 5935 } 5936 5937 // Note: No point in getting fancy here, since the DT info is never 5938 // available to CodeGenPrepare. 5939 ModifiedDT = true; 5940 5941 MadeChange = true; 5942 5943 DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); 5944 TmpBB->dump()); 5945 } 5946 return MadeChange; 5947 } 5948