1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass munges the code in the input function to better prepare it for 11 // SelectionDAG-based code generation. This works around limitations in it's 12 // basic-block-at-a-time approach. It should eventually be removed. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/CodeGen/Passes.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/Analysis/InstructionSimplify.h" 21 #include "llvm/Analysis/TargetTransformInfo.h" 22 #include "llvm/IR/CallSite.h" 23 #include "llvm/IR/Constants.h" 24 #include "llvm/IR/DataLayout.h" 25 #include "llvm/IR/DerivedTypes.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/GetElementPtrTypeIterator.h" 29 #include "llvm/IR/IRBuilder.h" 30 #include "llvm/IR/InlineAsm.h" 31 #include "llvm/IR/Instructions.h" 32 #include "llvm/IR/IntrinsicInst.h" 33 #include "llvm/IR/MDBuilder.h" 34 #include "llvm/IR/PatternMatch.h" 35 #include "llvm/IR/ValueHandle.h" 36 #include "llvm/IR/ValueMap.h" 37 #include "llvm/Pass.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Target/TargetLibraryInfo.h" 42 #include "llvm/Target/TargetLowering.h" 43 #include "llvm/Target/TargetSubtargetInfo.h" 44 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 45 #include "llvm/Transforms/Utils/BuildLibCalls.h" 46 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 47 #include "llvm/Transforms/Utils/Local.h" 48 using namespace llvm; 49 using namespace llvm::PatternMatch; 50 51 #define DEBUG_TYPE "codegenprepare" 52 53 STATISTIC(NumBlocksElim, "Number of blocks eliminated"); 54 STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); 55 STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); 56 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " 57 "sunken Cmps"); 58 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " 59 "of sunken Casts"); 60 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " 61 "computations were sunk"); 62 STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); 63 STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); 64 STATISTIC(NumRetsDup, "Number of return instructions duplicated"); 65 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); 66 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); 67 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); 68 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); 69 70 static cl::opt<bool> DisableBranchOpts( 71 "disable-cgp-branch-opts", cl::Hidden, cl::init(false), 72 cl::desc("Disable branch optimizations in CodeGenPrepare")); 73 74 static cl::opt<bool> DisableSelectToBranch( 75 "disable-cgp-select2branch", cl::Hidden, cl::init(false), 76 cl::desc("Disable select to branch conversion.")); 77 78 static cl::opt<bool> AddrSinkUsingGEPs( 79 "addr-sink-using-gep", cl::Hidden, cl::init(false), 80 cl::desc("Address sinking in CGP using GEPs.")); 81 82 static cl::opt<bool> EnableAndCmpSinking( 83 "enable-andcmp-sinking", cl::Hidden, cl::init(true), 84 cl::desc("Enable sinkinig and/cmp into branches.")); 85 86 static cl::opt<bool> DisableStoreExtract( 87 "disable-cgp-store-extract", cl::Hidden, cl::init(false), 88 cl::desc("Disable store(extract) optimizations in CodeGenPrepare")); 89 90 static cl::opt<bool> StressStoreExtract( 91 "stress-cgp-store-extract", cl::Hidden, cl::init(false), 92 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare")); 93 94 static cl::opt<bool> DisableExtLdPromotion( 95 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), 96 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " 97 "CodeGenPrepare")); 98 99 static cl::opt<bool> StressExtLdPromotion( 100 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), 101 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " 102 "optimization in CodeGenPrepare")); 103 104 namespace { 105 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; 106 struct TypeIsSExt { 107 Type *Ty; 108 bool IsSExt; 109 TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {} 110 }; 111 typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy; 112 class TypePromotionTransaction; 113 114 class CodeGenPrepare : public FunctionPass { 115 /// TLI - Keep a pointer of a TargetLowering to consult for determining 116 /// transformation profitability. 117 const TargetMachine *TM; 118 const TargetLowering *TLI; 119 const TargetTransformInfo *TTI; 120 const TargetLibraryInfo *TLInfo; 121 DominatorTree *DT; 122 123 /// CurInstIterator - As we scan instructions optimizing them, this is the 124 /// next instruction to optimize. Xforms that can invalidate this should 125 /// update it. 126 BasicBlock::iterator CurInstIterator; 127 128 /// Keeps track of non-local addresses that have been sunk into a block. 129 /// This allows us to avoid inserting duplicate code for blocks with 130 /// multiple load/stores of the same address. 131 ValueMap<Value*, Value*> SunkAddrs; 132 133 /// Keeps track of all truncates inserted for the current function. 134 SetOfInstrs InsertedTruncsSet; 135 /// Keeps track of the type of the related instruction before their 136 /// promotion for the current function. 137 InstrToOrigTy PromotedInsts; 138 139 /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to 140 /// be updated. 141 bool ModifiedDT; 142 143 /// OptSize - True if optimizing for size. 144 bool OptSize; 145 146 public: 147 static char ID; // Pass identification, replacement for typeid 148 explicit CodeGenPrepare(const TargetMachine *TM = nullptr) 149 : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) { 150 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); 151 } 152 bool runOnFunction(Function &F) override; 153 154 const char *getPassName() const override { return "CodeGen Prepare"; } 155 156 void getAnalysisUsage(AnalysisUsage &AU) const override { 157 AU.addPreserved<DominatorTreeWrapperPass>(); 158 AU.addRequired<TargetLibraryInfo>(); 159 AU.addRequired<TargetTransformInfo>(); 160 } 161 162 private: 163 bool EliminateFallThrough(Function &F); 164 bool EliminateMostlyEmptyBlocks(Function &F); 165 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 166 void EliminateMostlyEmptyBlock(BasicBlock *BB); 167 bool OptimizeBlock(BasicBlock &BB); 168 bool OptimizeInst(Instruction *I); 169 bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); 170 bool OptimizeInlineAsmInst(CallInst *CS); 171 bool OptimizeCallInst(CallInst *CI); 172 bool MoveExtToFormExtLoad(Instruction *&I); 173 bool OptimizeExtUses(Instruction *I); 174 bool OptimizeSelectInst(SelectInst *SI); 175 bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); 176 bool OptimizeExtractElementInst(Instruction *Inst); 177 bool DupRetToEnableTailCallOpts(BasicBlock *BB); 178 bool PlaceDbgValues(Function &F); 179 bool sinkAndCmp(Function &F); 180 bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, 181 Instruction *&Inst, 182 const SmallVectorImpl<Instruction *> &Exts, 183 unsigned CreatedInst); 184 bool splitBranchCondition(Function &F); 185 }; 186 } 187 188 char CodeGenPrepare::ID = 0; 189 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", 190 "Optimize for code generation", false, false) 191 192 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { 193 return new CodeGenPrepare(TM); 194 } 195 196 bool CodeGenPrepare::runOnFunction(Function &F) { 197 if (skipOptnoneFunction(F)) 198 return false; 199 200 bool EverMadeChange = false; 201 // Clear per function information. 202 InsertedTruncsSet.clear(); 203 PromotedInsts.clear(); 204 205 ModifiedDT = false; 206 if (TM) 207 TLI = TM->getSubtargetImpl()->getTargetLowering(); 208 TLInfo = &getAnalysis<TargetLibraryInfo>(); 209 TTI = &getAnalysis<TargetTransformInfo>(); 210 DominatorTreeWrapperPass *DTWP = 211 getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 212 DT = DTWP ? &DTWP->getDomTree() : nullptr; 213 OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 214 Attribute::OptimizeForSize); 215 216 /// This optimization identifies DIV instructions that can be 217 /// profitably bypassed and carried out with a shorter, faster divide. 218 if (!OptSize && TLI && TLI->isSlowDivBypassed()) { 219 const DenseMap<unsigned int, unsigned int> &BypassWidths = 220 TLI->getBypassSlowDivWidths(); 221 for (Function::iterator I = F.begin(); I != F.end(); I++) 222 EverMadeChange |= bypassSlowDivision(F, I, BypassWidths); 223 } 224 225 // Eliminate blocks that contain only PHI nodes and an 226 // unconditional branch. 227 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 228 229 // llvm.dbg.value is far away from the value then iSel may not be able 230 // handle it properly. iSel will drop llvm.dbg.value if it can not 231 // find a node corresponding to the value. 232 EverMadeChange |= PlaceDbgValues(F); 233 234 // If there is a mask, compare against zero, and branch that can be combined 235 // into a single target instruction, push the mask and compare into branch 236 // users. Do this before OptimizeBlock -> OptimizeInst -> 237 // OptimizeCmpExpression, which perturbs the pattern being searched for. 238 if (!DisableBranchOpts) { 239 EverMadeChange |= sinkAndCmp(F); 240 EverMadeChange |= splitBranchCondition(F); 241 } 242 243 bool MadeChange = true; 244 while (MadeChange) { 245 MadeChange = false; 246 for (Function::iterator I = F.begin(); I != F.end(); ) { 247 BasicBlock *BB = I++; 248 MadeChange |= OptimizeBlock(*BB); 249 } 250 EverMadeChange |= MadeChange; 251 } 252 253 SunkAddrs.clear(); 254 255 if (!DisableBranchOpts) { 256 MadeChange = false; 257 SmallPtrSet<BasicBlock*, 8> WorkList; 258 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { 259 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 260 MadeChange |= ConstantFoldTerminator(BB, true); 261 if (!MadeChange) continue; 262 263 for (SmallVectorImpl<BasicBlock*>::iterator 264 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 265 if (pred_begin(*II) == pred_end(*II)) 266 WorkList.insert(*II); 267 } 268 269 // Delete the dead blocks and any of their dead successors. 270 MadeChange |= !WorkList.empty(); 271 while (!WorkList.empty()) { 272 BasicBlock *BB = *WorkList.begin(); 273 WorkList.erase(BB); 274 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 275 276 DeleteDeadBlock(BB); 277 278 for (SmallVectorImpl<BasicBlock*>::iterator 279 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 280 if (pred_begin(*II) == pred_end(*II)) 281 WorkList.insert(*II); 282 } 283 284 // Merge pairs of basic blocks with unconditional branches, connected by 285 // a single edge. 286 if (EverMadeChange || MadeChange) 287 MadeChange |= EliminateFallThrough(F); 288 289 if (MadeChange) 290 ModifiedDT = true; 291 EverMadeChange |= MadeChange; 292 } 293 294 if (ModifiedDT && DT) 295 DT->recalculate(F); 296 297 return EverMadeChange; 298 } 299 300 /// EliminateFallThrough - Merge basic blocks which are connected 301 /// by a single edge, where one of the basic blocks has a single successor 302 /// pointing to the other basic block, which has a single predecessor. 303 bool CodeGenPrepare::EliminateFallThrough(Function &F) { 304 bool Changed = false; 305 // Scan all of the blocks in the function, except for the entry block. 306 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { 307 BasicBlock *BB = I++; 308 // If the destination block has a single pred, then this is a trivial 309 // edge, just collapse it. 310 BasicBlock *SinglePred = BB->getSinglePredecessor(); 311 312 // Don't merge if BB's address is taken. 313 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; 314 315 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); 316 if (Term && !Term->isConditional()) { 317 Changed = true; 318 DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); 319 // Remember if SinglePred was the entry block of the function. 320 // If so, we will need to move BB back to the entry position. 321 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 322 MergeBasicBlockIntoOnlyPred(BB, this); 323 324 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 325 BB->moveBefore(&BB->getParent()->getEntryBlock()); 326 327 // We have erased a block. Update the iterator. 328 I = BB; 329 } 330 } 331 return Changed; 332 } 333 334 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 335 /// debug info directives, and an unconditional branch. Passes before isel 336 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 337 /// isel. Start by eliminating these blocks so we can split them the way we 338 /// want them. 339 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 340 bool MadeChange = false; 341 // Note that this intentionally skips the entry block. 342 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { 343 BasicBlock *BB = I++; 344 345 // If this block doesn't end with an uncond branch, ignore it. 346 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 347 if (!BI || !BI->isUnconditional()) 348 continue; 349 350 // If the instruction before the branch (skipping debug info) isn't a phi 351 // node, then other stuff is happening here. 352 BasicBlock::iterator BBI = BI; 353 if (BBI != BB->begin()) { 354 --BBI; 355 while (isa<DbgInfoIntrinsic>(BBI)) { 356 if (BBI == BB->begin()) 357 break; 358 --BBI; 359 } 360 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 361 continue; 362 } 363 364 // Do not break infinite loops. 365 BasicBlock *DestBB = BI->getSuccessor(0); 366 if (DestBB == BB) 367 continue; 368 369 if (!CanMergeBlocks(BB, DestBB)) 370 continue; 371 372 EliminateMostlyEmptyBlock(BB); 373 MadeChange = true; 374 } 375 return MadeChange; 376 } 377 378 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 379 /// single uncond branch between them, and BB contains no other non-phi 380 /// instructions. 381 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 382 const BasicBlock *DestBB) const { 383 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 384 // the successor. If there are more complex condition (e.g. preheaders), 385 // don't mess around with them. 386 BasicBlock::const_iterator BBI = BB->begin(); 387 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 388 for (const User *U : PN->users()) { 389 const Instruction *UI = cast<Instruction>(U); 390 if (UI->getParent() != DestBB || !isa<PHINode>(UI)) 391 return false; 392 // If User is inside DestBB block and it is a PHINode then check 393 // incoming value. If incoming value is not from BB then this is 394 // a complex condition (e.g. preheaders) we want to avoid here. 395 if (UI->getParent() == DestBB) { 396 if (const PHINode *UPN = dyn_cast<PHINode>(UI)) 397 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 398 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 399 if (Insn && Insn->getParent() == BB && 400 Insn->getParent() != UPN->getIncomingBlock(I)) 401 return false; 402 } 403 } 404 } 405 } 406 407 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 408 // and DestBB may have conflicting incoming values for the block. If so, we 409 // can't merge the block. 410 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 411 if (!DestBBPN) return true; // no conflict. 412 413 // Collect the preds of BB. 414 SmallPtrSet<const BasicBlock*, 16> BBPreds; 415 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 416 // It is faster to get preds from a PHI than with pred_iterator. 417 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 418 BBPreds.insert(BBPN->getIncomingBlock(i)); 419 } else { 420 BBPreds.insert(pred_begin(BB), pred_end(BB)); 421 } 422 423 // Walk the preds of DestBB. 424 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 425 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 426 if (BBPreds.count(Pred)) { // Common predecessor? 427 BBI = DestBB->begin(); 428 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 429 const Value *V1 = PN->getIncomingValueForBlock(Pred); 430 const Value *V2 = PN->getIncomingValueForBlock(BB); 431 432 // If V2 is a phi node in BB, look up what the mapped value will be. 433 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 434 if (V2PN->getParent() == BB) 435 V2 = V2PN->getIncomingValueForBlock(Pred); 436 437 // If there is a conflict, bail out. 438 if (V1 != V2) return false; 439 } 440 } 441 } 442 443 return true; 444 } 445 446 447 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 448 /// an unconditional branch in it. 449 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 450 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 451 BasicBlock *DestBB = BI->getSuccessor(0); 452 453 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 454 455 // If the destination block has a single pred, then this is a trivial edge, 456 // just collapse it. 457 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 458 if (SinglePred != DestBB) { 459 // Remember if SinglePred was the entry block of the function. If so, we 460 // will need to move BB back to the entry position. 461 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 462 MergeBasicBlockIntoOnlyPred(DestBB, this); 463 464 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 465 BB->moveBefore(&BB->getParent()->getEntryBlock()); 466 467 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 468 return; 469 } 470 } 471 472 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 473 // to handle the new incoming edges it is about to have. 474 PHINode *PN; 475 for (BasicBlock::iterator BBI = DestBB->begin(); 476 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 477 // Remove the incoming value for BB, and remember it. 478 Value *InVal = PN->removeIncomingValue(BB, false); 479 480 // Two options: either the InVal is a phi node defined in BB or it is some 481 // value that dominates BB. 482 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 483 if (InValPhi && InValPhi->getParent() == BB) { 484 // Add all of the input values of the input PHI as inputs of this phi. 485 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 486 PN->addIncoming(InValPhi->getIncomingValue(i), 487 InValPhi->getIncomingBlock(i)); 488 } else { 489 // Otherwise, add one instance of the dominating value for each edge that 490 // we will be adding. 491 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 492 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 493 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 494 } else { 495 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 496 PN->addIncoming(InVal, *PI); 497 } 498 } 499 } 500 501 // The PHIs are now updated, change everything that refers to BB to use 502 // DestBB and remove BB. 503 BB->replaceAllUsesWith(DestBB); 504 if (DT && !ModifiedDT) { 505 BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); 506 BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); 507 BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); 508 DT->changeImmediateDominator(DestBB, NewIDom); 509 DT->eraseNode(BB); 510 } 511 BB->eraseFromParent(); 512 ++NumBlocksElim; 513 514 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 515 } 516 517 /// SinkCast - Sink the specified cast instruction into its user blocks 518 static bool SinkCast(CastInst *CI) { 519 BasicBlock *DefBB = CI->getParent(); 520 521 /// InsertedCasts - Only insert a cast in each block once. 522 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 523 524 bool MadeChange = false; 525 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 526 UI != E; ) { 527 Use &TheUse = UI.getUse(); 528 Instruction *User = cast<Instruction>(*UI); 529 530 // Figure out which BB this cast is used in. For PHI's this is the 531 // appropriate predecessor block. 532 BasicBlock *UserBB = User->getParent(); 533 if (PHINode *PN = dyn_cast<PHINode>(User)) { 534 UserBB = PN->getIncomingBlock(TheUse); 535 } 536 537 // Preincrement use iterator so we don't invalidate it. 538 ++UI; 539 540 // If this user is in the same block as the cast, don't change the cast. 541 if (UserBB == DefBB) continue; 542 543 // If we have already inserted a cast into this block, use it. 544 CastInst *&InsertedCast = InsertedCasts[UserBB]; 545 546 if (!InsertedCast) { 547 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 548 InsertedCast = 549 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 550 InsertPt); 551 MadeChange = true; 552 } 553 554 // Replace a use of the cast with a use of the new cast. 555 TheUse = InsertedCast; 556 ++NumCastUses; 557 } 558 559 // If we removed all uses, nuke the cast. 560 if (CI->use_empty()) { 561 CI->eraseFromParent(); 562 MadeChange = true; 563 } 564 565 return MadeChange; 566 } 567 568 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 569 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 570 /// sink it into user blocks to reduce the number of virtual 571 /// registers that must be created and coalesced. 572 /// 573 /// Return true if any changes are made. 574 /// 575 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 576 // If this is a noop copy, 577 EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 578 EVT DstVT = TLI.getValueType(CI->getType()); 579 580 // This is an fp<->int conversion? 581 if (SrcVT.isInteger() != DstVT.isInteger()) 582 return false; 583 584 // If this is an extension, it will be a zero or sign extension, which 585 // isn't a noop. 586 if (SrcVT.bitsLT(DstVT)) return false; 587 588 // If these values will be promoted, find out what they will be promoted 589 // to. This helps us consider truncates on PPC as noop copies when they 590 // are. 591 if (TLI.getTypeAction(CI->getContext(), SrcVT) == 592 TargetLowering::TypePromoteInteger) 593 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 594 if (TLI.getTypeAction(CI->getContext(), DstVT) == 595 TargetLowering::TypePromoteInteger) 596 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 597 598 // If, after promotion, these are the same types, this is a noop copy. 599 if (SrcVT != DstVT) 600 return false; 601 602 return SinkCast(CI); 603 } 604 605 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 606 /// the number of virtual registers that must be created and coalesced. This is 607 /// a clear win except on targets with multiple condition code registers 608 /// (PowerPC), where it might lose; some adjustment may be wanted there. 609 /// 610 /// Return true if any changes are made. 611 static bool OptimizeCmpExpression(CmpInst *CI) { 612 BasicBlock *DefBB = CI->getParent(); 613 614 /// InsertedCmp - Only insert a cmp in each block once. 615 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 616 617 bool MadeChange = false; 618 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 619 UI != E; ) { 620 Use &TheUse = UI.getUse(); 621 Instruction *User = cast<Instruction>(*UI); 622 623 // Preincrement use iterator so we don't invalidate it. 624 ++UI; 625 626 // Don't bother for PHI nodes. 627 if (isa<PHINode>(User)) 628 continue; 629 630 // Figure out which BB this cmp is used in. 631 BasicBlock *UserBB = User->getParent(); 632 633 // If this user is in the same block as the cmp, don't change the cmp. 634 if (UserBB == DefBB) continue; 635 636 // If we have already inserted a cmp into this block, use it. 637 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 638 639 if (!InsertedCmp) { 640 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 641 InsertedCmp = 642 CmpInst::Create(CI->getOpcode(), 643 CI->getPredicate(), CI->getOperand(0), 644 CI->getOperand(1), "", InsertPt); 645 MadeChange = true; 646 } 647 648 // Replace a use of the cmp with a use of the new cmp. 649 TheUse = InsertedCmp; 650 ++NumCmpUses; 651 } 652 653 // If we removed all uses, nuke the cmp. 654 if (CI->use_empty()) 655 CI->eraseFromParent(); 656 657 return MadeChange; 658 } 659 660 /// isExtractBitsCandidateUse - Check if the candidates could 661 /// be combined with shift instruction, which includes: 662 /// 1. Truncate instruction 663 /// 2. And instruction and the imm is a mask of the low bits: 664 /// imm & (imm+1) == 0 665 static bool isExtractBitsCandidateUse(Instruction *User) { 666 if (!isa<TruncInst>(User)) { 667 if (User->getOpcode() != Instruction::And || 668 !isa<ConstantInt>(User->getOperand(1))) 669 return false; 670 671 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue(); 672 673 if ((Cimm & (Cimm + 1)).getBoolValue()) 674 return false; 675 } 676 return true; 677 } 678 679 /// SinkShiftAndTruncate - sink both shift and truncate instruction 680 /// to the use of truncate's BB. 681 static bool 682 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, 683 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, 684 const TargetLowering &TLI) { 685 BasicBlock *UserBB = User->getParent(); 686 DenseMap<BasicBlock *, CastInst *> InsertedTruncs; 687 TruncInst *TruncI = dyn_cast<TruncInst>(User); 688 bool MadeChange = false; 689 690 for (Value::user_iterator TruncUI = TruncI->user_begin(), 691 TruncE = TruncI->user_end(); 692 TruncUI != TruncE;) { 693 694 Use &TruncTheUse = TruncUI.getUse(); 695 Instruction *TruncUser = cast<Instruction>(*TruncUI); 696 // Preincrement use iterator so we don't invalidate it. 697 698 ++TruncUI; 699 700 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); 701 if (!ISDOpcode) 702 continue; 703 704 // If the use is actually a legal node, there will not be an 705 // implicit truncate. 706 // FIXME: always querying the result type is just an 707 // approximation; some nodes' legality is determined by the 708 // operand or other means. There's no good way to find out though. 709 if (TLI.isOperationLegalOrCustom( 710 ISDOpcode, TLI.getValueType(TruncUser->getType(), true))) 711 continue; 712 713 // Don't bother for PHI nodes. 714 if (isa<PHINode>(TruncUser)) 715 continue; 716 717 BasicBlock *TruncUserBB = TruncUser->getParent(); 718 719 if (UserBB == TruncUserBB) 720 continue; 721 722 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; 723 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; 724 725 if (!InsertedShift && !InsertedTrunc) { 726 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); 727 // Sink the shift 728 if (ShiftI->getOpcode() == Instruction::AShr) 729 InsertedShift = 730 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); 731 else 732 InsertedShift = 733 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); 734 735 // Sink the trunc 736 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); 737 TruncInsertPt++; 738 739 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, 740 TruncI->getType(), "", TruncInsertPt); 741 742 MadeChange = true; 743 744 TruncTheUse = InsertedTrunc; 745 } 746 } 747 return MadeChange; 748 } 749 750 /// OptimizeExtractBits - sink the shift *right* instruction into user blocks if 751 /// the uses could potentially be combined with this shift instruction and 752 /// generate BitExtract instruction. It will only be applied if the architecture 753 /// supports BitExtract instruction. Here is an example: 754 /// BB1: 755 /// %x.extract.shift = lshr i64 %arg1, 32 756 /// BB2: 757 /// %x.extract.trunc = trunc i64 %x.extract.shift to i16 758 /// ==> 759 /// 760 /// BB2: 761 /// %x.extract.shift.1 = lshr i64 %arg1, 32 762 /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 763 /// 764 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract 765 /// instruction. 766 /// Return true if any changes are made. 767 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, 768 const TargetLowering &TLI) { 769 BasicBlock *DefBB = ShiftI->getParent(); 770 771 /// Only insert instructions in each block once. 772 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts; 773 774 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType())); 775 776 bool MadeChange = false; 777 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); 778 UI != E;) { 779 Use &TheUse = UI.getUse(); 780 Instruction *User = cast<Instruction>(*UI); 781 // Preincrement use iterator so we don't invalidate it. 782 ++UI; 783 784 // Don't bother for PHI nodes. 785 if (isa<PHINode>(User)) 786 continue; 787 788 if (!isExtractBitsCandidateUse(User)) 789 continue; 790 791 BasicBlock *UserBB = User->getParent(); 792 793 if (UserBB == DefBB) { 794 // If the shift and truncate instruction are in the same BB. The use of 795 // the truncate(TruncUse) may still introduce another truncate if not 796 // legal. In this case, we would like to sink both shift and truncate 797 // instruction to the BB of TruncUse. 798 // for example: 799 // BB1: 800 // i64 shift.result = lshr i64 opnd, imm 801 // trunc.result = trunc shift.result to i16 802 // 803 // BB2: 804 // ----> We will have an implicit truncate here if the architecture does 805 // not have i16 compare. 806 // cmp i16 trunc.result, opnd2 807 // 808 if (isa<TruncInst>(User) && shiftIsLegal 809 // If the type of the truncate is legal, no trucate will be 810 // introduced in other basic blocks. 811 && (!TLI.isTypeLegal(TLI.getValueType(User->getType())))) 812 MadeChange = 813 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI); 814 815 continue; 816 } 817 // If we have already inserted a shift into this block, use it. 818 BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; 819 820 if (!InsertedShift) { 821 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 822 823 if (ShiftI->getOpcode() == Instruction::AShr) 824 InsertedShift = 825 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); 826 else 827 InsertedShift = 828 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); 829 830 MadeChange = true; 831 } 832 833 // Replace a use of the shift with a use of the new shift. 834 TheUse = InsertedShift; 835 } 836 837 // If we removed all uses, nuke the shift. 838 if (ShiftI->use_empty()) 839 ShiftI->eraseFromParent(); 840 841 return MadeChange; 842 } 843 844 namespace { 845 class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { 846 protected: 847 void replaceCall(Value *With) override { 848 CI->replaceAllUsesWith(With); 849 CI->eraseFromParent(); 850 } 851 bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override { 852 if (ConstantInt *SizeCI = 853 dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) 854 return SizeCI->isAllOnesValue(); 855 return false; 856 } 857 }; 858 } // end anonymous namespace 859 860 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { 861 BasicBlock *BB = CI->getParent(); 862 863 // Lower inline assembly if we can. 864 // If we found an inline asm expession, and if the target knows how to 865 // lower it to normal LLVM code, do so now. 866 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 867 if (TLI->ExpandInlineAsm(CI)) { 868 // Avoid invalidating the iterator. 869 CurInstIterator = BB->begin(); 870 // Avoid processing instructions out of order, which could cause 871 // reuse before a value is defined. 872 SunkAddrs.clear(); 873 return true; 874 } 875 // Sink address computing for memory operands into the block. 876 if (OptimizeInlineAsmInst(CI)) 877 return true; 878 } 879 880 // Lower all uses of llvm.objectsize.* 881 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 882 if (II && II->getIntrinsicID() == Intrinsic::objectsize) { 883 bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); 884 Type *ReturnTy = CI->getType(); 885 Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); 886 887 // Substituting this can cause recursive simplifications, which can 888 // invalidate our iterator. Use a WeakVH to hold onto it in case this 889 // happens. 890 WeakVH IterHandle(CurInstIterator); 891 892 replaceAndRecursivelySimplify(CI, RetVal, 893 TLI ? TLI->getDataLayout() : nullptr, 894 TLInfo, ModifiedDT ? nullptr : DT); 895 896 // If the iterator instruction was recursively deleted, start over at the 897 // start of the block. 898 if (IterHandle != CurInstIterator) { 899 CurInstIterator = BB->begin(); 900 SunkAddrs.clear(); 901 } 902 return true; 903 } 904 905 if (II && TLI) { 906 SmallVector<Value*, 2> PtrOps; 907 Type *AccessTy; 908 if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) 909 while (!PtrOps.empty()) 910 if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) 911 return true; 912 } 913 914 // From here on out we're working with named functions. 915 if (!CI->getCalledFunction()) return false; 916 917 // We'll need DataLayout from here on out. 918 const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; 919 if (!TD) return false; 920 921 // Lower all default uses of _chk calls. This is very similar 922 // to what InstCombineCalls does, but here we are only lowering calls 923 // that have the default "don't know" as the objectsize. Anything else 924 // should be left alone. 925 CodeGenPrepareFortifiedLibCalls Simplifier; 926 return Simplifier.fold(CI, TD, TLInfo); 927 } 928 929 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return 930 /// instructions to the predecessor to enable tail call optimizations. The 931 /// case it is currently looking for is: 932 /// @code 933 /// bb0: 934 /// %tmp0 = tail call i32 @f0() 935 /// br label %return 936 /// bb1: 937 /// %tmp1 = tail call i32 @f1() 938 /// br label %return 939 /// bb2: 940 /// %tmp2 = tail call i32 @f2() 941 /// br label %return 942 /// return: 943 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 944 /// ret i32 %retval 945 /// @endcode 946 /// 947 /// => 948 /// 949 /// @code 950 /// bb0: 951 /// %tmp0 = tail call i32 @f0() 952 /// ret i32 %tmp0 953 /// bb1: 954 /// %tmp1 = tail call i32 @f1() 955 /// ret i32 %tmp1 956 /// bb2: 957 /// %tmp2 = tail call i32 @f2() 958 /// ret i32 %tmp2 959 /// @endcode 960 bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { 961 if (!TLI) 962 return false; 963 964 ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); 965 if (!RI) 966 return false; 967 968 PHINode *PN = nullptr; 969 BitCastInst *BCI = nullptr; 970 Value *V = RI->getReturnValue(); 971 if (V) { 972 BCI = dyn_cast<BitCastInst>(V); 973 if (BCI) 974 V = BCI->getOperand(0); 975 976 PN = dyn_cast<PHINode>(V); 977 if (!PN) 978 return false; 979 } 980 981 if (PN && PN->getParent() != BB) 982 return false; 983 984 // It's not safe to eliminate the sign / zero extension of the return value. 985 // See llvm::isInTailCallPosition(). 986 const Function *F = BB->getParent(); 987 AttributeSet CallerAttrs = F->getAttributes(); 988 if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || 989 CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) 990 return false; 991 992 // Make sure there are no instructions between the PHI and return, or that the 993 // return is the first instruction in the block. 994 if (PN) { 995 BasicBlock::iterator BI = BB->begin(); 996 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); 997 if (&*BI == BCI) 998 // Also skip over the bitcast. 999 ++BI; 1000 if (&*BI != RI) 1001 return false; 1002 } else { 1003 BasicBlock::iterator BI = BB->begin(); 1004 while (isa<DbgInfoIntrinsic>(BI)) ++BI; 1005 if (&*BI != RI) 1006 return false; 1007 } 1008 1009 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail 1010 /// call. 1011 SmallVector<CallInst*, 4> TailCalls; 1012 if (PN) { 1013 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 1014 CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); 1015 // Make sure the phi value is indeed produced by the tail call. 1016 if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && 1017 TLI->mayBeEmittedAsTailCall(CI)) 1018 TailCalls.push_back(CI); 1019 } 1020 } else { 1021 SmallPtrSet<BasicBlock*, 4> VisitedBBs; 1022 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { 1023 if (!VisitedBBs.insert(*PI).second) 1024 continue; 1025 1026 BasicBlock::InstListType &InstList = (*PI)->getInstList(); 1027 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); 1028 BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); 1029 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); 1030 if (RI == RE) 1031 continue; 1032 1033 CallInst *CI = dyn_cast<CallInst>(&*RI); 1034 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) 1035 TailCalls.push_back(CI); 1036 } 1037 } 1038 1039 bool Changed = false; 1040 for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { 1041 CallInst *CI = TailCalls[i]; 1042 CallSite CS(CI); 1043 1044 // Conservatively require the attributes of the call to match those of the 1045 // return. Ignore noalias because it doesn't affect the call sequence. 1046 AttributeSet CalleeAttrs = CS.getAttributes(); 1047 if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 1048 removeAttribute(Attribute::NoAlias) != 1049 AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 1050 removeAttribute(Attribute::NoAlias)) 1051 continue; 1052 1053 // Make sure the call instruction is followed by an unconditional branch to 1054 // the return block. 1055 BasicBlock *CallBB = CI->getParent(); 1056 BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); 1057 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) 1058 continue; 1059 1060 // Duplicate the return into CallBB. 1061 (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); 1062 ModifiedDT = Changed = true; 1063 ++NumRetsDup; 1064 } 1065 1066 // If we eliminated all predecessors of the block, delete the block now. 1067 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) 1068 BB->eraseFromParent(); 1069 1070 return Changed; 1071 } 1072 1073 //===----------------------------------------------------------------------===// 1074 // Memory Optimization 1075 //===----------------------------------------------------------------------===// 1076 1077 namespace { 1078 1079 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode 1080 /// which holds actual Value*'s for register values. 1081 struct ExtAddrMode : public TargetLowering::AddrMode { 1082 Value *BaseReg; 1083 Value *ScaledReg; 1084 ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {} 1085 void print(raw_ostream &OS) const; 1086 void dump() const; 1087 1088 bool operator==(const ExtAddrMode& O) const { 1089 return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && 1090 (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && 1091 (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); 1092 } 1093 }; 1094 1095 #ifndef NDEBUG 1096 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { 1097 AM.print(OS); 1098 return OS; 1099 } 1100 #endif 1101 1102 void ExtAddrMode::print(raw_ostream &OS) const { 1103 bool NeedPlus = false; 1104 OS << "["; 1105 if (BaseGV) { 1106 OS << (NeedPlus ? " + " : "") 1107 << "GV:"; 1108 BaseGV->printAsOperand(OS, /*PrintType=*/false); 1109 NeedPlus = true; 1110 } 1111 1112 if (BaseOffs) { 1113 OS << (NeedPlus ? " + " : "") 1114 << BaseOffs; 1115 NeedPlus = true; 1116 } 1117 1118 if (BaseReg) { 1119 OS << (NeedPlus ? " + " : "") 1120 << "Base:"; 1121 BaseReg->printAsOperand(OS, /*PrintType=*/false); 1122 NeedPlus = true; 1123 } 1124 if (Scale) { 1125 OS << (NeedPlus ? " + " : "") 1126 << Scale << "*"; 1127 ScaledReg->printAsOperand(OS, /*PrintType=*/false); 1128 } 1129 1130 OS << ']'; 1131 } 1132 1133 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1134 void ExtAddrMode::dump() const { 1135 print(dbgs()); 1136 dbgs() << '\n'; 1137 } 1138 #endif 1139 1140 /// \brief This class provides transaction based operation on the IR. 1141 /// Every change made through this class is recorded in the internal state and 1142 /// can be undone (rollback) until commit is called. 1143 class TypePromotionTransaction { 1144 1145 /// \brief This represents the common interface of the individual transaction. 1146 /// Each class implements the logic for doing one specific modification on 1147 /// the IR via the TypePromotionTransaction. 1148 class TypePromotionAction { 1149 protected: 1150 /// The Instruction modified. 1151 Instruction *Inst; 1152 1153 public: 1154 /// \brief Constructor of the action. 1155 /// The constructor performs the related action on the IR. 1156 TypePromotionAction(Instruction *Inst) : Inst(Inst) {} 1157 1158 virtual ~TypePromotionAction() {} 1159 1160 /// \brief Undo the modification done by this action. 1161 /// When this method is called, the IR must be in the same state as it was 1162 /// before this action was applied. 1163 /// \pre Undoing the action works if and only if the IR is in the exact same 1164 /// state as it was directly after this action was applied. 1165 virtual void undo() = 0; 1166 1167 /// \brief Advocate every change made by this action. 1168 /// When the results on the IR of the action are to be kept, it is important 1169 /// to call this function, otherwise hidden information may be kept forever. 1170 virtual void commit() { 1171 // Nothing to be done, this action is not doing anything. 1172 } 1173 }; 1174 1175 /// \brief Utility to remember the position of an instruction. 1176 class InsertionHandler { 1177 /// Position of an instruction. 1178 /// Either an instruction: 1179 /// - Is the first in a basic block: BB is used. 1180 /// - Has a previous instructon: PrevInst is used. 1181 union { 1182 Instruction *PrevInst; 1183 BasicBlock *BB; 1184 } Point; 1185 /// Remember whether or not the instruction had a previous instruction. 1186 bool HasPrevInstruction; 1187 1188 public: 1189 /// \brief Record the position of \p Inst. 1190 InsertionHandler(Instruction *Inst) { 1191 BasicBlock::iterator It = Inst; 1192 HasPrevInstruction = (It != (Inst->getParent()->begin())); 1193 if (HasPrevInstruction) 1194 Point.PrevInst = --It; 1195 else 1196 Point.BB = Inst->getParent(); 1197 } 1198 1199 /// \brief Insert \p Inst at the recorded position. 1200 void insert(Instruction *Inst) { 1201 if (HasPrevInstruction) { 1202 if (Inst->getParent()) 1203 Inst->removeFromParent(); 1204 Inst->insertAfter(Point.PrevInst); 1205 } else { 1206 Instruction *Position = Point.BB->getFirstInsertionPt(); 1207 if (Inst->getParent()) 1208 Inst->moveBefore(Position); 1209 else 1210 Inst->insertBefore(Position); 1211 } 1212 } 1213 }; 1214 1215 /// \brief Move an instruction before another. 1216 class InstructionMoveBefore : public TypePromotionAction { 1217 /// Original position of the instruction. 1218 InsertionHandler Position; 1219 1220 public: 1221 /// \brief Move \p Inst before \p Before. 1222 InstructionMoveBefore(Instruction *Inst, Instruction *Before) 1223 : TypePromotionAction(Inst), Position(Inst) { 1224 DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n"); 1225 Inst->moveBefore(Before); 1226 } 1227 1228 /// \brief Move the instruction back to its original position. 1229 void undo() override { 1230 DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); 1231 Position.insert(Inst); 1232 } 1233 }; 1234 1235 /// \brief Set the operand of an instruction with a new value. 1236 class OperandSetter : public TypePromotionAction { 1237 /// Original operand of the instruction. 1238 Value *Origin; 1239 /// Index of the modified instruction. 1240 unsigned Idx; 1241 1242 public: 1243 /// \brief Set \p Idx operand of \p Inst with \p NewVal. 1244 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) 1245 : TypePromotionAction(Inst), Idx(Idx) { 1246 DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" 1247 << "for:" << *Inst << "\n" 1248 << "with:" << *NewVal << "\n"); 1249 Origin = Inst->getOperand(Idx); 1250 Inst->setOperand(Idx, NewVal); 1251 } 1252 1253 /// \brief Restore the original value of the instruction. 1254 void undo() override { 1255 DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" 1256 << "for: " << *Inst << "\n" 1257 << "with: " << *Origin << "\n"); 1258 Inst->setOperand(Idx, Origin); 1259 } 1260 }; 1261 1262 /// \brief Hide the operands of an instruction. 1263 /// Do as if this instruction was not using any of its operands. 1264 class OperandsHider : public TypePromotionAction { 1265 /// The list of original operands. 1266 SmallVector<Value *, 4> OriginalValues; 1267 1268 public: 1269 /// \brief Remove \p Inst from the uses of the operands of \p Inst. 1270 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { 1271 DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); 1272 unsigned NumOpnds = Inst->getNumOperands(); 1273 OriginalValues.reserve(NumOpnds); 1274 for (unsigned It = 0; It < NumOpnds; ++It) { 1275 // Save the current operand. 1276 Value *Val = Inst->getOperand(It); 1277 OriginalValues.push_back(Val); 1278 // Set a dummy one. 1279 // We could use OperandSetter here, but that would implied an overhead 1280 // that we are not willing to pay. 1281 Inst->setOperand(It, UndefValue::get(Val->getType())); 1282 } 1283 } 1284 1285 /// \brief Restore the original list of uses. 1286 void undo() override { 1287 DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); 1288 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) 1289 Inst->setOperand(It, OriginalValues[It]); 1290 } 1291 }; 1292 1293 /// \brief Build a truncate instruction. 1294 class TruncBuilder : public TypePromotionAction { 1295 Value *Val; 1296 public: 1297 /// \brief Build a truncate instruction of \p Opnd producing a \p Ty 1298 /// result. 1299 /// trunc Opnd to Ty. 1300 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { 1301 IRBuilder<> Builder(Opnd); 1302 Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); 1303 DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); 1304 } 1305 1306 /// \brief Get the built value. 1307 Value *getBuiltValue() { return Val; } 1308 1309 /// \brief Remove the built instruction. 1310 void undo() override { 1311 DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); 1312 if (Instruction *IVal = dyn_cast<Instruction>(Val)) 1313 IVal->eraseFromParent(); 1314 } 1315 }; 1316 1317 /// \brief Build a sign extension instruction. 1318 class SExtBuilder : public TypePromotionAction { 1319 Value *Val; 1320 public: 1321 /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty 1322 /// result. 1323 /// sext Opnd to Ty. 1324 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) 1325 : TypePromotionAction(InsertPt) { 1326 IRBuilder<> Builder(InsertPt); 1327 Val = Builder.CreateSExt(Opnd, Ty, "promoted"); 1328 DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); 1329 } 1330 1331 /// \brief Get the built value. 1332 Value *getBuiltValue() { return Val; } 1333 1334 /// \brief Remove the built instruction. 1335 void undo() override { 1336 DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); 1337 if (Instruction *IVal = dyn_cast<Instruction>(Val)) 1338 IVal->eraseFromParent(); 1339 } 1340 }; 1341 1342 /// \brief Build a zero extension instruction. 1343 class ZExtBuilder : public TypePromotionAction { 1344 Value *Val; 1345 public: 1346 /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty 1347 /// result. 1348 /// zext Opnd to Ty. 1349 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) 1350 : TypePromotionAction(InsertPt) { 1351 IRBuilder<> Builder(InsertPt); 1352 Val = Builder.CreateZExt(Opnd, Ty, "promoted"); 1353 DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); 1354 } 1355 1356 /// \brief Get the built value. 1357 Value *getBuiltValue() { return Val; } 1358 1359 /// \brief Remove the built instruction. 1360 void undo() override { 1361 DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); 1362 if (Instruction *IVal = dyn_cast<Instruction>(Val)) 1363 IVal->eraseFromParent(); 1364 } 1365 }; 1366 1367 /// \brief Mutate an instruction to another type. 1368 class TypeMutator : public TypePromotionAction { 1369 /// Record the original type. 1370 Type *OrigTy; 1371 1372 public: 1373 /// \brief Mutate the type of \p Inst into \p NewTy. 1374 TypeMutator(Instruction *Inst, Type *NewTy) 1375 : TypePromotionAction(Inst), OrigTy(Inst->getType()) { 1376 DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy 1377 << "\n"); 1378 Inst->mutateType(NewTy); 1379 } 1380 1381 /// \brief Mutate the instruction back to its original type. 1382 void undo() override { 1383 DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy 1384 << "\n"); 1385 Inst->mutateType(OrigTy); 1386 } 1387 }; 1388 1389 /// \brief Replace the uses of an instruction by another instruction. 1390 class UsesReplacer : public TypePromotionAction { 1391 /// Helper structure to keep track of the replaced uses. 1392 struct InstructionAndIdx { 1393 /// The instruction using the instruction. 1394 Instruction *Inst; 1395 /// The index where this instruction is used for Inst. 1396 unsigned Idx; 1397 InstructionAndIdx(Instruction *Inst, unsigned Idx) 1398 : Inst(Inst), Idx(Idx) {} 1399 }; 1400 1401 /// Keep track of the original uses (pair Instruction, Index). 1402 SmallVector<InstructionAndIdx, 4> OriginalUses; 1403 typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator; 1404 1405 public: 1406 /// \brief Replace all the use of \p Inst by \p New. 1407 UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { 1408 DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New 1409 << "\n"); 1410 // Record the original uses. 1411 for (Use &U : Inst->uses()) { 1412 Instruction *UserI = cast<Instruction>(U.getUser()); 1413 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); 1414 } 1415 // Now, we can replace the uses. 1416 Inst->replaceAllUsesWith(New); 1417 } 1418 1419 /// \brief Reassign the original uses of Inst to Inst. 1420 void undo() override { 1421 DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); 1422 for (use_iterator UseIt = OriginalUses.begin(), 1423 EndIt = OriginalUses.end(); 1424 UseIt != EndIt; ++UseIt) { 1425 UseIt->Inst->setOperand(UseIt->Idx, Inst); 1426 } 1427 } 1428 }; 1429 1430 /// \brief Remove an instruction from the IR. 1431 class InstructionRemover : public TypePromotionAction { 1432 /// Original position of the instruction. 1433 InsertionHandler Inserter; 1434 /// Helper structure to hide all the link to the instruction. In other 1435 /// words, this helps to do as if the instruction was removed. 1436 OperandsHider Hider; 1437 /// Keep track of the uses replaced, if any. 1438 UsesReplacer *Replacer; 1439 1440 public: 1441 /// \brief Remove all reference of \p Inst and optinally replace all its 1442 /// uses with New. 1443 /// \pre If !Inst->use_empty(), then New != nullptr 1444 InstructionRemover(Instruction *Inst, Value *New = nullptr) 1445 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), 1446 Replacer(nullptr) { 1447 if (New) 1448 Replacer = new UsesReplacer(Inst, New); 1449 DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); 1450 Inst->removeFromParent(); 1451 } 1452 1453 ~InstructionRemover() { delete Replacer; } 1454 1455 /// \brief Really remove the instruction. 1456 void commit() override { delete Inst; } 1457 1458 /// \brief Resurrect the instruction and reassign it to the proper uses if 1459 /// new value was provided when build this action. 1460 void undo() override { 1461 DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); 1462 Inserter.insert(Inst); 1463 if (Replacer) 1464 Replacer->undo(); 1465 Hider.undo(); 1466 } 1467 }; 1468 1469 public: 1470 /// Restoration point. 1471 /// The restoration point is a pointer to an action instead of an iterator 1472 /// because the iterator may be invalidated but not the pointer. 1473 typedef const TypePromotionAction *ConstRestorationPt; 1474 /// Advocate every changes made in that transaction. 1475 void commit(); 1476 /// Undo all the changes made after the given point. 1477 void rollback(ConstRestorationPt Point); 1478 /// Get the current restoration point. 1479 ConstRestorationPt getRestorationPoint() const; 1480 1481 /// \name API for IR modification with state keeping to support rollback. 1482 /// @{ 1483 /// Same as Instruction::setOperand. 1484 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); 1485 /// Same as Instruction::eraseFromParent. 1486 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); 1487 /// Same as Value::replaceAllUsesWith. 1488 void replaceAllUsesWith(Instruction *Inst, Value *New); 1489 /// Same as Value::mutateType. 1490 void mutateType(Instruction *Inst, Type *NewTy); 1491 /// Same as IRBuilder::createTrunc. 1492 Value *createTrunc(Instruction *Opnd, Type *Ty); 1493 /// Same as IRBuilder::createSExt. 1494 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); 1495 /// Same as IRBuilder::createZExt. 1496 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty); 1497 /// Same as Instruction::moveBefore. 1498 void moveBefore(Instruction *Inst, Instruction *Before); 1499 /// @} 1500 1501 private: 1502 /// The ordered list of actions made so far. 1503 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions; 1504 typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt; 1505 }; 1506 1507 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, 1508 Value *NewVal) { 1509 Actions.push_back( 1510 make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal)); 1511 } 1512 1513 void TypePromotionTransaction::eraseInstruction(Instruction *Inst, 1514 Value *NewVal) { 1515 Actions.push_back( 1516 make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal)); 1517 } 1518 1519 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, 1520 Value *New) { 1521 Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); 1522 } 1523 1524 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { 1525 Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); 1526 } 1527 1528 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, 1529 Type *Ty) { 1530 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty)); 1531 Value *Val = Ptr->getBuiltValue(); 1532 Actions.push_back(std::move(Ptr)); 1533 return Val; 1534 } 1535 1536 Value *TypePromotionTransaction::createSExt(Instruction *Inst, 1537 Value *Opnd, Type *Ty) { 1538 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty)); 1539 Value *Val = Ptr->getBuiltValue(); 1540 Actions.push_back(std::move(Ptr)); 1541 return Val; 1542 } 1543 1544 Value *TypePromotionTransaction::createZExt(Instruction *Inst, 1545 Value *Opnd, Type *Ty) { 1546 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty)); 1547 Value *Val = Ptr->getBuiltValue(); 1548 Actions.push_back(std::move(Ptr)); 1549 return Val; 1550 } 1551 1552 void TypePromotionTransaction::moveBefore(Instruction *Inst, 1553 Instruction *Before) { 1554 Actions.push_back( 1555 make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before)); 1556 } 1557 1558 TypePromotionTransaction::ConstRestorationPt 1559 TypePromotionTransaction::getRestorationPoint() const { 1560 return !Actions.empty() ? Actions.back().get() : nullptr; 1561 } 1562 1563 void TypePromotionTransaction::commit() { 1564 for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; 1565 ++It) 1566 (*It)->commit(); 1567 Actions.clear(); 1568 } 1569 1570 void TypePromotionTransaction::rollback( 1571 TypePromotionTransaction::ConstRestorationPt Point) { 1572 while (!Actions.empty() && Point != Actions.back().get()) { 1573 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val(); 1574 Curr->undo(); 1575 } 1576 } 1577 1578 /// \brief A helper class for matching addressing modes. 1579 /// 1580 /// This encapsulates the logic for matching the target-legal addressing modes. 1581 class AddressingModeMatcher { 1582 SmallVectorImpl<Instruction*> &AddrModeInsts; 1583 const TargetLowering &TLI; 1584 1585 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and 1586 /// the memory instruction that we're computing this address for. 1587 Type *AccessTy; 1588 Instruction *MemoryInst; 1589 1590 /// AddrMode - This is the addressing mode that we're building up. This is 1591 /// part of the return value of this addressing mode matching stuff. 1592 ExtAddrMode &AddrMode; 1593 1594 /// The truncate instruction inserted by other CodeGenPrepare optimizations. 1595 const SetOfInstrs &InsertedTruncs; 1596 /// A map from the instructions to their type before promotion. 1597 InstrToOrigTy &PromotedInsts; 1598 /// The ongoing transaction where every action should be registered. 1599 TypePromotionTransaction &TPT; 1600 1601 /// IgnoreProfitability - This is set to true when we should not do 1602 /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode 1603 /// always returns true. 1604 bool IgnoreProfitability; 1605 1606 AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, 1607 const TargetLowering &T, Type *AT, 1608 Instruction *MI, ExtAddrMode &AM, 1609 const SetOfInstrs &InsertedTruncs, 1610 InstrToOrigTy &PromotedInsts, 1611 TypePromotionTransaction &TPT) 1612 : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM), 1613 InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) { 1614 IgnoreProfitability = false; 1615 } 1616 public: 1617 1618 /// Match - Find the maximal addressing mode that a load/store of V can fold, 1619 /// give an access type of AccessTy. This returns a list of involved 1620 /// instructions in AddrModeInsts. 1621 /// \p InsertedTruncs The truncate instruction inserted by other 1622 /// CodeGenPrepare 1623 /// optimizations. 1624 /// \p PromotedInsts maps the instructions to their type before promotion. 1625 /// \p The ongoing transaction where every action should be registered. 1626 static ExtAddrMode Match(Value *V, Type *AccessTy, 1627 Instruction *MemoryInst, 1628 SmallVectorImpl<Instruction*> &AddrModeInsts, 1629 const TargetLowering &TLI, 1630 const SetOfInstrs &InsertedTruncs, 1631 InstrToOrigTy &PromotedInsts, 1632 TypePromotionTransaction &TPT) { 1633 ExtAddrMode Result; 1634 1635 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, 1636 MemoryInst, Result, InsertedTruncs, 1637 PromotedInsts, TPT).MatchAddr(V, 0); 1638 (void)Success; assert(Success && "Couldn't select *anything*?"); 1639 return Result; 1640 } 1641 private: 1642 bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); 1643 bool MatchAddr(Value *V, unsigned Depth); 1644 bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, 1645 bool *MovedAway = nullptr); 1646 bool IsProfitableToFoldIntoAddressingMode(Instruction *I, 1647 ExtAddrMode &AMBefore, 1648 ExtAddrMode &AMAfter); 1649 bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); 1650 bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion, 1651 Value *PromotedOperand) const; 1652 }; 1653 1654 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. 1655 /// Return true and update AddrMode if this addr mode is legal for the target, 1656 /// false if not. 1657 bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, 1658 unsigned Depth) { 1659 // If Scale is 1, then this is the same as adding ScaleReg to the addressing 1660 // mode. Just process that directly. 1661 if (Scale == 1) 1662 return MatchAddr(ScaleReg, Depth); 1663 1664 // If the scale is 0, it takes nothing to add this. 1665 if (Scale == 0) 1666 return true; 1667 1668 // If we already have a scale of this value, we can add to it, otherwise, we 1669 // need an available scale field. 1670 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 1671 return false; 1672 1673 ExtAddrMode TestAddrMode = AddrMode; 1674 1675 // Add scale to turn X*4+X*3 -> X*7. This could also do things like 1676 // [A+B + A*7] -> [B+A*8]. 1677 TestAddrMode.Scale += Scale; 1678 TestAddrMode.ScaledReg = ScaleReg; 1679 1680 // If the new address isn't legal, bail out. 1681 if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) 1682 return false; 1683 1684 // It was legal, so commit it. 1685 AddrMode = TestAddrMode; 1686 1687 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 1688 // to see if ScaleReg is actually X+C. If so, we can turn this into adding 1689 // X*Scale + C*Scale to addr mode. 1690 ConstantInt *CI = nullptr; Value *AddLHS = nullptr; 1691 if (isa<Instruction>(ScaleReg) && // not a constant expr. 1692 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { 1693 TestAddrMode.ScaledReg = AddLHS; 1694 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; 1695 1696 // If this addressing mode is legal, commit it and remember that we folded 1697 // this instruction. 1698 if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { 1699 AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); 1700 AddrMode = TestAddrMode; 1701 return true; 1702 } 1703 } 1704 1705 // Otherwise, not (x+c)*scale, just return what we have. 1706 return true; 1707 } 1708 1709 /// MightBeFoldableInst - This is a little filter, which returns true if an 1710 /// addressing computation involving I might be folded into a load/store 1711 /// accessing it. This doesn't need to be perfect, but needs to accept at least 1712 /// the set of instructions that MatchOperationAddr can. 1713 static bool MightBeFoldableInst(Instruction *I) { 1714 switch (I->getOpcode()) { 1715 case Instruction::BitCast: 1716 case Instruction::AddrSpaceCast: 1717 // Don't touch identity bitcasts. 1718 if (I->getType() == I->getOperand(0)->getType()) 1719 return false; 1720 return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); 1721 case Instruction::PtrToInt: 1722 // PtrToInt is always a noop, as we know that the int type is pointer sized. 1723 return true; 1724 case Instruction::IntToPtr: 1725 // We know the input is intptr_t, so this is foldable. 1726 return true; 1727 case Instruction::Add: 1728 return true; 1729 case Instruction::Mul: 1730 case Instruction::Shl: 1731 // Can only handle X*C and X << C. 1732 return isa<ConstantInt>(I->getOperand(1)); 1733 case Instruction::GetElementPtr: 1734 return true; 1735 default: 1736 return false; 1737 } 1738 } 1739 1740 /// \brief Check whether or not \p Val is a legal instruction for \p TLI. 1741 /// \note \p Val is assumed to be the product of some type promotion. 1742 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed 1743 /// to be legal, as the non-promoted value would have had the same state. 1744 static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) { 1745 Instruction *PromotedInst = dyn_cast<Instruction>(Val); 1746 if (!PromotedInst) 1747 return false; 1748 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); 1749 // If the ISDOpcode is undefined, it was undefined before the promotion. 1750 if (!ISDOpcode) 1751 return true; 1752 // Otherwise, check if the promoted instruction is legal or not. 1753 return TLI.isOperationLegalOrCustom( 1754 ISDOpcode, TLI.getValueType(PromotedInst->getType())); 1755 } 1756 1757 /// \brief Hepler class to perform type promotion. 1758 class TypePromotionHelper { 1759 /// \brief Utility function to check whether or not a sign or zero extension 1760 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by 1761 /// either using the operands of \p Inst or promoting \p Inst. 1762 /// The type of the extension is defined by \p IsSExt. 1763 /// In other words, check if: 1764 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType. 1765 /// #1 Promotion applies: 1766 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...). 1767 /// #2 Operand reuses: 1768 /// ext opnd1 to ConsideredExtType. 1769 /// \p PromotedInsts maps the instructions to their type before promotion. 1770 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType, 1771 const InstrToOrigTy &PromotedInsts, bool IsSExt); 1772 1773 /// \brief Utility function to determine if \p OpIdx should be promoted when 1774 /// promoting \p Inst. 1775 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { 1776 if (isa<SelectInst>(Inst) && OpIdx == 0) 1777 return false; 1778 return true; 1779 } 1780 1781 /// \brief Utility function to promote the operand of \p Ext when this 1782 /// operand is a promotable trunc or sext or zext. 1783 /// \p PromotedInsts maps the instructions to their type before promotion. 1784 /// \p CreatedInsts[out] contains how many non-free instructions have been 1785 /// created to promote the operand of Ext. 1786 /// Newly added extensions are inserted in \p Exts. 1787 /// Newly added truncates are inserted in \p Truncs. 1788 /// Should never be called directly. 1789 /// \return The promoted value which is used instead of Ext. 1790 static Value *promoteOperandForTruncAndAnyExt( 1791 Instruction *Ext, TypePromotionTransaction &TPT, 1792 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, 1793 SmallVectorImpl<Instruction *> *Exts, 1794 SmallVectorImpl<Instruction *> *Truncs); 1795 1796 /// \brief Utility function to promote the operand of \p Ext when this 1797 /// operand is promotable and is not a supported trunc or sext. 1798 /// \p PromotedInsts maps the instructions to their type before promotion. 1799 /// \p CreatedInsts[out] contains how many non-free instructions have been 1800 /// created to promote the operand of Ext. 1801 /// Newly added extensions are inserted in \p Exts. 1802 /// Newly added truncates are inserted in \p Truncs. 1803 /// Should never be called directly. 1804 /// \return The promoted value which is used instead of Ext. 1805 static Value * 1806 promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, 1807 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, 1808 SmallVectorImpl<Instruction *> *Exts, 1809 SmallVectorImpl<Instruction *> *Truncs, bool IsSExt); 1810 1811 /// \see promoteOperandForOther. 1812 static Value * 1813 signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, 1814 InstrToOrigTy &PromotedInsts, 1815 unsigned &CreatedInsts, 1816 SmallVectorImpl<Instruction *> *Exts, 1817 SmallVectorImpl<Instruction *> *Truncs) { 1818 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, 1819 Truncs, true); 1820 } 1821 1822 /// \see promoteOperandForOther. 1823 static Value * 1824 zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, 1825 InstrToOrigTy &PromotedInsts, 1826 unsigned &CreatedInsts, 1827 SmallVectorImpl<Instruction *> *Exts, 1828 SmallVectorImpl<Instruction *> *Truncs) { 1829 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, 1830 Truncs, false); 1831 } 1832 1833 public: 1834 /// Type for the utility function that promotes the operand of Ext. 1835 typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT, 1836 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, 1837 SmallVectorImpl<Instruction *> *Exts, 1838 SmallVectorImpl<Instruction *> *Truncs); 1839 /// \brief Given a sign/zero extend instruction \p Ext, return the approriate 1840 /// action to promote the operand of \p Ext instead of using Ext. 1841 /// \return NULL if no promotable action is possible with the current 1842 /// sign extension. 1843 /// \p InsertedTruncs keeps track of all the truncate instructions inserted by 1844 /// the others CodeGenPrepare optimizations. This information is important 1845 /// because we do not want to promote these instructions as CodeGenPrepare 1846 /// will reinsert them later. Thus creating an infinite loop: create/remove. 1847 /// \p PromotedInsts maps the instructions to their type before promotion. 1848 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedTruncs, 1849 const TargetLowering &TLI, 1850 const InstrToOrigTy &PromotedInsts); 1851 }; 1852 1853 bool TypePromotionHelper::canGetThrough(const Instruction *Inst, 1854 Type *ConsideredExtType, 1855 const InstrToOrigTy &PromotedInsts, 1856 bool IsSExt) { 1857 // The promotion helper does not know how to deal with vector types yet. 1858 // To be able to fix that, we would need to fix the places where we 1859 // statically extend, e.g., constants and such. 1860 if (Inst->getType()->isVectorTy()) 1861 return false; 1862 1863 // We can always get through zext. 1864 if (isa<ZExtInst>(Inst)) 1865 return true; 1866 1867 // sext(sext) is ok too. 1868 if (IsSExt && isa<SExtInst>(Inst)) 1869 return true; 1870 1871 // We can get through binary operator, if it is legal. In other words, the 1872 // binary operator must have a nuw or nsw flag. 1873 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); 1874 if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && 1875 ((!IsSExt && BinOp->hasNoUnsignedWrap()) || 1876 (IsSExt && BinOp->hasNoSignedWrap()))) 1877 return true; 1878 1879 // Check if we can do the following simplification. 1880 // ext(trunc(opnd)) --> ext(opnd) 1881 if (!isa<TruncInst>(Inst)) 1882 return false; 1883 1884 Value *OpndVal = Inst->getOperand(0); 1885 // Check if we can use this operand in the extension. 1886 // If the type is larger than the result type of the extension, 1887 // we cannot. 1888 if (!OpndVal->getType()->isIntegerTy() || 1889 OpndVal->getType()->getIntegerBitWidth() > 1890 ConsideredExtType->getIntegerBitWidth()) 1891 return false; 1892 1893 // If the operand of the truncate is not an instruction, we will not have 1894 // any information on the dropped bits. 1895 // (Actually we could for constant but it is not worth the extra logic). 1896 Instruction *Opnd = dyn_cast<Instruction>(OpndVal); 1897 if (!Opnd) 1898 return false; 1899 1900 // Check if the source of the type is narrow enough. 1901 // I.e., check that trunc just drops extended bits of the same kind of 1902 // the extension. 1903 // #1 get the type of the operand and check the kind of the extended bits. 1904 const Type *OpndType; 1905 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); 1906 if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt) 1907 OpndType = It->second.Ty; 1908 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd))) 1909 OpndType = Opnd->getOperand(0)->getType(); 1910 else 1911 return false; 1912 1913 // #2 check that the truncate just drop extended bits. 1914 if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth()) 1915 return true; 1916 1917 return false; 1918 } 1919 1920 TypePromotionHelper::Action TypePromotionHelper::getAction( 1921 Instruction *Ext, const SetOfInstrs &InsertedTruncs, 1922 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { 1923 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && 1924 "Unexpected instruction type"); 1925 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0)); 1926 Type *ExtTy = Ext->getType(); 1927 bool IsSExt = isa<SExtInst>(Ext); 1928 // If the operand of the extension is not an instruction, we cannot 1929 // get through. 1930 // If it, check we can get through. 1931 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt)) 1932 return nullptr; 1933 1934 // Do not promote if the operand has been added by codegenprepare. 1935 // Otherwise, it means we are undoing an optimization that is likely to be 1936 // redone, thus causing potential infinite loop. 1937 if (isa<TruncInst>(ExtOpnd) && InsertedTruncs.count(ExtOpnd)) 1938 return nullptr; 1939 1940 // SExt or Trunc instructions. 1941 // Return the related handler. 1942 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) || 1943 isa<ZExtInst>(ExtOpnd)) 1944 return promoteOperandForTruncAndAnyExt; 1945 1946 // Regular instruction. 1947 // Abort early if we will have to insert non-free instructions. 1948 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType())) 1949 return nullptr; 1950 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther; 1951 } 1952 1953 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( 1954 llvm::Instruction *SExt, TypePromotionTransaction &TPT, 1955 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, 1956 SmallVectorImpl<Instruction *> *Exts, 1957 SmallVectorImpl<Instruction *> *Truncs) { 1958 // By construction, the operand of SExt is an instruction. Otherwise we cannot 1959 // get through it and this method should not be called. 1960 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); 1961 Value *ExtVal = SExt; 1962 if (isa<ZExtInst>(SExtOpnd)) { 1963 // Replace s|zext(zext(opnd)) 1964 // => zext(opnd). 1965 Value *ZExt = 1966 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); 1967 TPT.replaceAllUsesWith(SExt, ZExt); 1968 TPT.eraseInstruction(SExt); 1969 ExtVal = ZExt; 1970 } else { 1971 // Replace z|sext(trunc(opnd)) or sext(sext(opnd)) 1972 // => z|sext(opnd). 1973 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); 1974 } 1975 CreatedInsts = 0; 1976 1977 // Remove dead code. 1978 if (SExtOpnd->use_empty()) 1979 TPT.eraseInstruction(SExtOpnd); 1980 1981 // Check if the extension is still needed. 1982 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal); 1983 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { 1984 if (ExtInst && Exts) 1985 Exts->push_back(ExtInst); 1986 return ExtVal; 1987 } 1988 1989 // At this point we have: ext ty opnd to ty. 1990 // Reassign the uses of ExtInst to the opnd and remove ExtInst. 1991 Value *NextVal = ExtInst->getOperand(0); 1992 TPT.eraseInstruction(ExtInst, NextVal); 1993 return NextVal; 1994 } 1995 1996 Value *TypePromotionHelper::promoteOperandForOther( 1997 Instruction *Ext, TypePromotionTransaction &TPT, 1998 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, 1999 SmallVectorImpl<Instruction *> *Exts, 2000 SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) { 2001 // By construction, the operand of Ext is an instruction. Otherwise we cannot 2002 // get through it and this method should not be called. 2003 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0)); 2004 CreatedInsts = 0; 2005 if (!ExtOpnd->hasOneUse()) { 2006 // ExtOpnd will be promoted. 2007 // All its uses, but Ext, will need to use a truncated value of the 2008 // promoted version. 2009 // Create the truncate now. 2010 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType()); 2011 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) { 2012 ITrunc->removeFromParent(); 2013 // Insert it just after the definition. 2014 ITrunc->insertAfter(ExtOpnd); 2015 if (Truncs) 2016 Truncs->push_back(ITrunc); 2017 } 2018 2019 TPT.replaceAllUsesWith(ExtOpnd, Trunc); 2020 // Restore the operand of Ext (which has been replace by the previous call 2021 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. 2022 TPT.setOperand(Ext, 0, ExtOpnd); 2023 } 2024 2025 // Get through the Instruction: 2026 // 1. Update its type. 2027 // 2. Replace the uses of Ext by Inst. 2028 // 3. Extend each operand that needs to be extended. 2029 2030 // Remember the original type of the instruction before promotion. 2031 // This is useful to know that the high bits are sign extended bits. 2032 PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>( 2033 ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt))); 2034 // Step #1. 2035 TPT.mutateType(ExtOpnd, Ext->getType()); 2036 // Step #2. 2037 TPT.replaceAllUsesWith(Ext, ExtOpnd); 2038 // Step #3. 2039 Instruction *ExtForOpnd = Ext; 2040 2041 DEBUG(dbgs() << "Propagate Ext to operands\n"); 2042 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx; 2043 ++OpIdx) { 2044 DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n'); 2045 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() || 2046 !shouldExtOperand(ExtOpnd, OpIdx)) { 2047 DEBUG(dbgs() << "No need to propagate\n"); 2048 continue; 2049 } 2050 // Check if we can statically extend the operand. 2051 Value *Opnd = ExtOpnd->getOperand(OpIdx); 2052 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { 2053 DEBUG(dbgs() << "Statically extend\n"); 2054 unsigned BitWidth = Ext->getType()->getIntegerBitWidth(); 2055 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth) 2056 : Cst->getValue().zext(BitWidth); 2057 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal)); 2058 continue; 2059 } 2060 // UndefValue are typed, so we have to statically sign extend them. 2061 if (isa<UndefValue>(Opnd)) { 2062 DEBUG(dbgs() << "Statically extend\n"); 2063 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType())); 2064 continue; 2065 } 2066 2067 // Otherwise we have to explicity sign extend the operand. 2068 // Check if Ext was reused to extend an operand. 2069 if (!ExtForOpnd) { 2070 // If yes, create a new one. 2071 DEBUG(dbgs() << "More operands to ext\n"); 2072 Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) 2073 : TPT.createZExt(Ext, Opnd, Ext->getType()); 2074 if (!isa<Instruction>(ValForExtOpnd)) { 2075 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd); 2076 continue; 2077 } 2078 ExtForOpnd = cast<Instruction>(ValForExtOpnd); 2079 ++CreatedInsts; 2080 } 2081 if (Exts) 2082 Exts->push_back(ExtForOpnd); 2083 TPT.setOperand(ExtForOpnd, 0, Opnd); 2084 2085 // Move the sign extension before the insertion point. 2086 TPT.moveBefore(ExtForOpnd, ExtOpnd); 2087 TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); 2088 // If more sext are required, new instructions will have to be created. 2089 ExtForOpnd = nullptr; 2090 } 2091 if (ExtForOpnd == Ext) { 2092 DEBUG(dbgs() << "Extension is useless now\n"); 2093 TPT.eraseInstruction(Ext); 2094 } 2095 return ExtOpnd; 2096 } 2097 2098 /// IsPromotionProfitable - Check whether or not promoting an instruction 2099 /// to a wider type was profitable. 2100 /// \p MatchedSize gives the number of instructions that have been matched 2101 /// in the addressing mode after the promotion was applied. 2102 /// \p SizeWithPromotion gives the number of created instructions for 2103 /// the promotion plus the number of instructions that have been 2104 /// matched in the addressing mode before the promotion. 2105 /// \p PromotedOperand is the value that has been promoted. 2106 /// \return True if the promotion is profitable, false otherwise. 2107 bool 2108 AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, 2109 unsigned SizeWithPromotion, 2110 Value *PromotedOperand) const { 2111 // We folded less instructions than what we created to promote the operand. 2112 // This is not profitable. 2113 if (MatchedSize < SizeWithPromotion) 2114 return false; 2115 if (MatchedSize > SizeWithPromotion) 2116 return true; 2117 // The promotion is neutral but it may help folding the sign extension in 2118 // loads for instance. 2119 // Check that we did not create an illegal instruction. 2120 return isPromotedInstructionLegal(TLI, PromotedOperand); 2121 } 2122 2123 /// MatchOperationAddr - Given an instruction or constant expr, see if we can 2124 /// fold the operation into the addressing mode. If so, update the addressing 2125 /// mode and return true, otherwise return false without modifying AddrMode. 2126 /// If \p MovedAway is not NULL, it contains the information of whether or 2127 /// not AddrInst has to be folded into the addressing mode on success. 2128 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing 2129 /// because it has been moved away. 2130 /// Thus AddrInst must not be added in the matched instructions. 2131 /// This state can happen when AddrInst is a sext, since it may be moved away. 2132 /// Therefore, AddrInst may not be valid when MovedAway is true and it must 2133 /// not be referenced anymore. 2134 bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, 2135 unsigned Depth, 2136 bool *MovedAway) { 2137 // Avoid exponential behavior on extremely deep expression trees. 2138 if (Depth >= 5) return false; 2139 2140 // By default, all matched instructions stay in place. 2141 if (MovedAway) 2142 *MovedAway = false; 2143 2144 switch (Opcode) { 2145 case Instruction::PtrToInt: 2146 // PtrToInt is always a noop, as we know that the int type is pointer sized. 2147 return MatchAddr(AddrInst->getOperand(0), Depth); 2148 case Instruction::IntToPtr: 2149 // This inttoptr is a no-op if the integer type is pointer sized. 2150 if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == 2151 TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace())) 2152 return MatchAddr(AddrInst->getOperand(0), Depth); 2153 return false; 2154 case Instruction::BitCast: 2155 case Instruction::AddrSpaceCast: 2156 // BitCast is always a noop, and we can handle it as long as it is 2157 // int->int or pointer->pointer (we don't want int<->fp or something). 2158 if ((AddrInst->getOperand(0)->getType()->isPointerTy() || 2159 AddrInst->getOperand(0)->getType()->isIntegerTy()) && 2160 // Don't touch identity bitcasts. These were probably put here by LSR, 2161 // and we don't want to mess around with them. Assume it knows what it 2162 // is doing. 2163 AddrInst->getOperand(0)->getType() != AddrInst->getType()) 2164 return MatchAddr(AddrInst->getOperand(0), Depth); 2165 return false; 2166 case Instruction::Add: { 2167 // Check to see if we can merge in the RHS then the LHS. If so, we win. 2168 ExtAddrMode BackupAddrMode = AddrMode; 2169 unsigned OldSize = AddrModeInsts.size(); 2170 // Start a transaction at this point. 2171 // The LHS may match but not the RHS. 2172 // Therefore, we need a higher level restoration point to undo partially 2173 // matched operation. 2174 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2175 TPT.getRestorationPoint(); 2176 2177 if (MatchAddr(AddrInst->getOperand(1), Depth+1) && 2178 MatchAddr(AddrInst->getOperand(0), Depth+1)) 2179 return true; 2180 2181 // Restore the old addr mode info. 2182 AddrMode = BackupAddrMode; 2183 AddrModeInsts.resize(OldSize); 2184 TPT.rollback(LastKnownGood); 2185 2186 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 2187 if (MatchAddr(AddrInst->getOperand(0), Depth+1) && 2188 MatchAddr(AddrInst->getOperand(1), Depth+1)) 2189 return true; 2190 2191 // Otherwise we definitely can't merge the ADD in. 2192 AddrMode = BackupAddrMode; 2193 AddrModeInsts.resize(OldSize); 2194 TPT.rollback(LastKnownGood); 2195 break; 2196 } 2197 //case Instruction::Or: 2198 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 2199 //break; 2200 case Instruction::Mul: 2201 case Instruction::Shl: { 2202 // Can only handle X*C and X << C. 2203 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 2204 if (!RHS) 2205 return false; 2206 int64_t Scale = RHS->getSExtValue(); 2207 if (Opcode == Instruction::Shl) 2208 Scale = 1LL << Scale; 2209 2210 return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); 2211 } 2212 case Instruction::GetElementPtr: { 2213 // Scan the GEP. We check it if it contains constant offsets and at most 2214 // one variable offset. 2215 int VariableOperand = -1; 2216 unsigned VariableScale = 0; 2217 2218 int64_t ConstantOffset = 0; 2219 const DataLayout *TD = TLI.getDataLayout(); 2220 gep_type_iterator GTI = gep_type_begin(AddrInst); 2221 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 2222 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 2223 const StructLayout *SL = TD->getStructLayout(STy); 2224 unsigned Idx = 2225 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 2226 ConstantOffset += SL->getElementOffset(Idx); 2227 } else { 2228 uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); 2229 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 2230 ConstantOffset += CI->getSExtValue()*TypeSize; 2231 } else if (TypeSize) { // Scales of zero don't do anything. 2232 // We only allow one variable index at the moment. 2233 if (VariableOperand != -1) 2234 return false; 2235 2236 // Remember the variable index. 2237 VariableOperand = i; 2238 VariableScale = TypeSize; 2239 } 2240 } 2241 } 2242 2243 // A common case is for the GEP to only do a constant offset. In this case, 2244 // just add it to the disp field and check validity. 2245 if (VariableOperand == -1) { 2246 AddrMode.BaseOffs += ConstantOffset; 2247 if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ 2248 // Check to see if we can fold the base pointer in too. 2249 if (MatchAddr(AddrInst->getOperand(0), Depth+1)) 2250 return true; 2251 } 2252 AddrMode.BaseOffs -= ConstantOffset; 2253 return false; 2254 } 2255 2256 // Save the valid addressing mode in case we can't match. 2257 ExtAddrMode BackupAddrMode = AddrMode; 2258 unsigned OldSize = AddrModeInsts.size(); 2259 2260 // See if the scale and offset amount is valid for this target. 2261 AddrMode.BaseOffs += ConstantOffset; 2262 2263 // Match the base operand of the GEP. 2264 if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { 2265 // If it couldn't be matched, just stuff the value in a register. 2266 if (AddrMode.HasBaseReg) { 2267 AddrMode = BackupAddrMode; 2268 AddrModeInsts.resize(OldSize); 2269 return false; 2270 } 2271 AddrMode.HasBaseReg = true; 2272 AddrMode.BaseReg = AddrInst->getOperand(0); 2273 } 2274 2275 // Match the remaining variable portion of the GEP. 2276 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, 2277 Depth)) { 2278 // If it couldn't be matched, try stuffing the base into a register 2279 // instead of matching it, and retrying the match of the scale. 2280 AddrMode = BackupAddrMode; 2281 AddrModeInsts.resize(OldSize); 2282 if (AddrMode.HasBaseReg) 2283 return false; 2284 AddrMode.HasBaseReg = true; 2285 AddrMode.BaseReg = AddrInst->getOperand(0); 2286 AddrMode.BaseOffs += ConstantOffset; 2287 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), 2288 VariableScale, Depth)) { 2289 // If even that didn't work, bail. 2290 AddrMode = BackupAddrMode; 2291 AddrModeInsts.resize(OldSize); 2292 return false; 2293 } 2294 } 2295 2296 return true; 2297 } 2298 case Instruction::SExt: 2299 case Instruction::ZExt: { 2300 Instruction *Ext = dyn_cast<Instruction>(AddrInst); 2301 if (!Ext) 2302 return false; 2303 2304 // Try to move this ext out of the way of the addressing mode. 2305 // Ask for a method for doing so. 2306 TypePromotionHelper::Action TPH = 2307 TypePromotionHelper::getAction(Ext, InsertedTruncs, TLI, PromotedInsts); 2308 if (!TPH) 2309 return false; 2310 2311 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2312 TPT.getRestorationPoint(); 2313 unsigned CreatedInsts = 0; 2314 Value *PromotedOperand = 2315 TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr); 2316 // SExt has been moved away. 2317 // Thus either it will be rematched later in the recursive calls or it is 2318 // gone. Anyway, we must not fold it into the addressing mode at this point. 2319 // E.g., 2320 // op = add opnd, 1 2321 // idx = ext op 2322 // addr = gep base, idx 2323 // is now: 2324 // promotedOpnd = ext opnd <- no match here 2325 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) 2326 // addr = gep base, op <- match 2327 if (MovedAway) 2328 *MovedAway = true; 2329 2330 assert(PromotedOperand && 2331 "TypePromotionHelper should have filtered out those cases"); 2332 2333 ExtAddrMode BackupAddrMode = AddrMode; 2334 unsigned OldSize = AddrModeInsts.size(); 2335 2336 if (!MatchAddr(PromotedOperand, Depth) || 2337 !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts, 2338 PromotedOperand)) { 2339 AddrMode = BackupAddrMode; 2340 AddrModeInsts.resize(OldSize); 2341 DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); 2342 TPT.rollback(LastKnownGood); 2343 return false; 2344 } 2345 return true; 2346 } 2347 } 2348 return false; 2349 } 2350 2351 /// MatchAddr - If we can, try to add the value of 'Addr' into the current 2352 /// addressing mode. If Addr can't be added to AddrMode this returns false and 2353 /// leaves AddrMode unmodified. This assumes that Addr is either a pointer type 2354 /// or intptr_t for the target. 2355 /// 2356 bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { 2357 // Start a transaction at this point that we will rollback if the matching 2358 // fails. 2359 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2360 TPT.getRestorationPoint(); 2361 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 2362 // Fold in immediates if legal for the target. 2363 AddrMode.BaseOffs += CI->getSExtValue(); 2364 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2365 return true; 2366 AddrMode.BaseOffs -= CI->getSExtValue(); 2367 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 2368 // If this is a global variable, try to fold it into the addressing mode. 2369 if (!AddrMode.BaseGV) { 2370 AddrMode.BaseGV = GV; 2371 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2372 return true; 2373 AddrMode.BaseGV = nullptr; 2374 } 2375 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { 2376 ExtAddrMode BackupAddrMode = AddrMode; 2377 unsigned OldSize = AddrModeInsts.size(); 2378 2379 // Check to see if it is possible to fold this operation. 2380 bool MovedAway = false; 2381 if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { 2382 // This instruction may have been move away. If so, there is nothing 2383 // to check here. 2384 if (MovedAway) 2385 return true; 2386 // Okay, it's possible to fold this. Check to see if it is actually 2387 // *profitable* to do so. We use a simple cost model to avoid increasing 2388 // register pressure too much. 2389 if (I->hasOneUse() || 2390 IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { 2391 AddrModeInsts.push_back(I); 2392 return true; 2393 } 2394 2395 // It isn't profitable to do this, roll back. 2396 //cerr << "NOT FOLDING: " << *I; 2397 AddrMode = BackupAddrMode; 2398 AddrModeInsts.resize(OldSize); 2399 TPT.rollback(LastKnownGood); 2400 } 2401 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 2402 if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) 2403 return true; 2404 TPT.rollback(LastKnownGood); 2405 } else if (isa<ConstantPointerNull>(Addr)) { 2406 // Null pointer gets folded without affecting the addressing mode. 2407 return true; 2408 } 2409 2410 // Worse case, the target should support [reg] addressing modes. :) 2411 if (!AddrMode.HasBaseReg) { 2412 AddrMode.HasBaseReg = true; 2413 AddrMode.BaseReg = Addr; 2414 // Still check for legality in case the target supports [imm] but not [i+r]. 2415 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2416 return true; 2417 AddrMode.HasBaseReg = false; 2418 AddrMode.BaseReg = nullptr; 2419 } 2420 2421 // If the base register is already taken, see if we can do [r+r]. 2422 if (AddrMode.Scale == 0) { 2423 AddrMode.Scale = 1; 2424 AddrMode.ScaledReg = Addr; 2425 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2426 return true; 2427 AddrMode.Scale = 0; 2428 AddrMode.ScaledReg = nullptr; 2429 } 2430 // Couldn't match. 2431 TPT.rollback(LastKnownGood); 2432 return false; 2433 } 2434 2435 /// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified 2436 /// inline asm call are due to memory operands. If so, return true, otherwise 2437 /// return false. 2438 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, 2439 const TargetLowering &TLI) { 2440 TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); 2441 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 2442 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 2443 2444 // Compute the constraint code and ConstraintType to use. 2445 TLI.ComputeConstraintToUse(OpInfo, SDValue()); 2446 2447 // If this asm operand is our Value*, and if it isn't an indirect memory 2448 // operand, we can't fold it! 2449 if (OpInfo.CallOperandVal == OpVal && 2450 (OpInfo.ConstraintType != TargetLowering::C_Memory || 2451 !OpInfo.isIndirect)) 2452 return false; 2453 } 2454 2455 return true; 2456 } 2457 2458 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a 2459 /// memory use. If we find an obviously non-foldable instruction, return true. 2460 /// Add the ultimately found memory instructions to MemoryUses. 2461 static bool FindAllMemoryUses(Instruction *I, 2462 SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, 2463 SmallPtrSetImpl<Instruction*> &ConsideredInsts, 2464 const TargetLowering &TLI) { 2465 // If we already considered this instruction, we're done. 2466 if (!ConsideredInsts.insert(I).second) 2467 return false; 2468 2469 // If this is an obviously unfoldable instruction, bail out. 2470 if (!MightBeFoldableInst(I)) 2471 return true; 2472 2473 // Loop over all the uses, recursively processing them. 2474 for (Use &U : I->uses()) { 2475 Instruction *UserI = cast<Instruction>(U.getUser()); 2476 2477 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { 2478 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); 2479 continue; 2480 } 2481 2482 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { 2483 unsigned opNo = U.getOperandNo(); 2484 if (opNo == 0) return true; // Storing addr, not into addr. 2485 MemoryUses.push_back(std::make_pair(SI, opNo)); 2486 continue; 2487 } 2488 2489 if (CallInst *CI = dyn_cast<CallInst>(UserI)) { 2490 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); 2491 if (!IA) return true; 2492 2493 // If this is a memory operand, we're cool, otherwise bail out. 2494 if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) 2495 return true; 2496 continue; 2497 } 2498 2499 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI)) 2500 return true; 2501 } 2502 2503 return false; 2504 } 2505 2506 /// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at 2507 /// the use site that we're folding it into. If so, there is no cost to 2508 /// include it in the addressing mode. KnownLive1 and KnownLive2 are two values 2509 /// that we know are live at the instruction already. 2510 bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, 2511 Value *KnownLive2) { 2512 // If Val is either of the known-live values, we know it is live! 2513 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) 2514 return true; 2515 2516 // All values other than instructions and arguments (e.g. constants) are live. 2517 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; 2518 2519 // If Val is a constant sized alloca in the entry block, it is live, this is 2520 // true because it is just a reference to the stack/frame pointer, which is 2521 // live for the whole function. 2522 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) 2523 if (AI->isStaticAlloca()) 2524 return true; 2525 2526 // Check to see if this value is already used in the memory instruction's 2527 // block. If so, it's already live into the block at the very least, so we 2528 // can reasonably fold it. 2529 return Val->isUsedInBasicBlock(MemoryInst->getParent()); 2530 } 2531 2532 /// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing 2533 /// mode of the machine to fold the specified instruction into a load or store 2534 /// that ultimately uses it. However, the specified instruction has multiple 2535 /// uses. Given this, it may actually increase register pressure to fold it 2536 /// into the load. For example, consider this code: 2537 /// 2538 /// X = ... 2539 /// Y = X+1 2540 /// use(Y) -> nonload/store 2541 /// Z = Y+1 2542 /// load Z 2543 /// 2544 /// In this case, Y has multiple uses, and can be folded into the load of Z 2545 /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to 2546 /// be live at the use(Y) line. If we don't fold Y into load Z, we use one 2547 /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the 2548 /// number of computations either. 2549 /// 2550 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If 2551 /// X was live across 'load Z' for other reasons, we actually *would* want to 2552 /// fold the addressing mode in the Z case. This would make Y die earlier. 2553 bool AddressingModeMatcher:: 2554 IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, 2555 ExtAddrMode &AMAfter) { 2556 if (IgnoreProfitability) return true; 2557 2558 // AMBefore is the addressing mode before this instruction was folded into it, 2559 // and AMAfter is the addressing mode after the instruction was folded. Get 2560 // the set of registers referenced by AMAfter and subtract out those 2561 // referenced by AMBefore: this is the set of values which folding in this 2562 // address extends the lifetime of. 2563 // 2564 // Note that there are only two potential values being referenced here, 2565 // BaseReg and ScaleReg (global addresses are always available, as are any 2566 // folded immediates). 2567 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; 2568 2569 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their 2570 // lifetime wasn't extended by adding this instruction. 2571 if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 2572 BaseReg = nullptr; 2573 if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 2574 ScaledReg = nullptr; 2575 2576 // If folding this instruction (and it's subexprs) didn't extend any live 2577 // ranges, we're ok with it. 2578 if (!BaseReg && !ScaledReg) 2579 return true; 2580 2581 // If all uses of this instruction are ultimately load/store/inlineasm's, 2582 // check to see if their addressing modes will include this instruction. If 2583 // so, we can fold it into all uses, so it doesn't matter if it has multiple 2584 // uses. 2585 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; 2586 SmallPtrSet<Instruction*, 16> ConsideredInsts; 2587 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) 2588 return false; // Has a non-memory, non-foldable use! 2589 2590 // Now that we know that all uses of this instruction are part of a chain of 2591 // computation involving only operations that could theoretically be folded 2592 // into a memory use, loop over each of these uses and see if they could 2593 // *actually* fold the instruction. 2594 SmallVector<Instruction*, 32> MatchedAddrModeInsts; 2595 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { 2596 Instruction *User = MemoryUses[i].first; 2597 unsigned OpNo = MemoryUses[i].second; 2598 2599 // Get the access type of this use. If the use isn't a pointer, we don't 2600 // know what it accesses. 2601 Value *Address = User->getOperand(OpNo); 2602 if (!Address->getType()->isPointerTy()) 2603 return false; 2604 Type *AddressAccessTy = Address->getType()->getPointerElementType(); 2605 2606 // Do a match against the root of this address, ignoring profitability. This 2607 // will tell us if the addressing mode for the memory operation will 2608 // *actually* cover the shared instruction. 2609 ExtAddrMode Result; 2610 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2611 TPT.getRestorationPoint(); 2612 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, 2613 MemoryInst, Result, InsertedTruncs, 2614 PromotedInsts, TPT); 2615 Matcher.IgnoreProfitability = true; 2616 bool Success = Matcher.MatchAddr(Address, 0); 2617 (void)Success; assert(Success && "Couldn't select *anything*?"); 2618 2619 // The match was to check the profitability, the changes made are not 2620 // part of the original matcher. Therefore, they should be dropped 2621 // otherwise the original matcher will not present the right state. 2622 TPT.rollback(LastKnownGood); 2623 2624 // If the match didn't cover I, then it won't be shared by it. 2625 if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), 2626 I) == MatchedAddrModeInsts.end()) 2627 return false; 2628 2629 MatchedAddrModeInsts.clear(); 2630 } 2631 2632 return true; 2633 } 2634 2635 } // end anonymous namespace 2636 2637 /// IsNonLocalValue - Return true if the specified values are defined in a 2638 /// different basic block than BB. 2639 static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 2640 if (Instruction *I = dyn_cast<Instruction>(V)) 2641 return I->getParent() != BB; 2642 return false; 2643 } 2644 2645 /// OptimizeMemoryInst - Load and Store Instructions often have 2646 /// addressing modes that can do significant amounts of computation. As such, 2647 /// instruction selection will try to get the load or store to do as much 2648 /// computation as possible for the program. The problem is that isel can only 2649 /// see within a single block. As such, we sink as much legal addressing mode 2650 /// stuff into the block as possible. 2651 /// 2652 /// This method is used to optimize both load/store and inline asms with memory 2653 /// operands. 2654 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 2655 Type *AccessTy) { 2656 Value *Repl = Addr; 2657 2658 // Try to collapse single-value PHI nodes. This is necessary to undo 2659 // unprofitable PRE transformations. 2660 SmallVector<Value*, 8> worklist; 2661 SmallPtrSet<Value*, 16> Visited; 2662 worklist.push_back(Addr); 2663 2664 // Use a worklist to iteratively look through PHI nodes, and ensure that 2665 // the addressing mode obtained from the non-PHI roots of the graph 2666 // are equivalent. 2667 Value *Consensus = nullptr; 2668 unsigned NumUsesConsensus = 0; 2669 bool IsNumUsesConsensusValid = false; 2670 SmallVector<Instruction*, 16> AddrModeInsts; 2671 ExtAddrMode AddrMode; 2672 TypePromotionTransaction TPT; 2673 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2674 TPT.getRestorationPoint(); 2675 while (!worklist.empty()) { 2676 Value *V = worklist.back(); 2677 worklist.pop_back(); 2678 2679 // Break use-def graph loops. 2680 if (!Visited.insert(V).second) { 2681 Consensus = nullptr; 2682 break; 2683 } 2684 2685 // For a PHI node, push all of its incoming values. 2686 if (PHINode *P = dyn_cast<PHINode>(V)) { 2687 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) 2688 worklist.push_back(P->getIncomingValue(i)); 2689 continue; 2690 } 2691 2692 // For non-PHIs, determine the addressing mode being computed. 2693 SmallVector<Instruction*, 16> NewAddrModeInsts; 2694 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( 2695 V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet, 2696 PromotedInsts, TPT); 2697 2698 // This check is broken into two cases with very similar code to avoid using 2699 // getNumUses() as much as possible. Some values have a lot of uses, so 2700 // calling getNumUses() unconditionally caused a significant compile-time 2701 // regression. 2702 if (!Consensus) { 2703 Consensus = V; 2704 AddrMode = NewAddrMode; 2705 AddrModeInsts = NewAddrModeInsts; 2706 continue; 2707 } else if (NewAddrMode == AddrMode) { 2708 if (!IsNumUsesConsensusValid) { 2709 NumUsesConsensus = Consensus->getNumUses(); 2710 IsNumUsesConsensusValid = true; 2711 } 2712 2713 // Ensure that the obtained addressing mode is equivalent to that obtained 2714 // for all other roots of the PHI traversal. Also, when choosing one 2715 // such root as representative, select the one with the most uses in order 2716 // to keep the cost modeling heuristics in AddressingModeMatcher 2717 // applicable. 2718 unsigned NumUses = V->getNumUses(); 2719 if (NumUses > NumUsesConsensus) { 2720 Consensus = V; 2721 NumUsesConsensus = NumUses; 2722 AddrModeInsts = NewAddrModeInsts; 2723 } 2724 continue; 2725 } 2726 2727 Consensus = nullptr; 2728 break; 2729 } 2730 2731 // If the addressing mode couldn't be determined, or if multiple different 2732 // ones were determined, bail out now. 2733 if (!Consensus) { 2734 TPT.rollback(LastKnownGood); 2735 return false; 2736 } 2737 TPT.commit(); 2738 2739 // Check to see if any of the instructions supersumed by this addr mode are 2740 // non-local to I's BB. 2741 bool AnyNonLocal = false; 2742 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 2743 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 2744 AnyNonLocal = true; 2745 break; 2746 } 2747 } 2748 2749 // If all the instructions matched are already in this BB, don't do anything. 2750 if (!AnyNonLocal) { 2751 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 2752 return false; 2753 } 2754 2755 // Insert this computation right after this user. Since our caller is 2756 // scanning from the top of the BB to the bottom, reuse of the expr are 2757 // guaranteed to happen later. 2758 IRBuilder<> Builder(MemoryInst); 2759 2760 // Now that we determined the addressing expression we want to use and know 2761 // that we have to sink it into this block. Check to see if we have already 2762 // done this for some other load/store instr in this block. If so, reuse the 2763 // computation. 2764 Value *&SunkAddr = SunkAddrs[Addr]; 2765 if (SunkAddr) { 2766 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 2767 << *MemoryInst << "\n"); 2768 if (SunkAddr->getType() != Addr->getType()) 2769 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 2770 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && 2771 TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) { 2772 // By default, we use the GEP-based method when AA is used later. This 2773 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. 2774 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 2775 << *MemoryInst << "\n"); 2776 Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); 2777 Value *ResultPtr = nullptr, *ResultIndex = nullptr; 2778 2779 // First, find the pointer. 2780 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { 2781 ResultPtr = AddrMode.BaseReg; 2782 AddrMode.BaseReg = nullptr; 2783 } 2784 2785 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { 2786 // We can't add more than one pointer together, nor can we scale a 2787 // pointer (both of which seem meaningless). 2788 if (ResultPtr || AddrMode.Scale != 1) 2789 return false; 2790 2791 ResultPtr = AddrMode.ScaledReg; 2792 AddrMode.Scale = 0; 2793 } 2794 2795 if (AddrMode.BaseGV) { 2796 if (ResultPtr) 2797 return false; 2798 2799 ResultPtr = AddrMode.BaseGV; 2800 } 2801 2802 // If the real base value actually came from an inttoptr, then the matcher 2803 // will look through it and provide only the integer value. In that case, 2804 // use it here. 2805 if (!ResultPtr && AddrMode.BaseReg) { 2806 ResultPtr = 2807 Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); 2808 AddrMode.BaseReg = nullptr; 2809 } else if (!ResultPtr && AddrMode.Scale == 1) { 2810 ResultPtr = 2811 Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); 2812 AddrMode.Scale = 0; 2813 } 2814 2815 if (!ResultPtr && 2816 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { 2817 SunkAddr = Constant::getNullValue(Addr->getType()); 2818 } else if (!ResultPtr) { 2819 return false; 2820 } else { 2821 Type *I8PtrTy = 2822 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); 2823 2824 // Start with the base register. Do this first so that subsequent address 2825 // matching finds it last, which will prevent it from trying to match it 2826 // as the scaled value in case it happens to be a mul. That would be 2827 // problematic if we've sunk a different mul for the scale, because then 2828 // we'd end up sinking both muls. 2829 if (AddrMode.BaseReg) { 2830 Value *V = AddrMode.BaseReg; 2831 if (V->getType() != IntPtrTy) 2832 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 2833 2834 ResultIndex = V; 2835 } 2836 2837 // Add the scale value. 2838 if (AddrMode.Scale) { 2839 Value *V = AddrMode.ScaledReg; 2840 if (V->getType() == IntPtrTy) { 2841 // done. 2842 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 2843 cast<IntegerType>(V->getType())->getBitWidth()) { 2844 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 2845 } else { 2846 // It is only safe to sign extend the BaseReg if we know that the math 2847 // required to create it did not overflow before we extend it. Since 2848 // the original IR value was tossed in favor of a constant back when 2849 // the AddrMode was created we need to bail out gracefully if widths 2850 // do not match instead of extending it. 2851 Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex); 2852 if (I && (ResultIndex != AddrMode.BaseReg)) 2853 I->eraseFromParent(); 2854 return false; 2855 } 2856 2857 if (AddrMode.Scale != 1) 2858 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 2859 "sunkaddr"); 2860 if (ResultIndex) 2861 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); 2862 else 2863 ResultIndex = V; 2864 } 2865 2866 // Add in the Base Offset if present. 2867 if (AddrMode.BaseOffs) { 2868 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 2869 if (ResultIndex) { 2870 // We need to add this separately from the scale above to help with 2871 // SDAG consecutive load/store merging. 2872 if (ResultPtr->getType() != I8PtrTy) 2873 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); 2874 ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); 2875 } 2876 2877 ResultIndex = V; 2878 } 2879 2880 if (!ResultIndex) { 2881 SunkAddr = ResultPtr; 2882 } else { 2883 if (ResultPtr->getType() != I8PtrTy) 2884 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); 2885 SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); 2886 } 2887 2888 if (SunkAddr->getType() != Addr->getType()) 2889 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 2890 } 2891 } else { 2892 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 2893 << *MemoryInst << "\n"); 2894 Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); 2895 Value *Result = nullptr; 2896 2897 // Start with the base register. Do this first so that subsequent address 2898 // matching finds it last, which will prevent it from trying to match it 2899 // as the scaled value in case it happens to be a mul. That would be 2900 // problematic if we've sunk a different mul for the scale, because then 2901 // we'd end up sinking both muls. 2902 if (AddrMode.BaseReg) { 2903 Value *V = AddrMode.BaseReg; 2904 if (V->getType()->isPointerTy()) 2905 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 2906 if (V->getType() != IntPtrTy) 2907 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 2908 Result = V; 2909 } 2910 2911 // Add the scale value. 2912 if (AddrMode.Scale) { 2913 Value *V = AddrMode.ScaledReg; 2914 if (V->getType() == IntPtrTy) { 2915 // done. 2916 } else if (V->getType()->isPointerTy()) { 2917 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 2918 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 2919 cast<IntegerType>(V->getType())->getBitWidth()) { 2920 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 2921 } else { 2922 // It is only safe to sign extend the BaseReg if we know that the math 2923 // required to create it did not overflow before we extend it. Since 2924 // the original IR value was tossed in favor of a constant back when 2925 // the AddrMode was created we need to bail out gracefully if widths 2926 // do not match instead of extending it. 2927 Instruction *I = dyn_cast_or_null<Instruction>(Result); 2928 if (I && (Result != AddrMode.BaseReg)) 2929 I->eraseFromParent(); 2930 return false; 2931 } 2932 if (AddrMode.Scale != 1) 2933 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 2934 "sunkaddr"); 2935 if (Result) 2936 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 2937 else 2938 Result = V; 2939 } 2940 2941 // Add in the BaseGV if present. 2942 if (AddrMode.BaseGV) { 2943 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); 2944 if (Result) 2945 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 2946 else 2947 Result = V; 2948 } 2949 2950 // Add in the Base Offset if present. 2951 if (AddrMode.BaseOffs) { 2952 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 2953 if (Result) 2954 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 2955 else 2956 Result = V; 2957 } 2958 2959 if (!Result) 2960 SunkAddr = Constant::getNullValue(Addr->getType()); 2961 else 2962 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); 2963 } 2964 2965 MemoryInst->replaceUsesOfWith(Repl, SunkAddr); 2966 2967 // If we have no uses, recursively delete the value and all dead instructions 2968 // using it. 2969 if (Repl->use_empty()) { 2970 // This can cause recursive deletion, which can invalidate our iterator. 2971 // Use a WeakVH to hold onto it in case this happens. 2972 WeakVH IterHandle(CurInstIterator); 2973 BasicBlock *BB = CurInstIterator->getParent(); 2974 2975 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); 2976 2977 if (IterHandle != CurInstIterator) { 2978 // If the iterator instruction was recursively deleted, start over at the 2979 // start of the block. 2980 CurInstIterator = BB->begin(); 2981 SunkAddrs.clear(); 2982 } 2983 } 2984 ++NumMemoryInsts; 2985 return true; 2986 } 2987 2988 /// OptimizeInlineAsmInst - If there are any memory operands, use 2989 /// OptimizeMemoryInst to sink their address computing into the block when 2990 /// possible / profitable. 2991 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { 2992 bool MadeChange = false; 2993 2994 TargetLowering::AsmOperandInfoVector 2995 TargetConstraints = TLI->ParseConstraints(CS); 2996 unsigned ArgNo = 0; 2997 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 2998 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 2999 3000 // Compute the constraint code and ConstraintType to use. 3001 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 3002 3003 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 3004 OpInfo.isIndirect) { 3005 Value *OpVal = CS->getArgOperand(ArgNo++); 3006 MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); 3007 } else if (OpInfo.Type == InlineAsm::isInput) 3008 ArgNo++; 3009 } 3010 3011 return MadeChange; 3012 } 3013 3014 /// \brief Check if all the uses of \p Inst are equivalent (or free) zero or 3015 /// sign extensions. 3016 static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { 3017 assert(!Inst->use_empty() && "Input must have at least one use"); 3018 const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin()); 3019 bool IsSExt = isa<SExtInst>(FirstUser); 3020 Type *ExtTy = FirstUser->getType(); 3021 for (const User *U : Inst->users()) { 3022 const Instruction *UI = cast<Instruction>(U); 3023 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI))) 3024 return false; 3025 Type *CurTy = UI->getType(); 3026 // Same input and output types: Same instruction after CSE. 3027 if (CurTy == ExtTy) 3028 continue; 3029 3030 // If IsSExt is true, we are in this situation: 3031 // a = Inst 3032 // b = sext ty1 a to ty2 3033 // c = sext ty1 a to ty3 3034 // Assuming ty2 is shorter than ty3, this could be turned into: 3035 // a = Inst 3036 // b = sext ty1 a to ty2 3037 // c = sext ty2 b to ty3 3038 // However, the last sext is not free. 3039 if (IsSExt) 3040 return false; 3041 3042 // This is a ZExt, maybe this is free to extend from one type to another. 3043 // In that case, we would not account for a different use. 3044 Type *NarrowTy; 3045 Type *LargeTy; 3046 if (ExtTy->getScalarType()->getIntegerBitWidth() > 3047 CurTy->getScalarType()->getIntegerBitWidth()) { 3048 NarrowTy = CurTy; 3049 LargeTy = ExtTy; 3050 } else { 3051 NarrowTy = ExtTy; 3052 LargeTy = CurTy; 3053 } 3054 3055 if (!TLI.isZExtFree(NarrowTy, LargeTy)) 3056 return false; 3057 } 3058 // All uses are the same or can be derived from one another for free. 3059 return true; 3060 } 3061 3062 /// \brief Try to form ExtLd by promoting \p Exts until they reach a 3063 /// load instruction. 3064 /// If an ext(load) can be formed, it is returned via \p LI for the load 3065 /// and \p Inst for the extension. 3066 /// Otherwise LI == nullptr and Inst == nullptr. 3067 /// When some promotion happened, \p TPT contains the proper state to 3068 /// revert them. 3069 /// 3070 /// \return true when promoting was necessary to expose the ext(load) 3071 /// opportunity, false otherwise. 3072 /// 3073 /// Example: 3074 /// \code 3075 /// %ld = load i32* %addr 3076 /// %add = add nuw i32 %ld, 4 3077 /// %zext = zext i32 %add to i64 3078 /// \endcode 3079 /// => 3080 /// \code 3081 /// %ld = load i32* %addr 3082 /// %zext = zext i32 %ld to i64 3083 /// %add = add nuw i64 %zext, 4 3084 /// \encode 3085 /// Thanks to the promotion, we can match zext(load i32*) to i64. 3086 bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, 3087 LoadInst *&LI, Instruction *&Inst, 3088 const SmallVectorImpl<Instruction *> &Exts, 3089 unsigned CreatedInsts = 0) { 3090 // Iterate over all the extensions to see if one form an ext(load). 3091 for (auto I : Exts) { 3092 // Check if we directly have ext(load). 3093 if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) { 3094 Inst = I; 3095 // No promotion happened here. 3096 return false; 3097 } 3098 // Check whether or not we want to do any promotion. 3099 if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion) 3100 continue; 3101 // Get the action to perform the promotion. 3102 TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( 3103 I, InsertedTruncsSet, *TLI, PromotedInsts); 3104 // Check if we can promote. 3105 if (!TPH) 3106 continue; 3107 // Save the current state. 3108 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 3109 TPT.getRestorationPoint(); 3110 SmallVector<Instruction *, 4> NewExts; 3111 unsigned NewCreatedInsts = 0; 3112 // Promote. 3113 Value *PromotedVal = 3114 TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr); 3115 assert(PromotedVal && 3116 "TypePromotionHelper should have filtered out those cases"); 3117 3118 // We would be able to merge only one extension in a load. 3119 // Therefore, if we have more than 1 new extension we heuristically 3120 // cut this search path, because it means we degrade the code quality. 3121 // With exactly 2, the transformation is neutral, because we will merge 3122 // one extension but leave one. However, we optimistically keep going, 3123 // because the new extension may be removed too. 3124 unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts; 3125 if (!StressExtLdPromotion && 3126 (TotalCreatedInsts > 1 || 3127 !isPromotedInstructionLegal(*TLI, PromotedVal))) { 3128 // The promotion is not profitable, rollback to the previous state. 3129 TPT.rollback(LastKnownGood); 3130 continue; 3131 } 3132 // The promotion is profitable. 3133 // Check if it exposes an ext(load). 3134 (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts); 3135 if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 || 3136 // If we have created a new extension, i.e., now we have two 3137 // extensions. We must make sure one of them is merged with 3138 // the load, otherwise we may degrade the code quality. 3139 (LI->hasOneUse() || hasSameExtUse(LI, *TLI)))) 3140 // Promotion happened. 3141 return true; 3142 // If this does not help to expose an ext(load) then, rollback. 3143 TPT.rollback(LastKnownGood); 3144 } 3145 // None of the extension can form an ext(load). 3146 LI = nullptr; 3147 Inst = nullptr; 3148 return false; 3149 } 3150 3151 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 3152 /// basic block as the load, unless conditions are unfavorable. This allows 3153 /// SelectionDAG to fold the extend into the load. 3154 /// \p I[in/out] the extension may be modified during the process if some 3155 /// promotions apply. 3156 /// 3157 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) { 3158 // Try to promote a chain of computation if it allows to form 3159 // an extended load. 3160 TypePromotionTransaction TPT; 3161 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 3162 TPT.getRestorationPoint(); 3163 SmallVector<Instruction *, 1> Exts; 3164 Exts.push_back(I); 3165 // Look for a load being extended. 3166 LoadInst *LI = nullptr; 3167 Instruction *OldExt = I; 3168 bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts); 3169 if (!LI || !I) { 3170 assert(!HasPromoted && !LI && "If we did not match any load instruction " 3171 "the code must remain the same"); 3172 I = OldExt; 3173 return false; 3174 } 3175 3176 // If they're already in the same block, there's nothing to do. 3177 // Make the cheap checks first if we did not promote. 3178 // If we promoted, we need to check if it is indeed profitable. 3179 if (!HasPromoted && LI->getParent() == I->getParent()) 3180 return false; 3181 3182 EVT VT = TLI->getValueType(I->getType()); 3183 EVT LoadVT = TLI->getValueType(LI->getType()); 3184 3185 // If the load has other users and the truncate is not free, this probably 3186 // isn't worthwhile. 3187 if (!LI->hasOneUse() && TLI && 3188 (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) && 3189 !TLI->isTruncateFree(I->getType(), LI->getType())) { 3190 I = OldExt; 3191 TPT.rollback(LastKnownGood); 3192 return false; 3193 } 3194 3195 // Check whether the target supports casts folded into loads. 3196 unsigned LType; 3197 if (isa<ZExtInst>(I)) 3198 LType = ISD::ZEXTLOAD; 3199 else { 3200 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 3201 LType = ISD::SEXTLOAD; 3202 } 3203 if (TLI && !TLI->isLoadExtLegal(LType, LoadVT)) { 3204 I = OldExt; 3205 TPT.rollback(LastKnownGood); 3206 return false; 3207 } 3208 3209 // Move the extend into the same block as the load, so that SelectionDAG 3210 // can fold it. 3211 TPT.commit(); 3212 I->removeFromParent(); 3213 I->insertAfter(LI); 3214 ++NumExtsMoved; 3215 return true; 3216 } 3217 3218 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 3219 BasicBlock *DefBB = I->getParent(); 3220 3221 // If the result of a {s|z}ext and its source are both live out, rewrite all 3222 // other uses of the source with result of extension. 3223 Value *Src = I->getOperand(0); 3224 if (Src->hasOneUse()) 3225 return false; 3226 3227 // Only do this xform if truncating is free. 3228 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 3229 return false; 3230 3231 // Only safe to perform the optimization if the source is also defined in 3232 // this block. 3233 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 3234 return false; 3235 3236 bool DefIsLiveOut = false; 3237 for (User *U : I->users()) { 3238 Instruction *UI = cast<Instruction>(U); 3239 3240 // Figure out which BB this ext is used in. 3241 BasicBlock *UserBB = UI->getParent(); 3242 if (UserBB == DefBB) continue; 3243 DefIsLiveOut = true; 3244 break; 3245 } 3246 if (!DefIsLiveOut) 3247 return false; 3248 3249 // Make sure none of the uses are PHI nodes. 3250 for (User *U : Src->users()) { 3251 Instruction *UI = cast<Instruction>(U); 3252 BasicBlock *UserBB = UI->getParent(); 3253 if (UserBB == DefBB) continue; 3254 // Be conservative. We don't want this xform to end up introducing 3255 // reloads just before load / store instructions. 3256 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI)) 3257 return false; 3258 } 3259 3260 // InsertedTruncs - Only insert one trunc in each block once. 3261 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 3262 3263 bool MadeChange = false; 3264 for (Use &U : Src->uses()) { 3265 Instruction *User = cast<Instruction>(U.getUser()); 3266 3267 // Figure out which BB this ext is used in. 3268 BasicBlock *UserBB = User->getParent(); 3269 if (UserBB == DefBB) continue; 3270 3271 // Both src and def are live in this block. Rewrite the use. 3272 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 3273 3274 if (!InsertedTrunc) { 3275 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 3276 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 3277 InsertedTruncsSet.insert(InsertedTrunc); 3278 } 3279 3280 // Replace a use of the {s|z}ext source with a use of the result. 3281 U = InsertedTrunc; 3282 ++NumExtUses; 3283 MadeChange = true; 3284 } 3285 3286 return MadeChange; 3287 } 3288 3289 /// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be 3290 /// turned into an explicit branch. 3291 static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { 3292 // FIXME: This should use the same heuristics as IfConversion to determine 3293 // whether a select is better represented as a branch. This requires that 3294 // branch probability metadata is preserved for the select, which is not the 3295 // case currently. 3296 3297 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); 3298 3299 // If the branch is predicted right, an out of order CPU can avoid blocking on 3300 // the compare. Emit cmovs on compares with a memory operand as branches to 3301 // avoid stalls on the load from memory. If the compare has more than one use 3302 // there's probably another cmov or setcc around so it's not worth emitting a 3303 // branch. 3304 if (!Cmp) 3305 return false; 3306 3307 Value *CmpOp0 = Cmp->getOperand(0); 3308 Value *CmpOp1 = Cmp->getOperand(1); 3309 3310 // We check that the memory operand has one use to avoid uses of the loaded 3311 // value directly after the compare, making branches unprofitable. 3312 return Cmp->hasOneUse() && 3313 ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || 3314 (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())); 3315 } 3316 3317 3318 /// If we have a SelectInst that will likely profit from branch prediction, 3319 /// turn it into a branch. 3320 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { 3321 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 3322 3323 // Can we convert the 'select' to CF ? 3324 if (DisableSelectToBranch || OptSize || !TLI || VectorCond) 3325 return false; 3326 3327 TargetLowering::SelectSupportKind SelectKind; 3328 if (VectorCond) 3329 SelectKind = TargetLowering::VectorMaskSelect; 3330 else if (SI->getType()->isVectorTy()) 3331 SelectKind = TargetLowering::ScalarCondVectorVal; 3332 else 3333 SelectKind = TargetLowering::ScalarValSelect; 3334 3335 // Do we have efficient codegen support for this kind of 'selects' ? 3336 if (TLI->isSelectSupported(SelectKind)) { 3337 // We have efficient codegen support for the select instruction. 3338 // Check if it is profitable to keep this 'select'. 3339 if (!TLI->isPredictableSelectExpensive() || 3340 !isFormingBranchFromSelectProfitable(SI)) 3341 return false; 3342 } 3343 3344 ModifiedDT = true; 3345 3346 // First, we split the block containing the select into 2 blocks. 3347 BasicBlock *StartBlock = SI->getParent(); 3348 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); 3349 BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 3350 3351 // Create a new block serving as the landing pad for the branch. 3352 BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", 3353 NextBlock->getParent(), NextBlock); 3354 3355 // Move the unconditional branch from the block with the select in it into our 3356 // landing pad block. 3357 StartBlock->getTerminator()->eraseFromParent(); 3358 BranchInst::Create(NextBlock, SmallBlock); 3359 3360 // Insert the real conditional branch based on the original condition. 3361 BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); 3362 3363 // The select itself is replaced with a PHI Node. 3364 PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); 3365 PN->takeName(SI); 3366 PN->addIncoming(SI->getTrueValue(), StartBlock); 3367 PN->addIncoming(SI->getFalseValue(), SmallBlock); 3368 SI->replaceAllUsesWith(PN); 3369 SI->eraseFromParent(); 3370 3371 // Instruct OptimizeBlock to skip to the next block. 3372 CurInstIterator = StartBlock->end(); 3373 ++NumSelectsExpanded; 3374 return true; 3375 } 3376 3377 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { 3378 SmallVector<int, 16> Mask(SVI->getShuffleMask()); 3379 int SplatElem = -1; 3380 for (unsigned i = 0; i < Mask.size(); ++i) { 3381 if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem) 3382 return false; 3383 SplatElem = Mask[i]; 3384 } 3385 3386 return true; 3387 } 3388 3389 /// Some targets have expensive vector shifts if the lanes aren't all the same 3390 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases 3391 /// it's often worth sinking a shufflevector splat down to its use so that 3392 /// codegen can spot all lanes are identical. 3393 bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { 3394 BasicBlock *DefBB = SVI->getParent(); 3395 3396 // Only do this xform if variable vector shifts are particularly expensive. 3397 if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType())) 3398 return false; 3399 3400 // We only expect better codegen by sinking a shuffle if we can recognise a 3401 // constant splat. 3402 if (!isBroadcastShuffle(SVI)) 3403 return false; 3404 3405 // InsertedShuffles - Only insert a shuffle in each block once. 3406 DenseMap<BasicBlock*, Instruction*> InsertedShuffles; 3407 3408 bool MadeChange = false; 3409 for (User *U : SVI->users()) { 3410 Instruction *UI = cast<Instruction>(U); 3411 3412 // Figure out which BB this ext is used in. 3413 BasicBlock *UserBB = UI->getParent(); 3414 if (UserBB == DefBB) continue; 3415 3416 // For now only apply this when the splat is used by a shift instruction. 3417 if (!UI->isShift()) continue; 3418 3419 // Everything checks out, sink the shuffle if the user's block doesn't 3420 // already have a copy. 3421 Instruction *&InsertedShuffle = InsertedShuffles[UserBB]; 3422 3423 if (!InsertedShuffle) { 3424 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 3425 InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0), 3426 SVI->getOperand(1), 3427 SVI->getOperand(2), "", InsertPt); 3428 } 3429 3430 UI->replaceUsesOfWith(SVI, InsertedShuffle); 3431 MadeChange = true; 3432 } 3433 3434 // If we removed all uses, nuke the shuffle. 3435 if (SVI->use_empty()) { 3436 SVI->eraseFromParent(); 3437 MadeChange = true; 3438 } 3439 3440 return MadeChange; 3441 } 3442 3443 namespace { 3444 /// \brief Helper class to promote a scalar operation to a vector one. 3445 /// This class is used to move downward extractelement transition. 3446 /// E.g., 3447 /// a = vector_op <2 x i32> 3448 /// b = extractelement <2 x i32> a, i32 0 3449 /// c = scalar_op b 3450 /// store c 3451 /// 3452 /// => 3453 /// a = vector_op <2 x i32> 3454 /// c = vector_op a (equivalent to scalar_op on the related lane) 3455 /// * d = extractelement <2 x i32> c, i32 0 3456 /// * store d 3457 /// Assuming both extractelement and store can be combine, we get rid of the 3458 /// transition. 3459 class VectorPromoteHelper { 3460 /// Used to perform some checks on the legality of vector operations. 3461 const TargetLowering &TLI; 3462 3463 /// Used to estimated the cost of the promoted chain. 3464 const TargetTransformInfo &TTI; 3465 3466 /// The transition being moved downwards. 3467 Instruction *Transition; 3468 /// The sequence of instructions to be promoted. 3469 SmallVector<Instruction *, 4> InstsToBePromoted; 3470 /// Cost of combining a store and an extract. 3471 unsigned StoreExtractCombineCost; 3472 /// Instruction that will be combined with the transition. 3473 Instruction *CombineInst; 3474 3475 /// \brief The instruction that represents the current end of the transition. 3476 /// Since we are faking the promotion until we reach the end of the chain 3477 /// of computation, we need a way to get the current end of the transition. 3478 Instruction *getEndOfTransition() const { 3479 if (InstsToBePromoted.empty()) 3480 return Transition; 3481 return InstsToBePromoted.back(); 3482 } 3483 3484 /// \brief Return the index of the original value in the transition. 3485 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value, 3486 /// c, is at index 0. 3487 unsigned getTransitionOriginalValueIdx() const { 3488 assert(isa<ExtractElementInst>(Transition) && 3489 "Other kind of transitions are not supported yet"); 3490 return 0; 3491 } 3492 3493 /// \brief Return the index of the index in the transition. 3494 /// E.g., for "extractelement <2 x i32> c, i32 0" the index 3495 /// is at index 1. 3496 unsigned getTransitionIdx() const { 3497 assert(isa<ExtractElementInst>(Transition) && 3498 "Other kind of transitions are not supported yet"); 3499 return 1; 3500 } 3501 3502 /// \brief Get the type of the transition. 3503 /// This is the type of the original value. 3504 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the 3505 /// transition is <2 x i32>. 3506 Type *getTransitionType() const { 3507 return Transition->getOperand(getTransitionOriginalValueIdx())->getType(); 3508 } 3509 3510 /// \brief Promote \p ToBePromoted by moving \p Def downward through. 3511 /// I.e., we have the following sequence: 3512 /// Def = Transition <ty1> a to <ty2> 3513 /// b = ToBePromoted <ty2> Def, ... 3514 /// => 3515 /// b = ToBePromoted <ty1> a, ... 3516 /// Def = Transition <ty1> ToBePromoted to <ty2> 3517 void promoteImpl(Instruction *ToBePromoted); 3518 3519 /// \brief Check whether or not it is profitable to promote all the 3520 /// instructions enqueued to be promoted. 3521 bool isProfitableToPromote() { 3522 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx()); 3523 unsigned Index = isa<ConstantInt>(ValIdx) 3524 ? cast<ConstantInt>(ValIdx)->getZExtValue() 3525 : -1; 3526 Type *PromotedType = getTransitionType(); 3527 3528 StoreInst *ST = cast<StoreInst>(CombineInst); 3529 unsigned AS = ST->getPointerAddressSpace(); 3530 unsigned Align = ST->getAlignment(); 3531 // Check if this store is supported. 3532 if (!TLI.allowsMisalignedMemoryAccesses( 3533 TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) { 3534 // If this is not supported, there is no way we can combine 3535 // the extract with the store. 3536 return false; 3537 } 3538 3539 // The scalar chain of computation has to pay for the transition 3540 // scalar to vector. 3541 // The vector chain has to account for the combining cost. 3542 uint64_t ScalarCost = 3543 TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); 3544 uint64_t VectorCost = StoreExtractCombineCost; 3545 for (const auto &Inst : InstsToBePromoted) { 3546 // Compute the cost. 3547 // By construction, all instructions being promoted are arithmetic ones. 3548 // Moreover, one argument is a constant that can be viewed as a splat 3549 // constant. 3550 Value *Arg0 = Inst->getOperand(0); 3551 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) || 3552 isa<ConstantFP>(Arg0); 3553 TargetTransformInfo::OperandValueKind Arg0OVK = 3554 IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue 3555 : TargetTransformInfo::OK_AnyValue; 3556 TargetTransformInfo::OperandValueKind Arg1OVK = 3557 !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue 3558 : TargetTransformInfo::OK_AnyValue; 3559 ScalarCost += TTI.getArithmeticInstrCost( 3560 Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK); 3561 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, 3562 Arg0OVK, Arg1OVK); 3563 } 3564 DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: " 3565 << ScalarCost << "\nVector: " << VectorCost << '\n'); 3566 return ScalarCost > VectorCost; 3567 } 3568 3569 /// \brief Generate a constant vector with \p Val with the same 3570 /// number of elements as the transition. 3571 /// \p UseSplat defines whether or not \p Val should be replicated 3572 /// accross the whole vector. 3573 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>, 3574 /// otherwise we generate a vector with as many undef as possible: 3575 /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only 3576 /// used at the index of the extract. 3577 Value *getConstantVector(Constant *Val, bool UseSplat) const { 3578 unsigned ExtractIdx = UINT_MAX; 3579 if (!UseSplat) { 3580 // If we cannot determine where the constant must be, we have to 3581 // use a splat constant. 3582 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx()); 3583 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx)) 3584 ExtractIdx = CstVal->getSExtValue(); 3585 else 3586 UseSplat = true; 3587 } 3588 3589 unsigned End = getTransitionType()->getVectorNumElements(); 3590 if (UseSplat) 3591 return ConstantVector::getSplat(End, Val); 3592 3593 SmallVector<Constant *, 4> ConstVec; 3594 UndefValue *UndefVal = UndefValue::get(Val->getType()); 3595 for (unsigned Idx = 0; Idx != End; ++Idx) { 3596 if (Idx == ExtractIdx) 3597 ConstVec.push_back(Val); 3598 else 3599 ConstVec.push_back(UndefVal); 3600 } 3601 return ConstantVector::get(ConstVec); 3602 } 3603 3604 /// \brief Check if promoting to a vector type an operand at \p OperandIdx 3605 /// in \p Use can trigger undefined behavior. 3606 static bool canCauseUndefinedBehavior(const Instruction *Use, 3607 unsigned OperandIdx) { 3608 // This is not safe to introduce undef when the operand is on 3609 // the right hand side of a division-like instruction. 3610 if (OperandIdx != 1) 3611 return false; 3612 switch (Use->getOpcode()) { 3613 default: 3614 return false; 3615 case Instruction::SDiv: 3616 case Instruction::UDiv: 3617 case Instruction::SRem: 3618 case Instruction::URem: 3619 return true; 3620 case Instruction::FDiv: 3621 case Instruction::FRem: 3622 return !Use->hasNoNaNs(); 3623 } 3624 llvm_unreachable(nullptr); 3625 } 3626 3627 public: 3628 VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI, 3629 Instruction *Transition, unsigned CombineCost) 3630 : TLI(TLI), TTI(TTI), Transition(Transition), 3631 StoreExtractCombineCost(CombineCost), CombineInst(nullptr) { 3632 assert(Transition && "Do not know how to promote null"); 3633 } 3634 3635 /// \brief Check if we can promote \p ToBePromoted to \p Type. 3636 bool canPromote(const Instruction *ToBePromoted) const { 3637 // We could support CastInst too. 3638 return isa<BinaryOperator>(ToBePromoted); 3639 } 3640 3641 /// \brief Check if it is profitable to promote \p ToBePromoted 3642 /// by moving downward the transition through. 3643 bool shouldPromote(const Instruction *ToBePromoted) const { 3644 // Promote only if all the operands can be statically expanded. 3645 // Indeed, we do not want to introduce any new kind of transitions. 3646 for (const Use &U : ToBePromoted->operands()) { 3647 const Value *Val = U.get(); 3648 if (Val == getEndOfTransition()) { 3649 // If the use is a division and the transition is on the rhs, 3650 // we cannot promote the operation, otherwise we may create a 3651 // division by zero. 3652 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())) 3653 return false; 3654 continue; 3655 } 3656 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) && 3657 !isa<ConstantFP>(Val)) 3658 return false; 3659 } 3660 // Check that the resulting operation is legal. 3661 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode()); 3662 if (!ISDOpcode) 3663 return false; 3664 return StressStoreExtract || 3665 TLI.isOperationLegalOrCustom( 3666 ISDOpcode, TLI.getValueType(getTransitionType(), true)); 3667 } 3668 3669 /// \brief Check whether or not \p Use can be combined 3670 /// with the transition. 3671 /// I.e., is it possible to do Use(Transition) => AnotherUse? 3672 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); } 3673 3674 /// \brief Record \p ToBePromoted as part of the chain to be promoted. 3675 void enqueueForPromotion(Instruction *ToBePromoted) { 3676 InstsToBePromoted.push_back(ToBePromoted); 3677 } 3678 3679 /// \brief Set the instruction that will be combined with the transition. 3680 void recordCombineInstruction(Instruction *ToBeCombined) { 3681 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine"); 3682 CombineInst = ToBeCombined; 3683 } 3684 3685 /// \brief Promote all the instructions enqueued for promotion if it is 3686 /// is profitable. 3687 /// \return True if the promotion happened, false otherwise. 3688 bool promote() { 3689 // Check if there is something to promote. 3690 // Right now, if we do not have anything to combine with, 3691 // we assume the promotion is not profitable. 3692 if (InstsToBePromoted.empty() || !CombineInst) 3693 return false; 3694 3695 // Check cost. 3696 if (!StressStoreExtract && !isProfitableToPromote()) 3697 return false; 3698 3699 // Promote. 3700 for (auto &ToBePromoted : InstsToBePromoted) 3701 promoteImpl(ToBePromoted); 3702 InstsToBePromoted.clear(); 3703 return true; 3704 } 3705 }; 3706 } // End of anonymous namespace. 3707 3708 void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { 3709 // At this point, we know that all the operands of ToBePromoted but Def 3710 // can be statically promoted. 3711 // For Def, we need to use its parameter in ToBePromoted: 3712 // b = ToBePromoted ty1 a 3713 // Def = Transition ty1 b to ty2 3714 // Move the transition down. 3715 // 1. Replace all uses of the promoted operation by the transition. 3716 // = ... b => = ... Def. 3717 assert(ToBePromoted->getType() == Transition->getType() && 3718 "The type of the result of the transition does not match " 3719 "the final type"); 3720 ToBePromoted->replaceAllUsesWith(Transition); 3721 // 2. Update the type of the uses. 3722 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def. 3723 Type *TransitionTy = getTransitionType(); 3724 ToBePromoted->mutateType(TransitionTy); 3725 // 3. Update all the operands of the promoted operation with promoted 3726 // operands. 3727 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a. 3728 for (Use &U : ToBePromoted->operands()) { 3729 Value *Val = U.get(); 3730 Value *NewVal = nullptr; 3731 if (Val == Transition) 3732 NewVal = Transition->getOperand(getTransitionOriginalValueIdx()); 3733 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) || 3734 isa<ConstantFP>(Val)) { 3735 // Use a splat constant if it is not safe to use undef. 3736 NewVal = getConstantVector( 3737 cast<Constant>(Val), 3738 isa<UndefValue>(Val) || 3739 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())); 3740 } else 3741 assert(0 && "Did you modified shouldPromote and forgot to update this?"); 3742 ToBePromoted->setOperand(U.getOperandNo(), NewVal); 3743 } 3744 Transition->removeFromParent(); 3745 Transition->insertAfter(ToBePromoted); 3746 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); 3747 } 3748 3749 /// Some targets can do store(extractelement) with one instruction. 3750 /// Try to push the extractelement towards the stores when the target 3751 /// has this feature and this is profitable. 3752 bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) { 3753 unsigned CombineCost = UINT_MAX; 3754 if (DisableStoreExtract || !TLI || 3755 (!StressStoreExtract && 3756 !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(), 3757 Inst->getOperand(1), CombineCost))) 3758 return false; 3759 3760 // At this point we know that Inst is a vector to scalar transition. 3761 // Try to move it down the def-use chain, until: 3762 // - We can combine the transition with its single use 3763 // => we got rid of the transition. 3764 // - We escape the current basic block 3765 // => we would need to check that we are moving it at a cheaper place and 3766 // we do not do that for now. 3767 BasicBlock *Parent = Inst->getParent(); 3768 DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); 3769 VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost); 3770 // If the transition has more than one use, assume this is not going to be 3771 // beneficial. 3772 while (Inst->hasOneUse()) { 3773 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin()); 3774 DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n'); 3775 3776 if (ToBePromoted->getParent() != Parent) { 3777 DEBUG(dbgs() << "Instruction to promote is in a different block (" 3778 << ToBePromoted->getParent()->getName() 3779 << ") than the transition (" << Parent->getName() << ").\n"); 3780 return false; 3781 } 3782 3783 if (VPH.canCombine(ToBePromoted)) { 3784 DEBUG(dbgs() << "Assume " << *Inst << '\n' 3785 << "will be combined with: " << *ToBePromoted << '\n'); 3786 VPH.recordCombineInstruction(ToBePromoted); 3787 bool Changed = VPH.promote(); 3788 NumStoreExtractExposed += Changed; 3789 return Changed; 3790 } 3791 3792 DEBUG(dbgs() << "Try promoting.\n"); 3793 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted)) 3794 return false; 3795 3796 DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n"); 3797 3798 VPH.enqueueForPromotion(ToBePromoted); 3799 Inst = ToBePromoted; 3800 } 3801 return false; 3802 } 3803 3804 bool CodeGenPrepare::OptimizeInst(Instruction *I) { 3805 if (PHINode *P = dyn_cast<PHINode>(I)) { 3806 // It is possible for very late stage optimizations (such as SimplifyCFG) 3807 // to introduce PHI nodes too late to be cleaned up. If we detect such a 3808 // trivial PHI, go ahead and zap it here. 3809 if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, 3810 TLInfo, DT)) { 3811 P->replaceAllUsesWith(V); 3812 P->eraseFromParent(); 3813 ++NumPHIsElim; 3814 return true; 3815 } 3816 return false; 3817 } 3818 3819 if (CastInst *CI = dyn_cast<CastInst>(I)) { 3820 // If the source of the cast is a constant, then this should have 3821 // already been constant folded. The only reason NOT to constant fold 3822 // it is if something (e.g. LSR) was careful to place the constant 3823 // evaluation in a block other than then one that uses it (e.g. to hoist 3824 // the address of globals out of a loop). If this is the case, we don't 3825 // want to forward-subst the cast. 3826 if (isa<Constant>(CI->getOperand(0))) 3827 return false; 3828 3829 if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) 3830 return true; 3831 3832 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { 3833 /// Sink a zext or sext into its user blocks if the target type doesn't 3834 /// fit in one register 3835 if (TLI && TLI->getTypeAction(CI->getContext(), 3836 TLI->getValueType(CI->getType())) == 3837 TargetLowering::TypeExpandInteger) { 3838 return SinkCast(CI); 3839 } else { 3840 bool MadeChange = MoveExtToFormExtLoad(I); 3841 return MadeChange | OptimizeExtUses(I); 3842 } 3843 } 3844 return false; 3845 } 3846 3847 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 3848 if (!TLI || !TLI->hasMultipleConditionRegisters()) 3849 return OptimizeCmpExpression(CI); 3850 3851 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 3852 if (TLI) 3853 return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); 3854 return false; 3855 } 3856 3857 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 3858 if (TLI) 3859 return OptimizeMemoryInst(I, SI->getOperand(1), 3860 SI->getOperand(0)->getType()); 3861 return false; 3862 } 3863 3864 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); 3865 3866 if (BinOp && (BinOp->getOpcode() == Instruction::AShr || 3867 BinOp->getOpcode() == Instruction::LShr)) { 3868 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); 3869 if (TLI && CI && TLI->hasExtractBitsInsn()) 3870 return OptimizeExtractBits(BinOp, CI, *TLI); 3871 3872 return false; 3873 } 3874 3875 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 3876 if (GEPI->hasAllZeroIndices()) { 3877 /// The GEP operand must be a pointer, so must its result -> BitCast 3878 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 3879 GEPI->getName(), GEPI); 3880 GEPI->replaceAllUsesWith(NC); 3881 GEPI->eraseFromParent(); 3882 ++NumGEPsElim; 3883 OptimizeInst(NC); 3884 return true; 3885 } 3886 return false; 3887 } 3888 3889 if (CallInst *CI = dyn_cast<CallInst>(I)) 3890 return OptimizeCallInst(CI); 3891 3892 if (SelectInst *SI = dyn_cast<SelectInst>(I)) 3893 return OptimizeSelectInst(SI); 3894 3895 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) 3896 return OptimizeShuffleVectorInst(SVI); 3897 3898 if (isa<ExtractElementInst>(I)) 3899 return OptimizeExtractElementInst(I); 3900 3901 return false; 3902 } 3903 3904 // In this pass we look for GEP and cast instructions that are used 3905 // across basic blocks and rewrite them to improve basic-block-at-a-time 3906 // selection. 3907 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 3908 SunkAddrs.clear(); 3909 bool MadeChange = false; 3910 3911 CurInstIterator = BB.begin(); 3912 while (CurInstIterator != BB.end()) 3913 MadeChange |= OptimizeInst(CurInstIterator++); 3914 3915 MadeChange |= DupRetToEnableTailCallOpts(&BB); 3916 3917 return MadeChange; 3918 } 3919 3920 // llvm.dbg.value is far away from the value then iSel may not be able 3921 // handle it properly. iSel will drop llvm.dbg.value if it can not 3922 // find a node corresponding to the value. 3923 bool CodeGenPrepare::PlaceDbgValues(Function &F) { 3924 bool MadeChange = false; 3925 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { 3926 Instruction *PrevNonDbgInst = nullptr; 3927 for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { 3928 Instruction *Insn = BI; ++BI; 3929 DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); 3930 // Leave dbg.values that refer to an alloca alone. These 3931 // instrinsics describe the address of a variable (= the alloca) 3932 // being taken. They should not be moved next to the alloca 3933 // (and to the beginning of the scope), but rather stay close to 3934 // where said address is used. 3935 if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) { 3936 PrevNonDbgInst = Insn; 3937 continue; 3938 } 3939 3940 Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); 3941 if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { 3942 DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); 3943 DVI->removeFromParent(); 3944 if (isa<PHINode>(VI)) 3945 DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); 3946 else 3947 DVI->insertAfter(VI); 3948 MadeChange = true; 3949 ++NumDbgValueMoved; 3950 } 3951 } 3952 } 3953 return MadeChange; 3954 } 3955 3956 // If there is a sequence that branches based on comparing a single bit 3957 // against zero that can be combined into a single instruction, and the 3958 // target supports folding these into a single instruction, sink the 3959 // mask and compare into the branch uses. Do this before OptimizeBlock -> 3960 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being 3961 // searched for. 3962 bool CodeGenPrepare::sinkAndCmp(Function &F) { 3963 if (!EnableAndCmpSinking) 3964 return false; 3965 if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) 3966 return false; 3967 bool MadeChange = false; 3968 for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { 3969 BasicBlock *BB = I++; 3970 3971 // Does this BB end with the following? 3972 // %andVal = and %val, #single-bit-set 3973 // %icmpVal = icmp %andResult, 0 3974 // br i1 %cmpVal label %dest1, label %dest2" 3975 BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator()); 3976 if (!Brcc || !Brcc->isConditional()) 3977 continue; 3978 ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0)); 3979 if (!Cmp || Cmp->getParent() != BB) 3980 continue; 3981 ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1)); 3982 if (!Zero || !Zero->isZero()) 3983 continue; 3984 Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0)); 3985 if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB) 3986 continue; 3987 ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1)); 3988 if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) 3989 continue; 3990 DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump()); 3991 3992 // Push the "and; icmp" for any users that are conditional branches. 3993 // Since there can only be one branch use per BB, we don't need to keep 3994 // track of which BBs we insert into. 3995 for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end(); 3996 UI != E; ) { 3997 Use &TheUse = *UI; 3998 // Find brcc use. 3999 BranchInst *BrccUser = dyn_cast<BranchInst>(*UI); 4000 ++UI; 4001 if (!BrccUser || !BrccUser->isConditional()) 4002 continue; 4003 BasicBlock *UserBB = BrccUser->getParent(); 4004 if (UserBB == BB) continue; 4005 DEBUG(dbgs() << "found Brcc use\n"); 4006 4007 // Sink the "and; icmp" to use. 4008 MadeChange = true; 4009 BinaryOperator *NewAnd = 4010 BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", 4011 BrccUser); 4012 CmpInst *NewCmp = 4013 CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, 4014 "", BrccUser); 4015 TheUse = NewCmp; 4016 ++NumAndCmpsMoved; 4017 DEBUG(BrccUser->getParent()->dump()); 4018 } 4019 } 4020 return MadeChange; 4021 } 4022 4023 /// \brief Retrieve the probabilities of a conditional branch. Returns true on 4024 /// success, or returns false if no or invalid metadata was found. 4025 static bool extractBranchMetadata(BranchInst *BI, 4026 uint64_t &ProbTrue, uint64_t &ProbFalse) { 4027 assert(BI->isConditional() && 4028 "Looking for probabilities on unconditional branch?"); 4029 auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof); 4030 if (!ProfileData || ProfileData->getNumOperands() != 3) 4031 return false; 4032 4033 const auto *CITrue = 4034 mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1)); 4035 const auto *CIFalse = 4036 mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2)); 4037 if (!CITrue || !CIFalse) 4038 return false; 4039 4040 ProbTrue = CITrue->getValue().getZExtValue(); 4041 ProbFalse = CIFalse->getValue().getZExtValue(); 4042 4043 return true; 4044 } 4045 4046 /// \brief Scale down both weights to fit into uint32_t. 4047 static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { 4048 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; 4049 uint32_t Scale = (NewMax / UINT32_MAX) + 1; 4050 NewTrue = NewTrue / Scale; 4051 NewFalse = NewFalse / Scale; 4052 } 4053 4054 /// \brief Some targets prefer to split a conditional branch like: 4055 /// \code 4056 /// %0 = icmp ne i32 %a, 0 4057 /// %1 = icmp ne i32 %b, 0 4058 /// %or.cond = or i1 %0, %1 4059 /// br i1 %or.cond, label %TrueBB, label %FalseBB 4060 /// \endcode 4061 /// into multiple branch instructions like: 4062 /// \code 4063 /// bb1: 4064 /// %0 = icmp ne i32 %a, 0 4065 /// br i1 %0, label %TrueBB, label %bb2 4066 /// bb2: 4067 /// %1 = icmp ne i32 %b, 0 4068 /// br i1 %1, label %TrueBB, label %FalseBB 4069 /// \endcode 4070 /// This usually allows instruction selection to do even further optimizations 4071 /// and combine the compare with the branch instruction. Currently this is 4072 /// applied for targets which have "cheap" jump instructions. 4073 /// 4074 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. 4075 /// 4076 bool CodeGenPrepare::splitBranchCondition(Function &F) { 4077 if (!TM || TM->Options.EnableFastISel != true || 4078 !TLI || TLI->isJumpExpensive()) 4079 return false; 4080 4081 bool MadeChange = false; 4082 for (auto &BB : F) { 4083 // Does this BB end with the following? 4084 // %cond1 = icmp|fcmp|binary instruction ... 4085 // %cond2 = icmp|fcmp|binary instruction ... 4086 // %cond.or = or|and i1 %cond1, cond2 4087 // br i1 %cond.or label %dest1, label %dest2" 4088 BinaryOperator *LogicOp; 4089 BasicBlock *TBB, *FBB; 4090 if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB))) 4091 continue; 4092 4093 unsigned Opc; 4094 Value *Cond1, *Cond2; 4095 if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)), 4096 m_OneUse(m_Value(Cond2))))) 4097 Opc = Instruction::And; 4098 else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)), 4099 m_OneUse(m_Value(Cond2))))) 4100 Opc = Instruction::Or; 4101 else 4102 continue; 4103 4104 if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) || 4105 !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) ) 4106 continue; 4107 4108 DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); 4109 4110 // Create a new BB. 4111 auto *InsertBefore = std::next(Function::iterator(BB)) 4112 .getNodePtrUnchecked(); 4113 auto TmpBB = BasicBlock::Create(BB.getContext(), 4114 BB.getName() + ".cond.split", 4115 BB.getParent(), InsertBefore); 4116 4117 // Update original basic block by using the first condition directly by the 4118 // branch instruction and removing the no longer needed and/or instruction. 4119 auto *Br1 = cast<BranchInst>(BB.getTerminator()); 4120 Br1->setCondition(Cond1); 4121 LogicOp->eraseFromParent(); 4122 4123 // Depending on the conditon we have to either replace the true or the false 4124 // successor of the original branch instruction. 4125 if (Opc == Instruction::And) 4126 Br1->setSuccessor(0, TmpBB); 4127 else 4128 Br1->setSuccessor(1, TmpBB); 4129 4130 // Fill in the new basic block. 4131 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB); 4132 if (auto *I = dyn_cast<Instruction>(Cond2)) { 4133 I->removeFromParent(); 4134 I->insertBefore(Br2); 4135 } 4136 4137 // Update PHI nodes in both successors. The original BB needs to be 4138 // replaced in one succesor's PHI nodes, because the branch comes now from 4139 // the newly generated BB (NewBB). In the other successor we need to add one 4140 // incoming edge to the PHI nodes, because both branch instructions target 4141 // now the same successor. Depending on the original branch condition 4142 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that 4143 // we perfrom the correct update for the PHI nodes. 4144 // This doesn't change the successor order of the just created branch 4145 // instruction (or any other instruction). 4146 if (Opc == Instruction::Or) 4147 std::swap(TBB, FBB); 4148 4149 // Replace the old BB with the new BB. 4150 for (auto &I : *TBB) { 4151 PHINode *PN = dyn_cast<PHINode>(&I); 4152 if (!PN) 4153 break; 4154 int i; 4155 while ((i = PN->getBasicBlockIndex(&BB)) >= 0) 4156 PN->setIncomingBlock(i, TmpBB); 4157 } 4158 4159 // Add another incoming edge form the new BB. 4160 for (auto &I : *FBB) { 4161 PHINode *PN = dyn_cast<PHINode>(&I); 4162 if (!PN) 4163 break; 4164 auto *Val = PN->getIncomingValueForBlock(&BB); 4165 PN->addIncoming(Val, TmpBB); 4166 } 4167 4168 // Update the branch weights (from SelectionDAGBuilder:: 4169 // FindMergedConditions). 4170 if (Opc == Instruction::Or) { 4171 // Codegen X | Y as: 4172 // BB1: 4173 // jmp_if_X TBB 4174 // jmp TmpBB 4175 // TmpBB: 4176 // jmp_if_Y TBB 4177 // jmp FBB 4178 // 4179 4180 // We have flexibility in setting Prob for BB1 and Prob for NewBB. 4181 // The requirement is that 4182 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) 4183 // = TrueProb for orignal BB. 4184 // Assuming the orignal weights are A and B, one choice is to set BB1's 4185 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice 4186 // assumes that 4187 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. 4188 // Another choice is to assume TrueProb for BB1 equals to TrueProb for 4189 // TmpBB, but the math is more complicated. 4190 uint64_t TrueWeight, FalseWeight; 4191 if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) { 4192 uint64_t NewTrueWeight = TrueWeight; 4193 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight; 4194 scaleWeights(NewTrueWeight, NewFalseWeight); 4195 Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) 4196 .createBranchWeights(TrueWeight, FalseWeight)); 4197 4198 NewTrueWeight = TrueWeight; 4199 NewFalseWeight = 2 * FalseWeight; 4200 scaleWeights(NewTrueWeight, NewFalseWeight); 4201 Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) 4202 .createBranchWeights(TrueWeight, FalseWeight)); 4203 } 4204 } else { 4205 // Codegen X & Y as: 4206 // BB1: 4207 // jmp_if_X TmpBB 4208 // jmp FBB 4209 // TmpBB: 4210 // jmp_if_Y TBB 4211 // jmp FBB 4212 // 4213 // This requires creation of TmpBB after CurBB. 4214 4215 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. 4216 // The requirement is that 4217 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) 4218 // = FalseProb for orignal BB. 4219 // Assuming the orignal weights are A and B, one choice is to set BB1's 4220 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice 4221 // assumes that 4222 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. 4223 uint64_t TrueWeight, FalseWeight; 4224 if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) { 4225 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight; 4226 uint64_t NewFalseWeight = FalseWeight; 4227 scaleWeights(NewTrueWeight, NewFalseWeight); 4228 Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) 4229 .createBranchWeights(TrueWeight, FalseWeight)); 4230 4231 NewTrueWeight = 2 * TrueWeight; 4232 NewFalseWeight = FalseWeight; 4233 scaleWeights(NewTrueWeight, NewFalseWeight); 4234 Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) 4235 .createBranchWeights(TrueWeight, FalseWeight)); 4236 } 4237 } 4238 4239 // Request DOM Tree update. 4240 // Note: No point in getting fancy here, since the DT info is never 4241 // available to CodeGenPrepare and the existing update code is broken 4242 // anyways. 4243 ModifiedDT = true; 4244 4245 MadeChange = true; 4246 4247 DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); 4248 TmpBB->dump()); 4249 } 4250 return MadeChange; 4251 } 4252