1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass munges the code in the input function to better prepare it for 11 // SelectionDAG-based code generation. This works around limitations in it's 12 // basic-block-at-a-time approach. It should eventually be removed. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/CodeGen/Passes.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/Analysis/InstructionSimplify.h" 21 #include "llvm/Analysis/TargetTransformInfo.h" 22 #include "llvm/IR/CallSite.h" 23 #include "llvm/IR/Constants.h" 24 #include "llvm/IR/DataLayout.h" 25 #include "llvm/IR/DerivedTypes.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/GetElementPtrTypeIterator.h" 29 #include "llvm/IR/IRBuilder.h" 30 #include "llvm/IR/InlineAsm.h" 31 #include "llvm/IR/Instructions.h" 32 #include "llvm/IR/IntrinsicInst.h" 33 #include "llvm/IR/MDBuilder.h" 34 #include "llvm/IR/PatternMatch.h" 35 #include "llvm/IR/ValueHandle.h" 36 #include "llvm/IR/ValueMap.h" 37 #include "llvm/Pass.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Target/TargetLibraryInfo.h" 42 #include "llvm/Target/TargetLowering.h" 43 #include "llvm/Target/TargetSubtargetInfo.h" 44 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 45 #include "llvm/Transforms/Utils/BuildLibCalls.h" 46 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 47 #include "llvm/Transforms/Utils/Local.h" 48 using namespace llvm; 49 using namespace llvm::PatternMatch; 50 51 #define DEBUG_TYPE "codegenprepare" 52 53 STATISTIC(NumBlocksElim, "Number of blocks eliminated"); 54 STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); 55 STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); 56 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " 57 "sunken Cmps"); 58 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " 59 "of sunken Casts"); 60 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " 61 "computations were sunk"); 62 STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); 63 STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); 64 STATISTIC(NumRetsDup, "Number of return instructions duplicated"); 65 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); 66 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); 67 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); 68 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); 69 70 static cl::opt<bool> DisableBranchOpts( 71 "disable-cgp-branch-opts", cl::Hidden, cl::init(false), 72 cl::desc("Disable branch optimizations in CodeGenPrepare")); 73 74 static cl::opt<bool> DisableSelectToBranch( 75 "disable-cgp-select2branch", cl::Hidden, cl::init(false), 76 cl::desc("Disable select to branch conversion.")); 77 78 static cl::opt<bool> AddrSinkUsingGEPs( 79 "addr-sink-using-gep", cl::Hidden, cl::init(false), 80 cl::desc("Address sinking in CGP using GEPs.")); 81 82 static cl::opt<bool> EnableAndCmpSinking( 83 "enable-andcmp-sinking", cl::Hidden, cl::init(true), 84 cl::desc("Enable sinkinig and/cmp into branches.")); 85 86 static cl::opt<bool> DisableStoreExtract( 87 "disable-cgp-store-extract", cl::Hidden, cl::init(false), 88 cl::desc("Disable store(extract) optimizations in CodeGenPrepare")); 89 90 static cl::opt<bool> StressStoreExtract( 91 "stress-cgp-store-extract", cl::Hidden, cl::init(false), 92 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare")); 93 94 static cl::opt<bool> DisableExtLdPromotion( 95 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), 96 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " 97 "CodeGenPrepare")); 98 99 static cl::opt<bool> StressExtLdPromotion( 100 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), 101 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " 102 "optimization in CodeGenPrepare")); 103 104 namespace { 105 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; 106 struct TypeIsSExt { 107 Type *Ty; 108 bool IsSExt; 109 TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {} 110 }; 111 typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy; 112 class TypePromotionTransaction; 113 114 class CodeGenPrepare : public FunctionPass { 115 /// TLI - Keep a pointer of a TargetLowering to consult for determining 116 /// transformation profitability. 117 const TargetMachine *TM; 118 const TargetLowering *TLI; 119 const TargetTransformInfo *TTI; 120 const TargetLibraryInfo *TLInfo; 121 DominatorTree *DT; 122 123 /// CurInstIterator - As we scan instructions optimizing them, this is the 124 /// next instruction to optimize. Xforms that can invalidate this should 125 /// update it. 126 BasicBlock::iterator CurInstIterator; 127 128 /// Keeps track of non-local addresses that have been sunk into a block. 129 /// This allows us to avoid inserting duplicate code for blocks with 130 /// multiple load/stores of the same address. 131 ValueMap<Value*, Value*> SunkAddrs; 132 133 /// Keeps track of all truncates inserted for the current function. 134 SetOfInstrs InsertedTruncsSet; 135 /// Keeps track of the type of the related instruction before their 136 /// promotion for the current function. 137 InstrToOrigTy PromotedInsts; 138 139 /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to 140 /// be updated. 141 bool ModifiedDT; 142 143 /// OptSize - True if optimizing for size. 144 bool OptSize; 145 146 public: 147 static char ID; // Pass identification, replacement for typeid 148 explicit CodeGenPrepare(const TargetMachine *TM = nullptr) 149 : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) { 150 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); 151 } 152 bool runOnFunction(Function &F) override; 153 154 const char *getPassName() const override { return "CodeGen Prepare"; } 155 156 void getAnalysisUsage(AnalysisUsage &AU) const override { 157 AU.addPreserved<DominatorTreeWrapperPass>(); 158 AU.addRequired<TargetLibraryInfo>(); 159 AU.addRequired<TargetTransformInfo>(); 160 } 161 162 private: 163 bool EliminateFallThrough(Function &F); 164 bool EliminateMostlyEmptyBlocks(Function &F); 165 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 166 void EliminateMostlyEmptyBlock(BasicBlock *BB); 167 bool OptimizeBlock(BasicBlock &BB); 168 bool OptimizeInst(Instruction *I); 169 bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); 170 bool OptimizeInlineAsmInst(CallInst *CS); 171 bool OptimizeCallInst(CallInst *CI); 172 bool MoveExtToFormExtLoad(Instruction *&I); 173 bool OptimizeExtUses(Instruction *I); 174 bool OptimizeSelectInst(SelectInst *SI); 175 bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); 176 bool OptimizeExtractElementInst(Instruction *Inst); 177 bool DupRetToEnableTailCallOpts(BasicBlock *BB); 178 bool PlaceDbgValues(Function &F); 179 bool sinkAndCmp(Function &F); 180 bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, 181 Instruction *&Inst, 182 const SmallVectorImpl<Instruction *> &Exts, 183 unsigned CreatedInst); 184 bool splitBranchCondition(Function &F); 185 }; 186 } 187 188 char CodeGenPrepare::ID = 0; 189 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", 190 "Optimize for code generation", false, false) 191 192 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { 193 return new CodeGenPrepare(TM); 194 } 195 196 bool CodeGenPrepare::runOnFunction(Function &F) { 197 if (skipOptnoneFunction(F)) 198 return false; 199 200 bool EverMadeChange = false; 201 // Clear per function information. 202 InsertedTruncsSet.clear(); 203 PromotedInsts.clear(); 204 205 ModifiedDT = false; 206 if (TM) 207 TLI = TM->getSubtargetImpl()->getTargetLowering(); 208 TLInfo = &getAnalysis<TargetLibraryInfo>(); 209 TTI = &getAnalysis<TargetTransformInfo>(); 210 DominatorTreeWrapperPass *DTWP = 211 getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 212 DT = DTWP ? &DTWP->getDomTree() : nullptr; 213 OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 214 Attribute::OptimizeForSize); 215 216 /// This optimization identifies DIV instructions that can be 217 /// profitably bypassed and carried out with a shorter, faster divide. 218 if (!OptSize && TLI && TLI->isSlowDivBypassed()) { 219 const DenseMap<unsigned int, unsigned int> &BypassWidths = 220 TLI->getBypassSlowDivWidths(); 221 for (Function::iterator I = F.begin(); I != F.end(); I++) 222 EverMadeChange |= bypassSlowDivision(F, I, BypassWidths); 223 } 224 225 // Eliminate blocks that contain only PHI nodes and an 226 // unconditional branch. 227 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 228 229 // llvm.dbg.value is far away from the value then iSel may not be able 230 // handle it properly. iSel will drop llvm.dbg.value if it can not 231 // find a node corresponding to the value. 232 EverMadeChange |= PlaceDbgValues(F); 233 234 // If there is a mask, compare against zero, and branch that can be combined 235 // into a single target instruction, push the mask and compare into branch 236 // users. Do this before OptimizeBlock -> OptimizeInst -> 237 // OptimizeCmpExpression, which perturbs the pattern being searched for. 238 if (!DisableBranchOpts) { 239 EverMadeChange |= sinkAndCmp(F); 240 EverMadeChange |= splitBranchCondition(F); 241 } 242 243 bool MadeChange = true; 244 while (MadeChange) { 245 MadeChange = false; 246 for (Function::iterator I = F.begin(); I != F.end(); ) { 247 BasicBlock *BB = I++; 248 MadeChange |= OptimizeBlock(*BB); 249 } 250 EverMadeChange |= MadeChange; 251 } 252 253 SunkAddrs.clear(); 254 255 if (!DisableBranchOpts) { 256 MadeChange = false; 257 SmallPtrSet<BasicBlock*, 8> WorkList; 258 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { 259 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 260 MadeChange |= ConstantFoldTerminator(BB, true); 261 if (!MadeChange) continue; 262 263 for (SmallVectorImpl<BasicBlock*>::iterator 264 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 265 if (pred_begin(*II) == pred_end(*II)) 266 WorkList.insert(*II); 267 } 268 269 // Delete the dead blocks and any of their dead successors. 270 MadeChange |= !WorkList.empty(); 271 while (!WorkList.empty()) { 272 BasicBlock *BB = *WorkList.begin(); 273 WorkList.erase(BB); 274 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 275 276 DeleteDeadBlock(BB); 277 278 for (SmallVectorImpl<BasicBlock*>::iterator 279 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 280 if (pred_begin(*II) == pred_end(*II)) 281 WorkList.insert(*II); 282 } 283 284 // Merge pairs of basic blocks with unconditional branches, connected by 285 // a single edge. 286 if (EverMadeChange || MadeChange) 287 MadeChange |= EliminateFallThrough(F); 288 289 if (MadeChange) 290 ModifiedDT = true; 291 EverMadeChange |= MadeChange; 292 } 293 294 if (ModifiedDT && DT) 295 DT->recalculate(F); 296 297 return EverMadeChange; 298 } 299 300 /// EliminateFallThrough - Merge basic blocks which are connected 301 /// by a single edge, where one of the basic blocks has a single successor 302 /// pointing to the other basic block, which has a single predecessor. 303 bool CodeGenPrepare::EliminateFallThrough(Function &F) { 304 bool Changed = false; 305 // Scan all of the blocks in the function, except for the entry block. 306 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { 307 BasicBlock *BB = I++; 308 // If the destination block has a single pred, then this is a trivial 309 // edge, just collapse it. 310 BasicBlock *SinglePred = BB->getSinglePredecessor(); 311 312 // Don't merge if BB's address is taken. 313 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; 314 315 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); 316 if (Term && !Term->isConditional()) { 317 Changed = true; 318 DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); 319 // Remember if SinglePred was the entry block of the function. 320 // If so, we will need to move BB back to the entry position. 321 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 322 MergeBasicBlockIntoOnlyPred(BB, this); 323 324 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 325 BB->moveBefore(&BB->getParent()->getEntryBlock()); 326 327 // We have erased a block. Update the iterator. 328 I = BB; 329 } 330 } 331 return Changed; 332 } 333 334 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 335 /// debug info directives, and an unconditional branch. Passes before isel 336 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 337 /// isel. Start by eliminating these blocks so we can split them the way we 338 /// want them. 339 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 340 bool MadeChange = false; 341 // Note that this intentionally skips the entry block. 342 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { 343 BasicBlock *BB = I++; 344 345 // If this block doesn't end with an uncond branch, ignore it. 346 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 347 if (!BI || !BI->isUnconditional()) 348 continue; 349 350 // If the instruction before the branch (skipping debug info) isn't a phi 351 // node, then other stuff is happening here. 352 BasicBlock::iterator BBI = BI; 353 if (BBI != BB->begin()) { 354 --BBI; 355 while (isa<DbgInfoIntrinsic>(BBI)) { 356 if (BBI == BB->begin()) 357 break; 358 --BBI; 359 } 360 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 361 continue; 362 } 363 364 // Do not break infinite loops. 365 BasicBlock *DestBB = BI->getSuccessor(0); 366 if (DestBB == BB) 367 continue; 368 369 if (!CanMergeBlocks(BB, DestBB)) 370 continue; 371 372 EliminateMostlyEmptyBlock(BB); 373 MadeChange = true; 374 } 375 return MadeChange; 376 } 377 378 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 379 /// single uncond branch between them, and BB contains no other non-phi 380 /// instructions. 381 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 382 const BasicBlock *DestBB) const { 383 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 384 // the successor. If there are more complex condition (e.g. preheaders), 385 // don't mess around with them. 386 BasicBlock::const_iterator BBI = BB->begin(); 387 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 388 for (const User *U : PN->users()) { 389 const Instruction *UI = cast<Instruction>(U); 390 if (UI->getParent() != DestBB || !isa<PHINode>(UI)) 391 return false; 392 // If User is inside DestBB block and it is a PHINode then check 393 // incoming value. If incoming value is not from BB then this is 394 // a complex condition (e.g. preheaders) we want to avoid here. 395 if (UI->getParent() == DestBB) { 396 if (const PHINode *UPN = dyn_cast<PHINode>(UI)) 397 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 398 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 399 if (Insn && Insn->getParent() == BB && 400 Insn->getParent() != UPN->getIncomingBlock(I)) 401 return false; 402 } 403 } 404 } 405 } 406 407 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 408 // and DestBB may have conflicting incoming values for the block. If so, we 409 // can't merge the block. 410 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 411 if (!DestBBPN) return true; // no conflict. 412 413 // Collect the preds of BB. 414 SmallPtrSet<const BasicBlock*, 16> BBPreds; 415 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 416 // It is faster to get preds from a PHI than with pred_iterator. 417 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 418 BBPreds.insert(BBPN->getIncomingBlock(i)); 419 } else { 420 BBPreds.insert(pred_begin(BB), pred_end(BB)); 421 } 422 423 // Walk the preds of DestBB. 424 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 425 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 426 if (BBPreds.count(Pred)) { // Common predecessor? 427 BBI = DestBB->begin(); 428 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 429 const Value *V1 = PN->getIncomingValueForBlock(Pred); 430 const Value *V2 = PN->getIncomingValueForBlock(BB); 431 432 // If V2 is a phi node in BB, look up what the mapped value will be. 433 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 434 if (V2PN->getParent() == BB) 435 V2 = V2PN->getIncomingValueForBlock(Pred); 436 437 // If there is a conflict, bail out. 438 if (V1 != V2) return false; 439 } 440 } 441 } 442 443 return true; 444 } 445 446 447 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 448 /// an unconditional branch in it. 449 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 450 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 451 BasicBlock *DestBB = BI->getSuccessor(0); 452 453 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 454 455 // If the destination block has a single pred, then this is a trivial edge, 456 // just collapse it. 457 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 458 if (SinglePred != DestBB) { 459 // Remember if SinglePred was the entry block of the function. If so, we 460 // will need to move BB back to the entry position. 461 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 462 MergeBasicBlockIntoOnlyPred(DestBB, this); 463 464 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 465 BB->moveBefore(&BB->getParent()->getEntryBlock()); 466 467 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 468 return; 469 } 470 } 471 472 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 473 // to handle the new incoming edges it is about to have. 474 PHINode *PN; 475 for (BasicBlock::iterator BBI = DestBB->begin(); 476 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 477 // Remove the incoming value for BB, and remember it. 478 Value *InVal = PN->removeIncomingValue(BB, false); 479 480 // Two options: either the InVal is a phi node defined in BB or it is some 481 // value that dominates BB. 482 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 483 if (InValPhi && InValPhi->getParent() == BB) { 484 // Add all of the input values of the input PHI as inputs of this phi. 485 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 486 PN->addIncoming(InValPhi->getIncomingValue(i), 487 InValPhi->getIncomingBlock(i)); 488 } else { 489 // Otherwise, add one instance of the dominating value for each edge that 490 // we will be adding. 491 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 492 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 493 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 494 } else { 495 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 496 PN->addIncoming(InVal, *PI); 497 } 498 } 499 } 500 501 // The PHIs are now updated, change everything that refers to BB to use 502 // DestBB and remove BB. 503 BB->replaceAllUsesWith(DestBB); 504 if (DT && !ModifiedDT) { 505 BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); 506 BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); 507 BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); 508 DT->changeImmediateDominator(DestBB, NewIDom); 509 DT->eraseNode(BB); 510 } 511 BB->eraseFromParent(); 512 ++NumBlocksElim; 513 514 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 515 } 516 517 /// SinkCast - Sink the specified cast instruction into its user blocks 518 static bool SinkCast(CastInst *CI) { 519 BasicBlock *DefBB = CI->getParent(); 520 521 /// InsertedCasts - Only insert a cast in each block once. 522 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 523 524 bool MadeChange = false; 525 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 526 UI != E; ) { 527 Use &TheUse = UI.getUse(); 528 Instruction *User = cast<Instruction>(*UI); 529 530 // Figure out which BB this cast is used in. For PHI's this is the 531 // appropriate predecessor block. 532 BasicBlock *UserBB = User->getParent(); 533 if (PHINode *PN = dyn_cast<PHINode>(User)) { 534 UserBB = PN->getIncomingBlock(TheUse); 535 } 536 537 // Preincrement use iterator so we don't invalidate it. 538 ++UI; 539 540 // If this user is in the same block as the cast, don't change the cast. 541 if (UserBB == DefBB) continue; 542 543 // If we have already inserted a cast into this block, use it. 544 CastInst *&InsertedCast = InsertedCasts[UserBB]; 545 546 if (!InsertedCast) { 547 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 548 InsertedCast = 549 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 550 InsertPt); 551 MadeChange = true; 552 } 553 554 // Replace a use of the cast with a use of the new cast. 555 TheUse = InsertedCast; 556 ++NumCastUses; 557 } 558 559 // If we removed all uses, nuke the cast. 560 if (CI->use_empty()) { 561 CI->eraseFromParent(); 562 MadeChange = true; 563 } 564 565 return MadeChange; 566 } 567 568 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 569 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 570 /// sink it into user blocks to reduce the number of virtual 571 /// registers that must be created and coalesced. 572 /// 573 /// Return true if any changes are made. 574 /// 575 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 576 // If this is a noop copy, 577 EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 578 EVT DstVT = TLI.getValueType(CI->getType()); 579 580 // This is an fp<->int conversion? 581 if (SrcVT.isInteger() != DstVT.isInteger()) 582 return false; 583 584 // If this is an extension, it will be a zero or sign extension, which 585 // isn't a noop. 586 if (SrcVT.bitsLT(DstVT)) return false; 587 588 // If these values will be promoted, find out what they will be promoted 589 // to. This helps us consider truncates on PPC as noop copies when they 590 // are. 591 if (TLI.getTypeAction(CI->getContext(), SrcVT) == 592 TargetLowering::TypePromoteInteger) 593 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 594 if (TLI.getTypeAction(CI->getContext(), DstVT) == 595 TargetLowering::TypePromoteInteger) 596 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 597 598 // If, after promotion, these are the same types, this is a noop copy. 599 if (SrcVT != DstVT) 600 return false; 601 602 return SinkCast(CI); 603 } 604 605 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 606 /// the number of virtual registers that must be created and coalesced. This is 607 /// a clear win except on targets with multiple condition code registers 608 /// (PowerPC), where it might lose; some adjustment may be wanted there. 609 /// 610 /// Return true if any changes are made. 611 static bool OptimizeCmpExpression(CmpInst *CI) { 612 BasicBlock *DefBB = CI->getParent(); 613 614 /// InsertedCmp - Only insert a cmp in each block once. 615 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 616 617 bool MadeChange = false; 618 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 619 UI != E; ) { 620 Use &TheUse = UI.getUse(); 621 Instruction *User = cast<Instruction>(*UI); 622 623 // Preincrement use iterator so we don't invalidate it. 624 ++UI; 625 626 // Don't bother for PHI nodes. 627 if (isa<PHINode>(User)) 628 continue; 629 630 // Figure out which BB this cmp is used in. 631 BasicBlock *UserBB = User->getParent(); 632 633 // If this user is in the same block as the cmp, don't change the cmp. 634 if (UserBB == DefBB) continue; 635 636 // If we have already inserted a cmp into this block, use it. 637 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 638 639 if (!InsertedCmp) { 640 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 641 InsertedCmp = 642 CmpInst::Create(CI->getOpcode(), 643 CI->getPredicate(), CI->getOperand(0), 644 CI->getOperand(1), "", InsertPt); 645 MadeChange = true; 646 } 647 648 // Replace a use of the cmp with a use of the new cmp. 649 TheUse = InsertedCmp; 650 ++NumCmpUses; 651 } 652 653 // If we removed all uses, nuke the cmp. 654 if (CI->use_empty()) 655 CI->eraseFromParent(); 656 657 return MadeChange; 658 } 659 660 /// isExtractBitsCandidateUse - Check if the candidates could 661 /// be combined with shift instruction, which includes: 662 /// 1. Truncate instruction 663 /// 2. And instruction and the imm is a mask of the low bits: 664 /// imm & (imm+1) == 0 665 static bool isExtractBitsCandidateUse(Instruction *User) { 666 if (!isa<TruncInst>(User)) { 667 if (User->getOpcode() != Instruction::And || 668 !isa<ConstantInt>(User->getOperand(1))) 669 return false; 670 671 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue(); 672 673 if ((Cimm & (Cimm + 1)).getBoolValue()) 674 return false; 675 } 676 return true; 677 } 678 679 /// SinkShiftAndTruncate - sink both shift and truncate instruction 680 /// to the use of truncate's BB. 681 static bool 682 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, 683 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, 684 const TargetLowering &TLI) { 685 BasicBlock *UserBB = User->getParent(); 686 DenseMap<BasicBlock *, CastInst *> InsertedTruncs; 687 TruncInst *TruncI = dyn_cast<TruncInst>(User); 688 bool MadeChange = false; 689 690 for (Value::user_iterator TruncUI = TruncI->user_begin(), 691 TruncE = TruncI->user_end(); 692 TruncUI != TruncE;) { 693 694 Use &TruncTheUse = TruncUI.getUse(); 695 Instruction *TruncUser = cast<Instruction>(*TruncUI); 696 // Preincrement use iterator so we don't invalidate it. 697 698 ++TruncUI; 699 700 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); 701 if (!ISDOpcode) 702 continue; 703 704 // If the use is actually a legal node, there will not be an 705 // implicit truncate. 706 // FIXME: always querying the result type is just an 707 // approximation; some nodes' legality is determined by the 708 // operand or other means. There's no good way to find out though. 709 if (TLI.isOperationLegalOrCustom( 710 ISDOpcode, TLI.getValueType(TruncUser->getType(), true))) 711 continue; 712 713 // Don't bother for PHI nodes. 714 if (isa<PHINode>(TruncUser)) 715 continue; 716 717 BasicBlock *TruncUserBB = TruncUser->getParent(); 718 719 if (UserBB == TruncUserBB) 720 continue; 721 722 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; 723 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; 724 725 if (!InsertedShift && !InsertedTrunc) { 726 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); 727 // Sink the shift 728 if (ShiftI->getOpcode() == Instruction::AShr) 729 InsertedShift = 730 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); 731 else 732 InsertedShift = 733 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); 734 735 // Sink the trunc 736 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); 737 TruncInsertPt++; 738 739 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, 740 TruncI->getType(), "", TruncInsertPt); 741 742 MadeChange = true; 743 744 TruncTheUse = InsertedTrunc; 745 } 746 } 747 return MadeChange; 748 } 749 750 /// OptimizeExtractBits - sink the shift *right* instruction into user blocks if 751 /// the uses could potentially be combined with this shift instruction and 752 /// generate BitExtract instruction. It will only be applied if the architecture 753 /// supports BitExtract instruction. Here is an example: 754 /// BB1: 755 /// %x.extract.shift = lshr i64 %arg1, 32 756 /// BB2: 757 /// %x.extract.trunc = trunc i64 %x.extract.shift to i16 758 /// ==> 759 /// 760 /// BB2: 761 /// %x.extract.shift.1 = lshr i64 %arg1, 32 762 /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 763 /// 764 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract 765 /// instruction. 766 /// Return true if any changes are made. 767 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, 768 const TargetLowering &TLI) { 769 BasicBlock *DefBB = ShiftI->getParent(); 770 771 /// Only insert instructions in each block once. 772 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts; 773 774 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType())); 775 776 bool MadeChange = false; 777 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); 778 UI != E;) { 779 Use &TheUse = UI.getUse(); 780 Instruction *User = cast<Instruction>(*UI); 781 // Preincrement use iterator so we don't invalidate it. 782 ++UI; 783 784 // Don't bother for PHI nodes. 785 if (isa<PHINode>(User)) 786 continue; 787 788 if (!isExtractBitsCandidateUse(User)) 789 continue; 790 791 BasicBlock *UserBB = User->getParent(); 792 793 if (UserBB == DefBB) { 794 // If the shift and truncate instruction are in the same BB. The use of 795 // the truncate(TruncUse) may still introduce another truncate if not 796 // legal. In this case, we would like to sink both shift and truncate 797 // instruction to the BB of TruncUse. 798 // for example: 799 // BB1: 800 // i64 shift.result = lshr i64 opnd, imm 801 // trunc.result = trunc shift.result to i16 802 // 803 // BB2: 804 // ----> We will have an implicit truncate here if the architecture does 805 // not have i16 compare. 806 // cmp i16 trunc.result, opnd2 807 // 808 if (isa<TruncInst>(User) && shiftIsLegal 809 // If the type of the truncate is legal, no trucate will be 810 // introduced in other basic blocks. 811 && (!TLI.isTypeLegal(TLI.getValueType(User->getType())))) 812 MadeChange = 813 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI); 814 815 continue; 816 } 817 // If we have already inserted a shift into this block, use it. 818 BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; 819 820 if (!InsertedShift) { 821 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 822 823 if (ShiftI->getOpcode() == Instruction::AShr) 824 InsertedShift = 825 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); 826 else 827 InsertedShift = 828 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); 829 830 MadeChange = true; 831 } 832 833 // Replace a use of the shift with a use of the new shift. 834 TheUse = InsertedShift; 835 } 836 837 // If we removed all uses, nuke the shift. 838 if (ShiftI->use_empty()) 839 ShiftI->eraseFromParent(); 840 841 return MadeChange; 842 } 843 844 namespace { 845 class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { 846 protected: 847 void replaceCall(Value *With) override { 848 CI->replaceAllUsesWith(With); 849 CI->eraseFromParent(); 850 } 851 bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override { 852 if (ConstantInt *SizeCI = 853 dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) 854 return SizeCI->isAllOnesValue(); 855 return false; 856 } 857 }; 858 } // end anonymous namespace 859 860 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { 861 BasicBlock *BB = CI->getParent(); 862 863 // Lower inline assembly if we can. 864 // If we found an inline asm expession, and if the target knows how to 865 // lower it to normal LLVM code, do so now. 866 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 867 if (TLI->ExpandInlineAsm(CI)) { 868 // Avoid invalidating the iterator. 869 CurInstIterator = BB->begin(); 870 // Avoid processing instructions out of order, which could cause 871 // reuse before a value is defined. 872 SunkAddrs.clear(); 873 return true; 874 } 875 // Sink address computing for memory operands into the block. 876 if (OptimizeInlineAsmInst(CI)) 877 return true; 878 } 879 880 // Lower all uses of llvm.objectsize.* 881 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 882 if (II && II->getIntrinsicID() == Intrinsic::objectsize) { 883 bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); 884 Type *ReturnTy = CI->getType(); 885 Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); 886 887 // Substituting this can cause recursive simplifications, which can 888 // invalidate our iterator. Use a WeakVH to hold onto it in case this 889 // happens. 890 WeakVH IterHandle(CurInstIterator); 891 892 replaceAndRecursivelySimplify(CI, RetVal, 893 TLI ? TLI->getDataLayout() : nullptr, 894 TLInfo, ModifiedDT ? nullptr : DT); 895 896 // If the iterator instruction was recursively deleted, start over at the 897 // start of the block. 898 if (IterHandle != CurInstIterator) { 899 CurInstIterator = BB->begin(); 900 SunkAddrs.clear(); 901 } 902 return true; 903 } 904 905 if (II && TLI) { 906 SmallVector<Value*, 2> PtrOps; 907 Type *AccessTy; 908 if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) 909 while (!PtrOps.empty()) 910 if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) 911 return true; 912 } 913 914 // From here on out we're working with named functions. 915 if (!CI->getCalledFunction()) return false; 916 917 // We'll need DataLayout from here on out. 918 const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; 919 if (!TD) return false; 920 921 // Lower all default uses of _chk calls. This is very similar 922 // to what InstCombineCalls does, but here we are only lowering calls 923 // that have the default "don't know" as the objectsize. Anything else 924 // should be left alone. 925 CodeGenPrepareFortifiedLibCalls Simplifier; 926 return Simplifier.fold(CI, TD, TLInfo); 927 } 928 929 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return 930 /// instructions to the predecessor to enable tail call optimizations. The 931 /// case it is currently looking for is: 932 /// @code 933 /// bb0: 934 /// %tmp0 = tail call i32 @f0() 935 /// br label %return 936 /// bb1: 937 /// %tmp1 = tail call i32 @f1() 938 /// br label %return 939 /// bb2: 940 /// %tmp2 = tail call i32 @f2() 941 /// br label %return 942 /// return: 943 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 944 /// ret i32 %retval 945 /// @endcode 946 /// 947 /// => 948 /// 949 /// @code 950 /// bb0: 951 /// %tmp0 = tail call i32 @f0() 952 /// ret i32 %tmp0 953 /// bb1: 954 /// %tmp1 = tail call i32 @f1() 955 /// ret i32 %tmp1 956 /// bb2: 957 /// %tmp2 = tail call i32 @f2() 958 /// ret i32 %tmp2 959 /// @endcode 960 bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { 961 if (!TLI) 962 return false; 963 964 ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); 965 if (!RI) 966 return false; 967 968 PHINode *PN = nullptr; 969 BitCastInst *BCI = nullptr; 970 Value *V = RI->getReturnValue(); 971 if (V) { 972 BCI = dyn_cast<BitCastInst>(V); 973 if (BCI) 974 V = BCI->getOperand(0); 975 976 PN = dyn_cast<PHINode>(V); 977 if (!PN) 978 return false; 979 } 980 981 if (PN && PN->getParent() != BB) 982 return false; 983 984 // It's not safe to eliminate the sign / zero extension of the return value. 985 // See llvm::isInTailCallPosition(). 986 const Function *F = BB->getParent(); 987 AttributeSet CallerAttrs = F->getAttributes(); 988 if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || 989 CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) 990 return false; 991 992 // Make sure there are no instructions between the PHI and return, or that the 993 // return is the first instruction in the block. 994 if (PN) { 995 BasicBlock::iterator BI = BB->begin(); 996 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); 997 if (&*BI == BCI) 998 // Also skip over the bitcast. 999 ++BI; 1000 if (&*BI != RI) 1001 return false; 1002 } else { 1003 BasicBlock::iterator BI = BB->begin(); 1004 while (isa<DbgInfoIntrinsic>(BI)) ++BI; 1005 if (&*BI != RI) 1006 return false; 1007 } 1008 1009 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail 1010 /// call. 1011 SmallVector<CallInst*, 4> TailCalls; 1012 if (PN) { 1013 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 1014 CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); 1015 // Make sure the phi value is indeed produced by the tail call. 1016 if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && 1017 TLI->mayBeEmittedAsTailCall(CI)) 1018 TailCalls.push_back(CI); 1019 } 1020 } else { 1021 SmallPtrSet<BasicBlock*, 4> VisitedBBs; 1022 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { 1023 if (!VisitedBBs.insert(*PI).second) 1024 continue; 1025 1026 BasicBlock::InstListType &InstList = (*PI)->getInstList(); 1027 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); 1028 BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); 1029 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); 1030 if (RI == RE) 1031 continue; 1032 1033 CallInst *CI = dyn_cast<CallInst>(&*RI); 1034 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) 1035 TailCalls.push_back(CI); 1036 } 1037 } 1038 1039 bool Changed = false; 1040 for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { 1041 CallInst *CI = TailCalls[i]; 1042 CallSite CS(CI); 1043 1044 // Conservatively require the attributes of the call to match those of the 1045 // return. Ignore noalias because it doesn't affect the call sequence. 1046 AttributeSet CalleeAttrs = CS.getAttributes(); 1047 if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 1048 removeAttribute(Attribute::NoAlias) != 1049 AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 1050 removeAttribute(Attribute::NoAlias)) 1051 continue; 1052 1053 // Make sure the call instruction is followed by an unconditional branch to 1054 // the return block. 1055 BasicBlock *CallBB = CI->getParent(); 1056 BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); 1057 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) 1058 continue; 1059 1060 // Duplicate the return into CallBB. 1061 (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); 1062 ModifiedDT = Changed = true; 1063 ++NumRetsDup; 1064 } 1065 1066 // If we eliminated all predecessors of the block, delete the block now. 1067 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) 1068 BB->eraseFromParent(); 1069 1070 return Changed; 1071 } 1072 1073 //===----------------------------------------------------------------------===// 1074 // Memory Optimization 1075 //===----------------------------------------------------------------------===// 1076 1077 namespace { 1078 1079 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode 1080 /// which holds actual Value*'s for register values. 1081 struct ExtAddrMode : public TargetLowering::AddrMode { 1082 Value *BaseReg; 1083 Value *ScaledReg; 1084 ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {} 1085 void print(raw_ostream &OS) const; 1086 void dump() const; 1087 1088 bool operator==(const ExtAddrMode& O) const { 1089 return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && 1090 (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && 1091 (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); 1092 } 1093 }; 1094 1095 #ifndef NDEBUG 1096 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { 1097 AM.print(OS); 1098 return OS; 1099 } 1100 #endif 1101 1102 void ExtAddrMode::print(raw_ostream &OS) const { 1103 bool NeedPlus = false; 1104 OS << "["; 1105 if (BaseGV) { 1106 OS << (NeedPlus ? " + " : "") 1107 << "GV:"; 1108 BaseGV->printAsOperand(OS, /*PrintType=*/false); 1109 NeedPlus = true; 1110 } 1111 1112 if (BaseOffs) { 1113 OS << (NeedPlus ? " + " : "") 1114 << BaseOffs; 1115 NeedPlus = true; 1116 } 1117 1118 if (BaseReg) { 1119 OS << (NeedPlus ? " + " : "") 1120 << "Base:"; 1121 BaseReg->printAsOperand(OS, /*PrintType=*/false); 1122 NeedPlus = true; 1123 } 1124 if (Scale) { 1125 OS << (NeedPlus ? " + " : "") 1126 << Scale << "*"; 1127 ScaledReg->printAsOperand(OS, /*PrintType=*/false); 1128 } 1129 1130 OS << ']'; 1131 } 1132 1133 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1134 void ExtAddrMode::dump() const { 1135 print(dbgs()); 1136 dbgs() << '\n'; 1137 } 1138 #endif 1139 1140 /// \brief This class provides transaction based operation on the IR. 1141 /// Every change made through this class is recorded in the internal state and 1142 /// can be undone (rollback) until commit is called. 1143 class TypePromotionTransaction { 1144 1145 /// \brief This represents the common interface of the individual transaction. 1146 /// Each class implements the logic for doing one specific modification on 1147 /// the IR via the TypePromotionTransaction. 1148 class TypePromotionAction { 1149 protected: 1150 /// The Instruction modified. 1151 Instruction *Inst; 1152 1153 public: 1154 /// \brief Constructor of the action. 1155 /// The constructor performs the related action on the IR. 1156 TypePromotionAction(Instruction *Inst) : Inst(Inst) {} 1157 1158 virtual ~TypePromotionAction() {} 1159 1160 /// \brief Undo the modification done by this action. 1161 /// When this method is called, the IR must be in the same state as it was 1162 /// before this action was applied. 1163 /// \pre Undoing the action works if and only if the IR is in the exact same 1164 /// state as it was directly after this action was applied. 1165 virtual void undo() = 0; 1166 1167 /// \brief Advocate every change made by this action. 1168 /// When the results on the IR of the action are to be kept, it is important 1169 /// to call this function, otherwise hidden information may be kept forever. 1170 virtual void commit() { 1171 // Nothing to be done, this action is not doing anything. 1172 } 1173 }; 1174 1175 /// \brief Utility to remember the position of an instruction. 1176 class InsertionHandler { 1177 /// Position of an instruction. 1178 /// Either an instruction: 1179 /// - Is the first in a basic block: BB is used. 1180 /// - Has a previous instructon: PrevInst is used. 1181 union { 1182 Instruction *PrevInst; 1183 BasicBlock *BB; 1184 } Point; 1185 /// Remember whether or not the instruction had a previous instruction. 1186 bool HasPrevInstruction; 1187 1188 public: 1189 /// \brief Record the position of \p Inst. 1190 InsertionHandler(Instruction *Inst) { 1191 BasicBlock::iterator It = Inst; 1192 HasPrevInstruction = (It != (Inst->getParent()->begin())); 1193 if (HasPrevInstruction) 1194 Point.PrevInst = --It; 1195 else 1196 Point.BB = Inst->getParent(); 1197 } 1198 1199 /// \brief Insert \p Inst at the recorded position. 1200 void insert(Instruction *Inst) { 1201 if (HasPrevInstruction) { 1202 if (Inst->getParent()) 1203 Inst->removeFromParent(); 1204 Inst->insertAfter(Point.PrevInst); 1205 } else { 1206 Instruction *Position = Point.BB->getFirstInsertionPt(); 1207 if (Inst->getParent()) 1208 Inst->moveBefore(Position); 1209 else 1210 Inst->insertBefore(Position); 1211 } 1212 } 1213 }; 1214 1215 /// \brief Move an instruction before another. 1216 class InstructionMoveBefore : public TypePromotionAction { 1217 /// Original position of the instruction. 1218 InsertionHandler Position; 1219 1220 public: 1221 /// \brief Move \p Inst before \p Before. 1222 InstructionMoveBefore(Instruction *Inst, Instruction *Before) 1223 : TypePromotionAction(Inst), Position(Inst) { 1224 DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n"); 1225 Inst->moveBefore(Before); 1226 } 1227 1228 /// \brief Move the instruction back to its original position. 1229 void undo() override { 1230 DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); 1231 Position.insert(Inst); 1232 } 1233 }; 1234 1235 /// \brief Set the operand of an instruction with a new value. 1236 class OperandSetter : public TypePromotionAction { 1237 /// Original operand of the instruction. 1238 Value *Origin; 1239 /// Index of the modified instruction. 1240 unsigned Idx; 1241 1242 public: 1243 /// \brief Set \p Idx operand of \p Inst with \p NewVal. 1244 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) 1245 : TypePromotionAction(Inst), Idx(Idx) { 1246 DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" 1247 << "for:" << *Inst << "\n" 1248 << "with:" << *NewVal << "\n"); 1249 Origin = Inst->getOperand(Idx); 1250 Inst->setOperand(Idx, NewVal); 1251 } 1252 1253 /// \brief Restore the original value of the instruction. 1254 void undo() override { 1255 DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" 1256 << "for: " << *Inst << "\n" 1257 << "with: " << *Origin << "\n"); 1258 Inst->setOperand(Idx, Origin); 1259 } 1260 }; 1261 1262 /// \brief Hide the operands of an instruction. 1263 /// Do as if this instruction was not using any of its operands. 1264 class OperandsHider : public TypePromotionAction { 1265 /// The list of original operands. 1266 SmallVector<Value *, 4> OriginalValues; 1267 1268 public: 1269 /// \brief Remove \p Inst from the uses of the operands of \p Inst. 1270 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { 1271 DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); 1272 unsigned NumOpnds = Inst->getNumOperands(); 1273 OriginalValues.reserve(NumOpnds); 1274 for (unsigned It = 0; It < NumOpnds; ++It) { 1275 // Save the current operand. 1276 Value *Val = Inst->getOperand(It); 1277 OriginalValues.push_back(Val); 1278 // Set a dummy one. 1279 // We could use OperandSetter here, but that would implied an overhead 1280 // that we are not willing to pay. 1281 Inst->setOperand(It, UndefValue::get(Val->getType())); 1282 } 1283 } 1284 1285 /// \brief Restore the original list of uses. 1286 void undo() override { 1287 DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); 1288 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) 1289 Inst->setOperand(It, OriginalValues[It]); 1290 } 1291 }; 1292 1293 /// \brief Build a truncate instruction. 1294 class TruncBuilder : public TypePromotionAction { 1295 Value *Val; 1296 public: 1297 /// \brief Build a truncate instruction of \p Opnd producing a \p Ty 1298 /// result. 1299 /// trunc Opnd to Ty. 1300 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { 1301 IRBuilder<> Builder(Opnd); 1302 Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); 1303 DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); 1304 } 1305 1306 /// \brief Get the built value. 1307 Value *getBuiltValue() { return Val; } 1308 1309 /// \brief Remove the built instruction. 1310 void undo() override { 1311 DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); 1312 if (Instruction *IVal = dyn_cast<Instruction>(Val)) 1313 IVal->eraseFromParent(); 1314 } 1315 }; 1316 1317 /// \brief Build a sign extension instruction. 1318 class SExtBuilder : public TypePromotionAction { 1319 Value *Val; 1320 public: 1321 /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty 1322 /// result. 1323 /// sext Opnd to Ty. 1324 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) 1325 : TypePromotionAction(InsertPt) { 1326 IRBuilder<> Builder(InsertPt); 1327 Val = Builder.CreateSExt(Opnd, Ty, "promoted"); 1328 DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); 1329 } 1330 1331 /// \brief Get the built value. 1332 Value *getBuiltValue() { return Val; } 1333 1334 /// \brief Remove the built instruction. 1335 void undo() override { 1336 DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); 1337 if (Instruction *IVal = dyn_cast<Instruction>(Val)) 1338 IVal->eraseFromParent(); 1339 } 1340 }; 1341 1342 /// \brief Build a zero extension instruction. 1343 class ZExtBuilder : public TypePromotionAction { 1344 Value *Val; 1345 public: 1346 /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty 1347 /// result. 1348 /// zext Opnd to Ty. 1349 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) 1350 : TypePromotionAction(InsertPt) { 1351 IRBuilder<> Builder(InsertPt); 1352 Val = Builder.CreateZExt(Opnd, Ty, "promoted"); 1353 DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); 1354 } 1355 1356 /// \brief Get the built value. 1357 Value *getBuiltValue() { return Val; } 1358 1359 /// \brief Remove the built instruction. 1360 void undo() override { 1361 DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); 1362 if (Instruction *IVal = dyn_cast<Instruction>(Val)) 1363 IVal->eraseFromParent(); 1364 } 1365 }; 1366 1367 /// \brief Mutate an instruction to another type. 1368 class TypeMutator : public TypePromotionAction { 1369 /// Record the original type. 1370 Type *OrigTy; 1371 1372 public: 1373 /// \brief Mutate the type of \p Inst into \p NewTy. 1374 TypeMutator(Instruction *Inst, Type *NewTy) 1375 : TypePromotionAction(Inst), OrigTy(Inst->getType()) { 1376 DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy 1377 << "\n"); 1378 Inst->mutateType(NewTy); 1379 } 1380 1381 /// \brief Mutate the instruction back to its original type. 1382 void undo() override { 1383 DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy 1384 << "\n"); 1385 Inst->mutateType(OrigTy); 1386 } 1387 }; 1388 1389 /// \brief Replace the uses of an instruction by another instruction. 1390 class UsesReplacer : public TypePromotionAction { 1391 /// Helper structure to keep track of the replaced uses. 1392 struct InstructionAndIdx { 1393 /// The instruction using the instruction. 1394 Instruction *Inst; 1395 /// The index where this instruction is used for Inst. 1396 unsigned Idx; 1397 InstructionAndIdx(Instruction *Inst, unsigned Idx) 1398 : Inst(Inst), Idx(Idx) {} 1399 }; 1400 1401 /// Keep track of the original uses (pair Instruction, Index). 1402 SmallVector<InstructionAndIdx, 4> OriginalUses; 1403 typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator; 1404 1405 public: 1406 /// \brief Replace all the use of \p Inst by \p New. 1407 UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { 1408 DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New 1409 << "\n"); 1410 // Record the original uses. 1411 for (Use &U : Inst->uses()) { 1412 Instruction *UserI = cast<Instruction>(U.getUser()); 1413 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); 1414 } 1415 // Now, we can replace the uses. 1416 Inst->replaceAllUsesWith(New); 1417 } 1418 1419 /// \brief Reassign the original uses of Inst to Inst. 1420 void undo() override { 1421 DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); 1422 for (use_iterator UseIt = OriginalUses.begin(), 1423 EndIt = OriginalUses.end(); 1424 UseIt != EndIt; ++UseIt) { 1425 UseIt->Inst->setOperand(UseIt->Idx, Inst); 1426 } 1427 } 1428 }; 1429 1430 /// \brief Remove an instruction from the IR. 1431 class InstructionRemover : public TypePromotionAction { 1432 /// Original position of the instruction. 1433 InsertionHandler Inserter; 1434 /// Helper structure to hide all the link to the instruction. In other 1435 /// words, this helps to do as if the instruction was removed. 1436 OperandsHider Hider; 1437 /// Keep track of the uses replaced, if any. 1438 UsesReplacer *Replacer; 1439 1440 public: 1441 /// \brief Remove all reference of \p Inst and optinally replace all its 1442 /// uses with New. 1443 /// \pre If !Inst->use_empty(), then New != nullptr 1444 InstructionRemover(Instruction *Inst, Value *New = nullptr) 1445 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), 1446 Replacer(nullptr) { 1447 if (New) 1448 Replacer = new UsesReplacer(Inst, New); 1449 DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); 1450 Inst->removeFromParent(); 1451 } 1452 1453 ~InstructionRemover() { delete Replacer; } 1454 1455 /// \brief Really remove the instruction. 1456 void commit() override { delete Inst; } 1457 1458 /// \brief Resurrect the instruction and reassign it to the proper uses if 1459 /// new value was provided when build this action. 1460 void undo() override { 1461 DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); 1462 Inserter.insert(Inst); 1463 if (Replacer) 1464 Replacer->undo(); 1465 Hider.undo(); 1466 } 1467 }; 1468 1469 public: 1470 /// Restoration point. 1471 /// The restoration point is a pointer to an action instead of an iterator 1472 /// because the iterator may be invalidated but not the pointer. 1473 typedef const TypePromotionAction *ConstRestorationPt; 1474 /// Advocate every changes made in that transaction. 1475 void commit(); 1476 /// Undo all the changes made after the given point. 1477 void rollback(ConstRestorationPt Point); 1478 /// Get the current restoration point. 1479 ConstRestorationPt getRestorationPoint() const; 1480 1481 /// \name API for IR modification with state keeping to support rollback. 1482 /// @{ 1483 /// Same as Instruction::setOperand. 1484 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); 1485 /// Same as Instruction::eraseFromParent. 1486 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); 1487 /// Same as Value::replaceAllUsesWith. 1488 void replaceAllUsesWith(Instruction *Inst, Value *New); 1489 /// Same as Value::mutateType. 1490 void mutateType(Instruction *Inst, Type *NewTy); 1491 /// Same as IRBuilder::createTrunc. 1492 Value *createTrunc(Instruction *Opnd, Type *Ty); 1493 /// Same as IRBuilder::createSExt. 1494 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); 1495 /// Same as IRBuilder::createZExt. 1496 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty); 1497 /// Same as Instruction::moveBefore. 1498 void moveBefore(Instruction *Inst, Instruction *Before); 1499 /// @} 1500 1501 private: 1502 /// The ordered list of actions made so far. 1503 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions; 1504 typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt; 1505 }; 1506 1507 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, 1508 Value *NewVal) { 1509 Actions.push_back( 1510 make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal)); 1511 } 1512 1513 void TypePromotionTransaction::eraseInstruction(Instruction *Inst, 1514 Value *NewVal) { 1515 Actions.push_back( 1516 make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal)); 1517 } 1518 1519 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, 1520 Value *New) { 1521 Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); 1522 } 1523 1524 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { 1525 Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); 1526 } 1527 1528 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, 1529 Type *Ty) { 1530 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty)); 1531 Value *Val = Ptr->getBuiltValue(); 1532 Actions.push_back(std::move(Ptr)); 1533 return Val; 1534 } 1535 1536 Value *TypePromotionTransaction::createSExt(Instruction *Inst, 1537 Value *Opnd, Type *Ty) { 1538 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty)); 1539 Value *Val = Ptr->getBuiltValue(); 1540 Actions.push_back(std::move(Ptr)); 1541 return Val; 1542 } 1543 1544 Value *TypePromotionTransaction::createZExt(Instruction *Inst, 1545 Value *Opnd, Type *Ty) { 1546 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty)); 1547 Value *Val = Ptr->getBuiltValue(); 1548 Actions.push_back(std::move(Ptr)); 1549 return Val; 1550 } 1551 1552 void TypePromotionTransaction::moveBefore(Instruction *Inst, 1553 Instruction *Before) { 1554 Actions.push_back( 1555 make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before)); 1556 } 1557 1558 TypePromotionTransaction::ConstRestorationPt 1559 TypePromotionTransaction::getRestorationPoint() const { 1560 return !Actions.empty() ? Actions.back().get() : nullptr; 1561 } 1562 1563 void TypePromotionTransaction::commit() { 1564 for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; 1565 ++It) 1566 (*It)->commit(); 1567 Actions.clear(); 1568 } 1569 1570 void TypePromotionTransaction::rollback( 1571 TypePromotionTransaction::ConstRestorationPt Point) { 1572 while (!Actions.empty() && Point != Actions.back().get()) { 1573 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val(); 1574 Curr->undo(); 1575 } 1576 } 1577 1578 /// \brief A helper class for matching addressing modes. 1579 /// 1580 /// This encapsulates the logic for matching the target-legal addressing modes. 1581 class AddressingModeMatcher { 1582 SmallVectorImpl<Instruction*> &AddrModeInsts; 1583 const TargetLowering &TLI; 1584 1585 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and 1586 /// the memory instruction that we're computing this address for. 1587 Type *AccessTy; 1588 Instruction *MemoryInst; 1589 1590 /// AddrMode - This is the addressing mode that we're building up. This is 1591 /// part of the return value of this addressing mode matching stuff. 1592 ExtAddrMode &AddrMode; 1593 1594 /// The truncate instruction inserted by other CodeGenPrepare optimizations. 1595 const SetOfInstrs &InsertedTruncs; 1596 /// A map from the instructions to their type before promotion. 1597 InstrToOrigTy &PromotedInsts; 1598 /// The ongoing transaction where every action should be registered. 1599 TypePromotionTransaction &TPT; 1600 1601 /// IgnoreProfitability - This is set to true when we should not do 1602 /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode 1603 /// always returns true. 1604 bool IgnoreProfitability; 1605 1606 AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, 1607 const TargetLowering &T, Type *AT, 1608 Instruction *MI, ExtAddrMode &AM, 1609 const SetOfInstrs &InsertedTruncs, 1610 InstrToOrigTy &PromotedInsts, 1611 TypePromotionTransaction &TPT) 1612 : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM), 1613 InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) { 1614 IgnoreProfitability = false; 1615 } 1616 public: 1617 1618 /// Match - Find the maximal addressing mode that a load/store of V can fold, 1619 /// give an access type of AccessTy. This returns a list of involved 1620 /// instructions in AddrModeInsts. 1621 /// \p InsertedTruncs The truncate instruction inserted by other 1622 /// CodeGenPrepare 1623 /// optimizations. 1624 /// \p PromotedInsts maps the instructions to their type before promotion. 1625 /// \p The ongoing transaction where every action should be registered. 1626 static ExtAddrMode Match(Value *V, Type *AccessTy, 1627 Instruction *MemoryInst, 1628 SmallVectorImpl<Instruction*> &AddrModeInsts, 1629 const TargetLowering &TLI, 1630 const SetOfInstrs &InsertedTruncs, 1631 InstrToOrigTy &PromotedInsts, 1632 TypePromotionTransaction &TPT) { 1633 ExtAddrMode Result; 1634 1635 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, 1636 MemoryInst, Result, InsertedTruncs, 1637 PromotedInsts, TPT).MatchAddr(V, 0); 1638 (void)Success; assert(Success && "Couldn't select *anything*?"); 1639 return Result; 1640 } 1641 private: 1642 bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); 1643 bool MatchAddr(Value *V, unsigned Depth); 1644 bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, 1645 bool *MovedAway = nullptr); 1646 bool IsProfitableToFoldIntoAddressingMode(Instruction *I, 1647 ExtAddrMode &AMBefore, 1648 ExtAddrMode &AMAfter); 1649 bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); 1650 bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion, 1651 Value *PromotedOperand) const; 1652 }; 1653 1654 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. 1655 /// Return true and update AddrMode if this addr mode is legal for the target, 1656 /// false if not. 1657 bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, 1658 unsigned Depth) { 1659 // If Scale is 1, then this is the same as adding ScaleReg to the addressing 1660 // mode. Just process that directly. 1661 if (Scale == 1) 1662 return MatchAddr(ScaleReg, Depth); 1663 1664 // If the scale is 0, it takes nothing to add this. 1665 if (Scale == 0) 1666 return true; 1667 1668 // If we already have a scale of this value, we can add to it, otherwise, we 1669 // need an available scale field. 1670 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 1671 return false; 1672 1673 ExtAddrMode TestAddrMode = AddrMode; 1674 1675 // Add scale to turn X*4+X*3 -> X*7. This could also do things like 1676 // [A+B + A*7] -> [B+A*8]. 1677 TestAddrMode.Scale += Scale; 1678 TestAddrMode.ScaledReg = ScaleReg; 1679 1680 // If the new address isn't legal, bail out. 1681 if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) 1682 return false; 1683 1684 // It was legal, so commit it. 1685 AddrMode = TestAddrMode; 1686 1687 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 1688 // to see if ScaleReg is actually X+C. If so, we can turn this into adding 1689 // X*Scale + C*Scale to addr mode. 1690 ConstantInt *CI = nullptr; Value *AddLHS = nullptr; 1691 if (isa<Instruction>(ScaleReg) && // not a constant expr. 1692 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { 1693 TestAddrMode.ScaledReg = AddLHS; 1694 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; 1695 1696 // If this addressing mode is legal, commit it and remember that we folded 1697 // this instruction. 1698 if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { 1699 AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); 1700 AddrMode = TestAddrMode; 1701 return true; 1702 } 1703 } 1704 1705 // Otherwise, not (x+c)*scale, just return what we have. 1706 return true; 1707 } 1708 1709 /// MightBeFoldableInst - This is a little filter, which returns true if an 1710 /// addressing computation involving I might be folded into a load/store 1711 /// accessing it. This doesn't need to be perfect, but needs to accept at least 1712 /// the set of instructions that MatchOperationAddr can. 1713 static bool MightBeFoldableInst(Instruction *I) { 1714 switch (I->getOpcode()) { 1715 case Instruction::BitCast: 1716 case Instruction::AddrSpaceCast: 1717 // Don't touch identity bitcasts. 1718 if (I->getType() == I->getOperand(0)->getType()) 1719 return false; 1720 return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); 1721 case Instruction::PtrToInt: 1722 // PtrToInt is always a noop, as we know that the int type is pointer sized. 1723 return true; 1724 case Instruction::IntToPtr: 1725 // We know the input is intptr_t, so this is foldable. 1726 return true; 1727 case Instruction::Add: 1728 return true; 1729 case Instruction::Mul: 1730 case Instruction::Shl: 1731 // Can only handle X*C and X << C. 1732 return isa<ConstantInt>(I->getOperand(1)); 1733 case Instruction::GetElementPtr: 1734 return true; 1735 default: 1736 return false; 1737 } 1738 } 1739 1740 /// \brief Check whether or not \p Val is a legal instruction for \p TLI. 1741 /// \note \p Val is assumed to be the product of some type promotion. 1742 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed 1743 /// to be legal, as the non-promoted value would have had the same state. 1744 static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) { 1745 Instruction *PromotedInst = dyn_cast<Instruction>(Val); 1746 if (!PromotedInst) 1747 return false; 1748 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); 1749 // If the ISDOpcode is undefined, it was undefined before the promotion. 1750 if (!ISDOpcode) 1751 return true; 1752 // Otherwise, check if the promoted instruction is legal or not. 1753 return TLI.isOperationLegalOrCustom( 1754 ISDOpcode, TLI.getValueType(PromotedInst->getType())); 1755 } 1756 1757 /// \brief Hepler class to perform type promotion. 1758 class TypePromotionHelper { 1759 /// \brief Utility function to check whether or not a sign or zero extension 1760 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by 1761 /// either using the operands of \p Inst or promoting \p Inst. 1762 /// The type of the extension is defined by \p IsSExt. 1763 /// In other words, check if: 1764 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType. 1765 /// #1 Promotion applies: 1766 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...). 1767 /// #2 Operand reuses: 1768 /// ext opnd1 to ConsideredExtType. 1769 /// \p PromotedInsts maps the instructions to their type before promotion. 1770 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType, 1771 const InstrToOrigTy &PromotedInsts, bool IsSExt); 1772 1773 /// \brief Utility function to determine if \p OpIdx should be promoted when 1774 /// promoting \p Inst. 1775 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { 1776 if (isa<SelectInst>(Inst) && OpIdx == 0) 1777 return false; 1778 return true; 1779 } 1780 1781 /// \brief Utility function to promote the operand of \p Ext when this 1782 /// operand is a promotable trunc or sext or zext. 1783 /// \p PromotedInsts maps the instructions to their type before promotion. 1784 /// \p CreatedInsts[out] contains how many non-free instructions have been 1785 /// created to promote the operand of Ext. 1786 /// Newly added extensions are inserted in \p Exts. 1787 /// Newly added truncates are inserted in \p Truncs. 1788 /// Should never be called directly. 1789 /// \return The promoted value which is used instead of Ext. 1790 static Value *promoteOperandForTruncAndAnyExt( 1791 Instruction *Ext, TypePromotionTransaction &TPT, 1792 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, 1793 SmallVectorImpl<Instruction *> *Exts, 1794 SmallVectorImpl<Instruction *> *Truncs); 1795 1796 /// \brief Utility function to promote the operand of \p Ext when this 1797 /// operand is promotable and is not a supported trunc or sext. 1798 /// \p PromotedInsts maps the instructions to their type before promotion. 1799 /// \p CreatedInsts[out] contains how many non-free instructions have been 1800 /// created to promote the operand of Ext. 1801 /// Newly added extensions are inserted in \p Exts. 1802 /// Newly added truncates are inserted in \p Truncs. 1803 /// Should never be called directly. 1804 /// \return The promoted value which is used instead of Ext. 1805 static Value * 1806 promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, 1807 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, 1808 SmallVectorImpl<Instruction *> *Exts, 1809 SmallVectorImpl<Instruction *> *Truncs, bool IsSExt); 1810 1811 /// \see promoteOperandForOther. 1812 static Value * 1813 signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, 1814 InstrToOrigTy &PromotedInsts, 1815 unsigned &CreatedInsts, 1816 SmallVectorImpl<Instruction *> *Exts, 1817 SmallVectorImpl<Instruction *> *Truncs) { 1818 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, 1819 Truncs, true); 1820 } 1821 1822 /// \see promoteOperandForOther. 1823 static Value * 1824 zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, 1825 InstrToOrigTy &PromotedInsts, 1826 unsigned &CreatedInsts, 1827 SmallVectorImpl<Instruction *> *Exts, 1828 SmallVectorImpl<Instruction *> *Truncs) { 1829 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, 1830 Truncs, false); 1831 } 1832 1833 public: 1834 /// Type for the utility function that promotes the operand of Ext. 1835 typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT, 1836 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, 1837 SmallVectorImpl<Instruction *> *Exts, 1838 SmallVectorImpl<Instruction *> *Truncs); 1839 /// \brief Given a sign/zero extend instruction \p Ext, return the approriate 1840 /// action to promote the operand of \p Ext instead of using Ext. 1841 /// \return NULL if no promotable action is possible with the current 1842 /// sign extension. 1843 /// \p InsertedTruncs keeps track of all the truncate instructions inserted by 1844 /// the others CodeGenPrepare optimizations. This information is important 1845 /// because we do not want to promote these instructions as CodeGenPrepare 1846 /// will reinsert them later. Thus creating an infinite loop: create/remove. 1847 /// \p PromotedInsts maps the instructions to their type before promotion. 1848 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedTruncs, 1849 const TargetLowering &TLI, 1850 const InstrToOrigTy &PromotedInsts); 1851 }; 1852 1853 bool TypePromotionHelper::canGetThrough(const Instruction *Inst, 1854 Type *ConsideredExtType, 1855 const InstrToOrigTy &PromotedInsts, 1856 bool IsSExt) { 1857 // The promotion helper does not know how to deal with vector types yet. 1858 // To be able to fix that, we would need to fix the places where we 1859 // statically extend, e.g., constants and such. 1860 if (Inst->getType()->isVectorTy()) 1861 return false; 1862 1863 // We can always get through zext. 1864 if (isa<ZExtInst>(Inst)) 1865 return true; 1866 1867 // sext(sext) is ok too. 1868 if (IsSExt && isa<SExtInst>(Inst)) 1869 return true; 1870 1871 // We can get through binary operator, if it is legal. In other words, the 1872 // binary operator must have a nuw or nsw flag. 1873 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); 1874 if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && 1875 ((!IsSExt && BinOp->hasNoUnsignedWrap()) || 1876 (IsSExt && BinOp->hasNoSignedWrap()))) 1877 return true; 1878 1879 // Check if we can do the following simplification. 1880 // ext(trunc(opnd)) --> ext(opnd) 1881 if (!isa<TruncInst>(Inst)) 1882 return false; 1883 1884 Value *OpndVal = Inst->getOperand(0); 1885 // Check if we can use this operand in the extension. 1886 // If the type is larger than the result type of the extension, 1887 // we cannot. 1888 if (!OpndVal->getType()->isIntegerTy() || 1889 OpndVal->getType()->getIntegerBitWidth() > 1890 ConsideredExtType->getIntegerBitWidth()) 1891 return false; 1892 1893 // If the operand of the truncate is not an instruction, we will not have 1894 // any information on the dropped bits. 1895 // (Actually we could for constant but it is not worth the extra logic). 1896 Instruction *Opnd = dyn_cast<Instruction>(OpndVal); 1897 if (!Opnd) 1898 return false; 1899 1900 // Check if the source of the type is narrow enough. 1901 // I.e., check that trunc just drops extended bits of the same kind of 1902 // the extension. 1903 // #1 get the type of the operand and check the kind of the extended bits. 1904 const Type *OpndType; 1905 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); 1906 if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt) 1907 OpndType = It->second.Ty; 1908 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd))) 1909 OpndType = Opnd->getOperand(0)->getType(); 1910 else 1911 return false; 1912 1913 // #2 check that the truncate just drop extended bits. 1914 if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth()) 1915 return true; 1916 1917 return false; 1918 } 1919 1920 TypePromotionHelper::Action TypePromotionHelper::getAction( 1921 Instruction *Ext, const SetOfInstrs &InsertedTruncs, 1922 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { 1923 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && 1924 "Unexpected instruction type"); 1925 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0)); 1926 Type *ExtTy = Ext->getType(); 1927 bool IsSExt = isa<SExtInst>(Ext); 1928 // If the operand of the extension is not an instruction, we cannot 1929 // get through. 1930 // If it, check we can get through. 1931 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt)) 1932 return nullptr; 1933 1934 // Do not promote if the operand has been added by codegenprepare. 1935 // Otherwise, it means we are undoing an optimization that is likely to be 1936 // redone, thus causing potential infinite loop. 1937 if (isa<TruncInst>(ExtOpnd) && InsertedTruncs.count(ExtOpnd)) 1938 return nullptr; 1939 1940 // SExt or Trunc instructions. 1941 // Return the related handler. 1942 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) || 1943 isa<ZExtInst>(ExtOpnd)) 1944 return promoteOperandForTruncAndAnyExt; 1945 1946 // Regular instruction. 1947 // Abort early if we will have to insert non-free instructions. 1948 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType())) 1949 return nullptr; 1950 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther; 1951 } 1952 1953 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( 1954 llvm::Instruction *SExt, TypePromotionTransaction &TPT, 1955 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, 1956 SmallVectorImpl<Instruction *> *Exts, 1957 SmallVectorImpl<Instruction *> *Truncs) { 1958 // By construction, the operand of SExt is an instruction. Otherwise we cannot 1959 // get through it and this method should not be called. 1960 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); 1961 Value *ExtVal = SExt; 1962 if (isa<ZExtInst>(SExtOpnd)) { 1963 // Replace s|zext(zext(opnd)) 1964 // => zext(opnd). 1965 Value *ZExt = 1966 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); 1967 TPT.replaceAllUsesWith(SExt, ZExt); 1968 TPT.eraseInstruction(SExt); 1969 ExtVal = ZExt; 1970 } else { 1971 // Replace z|sext(trunc(opnd)) or sext(sext(opnd)) 1972 // => z|sext(opnd). 1973 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); 1974 } 1975 CreatedInsts = 0; 1976 1977 // Remove dead code. 1978 if (SExtOpnd->use_empty()) 1979 TPT.eraseInstruction(SExtOpnd); 1980 1981 // Check if the extension is still needed. 1982 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal); 1983 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { 1984 if (ExtInst && Exts) 1985 Exts->push_back(ExtInst); 1986 return ExtVal; 1987 } 1988 1989 // At this point we have: ext ty opnd to ty. 1990 // Reassign the uses of ExtInst to the opnd and remove ExtInst. 1991 Value *NextVal = ExtInst->getOperand(0); 1992 TPT.eraseInstruction(ExtInst, NextVal); 1993 return NextVal; 1994 } 1995 1996 Value *TypePromotionHelper::promoteOperandForOther( 1997 Instruction *Ext, TypePromotionTransaction &TPT, 1998 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, 1999 SmallVectorImpl<Instruction *> *Exts, 2000 SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) { 2001 // By construction, the operand of Ext is an instruction. Otherwise we cannot 2002 // get through it and this method should not be called. 2003 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0)); 2004 CreatedInsts = 0; 2005 if (!ExtOpnd->hasOneUse()) { 2006 // ExtOpnd will be promoted. 2007 // All its uses, but Ext, will need to use a truncated value of the 2008 // promoted version. 2009 // Create the truncate now. 2010 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType()); 2011 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) { 2012 ITrunc->removeFromParent(); 2013 // Insert it just after the definition. 2014 ITrunc->insertAfter(ExtOpnd); 2015 if (Truncs) 2016 Truncs->push_back(ITrunc); 2017 } 2018 2019 TPT.replaceAllUsesWith(ExtOpnd, Trunc); 2020 // Restore the operand of Ext (which has been replace by the previous call 2021 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. 2022 TPT.setOperand(Ext, 0, ExtOpnd); 2023 } 2024 2025 // Get through the Instruction: 2026 // 1. Update its type. 2027 // 2. Replace the uses of Ext by Inst. 2028 // 3. Extend each operand that needs to be extended. 2029 2030 // Remember the original type of the instruction before promotion. 2031 // This is useful to know that the high bits are sign extended bits. 2032 PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>( 2033 ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt))); 2034 // Step #1. 2035 TPT.mutateType(ExtOpnd, Ext->getType()); 2036 // Step #2. 2037 TPT.replaceAllUsesWith(Ext, ExtOpnd); 2038 // Step #3. 2039 Instruction *ExtForOpnd = Ext; 2040 2041 DEBUG(dbgs() << "Propagate Ext to operands\n"); 2042 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx; 2043 ++OpIdx) { 2044 DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n'); 2045 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() || 2046 !shouldExtOperand(ExtOpnd, OpIdx)) { 2047 DEBUG(dbgs() << "No need to propagate\n"); 2048 continue; 2049 } 2050 // Check if we can statically extend the operand. 2051 Value *Opnd = ExtOpnd->getOperand(OpIdx); 2052 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { 2053 DEBUG(dbgs() << "Statically extend\n"); 2054 unsigned BitWidth = Ext->getType()->getIntegerBitWidth(); 2055 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth) 2056 : Cst->getValue().zext(BitWidth); 2057 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal)); 2058 continue; 2059 } 2060 // UndefValue are typed, so we have to statically sign extend them. 2061 if (isa<UndefValue>(Opnd)) { 2062 DEBUG(dbgs() << "Statically extend\n"); 2063 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType())); 2064 continue; 2065 } 2066 2067 // Otherwise we have to explicity sign extend the operand. 2068 // Check if Ext was reused to extend an operand. 2069 if (!ExtForOpnd) { 2070 // If yes, create a new one. 2071 DEBUG(dbgs() << "More operands to ext\n"); 2072 ExtForOpnd = 2073 cast<Instruction>(IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) 2074 : TPT.createZExt(Ext, Opnd, Ext->getType())); 2075 ++CreatedInsts; 2076 } 2077 if (Exts) 2078 Exts->push_back(ExtForOpnd); 2079 TPT.setOperand(ExtForOpnd, 0, Opnd); 2080 2081 // Move the sign extension before the insertion point. 2082 TPT.moveBefore(ExtForOpnd, ExtOpnd); 2083 TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); 2084 // If more sext are required, new instructions will have to be created. 2085 ExtForOpnd = nullptr; 2086 } 2087 if (ExtForOpnd == Ext) { 2088 DEBUG(dbgs() << "Extension is useless now\n"); 2089 TPT.eraseInstruction(Ext); 2090 } 2091 return ExtOpnd; 2092 } 2093 2094 /// IsPromotionProfitable - Check whether or not promoting an instruction 2095 /// to a wider type was profitable. 2096 /// \p MatchedSize gives the number of instructions that have been matched 2097 /// in the addressing mode after the promotion was applied. 2098 /// \p SizeWithPromotion gives the number of created instructions for 2099 /// the promotion plus the number of instructions that have been 2100 /// matched in the addressing mode before the promotion. 2101 /// \p PromotedOperand is the value that has been promoted. 2102 /// \return True if the promotion is profitable, false otherwise. 2103 bool 2104 AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, 2105 unsigned SizeWithPromotion, 2106 Value *PromotedOperand) const { 2107 // We folded less instructions than what we created to promote the operand. 2108 // This is not profitable. 2109 if (MatchedSize < SizeWithPromotion) 2110 return false; 2111 if (MatchedSize > SizeWithPromotion) 2112 return true; 2113 // The promotion is neutral but it may help folding the sign extension in 2114 // loads for instance. 2115 // Check that we did not create an illegal instruction. 2116 return isPromotedInstructionLegal(TLI, PromotedOperand); 2117 } 2118 2119 /// MatchOperationAddr - Given an instruction or constant expr, see if we can 2120 /// fold the operation into the addressing mode. If so, update the addressing 2121 /// mode and return true, otherwise return false without modifying AddrMode. 2122 /// If \p MovedAway is not NULL, it contains the information of whether or 2123 /// not AddrInst has to be folded into the addressing mode on success. 2124 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing 2125 /// because it has been moved away. 2126 /// Thus AddrInst must not be added in the matched instructions. 2127 /// This state can happen when AddrInst is a sext, since it may be moved away. 2128 /// Therefore, AddrInst may not be valid when MovedAway is true and it must 2129 /// not be referenced anymore. 2130 bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, 2131 unsigned Depth, 2132 bool *MovedAway) { 2133 // Avoid exponential behavior on extremely deep expression trees. 2134 if (Depth >= 5) return false; 2135 2136 // By default, all matched instructions stay in place. 2137 if (MovedAway) 2138 *MovedAway = false; 2139 2140 switch (Opcode) { 2141 case Instruction::PtrToInt: 2142 // PtrToInt is always a noop, as we know that the int type is pointer sized. 2143 return MatchAddr(AddrInst->getOperand(0), Depth); 2144 case Instruction::IntToPtr: 2145 // This inttoptr is a no-op if the integer type is pointer sized. 2146 if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == 2147 TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace())) 2148 return MatchAddr(AddrInst->getOperand(0), Depth); 2149 return false; 2150 case Instruction::BitCast: 2151 case Instruction::AddrSpaceCast: 2152 // BitCast is always a noop, and we can handle it as long as it is 2153 // int->int or pointer->pointer (we don't want int<->fp or something). 2154 if ((AddrInst->getOperand(0)->getType()->isPointerTy() || 2155 AddrInst->getOperand(0)->getType()->isIntegerTy()) && 2156 // Don't touch identity bitcasts. These were probably put here by LSR, 2157 // and we don't want to mess around with them. Assume it knows what it 2158 // is doing. 2159 AddrInst->getOperand(0)->getType() != AddrInst->getType()) 2160 return MatchAddr(AddrInst->getOperand(0), Depth); 2161 return false; 2162 case Instruction::Add: { 2163 // Check to see if we can merge in the RHS then the LHS. If so, we win. 2164 ExtAddrMode BackupAddrMode = AddrMode; 2165 unsigned OldSize = AddrModeInsts.size(); 2166 // Start a transaction at this point. 2167 // The LHS may match but not the RHS. 2168 // Therefore, we need a higher level restoration point to undo partially 2169 // matched operation. 2170 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2171 TPT.getRestorationPoint(); 2172 2173 if (MatchAddr(AddrInst->getOperand(1), Depth+1) && 2174 MatchAddr(AddrInst->getOperand(0), Depth+1)) 2175 return true; 2176 2177 // Restore the old addr mode info. 2178 AddrMode = BackupAddrMode; 2179 AddrModeInsts.resize(OldSize); 2180 TPT.rollback(LastKnownGood); 2181 2182 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 2183 if (MatchAddr(AddrInst->getOperand(0), Depth+1) && 2184 MatchAddr(AddrInst->getOperand(1), Depth+1)) 2185 return true; 2186 2187 // Otherwise we definitely can't merge the ADD in. 2188 AddrMode = BackupAddrMode; 2189 AddrModeInsts.resize(OldSize); 2190 TPT.rollback(LastKnownGood); 2191 break; 2192 } 2193 //case Instruction::Or: 2194 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 2195 //break; 2196 case Instruction::Mul: 2197 case Instruction::Shl: { 2198 // Can only handle X*C and X << C. 2199 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 2200 if (!RHS) 2201 return false; 2202 int64_t Scale = RHS->getSExtValue(); 2203 if (Opcode == Instruction::Shl) 2204 Scale = 1LL << Scale; 2205 2206 return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); 2207 } 2208 case Instruction::GetElementPtr: { 2209 // Scan the GEP. We check it if it contains constant offsets and at most 2210 // one variable offset. 2211 int VariableOperand = -1; 2212 unsigned VariableScale = 0; 2213 2214 int64_t ConstantOffset = 0; 2215 const DataLayout *TD = TLI.getDataLayout(); 2216 gep_type_iterator GTI = gep_type_begin(AddrInst); 2217 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 2218 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 2219 const StructLayout *SL = TD->getStructLayout(STy); 2220 unsigned Idx = 2221 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 2222 ConstantOffset += SL->getElementOffset(Idx); 2223 } else { 2224 uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); 2225 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 2226 ConstantOffset += CI->getSExtValue()*TypeSize; 2227 } else if (TypeSize) { // Scales of zero don't do anything. 2228 // We only allow one variable index at the moment. 2229 if (VariableOperand != -1) 2230 return false; 2231 2232 // Remember the variable index. 2233 VariableOperand = i; 2234 VariableScale = TypeSize; 2235 } 2236 } 2237 } 2238 2239 // A common case is for the GEP to only do a constant offset. In this case, 2240 // just add it to the disp field and check validity. 2241 if (VariableOperand == -1) { 2242 AddrMode.BaseOffs += ConstantOffset; 2243 if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ 2244 // Check to see if we can fold the base pointer in too. 2245 if (MatchAddr(AddrInst->getOperand(0), Depth+1)) 2246 return true; 2247 } 2248 AddrMode.BaseOffs -= ConstantOffset; 2249 return false; 2250 } 2251 2252 // Save the valid addressing mode in case we can't match. 2253 ExtAddrMode BackupAddrMode = AddrMode; 2254 unsigned OldSize = AddrModeInsts.size(); 2255 2256 // See if the scale and offset amount is valid for this target. 2257 AddrMode.BaseOffs += ConstantOffset; 2258 2259 // Match the base operand of the GEP. 2260 if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { 2261 // If it couldn't be matched, just stuff the value in a register. 2262 if (AddrMode.HasBaseReg) { 2263 AddrMode = BackupAddrMode; 2264 AddrModeInsts.resize(OldSize); 2265 return false; 2266 } 2267 AddrMode.HasBaseReg = true; 2268 AddrMode.BaseReg = AddrInst->getOperand(0); 2269 } 2270 2271 // Match the remaining variable portion of the GEP. 2272 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, 2273 Depth)) { 2274 // If it couldn't be matched, try stuffing the base into a register 2275 // instead of matching it, and retrying the match of the scale. 2276 AddrMode = BackupAddrMode; 2277 AddrModeInsts.resize(OldSize); 2278 if (AddrMode.HasBaseReg) 2279 return false; 2280 AddrMode.HasBaseReg = true; 2281 AddrMode.BaseReg = AddrInst->getOperand(0); 2282 AddrMode.BaseOffs += ConstantOffset; 2283 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), 2284 VariableScale, Depth)) { 2285 // If even that didn't work, bail. 2286 AddrMode = BackupAddrMode; 2287 AddrModeInsts.resize(OldSize); 2288 return false; 2289 } 2290 } 2291 2292 return true; 2293 } 2294 case Instruction::SExt: 2295 case Instruction::ZExt: { 2296 Instruction *Ext = dyn_cast<Instruction>(AddrInst); 2297 if (!Ext) 2298 return false; 2299 2300 // Try to move this ext out of the way of the addressing mode. 2301 // Ask for a method for doing so. 2302 TypePromotionHelper::Action TPH = 2303 TypePromotionHelper::getAction(Ext, InsertedTruncs, TLI, PromotedInsts); 2304 if (!TPH) 2305 return false; 2306 2307 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2308 TPT.getRestorationPoint(); 2309 unsigned CreatedInsts = 0; 2310 Value *PromotedOperand = 2311 TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr); 2312 // SExt has been moved away. 2313 // Thus either it will be rematched later in the recursive calls or it is 2314 // gone. Anyway, we must not fold it into the addressing mode at this point. 2315 // E.g., 2316 // op = add opnd, 1 2317 // idx = ext op 2318 // addr = gep base, idx 2319 // is now: 2320 // promotedOpnd = ext opnd <- no match here 2321 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) 2322 // addr = gep base, op <- match 2323 if (MovedAway) 2324 *MovedAway = true; 2325 2326 assert(PromotedOperand && 2327 "TypePromotionHelper should have filtered out those cases"); 2328 2329 ExtAddrMode BackupAddrMode = AddrMode; 2330 unsigned OldSize = AddrModeInsts.size(); 2331 2332 if (!MatchAddr(PromotedOperand, Depth) || 2333 !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts, 2334 PromotedOperand)) { 2335 AddrMode = BackupAddrMode; 2336 AddrModeInsts.resize(OldSize); 2337 DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); 2338 TPT.rollback(LastKnownGood); 2339 return false; 2340 } 2341 return true; 2342 } 2343 } 2344 return false; 2345 } 2346 2347 /// MatchAddr - If we can, try to add the value of 'Addr' into the current 2348 /// addressing mode. If Addr can't be added to AddrMode this returns false and 2349 /// leaves AddrMode unmodified. This assumes that Addr is either a pointer type 2350 /// or intptr_t for the target. 2351 /// 2352 bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { 2353 // Start a transaction at this point that we will rollback if the matching 2354 // fails. 2355 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2356 TPT.getRestorationPoint(); 2357 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 2358 // Fold in immediates if legal for the target. 2359 AddrMode.BaseOffs += CI->getSExtValue(); 2360 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2361 return true; 2362 AddrMode.BaseOffs -= CI->getSExtValue(); 2363 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 2364 // If this is a global variable, try to fold it into the addressing mode. 2365 if (!AddrMode.BaseGV) { 2366 AddrMode.BaseGV = GV; 2367 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2368 return true; 2369 AddrMode.BaseGV = nullptr; 2370 } 2371 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { 2372 ExtAddrMode BackupAddrMode = AddrMode; 2373 unsigned OldSize = AddrModeInsts.size(); 2374 2375 // Check to see if it is possible to fold this operation. 2376 bool MovedAway = false; 2377 if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { 2378 // This instruction may have been move away. If so, there is nothing 2379 // to check here. 2380 if (MovedAway) 2381 return true; 2382 // Okay, it's possible to fold this. Check to see if it is actually 2383 // *profitable* to do so. We use a simple cost model to avoid increasing 2384 // register pressure too much. 2385 if (I->hasOneUse() || 2386 IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { 2387 AddrModeInsts.push_back(I); 2388 return true; 2389 } 2390 2391 // It isn't profitable to do this, roll back. 2392 //cerr << "NOT FOLDING: " << *I; 2393 AddrMode = BackupAddrMode; 2394 AddrModeInsts.resize(OldSize); 2395 TPT.rollback(LastKnownGood); 2396 } 2397 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 2398 if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) 2399 return true; 2400 TPT.rollback(LastKnownGood); 2401 } else if (isa<ConstantPointerNull>(Addr)) { 2402 // Null pointer gets folded without affecting the addressing mode. 2403 return true; 2404 } 2405 2406 // Worse case, the target should support [reg] addressing modes. :) 2407 if (!AddrMode.HasBaseReg) { 2408 AddrMode.HasBaseReg = true; 2409 AddrMode.BaseReg = Addr; 2410 // Still check for legality in case the target supports [imm] but not [i+r]. 2411 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2412 return true; 2413 AddrMode.HasBaseReg = false; 2414 AddrMode.BaseReg = nullptr; 2415 } 2416 2417 // If the base register is already taken, see if we can do [r+r]. 2418 if (AddrMode.Scale == 0) { 2419 AddrMode.Scale = 1; 2420 AddrMode.ScaledReg = Addr; 2421 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2422 return true; 2423 AddrMode.Scale = 0; 2424 AddrMode.ScaledReg = nullptr; 2425 } 2426 // Couldn't match. 2427 TPT.rollback(LastKnownGood); 2428 return false; 2429 } 2430 2431 /// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified 2432 /// inline asm call are due to memory operands. If so, return true, otherwise 2433 /// return false. 2434 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, 2435 const TargetLowering &TLI) { 2436 TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); 2437 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 2438 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 2439 2440 // Compute the constraint code and ConstraintType to use. 2441 TLI.ComputeConstraintToUse(OpInfo, SDValue()); 2442 2443 // If this asm operand is our Value*, and if it isn't an indirect memory 2444 // operand, we can't fold it! 2445 if (OpInfo.CallOperandVal == OpVal && 2446 (OpInfo.ConstraintType != TargetLowering::C_Memory || 2447 !OpInfo.isIndirect)) 2448 return false; 2449 } 2450 2451 return true; 2452 } 2453 2454 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a 2455 /// memory use. If we find an obviously non-foldable instruction, return true. 2456 /// Add the ultimately found memory instructions to MemoryUses. 2457 static bool FindAllMemoryUses(Instruction *I, 2458 SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, 2459 SmallPtrSetImpl<Instruction*> &ConsideredInsts, 2460 const TargetLowering &TLI) { 2461 // If we already considered this instruction, we're done. 2462 if (!ConsideredInsts.insert(I).second) 2463 return false; 2464 2465 // If this is an obviously unfoldable instruction, bail out. 2466 if (!MightBeFoldableInst(I)) 2467 return true; 2468 2469 // Loop over all the uses, recursively processing them. 2470 for (Use &U : I->uses()) { 2471 Instruction *UserI = cast<Instruction>(U.getUser()); 2472 2473 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { 2474 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); 2475 continue; 2476 } 2477 2478 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { 2479 unsigned opNo = U.getOperandNo(); 2480 if (opNo == 0) return true; // Storing addr, not into addr. 2481 MemoryUses.push_back(std::make_pair(SI, opNo)); 2482 continue; 2483 } 2484 2485 if (CallInst *CI = dyn_cast<CallInst>(UserI)) { 2486 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); 2487 if (!IA) return true; 2488 2489 // If this is a memory operand, we're cool, otherwise bail out. 2490 if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) 2491 return true; 2492 continue; 2493 } 2494 2495 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI)) 2496 return true; 2497 } 2498 2499 return false; 2500 } 2501 2502 /// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at 2503 /// the use site that we're folding it into. If so, there is no cost to 2504 /// include it in the addressing mode. KnownLive1 and KnownLive2 are two values 2505 /// that we know are live at the instruction already. 2506 bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, 2507 Value *KnownLive2) { 2508 // If Val is either of the known-live values, we know it is live! 2509 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) 2510 return true; 2511 2512 // All values other than instructions and arguments (e.g. constants) are live. 2513 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; 2514 2515 // If Val is a constant sized alloca in the entry block, it is live, this is 2516 // true because it is just a reference to the stack/frame pointer, which is 2517 // live for the whole function. 2518 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) 2519 if (AI->isStaticAlloca()) 2520 return true; 2521 2522 // Check to see if this value is already used in the memory instruction's 2523 // block. If so, it's already live into the block at the very least, so we 2524 // can reasonably fold it. 2525 return Val->isUsedInBasicBlock(MemoryInst->getParent()); 2526 } 2527 2528 /// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing 2529 /// mode of the machine to fold the specified instruction into a load or store 2530 /// that ultimately uses it. However, the specified instruction has multiple 2531 /// uses. Given this, it may actually increase register pressure to fold it 2532 /// into the load. For example, consider this code: 2533 /// 2534 /// X = ... 2535 /// Y = X+1 2536 /// use(Y) -> nonload/store 2537 /// Z = Y+1 2538 /// load Z 2539 /// 2540 /// In this case, Y has multiple uses, and can be folded into the load of Z 2541 /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to 2542 /// be live at the use(Y) line. If we don't fold Y into load Z, we use one 2543 /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the 2544 /// number of computations either. 2545 /// 2546 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If 2547 /// X was live across 'load Z' for other reasons, we actually *would* want to 2548 /// fold the addressing mode in the Z case. This would make Y die earlier. 2549 bool AddressingModeMatcher:: 2550 IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, 2551 ExtAddrMode &AMAfter) { 2552 if (IgnoreProfitability) return true; 2553 2554 // AMBefore is the addressing mode before this instruction was folded into it, 2555 // and AMAfter is the addressing mode after the instruction was folded. Get 2556 // the set of registers referenced by AMAfter and subtract out those 2557 // referenced by AMBefore: this is the set of values which folding in this 2558 // address extends the lifetime of. 2559 // 2560 // Note that there are only two potential values being referenced here, 2561 // BaseReg and ScaleReg (global addresses are always available, as are any 2562 // folded immediates). 2563 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; 2564 2565 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their 2566 // lifetime wasn't extended by adding this instruction. 2567 if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 2568 BaseReg = nullptr; 2569 if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 2570 ScaledReg = nullptr; 2571 2572 // If folding this instruction (and it's subexprs) didn't extend any live 2573 // ranges, we're ok with it. 2574 if (!BaseReg && !ScaledReg) 2575 return true; 2576 2577 // If all uses of this instruction are ultimately load/store/inlineasm's, 2578 // check to see if their addressing modes will include this instruction. If 2579 // so, we can fold it into all uses, so it doesn't matter if it has multiple 2580 // uses. 2581 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; 2582 SmallPtrSet<Instruction*, 16> ConsideredInsts; 2583 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) 2584 return false; // Has a non-memory, non-foldable use! 2585 2586 // Now that we know that all uses of this instruction are part of a chain of 2587 // computation involving only operations that could theoretically be folded 2588 // into a memory use, loop over each of these uses and see if they could 2589 // *actually* fold the instruction. 2590 SmallVector<Instruction*, 32> MatchedAddrModeInsts; 2591 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { 2592 Instruction *User = MemoryUses[i].first; 2593 unsigned OpNo = MemoryUses[i].second; 2594 2595 // Get the access type of this use. If the use isn't a pointer, we don't 2596 // know what it accesses. 2597 Value *Address = User->getOperand(OpNo); 2598 if (!Address->getType()->isPointerTy()) 2599 return false; 2600 Type *AddressAccessTy = Address->getType()->getPointerElementType(); 2601 2602 // Do a match against the root of this address, ignoring profitability. This 2603 // will tell us if the addressing mode for the memory operation will 2604 // *actually* cover the shared instruction. 2605 ExtAddrMode Result; 2606 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2607 TPT.getRestorationPoint(); 2608 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, 2609 MemoryInst, Result, InsertedTruncs, 2610 PromotedInsts, TPT); 2611 Matcher.IgnoreProfitability = true; 2612 bool Success = Matcher.MatchAddr(Address, 0); 2613 (void)Success; assert(Success && "Couldn't select *anything*?"); 2614 2615 // The match was to check the profitability, the changes made are not 2616 // part of the original matcher. Therefore, they should be dropped 2617 // otherwise the original matcher will not present the right state. 2618 TPT.rollback(LastKnownGood); 2619 2620 // If the match didn't cover I, then it won't be shared by it. 2621 if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), 2622 I) == MatchedAddrModeInsts.end()) 2623 return false; 2624 2625 MatchedAddrModeInsts.clear(); 2626 } 2627 2628 return true; 2629 } 2630 2631 } // end anonymous namespace 2632 2633 /// IsNonLocalValue - Return true if the specified values are defined in a 2634 /// different basic block than BB. 2635 static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 2636 if (Instruction *I = dyn_cast<Instruction>(V)) 2637 return I->getParent() != BB; 2638 return false; 2639 } 2640 2641 /// OptimizeMemoryInst - Load and Store Instructions often have 2642 /// addressing modes that can do significant amounts of computation. As such, 2643 /// instruction selection will try to get the load or store to do as much 2644 /// computation as possible for the program. The problem is that isel can only 2645 /// see within a single block. As such, we sink as much legal addressing mode 2646 /// stuff into the block as possible. 2647 /// 2648 /// This method is used to optimize both load/store and inline asms with memory 2649 /// operands. 2650 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 2651 Type *AccessTy) { 2652 Value *Repl = Addr; 2653 2654 // Try to collapse single-value PHI nodes. This is necessary to undo 2655 // unprofitable PRE transformations. 2656 SmallVector<Value*, 8> worklist; 2657 SmallPtrSet<Value*, 16> Visited; 2658 worklist.push_back(Addr); 2659 2660 // Use a worklist to iteratively look through PHI nodes, and ensure that 2661 // the addressing mode obtained from the non-PHI roots of the graph 2662 // are equivalent. 2663 Value *Consensus = nullptr; 2664 unsigned NumUsesConsensus = 0; 2665 bool IsNumUsesConsensusValid = false; 2666 SmallVector<Instruction*, 16> AddrModeInsts; 2667 ExtAddrMode AddrMode; 2668 TypePromotionTransaction TPT; 2669 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2670 TPT.getRestorationPoint(); 2671 while (!worklist.empty()) { 2672 Value *V = worklist.back(); 2673 worklist.pop_back(); 2674 2675 // Break use-def graph loops. 2676 if (!Visited.insert(V).second) { 2677 Consensus = nullptr; 2678 break; 2679 } 2680 2681 // For a PHI node, push all of its incoming values. 2682 if (PHINode *P = dyn_cast<PHINode>(V)) { 2683 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) 2684 worklist.push_back(P->getIncomingValue(i)); 2685 continue; 2686 } 2687 2688 // For non-PHIs, determine the addressing mode being computed. 2689 SmallVector<Instruction*, 16> NewAddrModeInsts; 2690 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( 2691 V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet, 2692 PromotedInsts, TPT); 2693 2694 // This check is broken into two cases with very similar code to avoid using 2695 // getNumUses() as much as possible. Some values have a lot of uses, so 2696 // calling getNumUses() unconditionally caused a significant compile-time 2697 // regression. 2698 if (!Consensus) { 2699 Consensus = V; 2700 AddrMode = NewAddrMode; 2701 AddrModeInsts = NewAddrModeInsts; 2702 continue; 2703 } else if (NewAddrMode == AddrMode) { 2704 if (!IsNumUsesConsensusValid) { 2705 NumUsesConsensus = Consensus->getNumUses(); 2706 IsNumUsesConsensusValid = true; 2707 } 2708 2709 // Ensure that the obtained addressing mode is equivalent to that obtained 2710 // for all other roots of the PHI traversal. Also, when choosing one 2711 // such root as representative, select the one with the most uses in order 2712 // to keep the cost modeling heuristics in AddressingModeMatcher 2713 // applicable. 2714 unsigned NumUses = V->getNumUses(); 2715 if (NumUses > NumUsesConsensus) { 2716 Consensus = V; 2717 NumUsesConsensus = NumUses; 2718 AddrModeInsts = NewAddrModeInsts; 2719 } 2720 continue; 2721 } 2722 2723 Consensus = nullptr; 2724 break; 2725 } 2726 2727 // If the addressing mode couldn't be determined, or if multiple different 2728 // ones were determined, bail out now. 2729 if (!Consensus) { 2730 TPT.rollback(LastKnownGood); 2731 return false; 2732 } 2733 TPT.commit(); 2734 2735 // Check to see if any of the instructions supersumed by this addr mode are 2736 // non-local to I's BB. 2737 bool AnyNonLocal = false; 2738 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 2739 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 2740 AnyNonLocal = true; 2741 break; 2742 } 2743 } 2744 2745 // If all the instructions matched are already in this BB, don't do anything. 2746 if (!AnyNonLocal) { 2747 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 2748 return false; 2749 } 2750 2751 // Insert this computation right after this user. Since our caller is 2752 // scanning from the top of the BB to the bottom, reuse of the expr are 2753 // guaranteed to happen later. 2754 IRBuilder<> Builder(MemoryInst); 2755 2756 // Now that we determined the addressing expression we want to use and know 2757 // that we have to sink it into this block. Check to see if we have already 2758 // done this for some other load/store instr in this block. If so, reuse the 2759 // computation. 2760 Value *&SunkAddr = SunkAddrs[Addr]; 2761 if (SunkAddr) { 2762 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 2763 << *MemoryInst << "\n"); 2764 if (SunkAddr->getType() != Addr->getType()) 2765 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 2766 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && 2767 TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) { 2768 // By default, we use the GEP-based method when AA is used later. This 2769 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. 2770 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 2771 << *MemoryInst << "\n"); 2772 Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); 2773 Value *ResultPtr = nullptr, *ResultIndex = nullptr; 2774 2775 // First, find the pointer. 2776 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { 2777 ResultPtr = AddrMode.BaseReg; 2778 AddrMode.BaseReg = nullptr; 2779 } 2780 2781 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { 2782 // We can't add more than one pointer together, nor can we scale a 2783 // pointer (both of which seem meaningless). 2784 if (ResultPtr || AddrMode.Scale != 1) 2785 return false; 2786 2787 ResultPtr = AddrMode.ScaledReg; 2788 AddrMode.Scale = 0; 2789 } 2790 2791 if (AddrMode.BaseGV) { 2792 if (ResultPtr) 2793 return false; 2794 2795 ResultPtr = AddrMode.BaseGV; 2796 } 2797 2798 // If the real base value actually came from an inttoptr, then the matcher 2799 // will look through it and provide only the integer value. In that case, 2800 // use it here. 2801 if (!ResultPtr && AddrMode.BaseReg) { 2802 ResultPtr = 2803 Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); 2804 AddrMode.BaseReg = nullptr; 2805 } else if (!ResultPtr && AddrMode.Scale == 1) { 2806 ResultPtr = 2807 Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); 2808 AddrMode.Scale = 0; 2809 } 2810 2811 if (!ResultPtr && 2812 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { 2813 SunkAddr = Constant::getNullValue(Addr->getType()); 2814 } else if (!ResultPtr) { 2815 return false; 2816 } else { 2817 Type *I8PtrTy = 2818 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); 2819 2820 // Start with the base register. Do this first so that subsequent address 2821 // matching finds it last, which will prevent it from trying to match it 2822 // as the scaled value in case it happens to be a mul. That would be 2823 // problematic if we've sunk a different mul for the scale, because then 2824 // we'd end up sinking both muls. 2825 if (AddrMode.BaseReg) { 2826 Value *V = AddrMode.BaseReg; 2827 if (V->getType() != IntPtrTy) 2828 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 2829 2830 ResultIndex = V; 2831 } 2832 2833 // Add the scale value. 2834 if (AddrMode.Scale) { 2835 Value *V = AddrMode.ScaledReg; 2836 if (V->getType() == IntPtrTy) { 2837 // done. 2838 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 2839 cast<IntegerType>(V->getType())->getBitWidth()) { 2840 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 2841 } else { 2842 // It is only safe to sign extend the BaseReg if we know that the math 2843 // required to create it did not overflow before we extend it. Since 2844 // the original IR value was tossed in favor of a constant back when 2845 // the AddrMode was created we need to bail out gracefully if widths 2846 // do not match instead of extending it. 2847 Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex); 2848 if (I && (ResultIndex != AddrMode.BaseReg)) 2849 I->eraseFromParent(); 2850 return false; 2851 } 2852 2853 if (AddrMode.Scale != 1) 2854 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 2855 "sunkaddr"); 2856 if (ResultIndex) 2857 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); 2858 else 2859 ResultIndex = V; 2860 } 2861 2862 // Add in the Base Offset if present. 2863 if (AddrMode.BaseOffs) { 2864 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 2865 if (ResultIndex) { 2866 // We need to add this separately from the scale above to help with 2867 // SDAG consecutive load/store merging. 2868 if (ResultPtr->getType() != I8PtrTy) 2869 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); 2870 ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); 2871 } 2872 2873 ResultIndex = V; 2874 } 2875 2876 if (!ResultIndex) { 2877 SunkAddr = ResultPtr; 2878 } else { 2879 if (ResultPtr->getType() != I8PtrTy) 2880 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); 2881 SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); 2882 } 2883 2884 if (SunkAddr->getType() != Addr->getType()) 2885 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 2886 } 2887 } else { 2888 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 2889 << *MemoryInst << "\n"); 2890 Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); 2891 Value *Result = nullptr; 2892 2893 // Start with the base register. Do this first so that subsequent address 2894 // matching finds it last, which will prevent it from trying to match it 2895 // as the scaled value in case it happens to be a mul. That would be 2896 // problematic if we've sunk a different mul for the scale, because then 2897 // we'd end up sinking both muls. 2898 if (AddrMode.BaseReg) { 2899 Value *V = AddrMode.BaseReg; 2900 if (V->getType()->isPointerTy()) 2901 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 2902 if (V->getType() != IntPtrTy) 2903 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 2904 Result = V; 2905 } 2906 2907 // Add the scale value. 2908 if (AddrMode.Scale) { 2909 Value *V = AddrMode.ScaledReg; 2910 if (V->getType() == IntPtrTy) { 2911 // done. 2912 } else if (V->getType()->isPointerTy()) { 2913 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 2914 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 2915 cast<IntegerType>(V->getType())->getBitWidth()) { 2916 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 2917 } else { 2918 // It is only safe to sign extend the BaseReg if we know that the math 2919 // required to create it did not overflow before we extend it. Since 2920 // the original IR value was tossed in favor of a constant back when 2921 // the AddrMode was created we need to bail out gracefully if widths 2922 // do not match instead of extending it. 2923 Instruction *I = dyn_cast_or_null<Instruction>(Result); 2924 if (I && (Result != AddrMode.BaseReg)) 2925 I->eraseFromParent(); 2926 return false; 2927 } 2928 if (AddrMode.Scale != 1) 2929 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 2930 "sunkaddr"); 2931 if (Result) 2932 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 2933 else 2934 Result = V; 2935 } 2936 2937 // Add in the BaseGV if present. 2938 if (AddrMode.BaseGV) { 2939 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); 2940 if (Result) 2941 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 2942 else 2943 Result = V; 2944 } 2945 2946 // Add in the Base Offset if present. 2947 if (AddrMode.BaseOffs) { 2948 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 2949 if (Result) 2950 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 2951 else 2952 Result = V; 2953 } 2954 2955 if (!Result) 2956 SunkAddr = Constant::getNullValue(Addr->getType()); 2957 else 2958 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); 2959 } 2960 2961 MemoryInst->replaceUsesOfWith(Repl, SunkAddr); 2962 2963 // If we have no uses, recursively delete the value and all dead instructions 2964 // using it. 2965 if (Repl->use_empty()) { 2966 // This can cause recursive deletion, which can invalidate our iterator. 2967 // Use a WeakVH to hold onto it in case this happens. 2968 WeakVH IterHandle(CurInstIterator); 2969 BasicBlock *BB = CurInstIterator->getParent(); 2970 2971 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); 2972 2973 if (IterHandle != CurInstIterator) { 2974 // If the iterator instruction was recursively deleted, start over at the 2975 // start of the block. 2976 CurInstIterator = BB->begin(); 2977 SunkAddrs.clear(); 2978 } 2979 } 2980 ++NumMemoryInsts; 2981 return true; 2982 } 2983 2984 /// OptimizeInlineAsmInst - If there are any memory operands, use 2985 /// OptimizeMemoryInst to sink their address computing into the block when 2986 /// possible / profitable. 2987 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { 2988 bool MadeChange = false; 2989 2990 TargetLowering::AsmOperandInfoVector 2991 TargetConstraints = TLI->ParseConstraints(CS); 2992 unsigned ArgNo = 0; 2993 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 2994 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 2995 2996 // Compute the constraint code and ConstraintType to use. 2997 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 2998 2999 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 3000 OpInfo.isIndirect) { 3001 Value *OpVal = CS->getArgOperand(ArgNo++); 3002 MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); 3003 } else if (OpInfo.Type == InlineAsm::isInput) 3004 ArgNo++; 3005 } 3006 3007 return MadeChange; 3008 } 3009 3010 /// \brief Check if all the uses of \p Inst are equivalent (or free) zero or 3011 /// sign extensions. 3012 static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { 3013 assert(!Inst->use_empty() && "Input must have at least one use"); 3014 const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin()); 3015 bool IsSExt = isa<SExtInst>(FirstUser); 3016 Type *ExtTy = FirstUser->getType(); 3017 for (const User *U : Inst->users()) { 3018 const Instruction *UI = cast<Instruction>(U); 3019 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI))) 3020 return false; 3021 Type *CurTy = UI->getType(); 3022 // Same input and output types: Same instruction after CSE. 3023 if (CurTy == ExtTy) 3024 continue; 3025 3026 // If IsSExt is true, we are in this situation: 3027 // a = Inst 3028 // b = sext ty1 a to ty2 3029 // c = sext ty1 a to ty3 3030 // Assuming ty2 is shorter than ty3, this could be turned into: 3031 // a = Inst 3032 // b = sext ty1 a to ty2 3033 // c = sext ty2 b to ty3 3034 // However, the last sext is not free. 3035 if (IsSExt) 3036 return false; 3037 3038 // This is a ZExt, maybe this is free to extend from one type to another. 3039 // In that case, we would not account for a different use. 3040 Type *NarrowTy; 3041 Type *LargeTy; 3042 if (ExtTy->getScalarType()->getIntegerBitWidth() > 3043 CurTy->getScalarType()->getIntegerBitWidth()) { 3044 NarrowTy = CurTy; 3045 LargeTy = ExtTy; 3046 } else { 3047 NarrowTy = ExtTy; 3048 LargeTy = CurTy; 3049 } 3050 3051 if (!TLI.isZExtFree(NarrowTy, LargeTy)) 3052 return false; 3053 } 3054 // All uses are the same or can be derived from one another for free. 3055 return true; 3056 } 3057 3058 /// \brief Try to form ExtLd by promoting \p Exts until they reach a 3059 /// load instruction. 3060 /// If an ext(load) can be formed, it is returned via \p LI for the load 3061 /// and \p Inst for the extension. 3062 /// Otherwise LI == nullptr and Inst == nullptr. 3063 /// When some promotion happened, \p TPT contains the proper state to 3064 /// revert them. 3065 /// 3066 /// \return true when promoting was necessary to expose the ext(load) 3067 /// opportunity, false otherwise. 3068 /// 3069 /// Example: 3070 /// \code 3071 /// %ld = load i32* %addr 3072 /// %add = add nuw i32 %ld, 4 3073 /// %zext = zext i32 %add to i64 3074 /// \endcode 3075 /// => 3076 /// \code 3077 /// %ld = load i32* %addr 3078 /// %zext = zext i32 %ld to i64 3079 /// %add = add nuw i64 %zext, 4 3080 /// \encode 3081 /// Thanks to the promotion, we can match zext(load i32*) to i64. 3082 bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, 3083 LoadInst *&LI, Instruction *&Inst, 3084 const SmallVectorImpl<Instruction *> &Exts, 3085 unsigned CreatedInsts = 0) { 3086 // Iterate over all the extensions to see if one form an ext(load). 3087 for (auto I : Exts) { 3088 // Check if we directly have ext(load). 3089 if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) { 3090 Inst = I; 3091 // No promotion happened here. 3092 return false; 3093 } 3094 // Check whether or not we want to do any promotion. 3095 if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion) 3096 continue; 3097 // Get the action to perform the promotion. 3098 TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( 3099 I, InsertedTruncsSet, *TLI, PromotedInsts); 3100 // Check if we can promote. 3101 if (!TPH) 3102 continue; 3103 // Save the current state. 3104 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 3105 TPT.getRestorationPoint(); 3106 SmallVector<Instruction *, 4> NewExts; 3107 unsigned NewCreatedInsts = 0; 3108 // Promote. 3109 Value *PromotedVal = 3110 TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr); 3111 assert(PromotedVal && 3112 "TypePromotionHelper should have filtered out those cases"); 3113 3114 // We would be able to merge only one extension in a load. 3115 // Therefore, if we have more than 1 new extension we heuristically 3116 // cut this search path, because it means we degrade the code quality. 3117 // With exactly 2, the transformation is neutral, because we will merge 3118 // one extension but leave one. However, we optimistically keep going, 3119 // because the new extension may be removed too. 3120 unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts; 3121 if (!StressExtLdPromotion && 3122 (TotalCreatedInsts > 1 || 3123 !isPromotedInstructionLegal(*TLI, PromotedVal))) { 3124 // The promotion is not profitable, rollback to the previous state. 3125 TPT.rollback(LastKnownGood); 3126 continue; 3127 } 3128 // The promotion is profitable. 3129 // Check if it exposes an ext(load). 3130 (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts); 3131 if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 || 3132 // If we have created a new extension, i.e., now we have two 3133 // extensions. We must make sure one of them is merged with 3134 // the load, otherwise we may degrade the code quality. 3135 (LI->hasOneUse() || hasSameExtUse(LI, *TLI)))) 3136 // Promotion happened. 3137 return true; 3138 // If this does not help to expose an ext(load) then, rollback. 3139 TPT.rollback(LastKnownGood); 3140 } 3141 // None of the extension can form an ext(load). 3142 LI = nullptr; 3143 Inst = nullptr; 3144 return false; 3145 } 3146 3147 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 3148 /// basic block as the load, unless conditions are unfavorable. This allows 3149 /// SelectionDAG to fold the extend into the load. 3150 /// \p I[in/out] the extension may be modified during the process if some 3151 /// promotions apply. 3152 /// 3153 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) { 3154 // Try to promote a chain of computation if it allows to form 3155 // an extended load. 3156 TypePromotionTransaction TPT; 3157 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 3158 TPT.getRestorationPoint(); 3159 SmallVector<Instruction *, 1> Exts; 3160 Exts.push_back(I); 3161 // Look for a load being extended. 3162 LoadInst *LI = nullptr; 3163 Instruction *OldExt = I; 3164 bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts); 3165 if (!LI || !I) { 3166 assert(!HasPromoted && !LI && "If we did not match any load instruction " 3167 "the code must remain the same"); 3168 I = OldExt; 3169 return false; 3170 } 3171 3172 // If they're already in the same block, there's nothing to do. 3173 // Make the cheap checks first if we did not promote. 3174 // If we promoted, we need to check if it is indeed profitable. 3175 if (!HasPromoted && LI->getParent() == I->getParent()) 3176 return false; 3177 3178 EVT VT = TLI->getValueType(I->getType()); 3179 EVT LoadVT = TLI->getValueType(LI->getType()); 3180 3181 // If the load has other users and the truncate is not free, this probably 3182 // isn't worthwhile. 3183 if (!LI->hasOneUse() && TLI && 3184 (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) && 3185 !TLI->isTruncateFree(I->getType(), LI->getType())) { 3186 I = OldExt; 3187 TPT.rollback(LastKnownGood); 3188 return false; 3189 } 3190 3191 // Check whether the target supports casts folded into loads. 3192 unsigned LType; 3193 if (isa<ZExtInst>(I)) 3194 LType = ISD::ZEXTLOAD; 3195 else { 3196 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 3197 LType = ISD::SEXTLOAD; 3198 } 3199 if (TLI && !TLI->isLoadExtLegal(LType, LoadVT)) { 3200 I = OldExt; 3201 TPT.rollback(LastKnownGood); 3202 return false; 3203 } 3204 3205 // Move the extend into the same block as the load, so that SelectionDAG 3206 // can fold it. 3207 TPT.commit(); 3208 I->removeFromParent(); 3209 I->insertAfter(LI); 3210 ++NumExtsMoved; 3211 return true; 3212 } 3213 3214 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 3215 BasicBlock *DefBB = I->getParent(); 3216 3217 // If the result of a {s|z}ext and its source are both live out, rewrite all 3218 // other uses of the source with result of extension. 3219 Value *Src = I->getOperand(0); 3220 if (Src->hasOneUse()) 3221 return false; 3222 3223 // Only do this xform if truncating is free. 3224 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 3225 return false; 3226 3227 // Only safe to perform the optimization if the source is also defined in 3228 // this block. 3229 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 3230 return false; 3231 3232 bool DefIsLiveOut = false; 3233 for (User *U : I->users()) { 3234 Instruction *UI = cast<Instruction>(U); 3235 3236 // Figure out which BB this ext is used in. 3237 BasicBlock *UserBB = UI->getParent(); 3238 if (UserBB == DefBB) continue; 3239 DefIsLiveOut = true; 3240 break; 3241 } 3242 if (!DefIsLiveOut) 3243 return false; 3244 3245 // Make sure none of the uses are PHI nodes. 3246 for (User *U : Src->users()) { 3247 Instruction *UI = cast<Instruction>(U); 3248 BasicBlock *UserBB = UI->getParent(); 3249 if (UserBB == DefBB) continue; 3250 // Be conservative. We don't want this xform to end up introducing 3251 // reloads just before load / store instructions. 3252 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI)) 3253 return false; 3254 } 3255 3256 // InsertedTruncs - Only insert one trunc in each block once. 3257 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 3258 3259 bool MadeChange = false; 3260 for (Use &U : Src->uses()) { 3261 Instruction *User = cast<Instruction>(U.getUser()); 3262 3263 // Figure out which BB this ext is used in. 3264 BasicBlock *UserBB = User->getParent(); 3265 if (UserBB == DefBB) continue; 3266 3267 // Both src and def are live in this block. Rewrite the use. 3268 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 3269 3270 if (!InsertedTrunc) { 3271 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 3272 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 3273 InsertedTruncsSet.insert(InsertedTrunc); 3274 } 3275 3276 // Replace a use of the {s|z}ext source with a use of the result. 3277 U = InsertedTrunc; 3278 ++NumExtUses; 3279 MadeChange = true; 3280 } 3281 3282 return MadeChange; 3283 } 3284 3285 /// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be 3286 /// turned into an explicit branch. 3287 static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { 3288 // FIXME: This should use the same heuristics as IfConversion to determine 3289 // whether a select is better represented as a branch. This requires that 3290 // branch probability metadata is preserved for the select, which is not the 3291 // case currently. 3292 3293 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); 3294 3295 // If the branch is predicted right, an out of order CPU can avoid blocking on 3296 // the compare. Emit cmovs on compares with a memory operand as branches to 3297 // avoid stalls on the load from memory. If the compare has more than one use 3298 // there's probably another cmov or setcc around so it's not worth emitting a 3299 // branch. 3300 if (!Cmp) 3301 return false; 3302 3303 Value *CmpOp0 = Cmp->getOperand(0); 3304 Value *CmpOp1 = Cmp->getOperand(1); 3305 3306 // We check that the memory operand has one use to avoid uses of the loaded 3307 // value directly after the compare, making branches unprofitable. 3308 return Cmp->hasOneUse() && 3309 ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || 3310 (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())); 3311 } 3312 3313 3314 /// If we have a SelectInst that will likely profit from branch prediction, 3315 /// turn it into a branch. 3316 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { 3317 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 3318 3319 // Can we convert the 'select' to CF ? 3320 if (DisableSelectToBranch || OptSize || !TLI || VectorCond) 3321 return false; 3322 3323 TargetLowering::SelectSupportKind SelectKind; 3324 if (VectorCond) 3325 SelectKind = TargetLowering::VectorMaskSelect; 3326 else if (SI->getType()->isVectorTy()) 3327 SelectKind = TargetLowering::ScalarCondVectorVal; 3328 else 3329 SelectKind = TargetLowering::ScalarValSelect; 3330 3331 // Do we have efficient codegen support for this kind of 'selects' ? 3332 if (TLI->isSelectSupported(SelectKind)) { 3333 // We have efficient codegen support for the select instruction. 3334 // Check if it is profitable to keep this 'select'. 3335 if (!TLI->isPredictableSelectExpensive() || 3336 !isFormingBranchFromSelectProfitable(SI)) 3337 return false; 3338 } 3339 3340 ModifiedDT = true; 3341 3342 // First, we split the block containing the select into 2 blocks. 3343 BasicBlock *StartBlock = SI->getParent(); 3344 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); 3345 BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 3346 3347 // Create a new block serving as the landing pad for the branch. 3348 BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", 3349 NextBlock->getParent(), NextBlock); 3350 3351 // Move the unconditional branch from the block with the select in it into our 3352 // landing pad block. 3353 StartBlock->getTerminator()->eraseFromParent(); 3354 BranchInst::Create(NextBlock, SmallBlock); 3355 3356 // Insert the real conditional branch based on the original condition. 3357 BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); 3358 3359 // The select itself is replaced with a PHI Node. 3360 PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); 3361 PN->takeName(SI); 3362 PN->addIncoming(SI->getTrueValue(), StartBlock); 3363 PN->addIncoming(SI->getFalseValue(), SmallBlock); 3364 SI->replaceAllUsesWith(PN); 3365 SI->eraseFromParent(); 3366 3367 // Instruct OptimizeBlock to skip to the next block. 3368 CurInstIterator = StartBlock->end(); 3369 ++NumSelectsExpanded; 3370 return true; 3371 } 3372 3373 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { 3374 SmallVector<int, 16> Mask(SVI->getShuffleMask()); 3375 int SplatElem = -1; 3376 for (unsigned i = 0; i < Mask.size(); ++i) { 3377 if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem) 3378 return false; 3379 SplatElem = Mask[i]; 3380 } 3381 3382 return true; 3383 } 3384 3385 /// Some targets have expensive vector shifts if the lanes aren't all the same 3386 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases 3387 /// it's often worth sinking a shufflevector splat down to its use so that 3388 /// codegen can spot all lanes are identical. 3389 bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { 3390 BasicBlock *DefBB = SVI->getParent(); 3391 3392 // Only do this xform if variable vector shifts are particularly expensive. 3393 if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType())) 3394 return false; 3395 3396 // We only expect better codegen by sinking a shuffle if we can recognise a 3397 // constant splat. 3398 if (!isBroadcastShuffle(SVI)) 3399 return false; 3400 3401 // InsertedShuffles - Only insert a shuffle in each block once. 3402 DenseMap<BasicBlock*, Instruction*> InsertedShuffles; 3403 3404 bool MadeChange = false; 3405 for (User *U : SVI->users()) { 3406 Instruction *UI = cast<Instruction>(U); 3407 3408 // Figure out which BB this ext is used in. 3409 BasicBlock *UserBB = UI->getParent(); 3410 if (UserBB == DefBB) continue; 3411 3412 // For now only apply this when the splat is used by a shift instruction. 3413 if (!UI->isShift()) continue; 3414 3415 // Everything checks out, sink the shuffle if the user's block doesn't 3416 // already have a copy. 3417 Instruction *&InsertedShuffle = InsertedShuffles[UserBB]; 3418 3419 if (!InsertedShuffle) { 3420 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 3421 InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0), 3422 SVI->getOperand(1), 3423 SVI->getOperand(2), "", InsertPt); 3424 } 3425 3426 UI->replaceUsesOfWith(SVI, InsertedShuffle); 3427 MadeChange = true; 3428 } 3429 3430 // If we removed all uses, nuke the shuffle. 3431 if (SVI->use_empty()) { 3432 SVI->eraseFromParent(); 3433 MadeChange = true; 3434 } 3435 3436 return MadeChange; 3437 } 3438 3439 namespace { 3440 /// \brief Helper class to promote a scalar operation to a vector one. 3441 /// This class is used to move downward extractelement transition. 3442 /// E.g., 3443 /// a = vector_op <2 x i32> 3444 /// b = extractelement <2 x i32> a, i32 0 3445 /// c = scalar_op b 3446 /// store c 3447 /// 3448 /// => 3449 /// a = vector_op <2 x i32> 3450 /// c = vector_op a (equivalent to scalar_op on the related lane) 3451 /// * d = extractelement <2 x i32> c, i32 0 3452 /// * store d 3453 /// Assuming both extractelement and store can be combine, we get rid of the 3454 /// transition. 3455 class VectorPromoteHelper { 3456 /// Used to perform some checks on the legality of vector operations. 3457 const TargetLowering &TLI; 3458 3459 /// Used to estimated the cost of the promoted chain. 3460 const TargetTransformInfo &TTI; 3461 3462 /// The transition being moved downwards. 3463 Instruction *Transition; 3464 /// The sequence of instructions to be promoted. 3465 SmallVector<Instruction *, 4> InstsToBePromoted; 3466 /// Cost of combining a store and an extract. 3467 unsigned StoreExtractCombineCost; 3468 /// Instruction that will be combined with the transition. 3469 Instruction *CombineInst; 3470 3471 /// \brief The instruction that represents the current end of the transition. 3472 /// Since we are faking the promotion until we reach the end of the chain 3473 /// of computation, we need a way to get the current end of the transition. 3474 Instruction *getEndOfTransition() const { 3475 if (InstsToBePromoted.empty()) 3476 return Transition; 3477 return InstsToBePromoted.back(); 3478 } 3479 3480 /// \brief Return the index of the original value in the transition. 3481 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value, 3482 /// c, is at index 0. 3483 unsigned getTransitionOriginalValueIdx() const { 3484 assert(isa<ExtractElementInst>(Transition) && 3485 "Other kind of transitions are not supported yet"); 3486 return 0; 3487 } 3488 3489 /// \brief Return the index of the index in the transition. 3490 /// E.g., for "extractelement <2 x i32> c, i32 0" the index 3491 /// is at index 1. 3492 unsigned getTransitionIdx() const { 3493 assert(isa<ExtractElementInst>(Transition) && 3494 "Other kind of transitions are not supported yet"); 3495 return 1; 3496 } 3497 3498 /// \brief Get the type of the transition. 3499 /// This is the type of the original value. 3500 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the 3501 /// transition is <2 x i32>. 3502 Type *getTransitionType() const { 3503 return Transition->getOperand(getTransitionOriginalValueIdx())->getType(); 3504 } 3505 3506 /// \brief Promote \p ToBePromoted by moving \p Def downward through. 3507 /// I.e., we have the following sequence: 3508 /// Def = Transition <ty1> a to <ty2> 3509 /// b = ToBePromoted <ty2> Def, ... 3510 /// => 3511 /// b = ToBePromoted <ty1> a, ... 3512 /// Def = Transition <ty1> ToBePromoted to <ty2> 3513 void promoteImpl(Instruction *ToBePromoted); 3514 3515 /// \brief Check whether or not it is profitable to promote all the 3516 /// instructions enqueued to be promoted. 3517 bool isProfitableToPromote() { 3518 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx()); 3519 unsigned Index = isa<ConstantInt>(ValIdx) 3520 ? cast<ConstantInt>(ValIdx)->getZExtValue() 3521 : -1; 3522 Type *PromotedType = getTransitionType(); 3523 3524 StoreInst *ST = cast<StoreInst>(CombineInst); 3525 unsigned AS = ST->getPointerAddressSpace(); 3526 unsigned Align = ST->getAlignment(); 3527 // Check if this store is supported. 3528 if (!TLI.allowsMisalignedMemoryAccesses( 3529 TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) { 3530 // If this is not supported, there is no way we can combine 3531 // the extract with the store. 3532 return false; 3533 } 3534 3535 // The scalar chain of computation has to pay for the transition 3536 // scalar to vector. 3537 // The vector chain has to account for the combining cost. 3538 uint64_t ScalarCost = 3539 TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); 3540 uint64_t VectorCost = StoreExtractCombineCost; 3541 for (const auto &Inst : InstsToBePromoted) { 3542 // Compute the cost. 3543 // By construction, all instructions being promoted are arithmetic ones. 3544 // Moreover, one argument is a constant that can be viewed as a splat 3545 // constant. 3546 Value *Arg0 = Inst->getOperand(0); 3547 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) || 3548 isa<ConstantFP>(Arg0); 3549 TargetTransformInfo::OperandValueKind Arg0OVK = 3550 IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue 3551 : TargetTransformInfo::OK_AnyValue; 3552 TargetTransformInfo::OperandValueKind Arg1OVK = 3553 !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue 3554 : TargetTransformInfo::OK_AnyValue; 3555 ScalarCost += TTI.getArithmeticInstrCost( 3556 Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK); 3557 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, 3558 Arg0OVK, Arg1OVK); 3559 } 3560 DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: " 3561 << ScalarCost << "\nVector: " << VectorCost << '\n'); 3562 return ScalarCost > VectorCost; 3563 } 3564 3565 /// \brief Generate a constant vector with \p Val with the same 3566 /// number of elements as the transition. 3567 /// \p UseSplat defines whether or not \p Val should be replicated 3568 /// accross the whole vector. 3569 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>, 3570 /// otherwise we generate a vector with as many undef as possible: 3571 /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only 3572 /// used at the index of the extract. 3573 Value *getConstantVector(Constant *Val, bool UseSplat) const { 3574 unsigned ExtractIdx = UINT_MAX; 3575 if (!UseSplat) { 3576 // If we cannot determine where the constant must be, we have to 3577 // use a splat constant. 3578 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx()); 3579 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx)) 3580 ExtractIdx = CstVal->getSExtValue(); 3581 else 3582 UseSplat = true; 3583 } 3584 3585 unsigned End = getTransitionType()->getVectorNumElements(); 3586 if (UseSplat) 3587 return ConstantVector::getSplat(End, Val); 3588 3589 SmallVector<Constant *, 4> ConstVec; 3590 UndefValue *UndefVal = UndefValue::get(Val->getType()); 3591 for (unsigned Idx = 0; Idx != End; ++Idx) { 3592 if (Idx == ExtractIdx) 3593 ConstVec.push_back(Val); 3594 else 3595 ConstVec.push_back(UndefVal); 3596 } 3597 return ConstantVector::get(ConstVec); 3598 } 3599 3600 /// \brief Check if promoting to a vector type an operand at \p OperandIdx 3601 /// in \p Use can trigger undefined behavior. 3602 static bool canCauseUndefinedBehavior(const Instruction *Use, 3603 unsigned OperandIdx) { 3604 // This is not safe to introduce undef when the operand is on 3605 // the right hand side of a division-like instruction. 3606 if (OperandIdx != 1) 3607 return false; 3608 switch (Use->getOpcode()) { 3609 default: 3610 return false; 3611 case Instruction::SDiv: 3612 case Instruction::UDiv: 3613 case Instruction::SRem: 3614 case Instruction::URem: 3615 return true; 3616 case Instruction::FDiv: 3617 case Instruction::FRem: 3618 return !Use->hasNoNaNs(); 3619 } 3620 llvm_unreachable(nullptr); 3621 } 3622 3623 public: 3624 VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI, 3625 Instruction *Transition, unsigned CombineCost) 3626 : TLI(TLI), TTI(TTI), Transition(Transition), 3627 StoreExtractCombineCost(CombineCost), CombineInst(nullptr) { 3628 assert(Transition && "Do not know how to promote null"); 3629 } 3630 3631 /// \brief Check if we can promote \p ToBePromoted to \p Type. 3632 bool canPromote(const Instruction *ToBePromoted) const { 3633 // We could support CastInst too. 3634 return isa<BinaryOperator>(ToBePromoted); 3635 } 3636 3637 /// \brief Check if it is profitable to promote \p ToBePromoted 3638 /// by moving downward the transition through. 3639 bool shouldPromote(const Instruction *ToBePromoted) const { 3640 // Promote only if all the operands can be statically expanded. 3641 // Indeed, we do not want to introduce any new kind of transitions. 3642 for (const Use &U : ToBePromoted->operands()) { 3643 const Value *Val = U.get(); 3644 if (Val == getEndOfTransition()) { 3645 // If the use is a division and the transition is on the rhs, 3646 // we cannot promote the operation, otherwise we may create a 3647 // division by zero. 3648 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())) 3649 return false; 3650 continue; 3651 } 3652 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) && 3653 !isa<ConstantFP>(Val)) 3654 return false; 3655 } 3656 // Check that the resulting operation is legal. 3657 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode()); 3658 if (!ISDOpcode) 3659 return false; 3660 return StressStoreExtract || 3661 TLI.isOperationLegalOrCustom( 3662 ISDOpcode, TLI.getValueType(getTransitionType(), true)); 3663 } 3664 3665 /// \brief Check whether or not \p Use can be combined 3666 /// with the transition. 3667 /// I.e., is it possible to do Use(Transition) => AnotherUse? 3668 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); } 3669 3670 /// \brief Record \p ToBePromoted as part of the chain to be promoted. 3671 void enqueueForPromotion(Instruction *ToBePromoted) { 3672 InstsToBePromoted.push_back(ToBePromoted); 3673 } 3674 3675 /// \brief Set the instruction that will be combined with the transition. 3676 void recordCombineInstruction(Instruction *ToBeCombined) { 3677 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine"); 3678 CombineInst = ToBeCombined; 3679 } 3680 3681 /// \brief Promote all the instructions enqueued for promotion if it is 3682 /// is profitable. 3683 /// \return True if the promotion happened, false otherwise. 3684 bool promote() { 3685 // Check if there is something to promote. 3686 // Right now, if we do not have anything to combine with, 3687 // we assume the promotion is not profitable. 3688 if (InstsToBePromoted.empty() || !CombineInst) 3689 return false; 3690 3691 // Check cost. 3692 if (!StressStoreExtract && !isProfitableToPromote()) 3693 return false; 3694 3695 // Promote. 3696 for (auto &ToBePromoted : InstsToBePromoted) 3697 promoteImpl(ToBePromoted); 3698 InstsToBePromoted.clear(); 3699 return true; 3700 } 3701 }; 3702 } // End of anonymous namespace. 3703 3704 void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { 3705 // At this point, we know that all the operands of ToBePromoted but Def 3706 // can be statically promoted. 3707 // For Def, we need to use its parameter in ToBePromoted: 3708 // b = ToBePromoted ty1 a 3709 // Def = Transition ty1 b to ty2 3710 // Move the transition down. 3711 // 1. Replace all uses of the promoted operation by the transition. 3712 // = ... b => = ... Def. 3713 assert(ToBePromoted->getType() == Transition->getType() && 3714 "The type of the result of the transition does not match " 3715 "the final type"); 3716 ToBePromoted->replaceAllUsesWith(Transition); 3717 // 2. Update the type of the uses. 3718 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def. 3719 Type *TransitionTy = getTransitionType(); 3720 ToBePromoted->mutateType(TransitionTy); 3721 // 3. Update all the operands of the promoted operation with promoted 3722 // operands. 3723 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a. 3724 for (Use &U : ToBePromoted->operands()) { 3725 Value *Val = U.get(); 3726 Value *NewVal = nullptr; 3727 if (Val == Transition) 3728 NewVal = Transition->getOperand(getTransitionOriginalValueIdx()); 3729 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) || 3730 isa<ConstantFP>(Val)) { 3731 // Use a splat constant if it is not safe to use undef. 3732 NewVal = getConstantVector( 3733 cast<Constant>(Val), 3734 isa<UndefValue>(Val) || 3735 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())); 3736 } else 3737 assert(0 && "Did you modified shouldPromote and forgot to update this?"); 3738 ToBePromoted->setOperand(U.getOperandNo(), NewVal); 3739 } 3740 Transition->removeFromParent(); 3741 Transition->insertAfter(ToBePromoted); 3742 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); 3743 } 3744 3745 /// Some targets can do store(extractelement) with one instruction. 3746 /// Try to push the extractelement towards the stores when the target 3747 /// has this feature and this is profitable. 3748 bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) { 3749 unsigned CombineCost = UINT_MAX; 3750 if (DisableStoreExtract || !TLI || 3751 (!StressStoreExtract && 3752 !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(), 3753 Inst->getOperand(1), CombineCost))) 3754 return false; 3755 3756 // At this point we know that Inst is a vector to scalar transition. 3757 // Try to move it down the def-use chain, until: 3758 // - We can combine the transition with its single use 3759 // => we got rid of the transition. 3760 // - We escape the current basic block 3761 // => we would need to check that we are moving it at a cheaper place and 3762 // we do not do that for now. 3763 BasicBlock *Parent = Inst->getParent(); 3764 DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); 3765 VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost); 3766 // If the transition has more than one use, assume this is not going to be 3767 // beneficial. 3768 while (Inst->hasOneUse()) { 3769 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin()); 3770 DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n'); 3771 3772 if (ToBePromoted->getParent() != Parent) { 3773 DEBUG(dbgs() << "Instruction to promote is in a different block (" 3774 << ToBePromoted->getParent()->getName() 3775 << ") than the transition (" << Parent->getName() << ").\n"); 3776 return false; 3777 } 3778 3779 if (VPH.canCombine(ToBePromoted)) { 3780 DEBUG(dbgs() << "Assume " << *Inst << '\n' 3781 << "will be combined with: " << *ToBePromoted << '\n'); 3782 VPH.recordCombineInstruction(ToBePromoted); 3783 bool Changed = VPH.promote(); 3784 NumStoreExtractExposed += Changed; 3785 return Changed; 3786 } 3787 3788 DEBUG(dbgs() << "Try promoting.\n"); 3789 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted)) 3790 return false; 3791 3792 DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n"); 3793 3794 VPH.enqueueForPromotion(ToBePromoted); 3795 Inst = ToBePromoted; 3796 } 3797 return false; 3798 } 3799 3800 bool CodeGenPrepare::OptimizeInst(Instruction *I) { 3801 if (PHINode *P = dyn_cast<PHINode>(I)) { 3802 // It is possible for very late stage optimizations (such as SimplifyCFG) 3803 // to introduce PHI nodes too late to be cleaned up. If we detect such a 3804 // trivial PHI, go ahead and zap it here. 3805 if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, 3806 TLInfo, DT)) { 3807 P->replaceAllUsesWith(V); 3808 P->eraseFromParent(); 3809 ++NumPHIsElim; 3810 return true; 3811 } 3812 return false; 3813 } 3814 3815 if (CastInst *CI = dyn_cast<CastInst>(I)) { 3816 // If the source of the cast is a constant, then this should have 3817 // already been constant folded. The only reason NOT to constant fold 3818 // it is if something (e.g. LSR) was careful to place the constant 3819 // evaluation in a block other than then one that uses it (e.g. to hoist 3820 // the address of globals out of a loop). If this is the case, we don't 3821 // want to forward-subst the cast. 3822 if (isa<Constant>(CI->getOperand(0))) 3823 return false; 3824 3825 if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) 3826 return true; 3827 3828 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { 3829 /// Sink a zext or sext into its user blocks if the target type doesn't 3830 /// fit in one register 3831 if (TLI && TLI->getTypeAction(CI->getContext(), 3832 TLI->getValueType(CI->getType())) == 3833 TargetLowering::TypeExpandInteger) { 3834 return SinkCast(CI); 3835 } else { 3836 bool MadeChange = MoveExtToFormExtLoad(I); 3837 return MadeChange | OptimizeExtUses(I); 3838 } 3839 } 3840 return false; 3841 } 3842 3843 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 3844 if (!TLI || !TLI->hasMultipleConditionRegisters()) 3845 return OptimizeCmpExpression(CI); 3846 3847 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 3848 if (TLI) 3849 return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); 3850 return false; 3851 } 3852 3853 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 3854 if (TLI) 3855 return OptimizeMemoryInst(I, SI->getOperand(1), 3856 SI->getOperand(0)->getType()); 3857 return false; 3858 } 3859 3860 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); 3861 3862 if (BinOp && (BinOp->getOpcode() == Instruction::AShr || 3863 BinOp->getOpcode() == Instruction::LShr)) { 3864 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); 3865 if (TLI && CI && TLI->hasExtractBitsInsn()) 3866 return OptimizeExtractBits(BinOp, CI, *TLI); 3867 3868 return false; 3869 } 3870 3871 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 3872 if (GEPI->hasAllZeroIndices()) { 3873 /// The GEP operand must be a pointer, so must its result -> BitCast 3874 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 3875 GEPI->getName(), GEPI); 3876 GEPI->replaceAllUsesWith(NC); 3877 GEPI->eraseFromParent(); 3878 ++NumGEPsElim; 3879 OptimizeInst(NC); 3880 return true; 3881 } 3882 return false; 3883 } 3884 3885 if (CallInst *CI = dyn_cast<CallInst>(I)) 3886 return OptimizeCallInst(CI); 3887 3888 if (SelectInst *SI = dyn_cast<SelectInst>(I)) 3889 return OptimizeSelectInst(SI); 3890 3891 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) 3892 return OptimizeShuffleVectorInst(SVI); 3893 3894 if (isa<ExtractElementInst>(I)) 3895 return OptimizeExtractElementInst(I); 3896 3897 return false; 3898 } 3899 3900 // In this pass we look for GEP and cast instructions that are used 3901 // across basic blocks and rewrite them to improve basic-block-at-a-time 3902 // selection. 3903 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 3904 SunkAddrs.clear(); 3905 bool MadeChange = false; 3906 3907 CurInstIterator = BB.begin(); 3908 while (CurInstIterator != BB.end()) 3909 MadeChange |= OptimizeInst(CurInstIterator++); 3910 3911 MadeChange |= DupRetToEnableTailCallOpts(&BB); 3912 3913 return MadeChange; 3914 } 3915 3916 // llvm.dbg.value is far away from the value then iSel may not be able 3917 // handle it properly. iSel will drop llvm.dbg.value if it can not 3918 // find a node corresponding to the value. 3919 bool CodeGenPrepare::PlaceDbgValues(Function &F) { 3920 bool MadeChange = false; 3921 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { 3922 Instruction *PrevNonDbgInst = nullptr; 3923 for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { 3924 Instruction *Insn = BI; ++BI; 3925 DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); 3926 // Leave dbg.values that refer to an alloca alone. These 3927 // instrinsics describe the address of a variable (= the alloca) 3928 // being taken. They should not be moved next to the alloca 3929 // (and to the beginning of the scope), but rather stay close to 3930 // where said address is used. 3931 if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) { 3932 PrevNonDbgInst = Insn; 3933 continue; 3934 } 3935 3936 Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); 3937 if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { 3938 DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); 3939 DVI->removeFromParent(); 3940 if (isa<PHINode>(VI)) 3941 DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); 3942 else 3943 DVI->insertAfter(VI); 3944 MadeChange = true; 3945 ++NumDbgValueMoved; 3946 } 3947 } 3948 } 3949 return MadeChange; 3950 } 3951 3952 // If there is a sequence that branches based on comparing a single bit 3953 // against zero that can be combined into a single instruction, and the 3954 // target supports folding these into a single instruction, sink the 3955 // mask and compare into the branch uses. Do this before OptimizeBlock -> 3956 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being 3957 // searched for. 3958 bool CodeGenPrepare::sinkAndCmp(Function &F) { 3959 if (!EnableAndCmpSinking) 3960 return false; 3961 if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) 3962 return false; 3963 bool MadeChange = false; 3964 for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { 3965 BasicBlock *BB = I++; 3966 3967 // Does this BB end with the following? 3968 // %andVal = and %val, #single-bit-set 3969 // %icmpVal = icmp %andResult, 0 3970 // br i1 %cmpVal label %dest1, label %dest2" 3971 BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator()); 3972 if (!Brcc || !Brcc->isConditional()) 3973 continue; 3974 ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0)); 3975 if (!Cmp || Cmp->getParent() != BB) 3976 continue; 3977 ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1)); 3978 if (!Zero || !Zero->isZero()) 3979 continue; 3980 Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0)); 3981 if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB) 3982 continue; 3983 ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1)); 3984 if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) 3985 continue; 3986 DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump()); 3987 3988 // Push the "and; icmp" for any users that are conditional branches. 3989 // Since there can only be one branch use per BB, we don't need to keep 3990 // track of which BBs we insert into. 3991 for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end(); 3992 UI != E; ) { 3993 Use &TheUse = *UI; 3994 // Find brcc use. 3995 BranchInst *BrccUser = dyn_cast<BranchInst>(*UI); 3996 ++UI; 3997 if (!BrccUser || !BrccUser->isConditional()) 3998 continue; 3999 BasicBlock *UserBB = BrccUser->getParent(); 4000 if (UserBB == BB) continue; 4001 DEBUG(dbgs() << "found Brcc use\n"); 4002 4003 // Sink the "and; icmp" to use. 4004 MadeChange = true; 4005 BinaryOperator *NewAnd = 4006 BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", 4007 BrccUser); 4008 CmpInst *NewCmp = 4009 CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, 4010 "", BrccUser); 4011 TheUse = NewCmp; 4012 ++NumAndCmpsMoved; 4013 DEBUG(BrccUser->getParent()->dump()); 4014 } 4015 } 4016 return MadeChange; 4017 } 4018 4019 /// \brief Retrieve the probabilities of a conditional branch. Returns true on 4020 /// success, or returns false if no or invalid metadata was found. 4021 static bool extractBranchMetadata(BranchInst *BI, 4022 uint64_t &ProbTrue, uint64_t &ProbFalse) { 4023 assert(BI->isConditional() && 4024 "Looking for probabilities on unconditional branch?"); 4025 auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof); 4026 if (!ProfileData || ProfileData->getNumOperands() != 3) 4027 return false; 4028 4029 const auto *CITrue = 4030 mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1)); 4031 const auto *CIFalse = 4032 mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2)); 4033 if (!CITrue || !CIFalse) 4034 return false; 4035 4036 ProbTrue = CITrue->getValue().getZExtValue(); 4037 ProbFalse = CIFalse->getValue().getZExtValue(); 4038 4039 return true; 4040 } 4041 4042 /// \brief Scale down both weights to fit into uint32_t. 4043 static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { 4044 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; 4045 uint32_t Scale = (NewMax / UINT32_MAX) + 1; 4046 NewTrue = NewTrue / Scale; 4047 NewFalse = NewFalse / Scale; 4048 } 4049 4050 /// \brief Some targets prefer to split a conditional branch like: 4051 /// \code 4052 /// %0 = icmp ne i32 %a, 0 4053 /// %1 = icmp ne i32 %b, 0 4054 /// %or.cond = or i1 %0, %1 4055 /// br i1 %or.cond, label %TrueBB, label %FalseBB 4056 /// \endcode 4057 /// into multiple branch instructions like: 4058 /// \code 4059 /// bb1: 4060 /// %0 = icmp ne i32 %a, 0 4061 /// br i1 %0, label %TrueBB, label %bb2 4062 /// bb2: 4063 /// %1 = icmp ne i32 %b, 0 4064 /// br i1 %1, label %TrueBB, label %FalseBB 4065 /// \endcode 4066 /// This usually allows instruction selection to do even further optimizations 4067 /// and combine the compare with the branch instruction. Currently this is 4068 /// applied for targets which have "cheap" jump instructions. 4069 /// 4070 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. 4071 /// 4072 bool CodeGenPrepare::splitBranchCondition(Function &F) { 4073 if (!TM || TM->Options.EnableFastISel != true || 4074 !TLI || TLI->isJumpExpensive()) 4075 return false; 4076 4077 bool MadeChange = false; 4078 for (auto &BB : F) { 4079 // Does this BB end with the following? 4080 // %cond1 = icmp|fcmp|binary instruction ... 4081 // %cond2 = icmp|fcmp|binary instruction ... 4082 // %cond.or = or|and i1 %cond1, cond2 4083 // br i1 %cond.or label %dest1, label %dest2" 4084 BinaryOperator *LogicOp; 4085 BasicBlock *TBB, *FBB; 4086 if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB))) 4087 continue; 4088 4089 unsigned Opc; 4090 Value *Cond1, *Cond2; 4091 if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)), 4092 m_OneUse(m_Value(Cond2))))) 4093 Opc = Instruction::And; 4094 else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)), 4095 m_OneUse(m_Value(Cond2))))) 4096 Opc = Instruction::Or; 4097 else 4098 continue; 4099 4100 if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) || 4101 !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) ) 4102 continue; 4103 4104 DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); 4105 4106 // Create a new BB. 4107 auto *InsertBefore = std::next(Function::iterator(BB)) 4108 .getNodePtrUnchecked(); 4109 auto TmpBB = BasicBlock::Create(BB.getContext(), 4110 BB.getName() + ".cond.split", 4111 BB.getParent(), InsertBefore); 4112 4113 // Update original basic block by using the first condition directly by the 4114 // branch instruction and removing the no longer needed and/or instruction. 4115 auto *Br1 = cast<BranchInst>(BB.getTerminator()); 4116 Br1->setCondition(Cond1); 4117 LogicOp->eraseFromParent(); 4118 4119 // Depending on the conditon we have to either replace the true or the false 4120 // successor of the original branch instruction. 4121 if (Opc == Instruction::And) 4122 Br1->setSuccessor(0, TmpBB); 4123 else 4124 Br1->setSuccessor(1, TmpBB); 4125 4126 // Fill in the new basic block. 4127 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB); 4128 if (auto *I = dyn_cast<Instruction>(Cond2)) { 4129 I->removeFromParent(); 4130 I->insertBefore(Br2); 4131 } 4132 4133 // Update PHI nodes in both successors. The original BB needs to be 4134 // replaced in one succesor's PHI nodes, because the branch comes now from 4135 // the newly generated BB (NewBB). In the other successor we need to add one 4136 // incoming edge to the PHI nodes, because both branch instructions target 4137 // now the same successor. Depending on the original branch condition 4138 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that 4139 // we perfrom the correct update for the PHI nodes. 4140 // This doesn't change the successor order of the just created branch 4141 // instruction (or any other instruction). 4142 if (Opc == Instruction::Or) 4143 std::swap(TBB, FBB); 4144 4145 // Replace the old BB with the new BB. 4146 for (auto &I : *TBB) { 4147 PHINode *PN = dyn_cast<PHINode>(&I); 4148 if (!PN) 4149 break; 4150 int i; 4151 while ((i = PN->getBasicBlockIndex(&BB)) >= 0) 4152 PN->setIncomingBlock(i, TmpBB); 4153 } 4154 4155 // Add another incoming edge form the new BB. 4156 for (auto &I : *FBB) { 4157 PHINode *PN = dyn_cast<PHINode>(&I); 4158 if (!PN) 4159 break; 4160 auto *Val = PN->getIncomingValueForBlock(&BB); 4161 PN->addIncoming(Val, TmpBB); 4162 } 4163 4164 // Update the branch weights (from SelectionDAGBuilder:: 4165 // FindMergedConditions). 4166 if (Opc == Instruction::Or) { 4167 // Codegen X | Y as: 4168 // BB1: 4169 // jmp_if_X TBB 4170 // jmp TmpBB 4171 // TmpBB: 4172 // jmp_if_Y TBB 4173 // jmp FBB 4174 // 4175 4176 // We have flexibility in setting Prob for BB1 and Prob for NewBB. 4177 // The requirement is that 4178 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) 4179 // = TrueProb for orignal BB. 4180 // Assuming the orignal weights are A and B, one choice is to set BB1's 4181 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice 4182 // assumes that 4183 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. 4184 // Another choice is to assume TrueProb for BB1 equals to TrueProb for 4185 // TmpBB, but the math is more complicated. 4186 uint64_t TrueWeight, FalseWeight; 4187 if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) { 4188 uint64_t NewTrueWeight = TrueWeight; 4189 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight; 4190 scaleWeights(NewTrueWeight, NewFalseWeight); 4191 Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) 4192 .createBranchWeights(TrueWeight, FalseWeight)); 4193 4194 NewTrueWeight = TrueWeight; 4195 NewFalseWeight = 2 * FalseWeight; 4196 scaleWeights(NewTrueWeight, NewFalseWeight); 4197 Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) 4198 .createBranchWeights(TrueWeight, FalseWeight)); 4199 } 4200 } else { 4201 // Codegen X & Y as: 4202 // BB1: 4203 // jmp_if_X TmpBB 4204 // jmp FBB 4205 // TmpBB: 4206 // jmp_if_Y TBB 4207 // jmp FBB 4208 // 4209 // This requires creation of TmpBB after CurBB. 4210 4211 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. 4212 // The requirement is that 4213 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) 4214 // = FalseProb for orignal BB. 4215 // Assuming the orignal weights are A and B, one choice is to set BB1's 4216 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice 4217 // assumes that 4218 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. 4219 uint64_t TrueWeight, FalseWeight; 4220 if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) { 4221 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight; 4222 uint64_t NewFalseWeight = FalseWeight; 4223 scaleWeights(NewTrueWeight, NewFalseWeight); 4224 Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) 4225 .createBranchWeights(TrueWeight, FalseWeight)); 4226 4227 NewTrueWeight = 2 * TrueWeight; 4228 NewFalseWeight = FalseWeight; 4229 scaleWeights(NewTrueWeight, NewFalseWeight); 4230 Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) 4231 .createBranchWeights(TrueWeight, FalseWeight)); 4232 } 4233 } 4234 4235 // Request DOM Tree update. 4236 // Note: No point in getting fancy here, since the DT info is never 4237 // available to CodeGenPrepare and the existing update code is broken 4238 // anyways. 4239 ModifiedDT = true; 4240 4241 MadeChange = true; 4242 4243 DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); 4244 TmpBB->dump()); 4245 } 4246 return MadeChange; 4247 } 4248