1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass munges the code in the input function to better prepare it for 11 // SelectionDAG-based code generation. This works around limitations in it's 12 // basic-block-at-a-time approach. It should eventually be removed. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/CodeGen/Passes.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/Analysis/InstructionSimplify.h" 21 #include "llvm/IR/CallSite.h" 22 #include "llvm/IR/Constants.h" 23 #include "llvm/IR/DataLayout.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/Dominators.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/IR/GetElementPtrTypeIterator.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/InlineAsm.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/IR/PatternMatch.h" 33 #include "llvm/IR/ValueHandle.h" 34 #include "llvm/IR/ValueMap.h" 35 #include "llvm/Pass.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Target/TargetLibraryInfo.h" 40 #include "llvm/Target/TargetLowering.h" 41 #include "llvm/Target/TargetSubtargetInfo.h" 42 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 43 #include "llvm/Transforms/Utils/BuildLibCalls.h" 44 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 45 #include "llvm/Transforms/Utils/Local.h" 46 using namespace llvm; 47 using namespace llvm::PatternMatch; 48 49 #define DEBUG_TYPE "codegenprepare" 50 51 STATISTIC(NumBlocksElim, "Number of blocks eliminated"); 52 STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); 53 STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); 54 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " 55 "sunken Cmps"); 56 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " 57 "of sunken Casts"); 58 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " 59 "computations were sunk"); 60 STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); 61 STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); 62 STATISTIC(NumRetsDup, "Number of return instructions duplicated"); 63 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); 64 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); 65 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); 66 67 static cl::opt<bool> DisableBranchOpts( 68 "disable-cgp-branch-opts", cl::Hidden, cl::init(false), 69 cl::desc("Disable branch optimizations in CodeGenPrepare")); 70 71 static cl::opt<bool> DisableSelectToBranch( 72 "disable-cgp-select2branch", cl::Hidden, cl::init(false), 73 cl::desc("Disable select to branch conversion.")); 74 75 static cl::opt<bool> AddrSinkUsingGEPs( 76 "addr-sink-using-gep", cl::Hidden, cl::init(false), 77 cl::desc("Address sinking in CGP using GEPs.")); 78 79 static cl::opt<bool> EnableAndCmpSinking( 80 "enable-andcmp-sinking", cl::Hidden, cl::init(true), 81 cl::desc("Enable sinkinig and/cmp into branches.")); 82 83 namespace { 84 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; 85 typedef DenseMap<Instruction *, Type *> InstrToOrigTy; 86 87 class CodeGenPrepare : public FunctionPass { 88 /// TLI - Keep a pointer of a TargetLowering to consult for determining 89 /// transformation profitability. 90 const TargetMachine *TM; 91 const TargetLowering *TLI; 92 const TargetLibraryInfo *TLInfo; 93 DominatorTree *DT; 94 95 /// CurInstIterator - As we scan instructions optimizing them, this is the 96 /// next instruction to optimize. Xforms that can invalidate this should 97 /// update it. 98 BasicBlock::iterator CurInstIterator; 99 100 /// Keeps track of non-local addresses that have been sunk into a block. 101 /// This allows us to avoid inserting duplicate code for blocks with 102 /// multiple load/stores of the same address. 103 ValueMap<Value*, Value*> SunkAddrs; 104 105 /// Keeps track of all truncates inserted for the current function. 106 SetOfInstrs InsertedTruncsSet; 107 /// Keeps track of the type of the related instruction before their 108 /// promotion for the current function. 109 InstrToOrigTy PromotedInsts; 110 111 /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to 112 /// be updated. 113 bool ModifiedDT; 114 115 /// OptSize - True if optimizing for size. 116 bool OptSize; 117 118 public: 119 static char ID; // Pass identification, replacement for typeid 120 explicit CodeGenPrepare(const TargetMachine *TM = nullptr) 121 : FunctionPass(ID), TM(TM), TLI(nullptr) { 122 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); 123 } 124 bool runOnFunction(Function &F) override; 125 126 const char *getPassName() const override { return "CodeGen Prepare"; } 127 128 void getAnalysisUsage(AnalysisUsage &AU) const override { 129 AU.addPreserved<DominatorTreeWrapperPass>(); 130 AU.addRequired<TargetLibraryInfo>(); 131 } 132 133 private: 134 bool EliminateFallThrough(Function &F); 135 bool EliminateMostlyEmptyBlocks(Function &F); 136 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 137 void EliminateMostlyEmptyBlock(BasicBlock *BB); 138 bool OptimizeBlock(BasicBlock &BB); 139 bool OptimizeInst(Instruction *I); 140 bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); 141 bool OptimizeInlineAsmInst(CallInst *CS); 142 bool OptimizeCallInst(CallInst *CI); 143 bool MoveExtToFormExtLoad(Instruction *I); 144 bool OptimizeExtUses(Instruction *I); 145 bool OptimizeSelectInst(SelectInst *SI); 146 bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); 147 bool DupRetToEnableTailCallOpts(BasicBlock *BB); 148 bool PlaceDbgValues(Function &F); 149 bool sinkAndCmp(Function &F); 150 }; 151 } 152 153 char CodeGenPrepare::ID = 0; 154 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", 155 "Optimize for code generation", false, false) 156 157 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { 158 return new CodeGenPrepare(TM); 159 } 160 161 bool CodeGenPrepare::runOnFunction(Function &F) { 162 if (skipOptnoneFunction(F)) 163 return false; 164 165 bool EverMadeChange = false; 166 // Clear per function information. 167 InsertedTruncsSet.clear(); 168 PromotedInsts.clear(); 169 170 ModifiedDT = false; 171 if (TM) TLI = TM->getTargetLowering(); 172 TLInfo = &getAnalysis<TargetLibraryInfo>(); 173 DominatorTreeWrapperPass *DTWP = 174 getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 175 DT = DTWP ? &DTWP->getDomTree() : nullptr; 176 OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 177 Attribute::OptimizeForSize); 178 179 /// This optimization identifies DIV instructions that can be 180 /// profitably bypassed and carried out with a shorter, faster divide. 181 if (!OptSize && TLI && TLI->isSlowDivBypassed()) { 182 const DenseMap<unsigned int, unsigned int> &BypassWidths = 183 TLI->getBypassSlowDivWidths(); 184 for (Function::iterator I = F.begin(); I != F.end(); I++) 185 EverMadeChange |= bypassSlowDivision(F, I, BypassWidths); 186 } 187 188 // Eliminate blocks that contain only PHI nodes and an 189 // unconditional branch. 190 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 191 192 // llvm.dbg.value is far away from the value then iSel may not be able 193 // handle it properly. iSel will drop llvm.dbg.value if it can not 194 // find a node corresponding to the value. 195 EverMadeChange |= PlaceDbgValues(F); 196 197 // If there is a mask, compare against zero, and branch that can be combined 198 // into a single target instruction, push the mask and compare into branch 199 // users. Do this before OptimizeBlock -> OptimizeInst -> 200 // OptimizeCmpExpression, which perturbs the pattern being searched for. 201 if (!DisableBranchOpts) 202 EverMadeChange |= sinkAndCmp(F); 203 204 bool MadeChange = true; 205 while (MadeChange) { 206 MadeChange = false; 207 for (Function::iterator I = F.begin(); I != F.end(); ) { 208 BasicBlock *BB = I++; 209 MadeChange |= OptimizeBlock(*BB); 210 } 211 EverMadeChange |= MadeChange; 212 } 213 214 SunkAddrs.clear(); 215 216 if (!DisableBranchOpts) { 217 MadeChange = false; 218 SmallPtrSet<BasicBlock*, 8> WorkList; 219 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { 220 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 221 MadeChange |= ConstantFoldTerminator(BB, true); 222 if (!MadeChange) continue; 223 224 for (SmallVectorImpl<BasicBlock*>::iterator 225 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 226 if (pred_begin(*II) == pred_end(*II)) 227 WorkList.insert(*II); 228 } 229 230 // Delete the dead blocks and any of their dead successors. 231 MadeChange |= !WorkList.empty(); 232 while (!WorkList.empty()) { 233 BasicBlock *BB = *WorkList.begin(); 234 WorkList.erase(BB); 235 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 236 237 DeleteDeadBlock(BB); 238 239 for (SmallVectorImpl<BasicBlock*>::iterator 240 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 241 if (pred_begin(*II) == pred_end(*II)) 242 WorkList.insert(*II); 243 } 244 245 // Merge pairs of basic blocks with unconditional branches, connected by 246 // a single edge. 247 if (EverMadeChange || MadeChange) 248 MadeChange |= EliminateFallThrough(F); 249 250 if (MadeChange) 251 ModifiedDT = true; 252 EverMadeChange |= MadeChange; 253 } 254 255 if (ModifiedDT && DT) 256 DT->recalculate(F); 257 258 return EverMadeChange; 259 } 260 261 /// EliminateFallThrough - Merge basic blocks which are connected 262 /// by a single edge, where one of the basic blocks has a single successor 263 /// pointing to the other basic block, which has a single predecessor. 264 bool CodeGenPrepare::EliminateFallThrough(Function &F) { 265 bool Changed = false; 266 // Scan all of the blocks in the function, except for the entry block. 267 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { 268 BasicBlock *BB = I++; 269 // If the destination block has a single pred, then this is a trivial 270 // edge, just collapse it. 271 BasicBlock *SinglePred = BB->getSinglePredecessor(); 272 273 // Don't merge if BB's address is taken. 274 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; 275 276 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); 277 if (Term && !Term->isConditional()) { 278 Changed = true; 279 DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); 280 // Remember if SinglePred was the entry block of the function. 281 // If so, we will need to move BB back to the entry position. 282 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 283 MergeBasicBlockIntoOnlyPred(BB, this); 284 285 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 286 BB->moveBefore(&BB->getParent()->getEntryBlock()); 287 288 // We have erased a block. Update the iterator. 289 I = BB; 290 } 291 } 292 return Changed; 293 } 294 295 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 296 /// debug info directives, and an unconditional branch. Passes before isel 297 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 298 /// isel. Start by eliminating these blocks so we can split them the way we 299 /// want them. 300 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 301 bool MadeChange = false; 302 // Note that this intentionally skips the entry block. 303 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { 304 BasicBlock *BB = I++; 305 306 // If this block doesn't end with an uncond branch, ignore it. 307 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 308 if (!BI || !BI->isUnconditional()) 309 continue; 310 311 // If the instruction before the branch (skipping debug info) isn't a phi 312 // node, then other stuff is happening here. 313 BasicBlock::iterator BBI = BI; 314 if (BBI != BB->begin()) { 315 --BBI; 316 while (isa<DbgInfoIntrinsic>(BBI)) { 317 if (BBI == BB->begin()) 318 break; 319 --BBI; 320 } 321 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 322 continue; 323 } 324 325 // Do not break infinite loops. 326 BasicBlock *DestBB = BI->getSuccessor(0); 327 if (DestBB == BB) 328 continue; 329 330 if (!CanMergeBlocks(BB, DestBB)) 331 continue; 332 333 EliminateMostlyEmptyBlock(BB); 334 MadeChange = true; 335 } 336 return MadeChange; 337 } 338 339 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 340 /// single uncond branch between them, and BB contains no other non-phi 341 /// instructions. 342 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 343 const BasicBlock *DestBB) const { 344 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 345 // the successor. If there are more complex condition (e.g. preheaders), 346 // don't mess around with them. 347 BasicBlock::const_iterator BBI = BB->begin(); 348 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 349 for (const User *U : PN->users()) { 350 const Instruction *UI = cast<Instruction>(U); 351 if (UI->getParent() != DestBB || !isa<PHINode>(UI)) 352 return false; 353 // If User is inside DestBB block and it is a PHINode then check 354 // incoming value. If incoming value is not from BB then this is 355 // a complex condition (e.g. preheaders) we want to avoid here. 356 if (UI->getParent() == DestBB) { 357 if (const PHINode *UPN = dyn_cast<PHINode>(UI)) 358 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 359 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 360 if (Insn && Insn->getParent() == BB && 361 Insn->getParent() != UPN->getIncomingBlock(I)) 362 return false; 363 } 364 } 365 } 366 } 367 368 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 369 // and DestBB may have conflicting incoming values for the block. If so, we 370 // can't merge the block. 371 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 372 if (!DestBBPN) return true; // no conflict. 373 374 // Collect the preds of BB. 375 SmallPtrSet<const BasicBlock*, 16> BBPreds; 376 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 377 // It is faster to get preds from a PHI than with pred_iterator. 378 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 379 BBPreds.insert(BBPN->getIncomingBlock(i)); 380 } else { 381 BBPreds.insert(pred_begin(BB), pred_end(BB)); 382 } 383 384 // Walk the preds of DestBB. 385 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 386 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 387 if (BBPreds.count(Pred)) { // Common predecessor? 388 BBI = DestBB->begin(); 389 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 390 const Value *V1 = PN->getIncomingValueForBlock(Pred); 391 const Value *V2 = PN->getIncomingValueForBlock(BB); 392 393 // If V2 is a phi node in BB, look up what the mapped value will be. 394 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 395 if (V2PN->getParent() == BB) 396 V2 = V2PN->getIncomingValueForBlock(Pred); 397 398 // If there is a conflict, bail out. 399 if (V1 != V2) return false; 400 } 401 } 402 } 403 404 return true; 405 } 406 407 408 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 409 /// an unconditional branch in it. 410 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 411 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 412 BasicBlock *DestBB = BI->getSuccessor(0); 413 414 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 415 416 // If the destination block has a single pred, then this is a trivial edge, 417 // just collapse it. 418 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 419 if (SinglePred != DestBB) { 420 // Remember if SinglePred was the entry block of the function. If so, we 421 // will need to move BB back to the entry position. 422 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 423 MergeBasicBlockIntoOnlyPred(DestBB, this); 424 425 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 426 BB->moveBefore(&BB->getParent()->getEntryBlock()); 427 428 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 429 return; 430 } 431 } 432 433 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 434 // to handle the new incoming edges it is about to have. 435 PHINode *PN; 436 for (BasicBlock::iterator BBI = DestBB->begin(); 437 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 438 // Remove the incoming value for BB, and remember it. 439 Value *InVal = PN->removeIncomingValue(BB, false); 440 441 // Two options: either the InVal is a phi node defined in BB or it is some 442 // value that dominates BB. 443 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 444 if (InValPhi && InValPhi->getParent() == BB) { 445 // Add all of the input values of the input PHI as inputs of this phi. 446 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 447 PN->addIncoming(InValPhi->getIncomingValue(i), 448 InValPhi->getIncomingBlock(i)); 449 } else { 450 // Otherwise, add one instance of the dominating value for each edge that 451 // we will be adding. 452 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 453 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 454 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 455 } else { 456 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 457 PN->addIncoming(InVal, *PI); 458 } 459 } 460 } 461 462 // The PHIs are now updated, change everything that refers to BB to use 463 // DestBB and remove BB. 464 BB->replaceAllUsesWith(DestBB); 465 if (DT && !ModifiedDT) { 466 BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); 467 BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); 468 BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); 469 DT->changeImmediateDominator(DestBB, NewIDom); 470 DT->eraseNode(BB); 471 } 472 BB->eraseFromParent(); 473 ++NumBlocksElim; 474 475 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 476 } 477 478 /// SinkCast - Sink the specified cast instruction into its user blocks 479 static bool SinkCast(CastInst *CI) { 480 BasicBlock *DefBB = CI->getParent(); 481 482 /// InsertedCasts - Only insert a cast in each block once. 483 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 484 485 bool MadeChange = false; 486 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 487 UI != E; ) { 488 Use &TheUse = UI.getUse(); 489 Instruction *User = cast<Instruction>(*UI); 490 491 // Figure out which BB this cast is used in. For PHI's this is the 492 // appropriate predecessor block. 493 BasicBlock *UserBB = User->getParent(); 494 if (PHINode *PN = dyn_cast<PHINode>(User)) { 495 UserBB = PN->getIncomingBlock(TheUse); 496 } 497 498 // Preincrement use iterator so we don't invalidate it. 499 ++UI; 500 501 // If this user is in the same block as the cast, don't change the cast. 502 if (UserBB == DefBB) continue; 503 504 // If we have already inserted a cast into this block, use it. 505 CastInst *&InsertedCast = InsertedCasts[UserBB]; 506 507 if (!InsertedCast) { 508 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 509 InsertedCast = 510 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 511 InsertPt); 512 MadeChange = true; 513 } 514 515 // Replace a use of the cast with a use of the new cast. 516 TheUse = InsertedCast; 517 ++NumCastUses; 518 } 519 520 // If we removed all uses, nuke the cast. 521 if (CI->use_empty()) { 522 CI->eraseFromParent(); 523 MadeChange = true; 524 } 525 526 return MadeChange; 527 } 528 529 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 530 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 531 /// sink it into user blocks to reduce the number of virtual 532 /// registers that must be created and coalesced. 533 /// 534 /// Return true if any changes are made. 535 /// 536 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 537 // If this is a noop copy, 538 EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 539 EVT DstVT = TLI.getValueType(CI->getType()); 540 541 // This is an fp<->int conversion? 542 if (SrcVT.isInteger() != DstVT.isInteger()) 543 return false; 544 545 // If this is an extension, it will be a zero or sign extension, which 546 // isn't a noop. 547 if (SrcVT.bitsLT(DstVT)) return false; 548 549 // If these values will be promoted, find out what they will be promoted 550 // to. This helps us consider truncates on PPC as noop copies when they 551 // are. 552 if (TLI.getTypeAction(CI->getContext(), SrcVT) == 553 TargetLowering::TypePromoteInteger) 554 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 555 if (TLI.getTypeAction(CI->getContext(), DstVT) == 556 TargetLowering::TypePromoteInteger) 557 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 558 559 // If, after promotion, these are the same types, this is a noop copy. 560 if (SrcVT != DstVT) 561 return false; 562 563 return SinkCast(CI); 564 } 565 566 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 567 /// the number of virtual registers that must be created and coalesced. This is 568 /// a clear win except on targets with multiple condition code registers 569 /// (PowerPC), where it might lose; some adjustment may be wanted there. 570 /// 571 /// Return true if any changes are made. 572 static bool OptimizeCmpExpression(CmpInst *CI) { 573 BasicBlock *DefBB = CI->getParent(); 574 575 /// InsertedCmp - Only insert a cmp in each block once. 576 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 577 578 bool MadeChange = false; 579 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 580 UI != E; ) { 581 Use &TheUse = UI.getUse(); 582 Instruction *User = cast<Instruction>(*UI); 583 584 // Preincrement use iterator so we don't invalidate it. 585 ++UI; 586 587 // Don't bother for PHI nodes. 588 if (isa<PHINode>(User)) 589 continue; 590 591 // Figure out which BB this cmp is used in. 592 BasicBlock *UserBB = User->getParent(); 593 594 // If this user is in the same block as the cmp, don't change the cmp. 595 if (UserBB == DefBB) continue; 596 597 // If we have already inserted a cmp into this block, use it. 598 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 599 600 if (!InsertedCmp) { 601 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 602 InsertedCmp = 603 CmpInst::Create(CI->getOpcode(), 604 CI->getPredicate(), CI->getOperand(0), 605 CI->getOperand(1), "", InsertPt); 606 MadeChange = true; 607 } 608 609 // Replace a use of the cmp with a use of the new cmp. 610 TheUse = InsertedCmp; 611 ++NumCmpUses; 612 } 613 614 // If we removed all uses, nuke the cmp. 615 if (CI->use_empty()) 616 CI->eraseFromParent(); 617 618 return MadeChange; 619 } 620 621 /// isExtractBitsCandidateUse - Check if the candidates could 622 /// be combined with shift instruction, which includes: 623 /// 1. Truncate instruction 624 /// 2. And instruction and the imm is a mask of the low bits: 625 /// imm & (imm+1) == 0 626 static bool isExtractBitsCandidateUse(Instruction *User) { 627 if (!isa<TruncInst>(User)) { 628 if (User->getOpcode() != Instruction::And || 629 !isa<ConstantInt>(User->getOperand(1))) 630 return false; 631 632 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue(); 633 634 if ((Cimm & (Cimm + 1)).getBoolValue()) 635 return false; 636 } 637 return true; 638 } 639 640 /// SinkShiftAndTruncate - sink both shift and truncate instruction 641 /// to the use of truncate's BB. 642 static bool 643 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, 644 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, 645 const TargetLowering &TLI) { 646 BasicBlock *UserBB = User->getParent(); 647 DenseMap<BasicBlock *, CastInst *> InsertedTruncs; 648 TruncInst *TruncI = dyn_cast<TruncInst>(User); 649 bool MadeChange = false; 650 651 for (Value::user_iterator TruncUI = TruncI->user_begin(), 652 TruncE = TruncI->user_end(); 653 TruncUI != TruncE;) { 654 655 Use &TruncTheUse = TruncUI.getUse(); 656 Instruction *TruncUser = cast<Instruction>(*TruncUI); 657 // Preincrement use iterator so we don't invalidate it. 658 659 ++TruncUI; 660 661 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); 662 if (!ISDOpcode) 663 continue; 664 665 // If the use is actually a legal node, there will not be an implicit 666 // truncate. 667 if (TLI.isOperationLegalOrCustom(ISDOpcode, 668 EVT::getEVT(TruncUser->getType()))) 669 continue; 670 671 // Don't bother for PHI nodes. 672 if (isa<PHINode>(TruncUser)) 673 continue; 674 675 BasicBlock *TruncUserBB = TruncUser->getParent(); 676 677 if (UserBB == TruncUserBB) 678 continue; 679 680 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; 681 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; 682 683 if (!InsertedShift && !InsertedTrunc) { 684 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); 685 // Sink the shift 686 if (ShiftI->getOpcode() == Instruction::AShr) 687 InsertedShift = 688 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); 689 else 690 InsertedShift = 691 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); 692 693 // Sink the trunc 694 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); 695 TruncInsertPt++; 696 697 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, 698 TruncI->getType(), "", TruncInsertPt); 699 700 MadeChange = true; 701 702 TruncTheUse = InsertedTrunc; 703 } 704 } 705 return MadeChange; 706 } 707 708 /// OptimizeExtractBits - sink the shift *right* instruction into user blocks if 709 /// the uses could potentially be combined with this shift instruction and 710 /// generate BitExtract instruction. It will only be applied if the architecture 711 /// supports BitExtract instruction. Here is an example: 712 /// BB1: 713 /// %x.extract.shift = lshr i64 %arg1, 32 714 /// BB2: 715 /// %x.extract.trunc = trunc i64 %x.extract.shift to i16 716 /// ==> 717 /// 718 /// BB2: 719 /// %x.extract.shift.1 = lshr i64 %arg1, 32 720 /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 721 /// 722 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract 723 /// instruction. 724 /// Return true if any changes are made. 725 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, 726 const TargetLowering &TLI) { 727 BasicBlock *DefBB = ShiftI->getParent(); 728 729 /// Only insert instructions in each block once. 730 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts; 731 732 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType())); 733 734 bool MadeChange = false; 735 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); 736 UI != E;) { 737 Use &TheUse = UI.getUse(); 738 Instruction *User = cast<Instruction>(*UI); 739 // Preincrement use iterator so we don't invalidate it. 740 ++UI; 741 742 // Don't bother for PHI nodes. 743 if (isa<PHINode>(User)) 744 continue; 745 746 if (!isExtractBitsCandidateUse(User)) 747 continue; 748 749 BasicBlock *UserBB = User->getParent(); 750 751 if (UserBB == DefBB) { 752 // If the shift and truncate instruction are in the same BB. The use of 753 // the truncate(TruncUse) may still introduce another truncate if not 754 // legal. In this case, we would like to sink both shift and truncate 755 // instruction to the BB of TruncUse. 756 // for example: 757 // BB1: 758 // i64 shift.result = lshr i64 opnd, imm 759 // trunc.result = trunc shift.result to i16 760 // 761 // BB2: 762 // ----> We will have an implicit truncate here if the architecture does 763 // not have i16 compare. 764 // cmp i16 trunc.result, opnd2 765 // 766 if (isa<TruncInst>(User) && shiftIsLegal 767 // If the type of the truncate is legal, no trucate will be 768 // introduced in other basic blocks. 769 && (!TLI.isTypeLegal(TLI.getValueType(User->getType())))) 770 MadeChange = 771 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI); 772 773 continue; 774 } 775 // If we have already inserted a shift into this block, use it. 776 BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; 777 778 if (!InsertedShift) { 779 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 780 781 if (ShiftI->getOpcode() == Instruction::AShr) 782 InsertedShift = 783 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); 784 else 785 InsertedShift = 786 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); 787 788 MadeChange = true; 789 } 790 791 // Replace a use of the shift with a use of the new shift. 792 TheUse = InsertedShift; 793 } 794 795 // If we removed all uses, nuke the shift. 796 if (ShiftI->use_empty()) 797 ShiftI->eraseFromParent(); 798 799 return MadeChange; 800 } 801 802 namespace { 803 class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { 804 protected: 805 void replaceCall(Value *With) override { 806 CI->replaceAllUsesWith(With); 807 CI->eraseFromParent(); 808 } 809 bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override { 810 if (ConstantInt *SizeCI = 811 dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) 812 return SizeCI->isAllOnesValue(); 813 return false; 814 } 815 }; 816 } // end anonymous namespace 817 818 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { 819 BasicBlock *BB = CI->getParent(); 820 821 // Lower inline assembly if we can. 822 // If we found an inline asm expession, and if the target knows how to 823 // lower it to normal LLVM code, do so now. 824 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 825 if (TLI->ExpandInlineAsm(CI)) { 826 // Avoid invalidating the iterator. 827 CurInstIterator = BB->begin(); 828 // Avoid processing instructions out of order, which could cause 829 // reuse before a value is defined. 830 SunkAddrs.clear(); 831 return true; 832 } 833 // Sink address computing for memory operands into the block. 834 if (OptimizeInlineAsmInst(CI)) 835 return true; 836 } 837 838 // Lower all uses of llvm.objectsize.* 839 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 840 if (II && II->getIntrinsicID() == Intrinsic::objectsize) { 841 bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); 842 Type *ReturnTy = CI->getType(); 843 Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); 844 845 // Substituting this can cause recursive simplifications, which can 846 // invalidate our iterator. Use a WeakVH to hold onto it in case this 847 // happens. 848 WeakVH IterHandle(CurInstIterator); 849 850 replaceAndRecursivelySimplify(CI, RetVal, 851 TLI ? TLI->getDataLayout() : nullptr, 852 TLInfo, ModifiedDT ? nullptr : DT); 853 854 // If the iterator instruction was recursively deleted, start over at the 855 // start of the block. 856 if (IterHandle != CurInstIterator) { 857 CurInstIterator = BB->begin(); 858 SunkAddrs.clear(); 859 } 860 return true; 861 } 862 863 if (II && TLI) { 864 SmallVector<Value*, 2> PtrOps; 865 Type *AccessTy; 866 if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) 867 while (!PtrOps.empty()) 868 if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) 869 return true; 870 } 871 872 // From here on out we're working with named functions. 873 if (!CI->getCalledFunction()) return false; 874 875 // We'll need DataLayout from here on out. 876 const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; 877 if (!TD) return false; 878 879 // Lower all default uses of _chk calls. This is very similar 880 // to what InstCombineCalls does, but here we are only lowering calls 881 // that have the default "don't know" as the objectsize. Anything else 882 // should be left alone. 883 CodeGenPrepareFortifiedLibCalls Simplifier; 884 return Simplifier.fold(CI, TD, TLInfo); 885 } 886 887 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return 888 /// instructions to the predecessor to enable tail call optimizations. The 889 /// case it is currently looking for is: 890 /// @code 891 /// bb0: 892 /// %tmp0 = tail call i32 @f0() 893 /// br label %return 894 /// bb1: 895 /// %tmp1 = tail call i32 @f1() 896 /// br label %return 897 /// bb2: 898 /// %tmp2 = tail call i32 @f2() 899 /// br label %return 900 /// return: 901 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 902 /// ret i32 %retval 903 /// @endcode 904 /// 905 /// => 906 /// 907 /// @code 908 /// bb0: 909 /// %tmp0 = tail call i32 @f0() 910 /// ret i32 %tmp0 911 /// bb1: 912 /// %tmp1 = tail call i32 @f1() 913 /// ret i32 %tmp1 914 /// bb2: 915 /// %tmp2 = tail call i32 @f2() 916 /// ret i32 %tmp2 917 /// @endcode 918 bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { 919 if (!TLI) 920 return false; 921 922 ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); 923 if (!RI) 924 return false; 925 926 PHINode *PN = nullptr; 927 BitCastInst *BCI = nullptr; 928 Value *V = RI->getReturnValue(); 929 if (V) { 930 BCI = dyn_cast<BitCastInst>(V); 931 if (BCI) 932 V = BCI->getOperand(0); 933 934 PN = dyn_cast<PHINode>(V); 935 if (!PN) 936 return false; 937 } 938 939 if (PN && PN->getParent() != BB) 940 return false; 941 942 // It's not safe to eliminate the sign / zero extension of the return value. 943 // See llvm::isInTailCallPosition(). 944 const Function *F = BB->getParent(); 945 AttributeSet CallerAttrs = F->getAttributes(); 946 if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || 947 CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) 948 return false; 949 950 // Make sure there are no instructions between the PHI and return, or that the 951 // return is the first instruction in the block. 952 if (PN) { 953 BasicBlock::iterator BI = BB->begin(); 954 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); 955 if (&*BI == BCI) 956 // Also skip over the bitcast. 957 ++BI; 958 if (&*BI != RI) 959 return false; 960 } else { 961 BasicBlock::iterator BI = BB->begin(); 962 while (isa<DbgInfoIntrinsic>(BI)) ++BI; 963 if (&*BI != RI) 964 return false; 965 } 966 967 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail 968 /// call. 969 SmallVector<CallInst*, 4> TailCalls; 970 if (PN) { 971 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 972 CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); 973 // Make sure the phi value is indeed produced by the tail call. 974 if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && 975 TLI->mayBeEmittedAsTailCall(CI)) 976 TailCalls.push_back(CI); 977 } 978 } else { 979 SmallPtrSet<BasicBlock*, 4> VisitedBBs; 980 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { 981 if (!VisitedBBs.insert(*PI)) 982 continue; 983 984 BasicBlock::InstListType &InstList = (*PI)->getInstList(); 985 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); 986 BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); 987 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); 988 if (RI == RE) 989 continue; 990 991 CallInst *CI = dyn_cast<CallInst>(&*RI); 992 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) 993 TailCalls.push_back(CI); 994 } 995 } 996 997 bool Changed = false; 998 for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { 999 CallInst *CI = TailCalls[i]; 1000 CallSite CS(CI); 1001 1002 // Conservatively require the attributes of the call to match those of the 1003 // return. Ignore noalias because it doesn't affect the call sequence. 1004 AttributeSet CalleeAttrs = CS.getAttributes(); 1005 if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 1006 removeAttribute(Attribute::NoAlias) != 1007 AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 1008 removeAttribute(Attribute::NoAlias)) 1009 continue; 1010 1011 // Make sure the call instruction is followed by an unconditional branch to 1012 // the return block. 1013 BasicBlock *CallBB = CI->getParent(); 1014 BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); 1015 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) 1016 continue; 1017 1018 // Duplicate the return into CallBB. 1019 (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); 1020 ModifiedDT = Changed = true; 1021 ++NumRetsDup; 1022 } 1023 1024 // If we eliminated all predecessors of the block, delete the block now. 1025 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) 1026 BB->eraseFromParent(); 1027 1028 return Changed; 1029 } 1030 1031 //===----------------------------------------------------------------------===// 1032 // Memory Optimization 1033 //===----------------------------------------------------------------------===// 1034 1035 namespace { 1036 1037 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode 1038 /// which holds actual Value*'s for register values. 1039 struct ExtAddrMode : public TargetLowering::AddrMode { 1040 Value *BaseReg; 1041 Value *ScaledReg; 1042 ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {} 1043 void print(raw_ostream &OS) const; 1044 void dump() const; 1045 1046 bool operator==(const ExtAddrMode& O) const { 1047 return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && 1048 (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && 1049 (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); 1050 } 1051 }; 1052 1053 #ifndef NDEBUG 1054 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { 1055 AM.print(OS); 1056 return OS; 1057 } 1058 #endif 1059 1060 void ExtAddrMode::print(raw_ostream &OS) const { 1061 bool NeedPlus = false; 1062 OS << "["; 1063 if (BaseGV) { 1064 OS << (NeedPlus ? " + " : "") 1065 << "GV:"; 1066 BaseGV->printAsOperand(OS, /*PrintType=*/false); 1067 NeedPlus = true; 1068 } 1069 1070 if (BaseOffs) { 1071 OS << (NeedPlus ? " + " : "") 1072 << BaseOffs; 1073 NeedPlus = true; 1074 } 1075 1076 if (BaseReg) { 1077 OS << (NeedPlus ? " + " : "") 1078 << "Base:"; 1079 BaseReg->printAsOperand(OS, /*PrintType=*/false); 1080 NeedPlus = true; 1081 } 1082 if (Scale) { 1083 OS << (NeedPlus ? " + " : "") 1084 << Scale << "*"; 1085 ScaledReg->printAsOperand(OS, /*PrintType=*/false); 1086 } 1087 1088 OS << ']'; 1089 } 1090 1091 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1092 void ExtAddrMode::dump() const { 1093 print(dbgs()); 1094 dbgs() << '\n'; 1095 } 1096 #endif 1097 1098 /// \brief This class provides transaction based operation on the IR. 1099 /// Every change made through this class is recorded in the internal state and 1100 /// can be undone (rollback) until commit is called. 1101 class TypePromotionTransaction { 1102 1103 /// \brief This represents the common interface of the individual transaction. 1104 /// Each class implements the logic for doing one specific modification on 1105 /// the IR via the TypePromotionTransaction. 1106 class TypePromotionAction { 1107 protected: 1108 /// The Instruction modified. 1109 Instruction *Inst; 1110 1111 public: 1112 /// \brief Constructor of the action. 1113 /// The constructor performs the related action on the IR. 1114 TypePromotionAction(Instruction *Inst) : Inst(Inst) {} 1115 1116 virtual ~TypePromotionAction() {} 1117 1118 /// \brief Undo the modification done by this action. 1119 /// When this method is called, the IR must be in the same state as it was 1120 /// before this action was applied. 1121 /// \pre Undoing the action works if and only if the IR is in the exact same 1122 /// state as it was directly after this action was applied. 1123 virtual void undo() = 0; 1124 1125 /// \brief Advocate every change made by this action. 1126 /// When the results on the IR of the action are to be kept, it is important 1127 /// to call this function, otherwise hidden information may be kept forever. 1128 virtual void commit() { 1129 // Nothing to be done, this action is not doing anything. 1130 } 1131 }; 1132 1133 /// \brief Utility to remember the position of an instruction. 1134 class InsertionHandler { 1135 /// Position of an instruction. 1136 /// Either an instruction: 1137 /// - Is the first in a basic block: BB is used. 1138 /// - Has a previous instructon: PrevInst is used. 1139 union { 1140 Instruction *PrevInst; 1141 BasicBlock *BB; 1142 } Point; 1143 /// Remember whether or not the instruction had a previous instruction. 1144 bool HasPrevInstruction; 1145 1146 public: 1147 /// \brief Record the position of \p Inst. 1148 InsertionHandler(Instruction *Inst) { 1149 BasicBlock::iterator It = Inst; 1150 HasPrevInstruction = (It != (Inst->getParent()->begin())); 1151 if (HasPrevInstruction) 1152 Point.PrevInst = --It; 1153 else 1154 Point.BB = Inst->getParent(); 1155 } 1156 1157 /// \brief Insert \p Inst at the recorded position. 1158 void insert(Instruction *Inst) { 1159 if (HasPrevInstruction) { 1160 if (Inst->getParent()) 1161 Inst->removeFromParent(); 1162 Inst->insertAfter(Point.PrevInst); 1163 } else { 1164 Instruction *Position = Point.BB->getFirstInsertionPt(); 1165 if (Inst->getParent()) 1166 Inst->moveBefore(Position); 1167 else 1168 Inst->insertBefore(Position); 1169 } 1170 } 1171 }; 1172 1173 /// \brief Move an instruction before another. 1174 class InstructionMoveBefore : public TypePromotionAction { 1175 /// Original position of the instruction. 1176 InsertionHandler Position; 1177 1178 public: 1179 /// \brief Move \p Inst before \p Before. 1180 InstructionMoveBefore(Instruction *Inst, Instruction *Before) 1181 : TypePromotionAction(Inst), Position(Inst) { 1182 DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n"); 1183 Inst->moveBefore(Before); 1184 } 1185 1186 /// \brief Move the instruction back to its original position. 1187 void undo() override { 1188 DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); 1189 Position.insert(Inst); 1190 } 1191 }; 1192 1193 /// \brief Set the operand of an instruction with a new value. 1194 class OperandSetter : public TypePromotionAction { 1195 /// Original operand of the instruction. 1196 Value *Origin; 1197 /// Index of the modified instruction. 1198 unsigned Idx; 1199 1200 public: 1201 /// \brief Set \p Idx operand of \p Inst with \p NewVal. 1202 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) 1203 : TypePromotionAction(Inst), Idx(Idx) { 1204 DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" 1205 << "for:" << *Inst << "\n" 1206 << "with:" << *NewVal << "\n"); 1207 Origin = Inst->getOperand(Idx); 1208 Inst->setOperand(Idx, NewVal); 1209 } 1210 1211 /// \brief Restore the original value of the instruction. 1212 void undo() override { 1213 DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" 1214 << "for: " << *Inst << "\n" 1215 << "with: " << *Origin << "\n"); 1216 Inst->setOperand(Idx, Origin); 1217 } 1218 }; 1219 1220 /// \brief Hide the operands of an instruction. 1221 /// Do as if this instruction was not using any of its operands. 1222 class OperandsHider : public TypePromotionAction { 1223 /// The list of original operands. 1224 SmallVector<Value *, 4> OriginalValues; 1225 1226 public: 1227 /// \brief Remove \p Inst from the uses of the operands of \p Inst. 1228 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { 1229 DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); 1230 unsigned NumOpnds = Inst->getNumOperands(); 1231 OriginalValues.reserve(NumOpnds); 1232 for (unsigned It = 0; It < NumOpnds; ++It) { 1233 // Save the current operand. 1234 Value *Val = Inst->getOperand(It); 1235 OriginalValues.push_back(Val); 1236 // Set a dummy one. 1237 // We could use OperandSetter here, but that would implied an overhead 1238 // that we are not willing to pay. 1239 Inst->setOperand(It, UndefValue::get(Val->getType())); 1240 } 1241 } 1242 1243 /// \brief Restore the original list of uses. 1244 void undo() override { 1245 DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); 1246 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) 1247 Inst->setOperand(It, OriginalValues[It]); 1248 } 1249 }; 1250 1251 /// \brief Build a truncate instruction. 1252 class TruncBuilder : public TypePromotionAction { 1253 public: 1254 /// \brief Build a truncate instruction of \p Opnd producing a \p Ty 1255 /// result. 1256 /// trunc Opnd to Ty. 1257 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { 1258 IRBuilder<> Builder(Opnd); 1259 Inst = cast<Instruction>(Builder.CreateTrunc(Opnd, Ty, "promoted")); 1260 DEBUG(dbgs() << "Do: TruncBuilder: " << *Inst << "\n"); 1261 } 1262 1263 /// \brief Get the built instruction. 1264 Instruction *getBuiltInstruction() { return Inst; } 1265 1266 /// \brief Remove the built instruction. 1267 void undo() override { 1268 DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n"); 1269 Inst->eraseFromParent(); 1270 } 1271 }; 1272 1273 /// \brief Build a sign extension instruction. 1274 class SExtBuilder : public TypePromotionAction { 1275 public: 1276 /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty 1277 /// result. 1278 /// sext Opnd to Ty. 1279 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) 1280 : TypePromotionAction(Inst) { 1281 IRBuilder<> Builder(InsertPt); 1282 Inst = cast<Instruction>(Builder.CreateSExt(Opnd, Ty, "promoted")); 1283 DEBUG(dbgs() << "Do: SExtBuilder: " << *Inst << "\n"); 1284 } 1285 1286 /// \brief Get the built instruction. 1287 Instruction *getBuiltInstruction() { return Inst; } 1288 1289 /// \brief Remove the built instruction. 1290 void undo() override { 1291 DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n"); 1292 Inst->eraseFromParent(); 1293 } 1294 }; 1295 1296 /// \brief Mutate an instruction to another type. 1297 class TypeMutator : public TypePromotionAction { 1298 /// Record the original type. 1299 Type *OrigTy; 1300 1301 public: 1302 /// \brief Mutate the type of \p Inst into \p NewTy. 1303 TypeMutator(Instruction *Inst, Type *NewTy) 1304 : TypePromotionAction(Inst), OrigTy(Inst->getType()) { 1305 DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy 1306 << "\n"); 1307 Inst->mutateType(NewTy); 1308 } 1309 1310 /// \brief Mutate the instruction back to its original type. 1311 void undo() override { 1312 DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy 1313 << "\n"); 1314 Inst->mutateType(OrigTy); 1315 } 1316 }; 1317 1318 /// \brief Replace the uses of an instruction by another instruction. 1319 class UsesReplacer : public TypePromotionAction { 1320 /// Helper structure to keep track of the replaced uses. 1321 struct InstructionAndIdx { 1322 /// The instruction using the instruction. 1323 Instruction *Inst; 1324 /// The index where this instruction is used for Inst. 1325 unsigned Idx; 1326 InstructionAndIdx(Instruction *Inst, unsigned Idx) 1327 : Inst(Inst), Idx(Idx) {} 1328 }; 1329 1330 /// Keep track of the original uses (pair Instruction, Index). 1331 SmallVector<InstructionAndIdx, 4> OriginalUses; 1332 typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator; 1333 1334 public: 1335 /// \brief Replace all the use of \p Inst by \p New. 1336 UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { 1337 DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New 1338 << "\n"); 1339 // Record the original uses. 1340 for (Use &U : Inst->uses()) { 1341 Instruction *UserI = cast<Instruction>(U.getUser()); 1342 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); 1343 } 1344 // Now, we can replace the uses. 1345 Inst->replaceAllUsesWith(New); 1346 } 1347 1348 /// \brief Reassign the original uses of Inst to Inst. 1349 void undo() override { 1350 DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); 1351 for (use_iterator UseIt = OriginalUses.begin(), 1352 EndIt = OriginalUses.end(); 1353 UseIt != EndIt; ++UseIt) { 1354 UseIt->Inst->setOperand(UseIt->Idx, Inst); 1355 } 1356 } 1357 }; 1358 1359 /// \brief Remove an instruction from the IR. 1360 class InstructionRemover : public TypePromotionAction { 1361 /// Original position of the instruction. 1362 InsertionHandler Inserter; 1363 /// Helper structure to hide all the link to the instruction. In other 1364 /// words, this helps to do as if the instruction was removed. 1365 OperandsHider Hider; 1366 /// Keep track of the uses replaced, if any. 1367 UsesReplacer *Replacer; 1368 1369 public: 1370 /// \brief Remove all reference of \p Inst and optinally replace all its 1371 /// uses with New. 1372 /// \pre If !Inst->use_empty(), then New != nullptr 1373 InstructionRemover(Instruction *Inst, Value *New = nullptr) 1374 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), 1375 Replacer(nullptr) { 1376 if (New) 1377 Replacer = new UsesReplacer(Inst, New); 1378 DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); 1379 Inst->removeFromParent(); 1380 } 1381 1382 ~InstructionRemover() { delete Replacer; } 1383 1384 /// \brief Really remove the instruction. 1385 void commit() override { delete Inst; } 1386 1387 /// \brief Resurrect the instruction and reassign it to the proper uses if 1388 /// new value was provided when build this action. 1389 void undo() override { 1390 DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); 1391 Inserter.insert(Inst); 1392 if (Replacer) 1393 Replacer->undo(); 1394 Hider.undo(); 1395 } 1396 }; 1397 1398 public: 1399 /// Restoration point. 1400 /// The restoration point is a pointer to an action instead of an iterator 1401 /// because the iterator may be invalidated but not the pointer. 1402 typedef const TypePromotionAction *ConstRestorationPt; 1403 /// Advocate every changes made in that transaction. 1404 void commit(); 1405 /// Undo all the changes made after the given point. 1406 void rollback(ConstRestorationPt Point); 1407 /// Get the current restoration point. 1408 ConstRestorationPt getRestorationPoint() const; 1409 1410 /// \name API for IR modification with state keeping to support rollback. 1411 /// @{ 1412 /// Same as Instruction::setOperand. 1413 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); 1414 /// Same as Instruction::eraseFromParent. 1415 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); 1416 /// Same as Value::replaceAllUsesWith. 1417 void replaceAllUsesWith(Instruction *Inst, Value *New); 1418 /// Same as Value::mutateType. 1419 void mutateType(Instruction *Inst, Type *NewTy); 1420 /// Same as IRBuilder::createTrunc. 1421 Instruction *createTrunc(Instruction *Opnd, Type *Ty); 1422 /// Same as IRBuilder::createSExt. 1423 Instruction *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); 1424 /// Same as Instruction::moveBefore. 1425 void moveBefore(Instruction *Inst, Instruction *Before); 1426 /// @} 1427 1428 private: 1429 /// The ordered list of actions made so far. 1430 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions; 1431 typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt; 1432 }; 1433 1434 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, 1435 Value *NewVal) { 1436 Actions.push_back( 1437 make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal)); 1438 } 1439 1440 void TypePromotionTransaction::eraseInstruction(Instruction *Inst, 1441 Value *NewVal) { 1442 Actions.push_back( 1443 make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal)); 1444 } 1445 1446 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, 1447 Value *New) { 1448 Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); 1449 } 1450 1451 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { 1452 Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); 1453 } 1454 1455 Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd, 1456 Type *Ty) { 1457 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty)); 1458 Instruction *I = Ptr->getBuiltInstruction(); 1459 Actions.push_back(std::move(Ptr)); 1460 return I; 1461 } 1462 1463 Instruction *TypePromotionTransaction::createSExt(Instruction *Inst, 1464 Value *Opnd, Type *Ty) { 1465 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty)); 1466 Instruction *I = Ptr->getBuiltInstruction(); 1467 Actions.push_back(std::move(Ptr)); 1468 return I; 1469 } 1470 1471 void TypePromotionTransaction::moveBefore(Instruction *Inst, 1472 Instruction *Before) { 1473 Actions.push_back( 1474 make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before)); 1475 } 1476 1477 TypePromotionTransaction::ConstRestorationPt 1478 TypePromotionTransaction::getRestorationPoint() const { 1479 return !Actions.empty() ? Actions.back().get() : nullptr; 1480 } 1481 1482 void TypePromotionTransaction::commit() { 1483 for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; 1484 ++It) 1485 (*It)->commit(); 1486 Actions.clear(); 1487 } 1488 1489 void TypePromotionTransaction::rollback( 1490 TypePromotionTransaction::ConstRestorationPt Point) { 1491 while (!Actions.empty() && Point != Actions.back().get()) { 1492 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val(); 1493 Curr->undo(); 1494 } 1495 } 1496 1497 /// \brief A helper class for matching addressing modes. 1498 /// 1499 /// This encapsulates the logic for matching the target-legal addressing modes. 1500 class AddressingModeMatcher { 1501 SmallVectorImpl<Instruction*> &AddrModeInsts; 1502 const TargetLowering &TLI; 1503 1504 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and 1505 /// the memory instruction that we're computing this address for. 1506 Type *AccessTy; 1507 Instruction *MemoryInst; 1508 1509 /// AddrMode - This is the addressing mode that we're building up. This is 1510 /// part of the return value of this addressing mode matching stuff. 1511 ExtAddrMode &AddrMode; 1512 1513 /// The truncate instruction inserted by other CodeGenPrepare optimizations. 1514 const SetOfInstrs &InsertedTruncs; 1515 /// A map from the instructions to their type before promotion. 1516 InstrToOrigTy &PromotedInsts; 1517 /// The ongoing transaction where every action should be registered. 1518 TypePromotionTransaction &TPT; 1519 1520 /// IgnoreProfitability - This is set to true when we should not do 1521 /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode 1522 /// always returns true. 1523 bool IgnoreProfitability; 1524 1525 AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, 1526 const TargetLowering &T, Type *AT, 1527 Instruction *MI, ExtAddrMode &AM, 1528 const SetOfInstrs &InsertedTruncs, 1529 InstrToOrigTy &PromotedInsts, 1530 TypePromotionTransaction &TPT) 1531 : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM), 1532 InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) { 1533 IgnoreProfitability = false; 1534 } 1535 public: 1536 1537 /// Match - Find the maximal addressing mode that a load/store of V can fold, 1538 /// give an access type of AccessTy. This returns a list of involved 1539 /// instructions in AddrModeInsts. 1540 /// \p InsertedTruncs The truncate instruction inserted by other 1541 /// CodeGenPrepare 1542 /// optimizations. 1543 /// \p PromotedInsts maps the instructions to their type before promotion. 1544 /// \p The ongoing transaction where every action should be registered. 1545 static ExtAddrMode Match(Value *V, Type *AccessTy, 1546 Instruction *MemoryInst, 1547 SmallVectorImpl<Instruction*> &AddrModeInsts, 1548 const TargetLowering &TLI, 1549 const SetOfInstrs &InsertedTruncs, 1550 InstrToOrigTy &PromotedInsts, 1551 TypePromotionTransaction &TPT) { 1552 ExtAddrMode Result; 1553 1554 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, 1555 MemoryInst, Result, InsertedTruncs, 1556 PromotedInsts, TPT).MatchAddr(V, 0); 1557 (void)Success; assert(Success && "Couldn't select *anything*?"); 1558 return Result; 1559 } 1560 private: 1561 bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); 1562 bool MatchAddr(Value *V, unsigned Depth); 1563 bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, 1564 bool *MovedAway = nullptr); 1565 bool IsProfitableToFoldIntoAddressingMode(Instruction *I, 1566 ExtAddrMode &AMBefore, 1567 ExtAddrMode &AMAfter); 1568 bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); 1569 bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion, 1570 Value *PromotedOperand) const; 1571 }; 1572 1573 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. 1574 /// Return true and update AddrMode if this addr mode is legal for the target, 1575 /// false if not. 1576 bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, 1577 unsigned Depth) { 1578 // If Scale is 1, then this is the same as adding ScaleReg to the addressing 1579 // mode. Just process that directly. 1580 if (Scale == 1) 1581 return MatchAddr(ScaleReg, Depth); 1582 1583 // If the scale is 0, it takes nothing to add this. 1584 if (Scale == 0) 1585 return true; 1586 1587 // If we already have a scale of this value, we can add to it, otherwise, we 1588 // need an available scale field. 1589 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 1590 return false; 1591 1592 ExtAddrMode TestAddrMode = AddrMode; 1593 1594 // Add scale to turn X*4+X*3 -> X*7. This could also do things like 1595 // [A+B + A*7] -> [B+A*8]. 1596 TestAddrMode.Scale += Scale; 1597 TestAddrMode.ScaledReg = ScaleReg; 1598 1599 // If the new address isn't legal, bail out. 1600 if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) 1601 return false; 1602 1603 // It was legal, so commit it. 1604 AddrMode = TestAddrMode; 1605 1606 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 1607 // to see if ScaleReg is actually X+C. If so, we can turn this into adding 1608 // X*Scale + C*Scale to addr mode. 1609 ConstantInt *CI = nullptr; Value *AddLHS = nullptr; 1610 if (isa<Instruction>(ScaleReg) && // not a constant expr. 1611 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { 1612 TestAddrMode.ScaledReg = AddLHS; 1613 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; 1614 1615 // If this addressing mode is legal, commit it and remember that we folded 1616 // this instruction. 1617 if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { 1618 AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); 1619 AddrMode = TestAddrMode; 1620 return true; 1621 } 1622 } 1623 1624 // Otherwise, not (x+c)*scale, just return what we have. 1625 return true; 1626 } 1627 1628 /// MightBeFoldableInst - This is a little filter, which returns true if an 1629 /// addressing computation involving I might be folded into a load/store 1630 /// accessing it. This doesn't need to be perfect, but needs to accept at least 1631 /// the set of instructions that MatchOperationAddr can. 1632 static bool MightBeFoldableInst(Instruction *I) { 1633 switch (I->getOpcode()) { 1634 case Instruction::BitCast: 1635 case Instruction::AddrSpaceCast: 1636 // Don't touch identity bitcasts. 1637 if (I->getType() == I->getOperand(0)->getType()) 1638 return false; 1639 return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); 1640 case Instruction::PtrToInt: 1641 // PtrToInt is always a noop, as we know that the int type is pointer sized. 1642 return true; 1643 case Instruction::IntToPtr: 1644 // We know the input is intptr_t, so this is foldable. 1645 return true; 1646 case Instruction::Add: 1647 return true; 1648 case Instruction::Mul: 1649 case Instruction::Shl: 1650 // Can only handle X*C and X << C. 1651 return isa<ConstantInt>(I->getOperand(1)); 1652 case Instruction::GetElementPtr: 1653 return true; 1654 default: 1655 return false; 1656 } 1657 } 1658 1659 /// \brief Hepler class to perform type promotion. 1660 class TypePromotionHelper { 1661 /// \brief Utility function to check whether or not a sign extension of 1662 /// \p Inst with \p ConsideredSExtType can be moved through \p Inst by either 1663 /// using the operands of \p Inst or promoting \p Inst. 1664 /// In other words, check if: 1665 /// sext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredSExtType. 1666 /// #1 Promotion applies: 1667 /// ConsideredSExtType Inst (sext opnd1 to ConsideredSExtType, ...). 1668 /// #2 Operand reuses: 1669 /// sext opnd1 to ConsideredSExtType. 1670 /// \p PromotedInsts maps the instructions to their type before promotion. 1671 static bool canGetThrough(const Instruction *Inst, Type *ConsideredSExtType, 1672 const InstrToOrigTy &PromotedInsts); 1673 1674 /// \brief Utility function to determine if \p OpIdx should be promoted when 1675 /// promoting \p Inst. 1676 static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) { 1677 if (isa<SelectInst>(Inst) && OpIdx == 0) 1678 return false; 1679 return true; 1680 } 1681 1682 /// \brief Utility function to promote the operand of \p SExt when this 1683 /// operand is a promotable trunc or sext. 1684 /// \p PromotedInsts maps the instructions to their type before promotion. 1685 /// \p CreatedInsts[out] contains how many non-free instructions have been 1686 /// created to promote the operand of SExt. 1687 /// Should never be called directly. 1688 /// \return The promoted value which is used instead of SExt. 1689 static Value *promoteOperandForTruncAndSExt(Instruction *SExt, 1690 TypePromotionTransaction &TPT, 1691 InstrToOrigTy &PromotedInsts, 1692 unsigned &CreatedInsts); 1693 1694 /// \brief Utility function to promote the operand of \p SExt when this 1695 /// operand is promotable and is not a supported trunc or sext. 1696 /// \p PromotedInsts maps the instructions to their type before promotion. 1697 /// \p CreatedInsts[out] contains how many non-free instructions have been 1698 /// created to promote the operand of SExt. 1699 /// Should never be called directly. 1700 /// \return The promoted value which is used instead of SExt. 1701 static Value *promoteOperandForOther(Instruction *SExt, 1702 TypePromotionTransaction &TPT, 1703 InstrToOrigTy &PromotedInsts, 1704 unsigned &CreatedInsts); 1705 1706 public: 1707 /// Type for the utility function that promotes the operand of SExt. 1708 typedef Value *(*Action)(Instruction *SExt, TypePromotionTransaction &TPT, 1709 InstrToOrigTy &PromotedInsts, 1710 unsigned &CreatedInsts); 1711 /// \brief Given a sign extend instruction \p SExt, return the approriate 1712 /// action to promote the operand of \p SExt instead of using SExt. 1713 /// \return NULL if no promotable action is possible with the current 1714 /// sign extension. 1715 /// \p InsertedTruncs keeps track of all the truncate instructions inserted by 1716 /// the others CodeGenPrepare optimizations. This information is important 1717 /// because we do not want to promote these instructions as CodeGenPrepare 1718 /// will reinsert them later. Thus creating an infinite loop: create/remove. 1719 /// \p PromotedInsts maps the instructions to their type before promotion. 1720 static Action getAction(Instruction *SExt, const SetOfInstrs &InsertedTruncs, 1721 const TargetLowering &TLI, 1722 const InstrToOrigTy &PromotedInsts); 1723 }; 1724 1725 bool TypePromotionHelper::canGetThrough(const Instruction *Inst, 1726 Type *ConsideredSExtType, 1727 const InstrToOrigTy &PromotedInsts) { 1728 // We can always get through sext. 1729 if (isa<SExtInst>(Inst)) 1730 return true; 1731 1732 // We can get through binary operator, if it is legal. In other words, the 1733 // binary operator must have a nuw or nsw flag. 1734 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); 1735 if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && 1736 (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap())) 1737 return true; 1738 1739 // Check if we can do the following simplification. 1740 // sext(trunc(sext)) --> sext 1741 if (!isa<TruncInst>(Inst)) 1742 return false; 1743 1744 Value *OpndVal = Inst->getOperand(0); 1745 // Check if we can use this operand in the sext. 1746 // If the type is larger than the result type of the sign extension, 1747 // we cannot. 1748 if (OpndVal->getType()->getIntegerBitWidth() > 1749 ConsideredSExtType->getIntegerBitWidth()) 1750 return false; 1751 1752 // If the operand of the truncate is not an instruction, we will not have 1753 // any information on the dropped bits. 1754 // (Actually we could for constant but it is not worth the extra logic). 1755 Instruction *Opnd = dyn_cast<Instruction>(OpndVal); 1756 if (!Opnd) 1757 return false; 1758 1759 // Check if the source of the type is narrow enough. 1760 // I.e., check that trunc just drops sign extended bits. 1761 // #1 get the type of the operand. 1762 const Type *OpndType; 1763 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); 1764 if (It != PromotedInsts.end()) 1765 OpndType = It->second; 1766 else if (isa<SExtInst>(Opnd)) 1767 OpndType = cast<Instruction>(Opnd)->getOperand(0)->getType(); 1768 else 1769 return false; 1770 1771 // #2 check that the truncate just drop sign extended bits. 1772 if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth()) 1773 return true; 1774 1775 return false; 1776 } 1777 1778 TypePromotionHelper::Action TypePromotionHelper::getAction( 1779 Instruction *SExt, const SetOfInstrs &InsertedTruncs, 1780 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { 1781 Instruction *SExtOpnd = dyn_cast<Instruction>(SExt->getOperand(0)); 1782 Type *SExtTy = SExt->getType(); 1783 // If the operand of the sign extension is not an instruction, we cannot 1784 // get through. 1785 // If it, check we can get through. 1786 if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts)) 1787 return nullptr; 1788 1789 // Do not promote if the operand has been added by codegenprepare. 1790 // Otherwise, it means we are undoing an optimization that is likely to be 1791 // redone, thus causing potential infinite loop. 1792 if (isa<TruncInst>(SExtOpnd) && InsertedTruncs.count(SExtOpnd)) 1793 return nullptr; 1794 1795 // SExt or Trunc instructions. 1796 // Return the related handler. 1797 if (isa<SExtInst>(SExtOpnd) || isa<TruncInst>(SExtOpnd)) 1798 return promoteOperandForTruncAndSExt; 1799 1800 // Regular instruction. 1801 // Abort early if we will have to insert non-free instructions. 1802 if (!SExtOpnd->hasOneUse() && 1803 !TLI.isTruncateFree(SExtTy, SExtOpnd->getType())) 1804 return nullptr; 1805 return promoteOperandForOther; 1806 } 1807 1808 Value *TypePromotionHelper::promoteOperandForTruncAndSExt( 1809 llvm::Instruction *SExt, TypePromotionTransaction &TPT, 1810 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) { 1811 // By construction, the operand of SExt is an instruction. Otherwise we cannot 1812 // get through it and this method should not be called. 1813 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); 1814 // Replace sext(trunc(opnd)) or sext(sext(opnd)) 1815 // => sext(opnd). 1816 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); 1817 CreatedInsts = 0; 1818 1819 // Remove dead code. 1820 if (SExtOpnd->use_empty()) 1821 TPT.eraseInstruction(SExtOpnd); 1822 1823 // Check if the sext is still needed. 1824 if (SExt->getType() != SExt->getOperand(0)->getType()) 1825 return SExt; 1826 1827 // At this point we have: sext ty opnd to ty. 1828 // Reassign the uses of SExt to the opnd and remove SExt. 1829 Value *NextVal = SExt->getOperand(0); 1830 TPT.eraseInstruction(SExt, NextVal); 1831 return NextVal; 1832 } 1833 1834 Value * 1835 TypePromotionHelper::promoteOperandForOther(Instruction *SExt, 1836 TypePromotionTransaction &TPT, 1837 InstrToOrigTy &PromotedInsts, 1838 unsigned &CreatedInsts) { 1839 // By construction, the operand of SExt is an instruction. Otherwise we cannot 1840 // get through it and this method should not be called. 1841 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); 1842 CreatedInsts = 0; 1843 if (!SExtOpnd->hasOneUse()) { 1844 // SExtOpnd will be promoted. 1845 // All its uses, but SExt, will need to use a truncated value of the 1846 // promoted version. 1847 // Create the truncate now. 1848 Instruction *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType()); 1849 Trunc->removeFromParent(); 1850 // Insert it just after the definition. 1851 Trunc->insertAfter(SExtOpnd); 1852 1853 TPT.replaceAllUsesWith(SExtOpnd, Trunc); 1854 // Restore the operand of SExt (which has been replace by the previous call 1855 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. 1856 TPT.setOperand(SExt, 0, SExtOpnd); 1857 } 1858 1859 // Get through the Instruction: 1860 // 1. Update its type. 1861 // 2. Replace the uses of SExt by Inst. 1862 // 3. Sign extend each operand that needs to be sign extended. 1863 1864 // Remember the original type of the instruction before promotion. 1865 // This is useful to know that the high bits are sign extended bits. 1866 PromotedInsts.insert( 1867 std::pair<Instruction *, Type *>(SExtOpnd, SExtOpnd->getType())); 1868 // Step #1. 1869 TPT.mutateType(SExtOpnd, SExt->getType()); 1870 // Step #2. 1871 TPT.replaceAllUsesWith(SExt, SExtOpnd); 1872 // Step #3. 1873 Instruction *SExtForOpnd = SExt; 1874 1875 DEBUG(dbgs() << "Propagate SExt to operands\n"); 1876 for (int OpIdx = 0, EndOpIdx = SExtOpnd->getNumOperands(); OpIdx != EndOpIdx; 1877 ++OpIdx) { 1878 DEBUG(dbgs() << "Operand:\n" << *(SExtOpnd->getOperand(OpIdx)) << '\n'); 1879 if (SExtOpnd->getOperand(OpIdx)->getType() == SExt->getType() || 1880 !shouldSExtOperand(SExtOpnd, OpIdx)) { 1881 DEBUG(dbgs() << "No need to propagate\n"); 1882 continue; 1883 } 1884 // Check if we can statically sign extend the operand. 1885 Value *Opnd = SExtOpnd->getOperand(OpIdx); 1886 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { 1887 DEBUG(dbgs() << "Statically sign extend\n"); 1888 TPT.setOperand( 1889 SExtOpnd, OpIdx, 1890 ConstantInt::getSigned(SExt->getType(), Cst->getSExtValue())); 1891 continue; 1892 } 1893 // UndefValue are typed, so we have to statically sign extend them. 1894 if (isa<UndefValue>(Opnd)) { 1895 DEBUG(dbgs() << "Statically sign extend\n"); 1896 TPT.setOperand(SExtOpnd, OpIdx, UndefValue::get(SExt->getType())); 1897 continue; 1898 } 1899 1900 // Otherwise we have to explicity sign extend the operand. 1901 // Check if SExt was reused to sign extend an operand. 1902 if (!SExtForOpnd) { 1903 // If yes, create a new one. 1904 DEBUG(dbgs() << "More operands to sext\n"); 1905 SExtForOpnd = TPT.createSExt(SExt, Opnd, SExt->getType()); 1906 ++CreatedInsts; 1907 } 1908 1909 TPT.setOperand(SExtForOpnd, 0, Opnd); 1910 1911 // Move the sign extension before the insertion point. 1912 TPT.moveBefore(SExtForOpnd, SExtOpnd); 1913 TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd); 1914 // If more sext are required, new instructions will have to be created. 1915 SExtForOpnd = nullptr; 1916 } 1917 if (SExtForOpnd == SExt) { 1918 DEBUG(dbgs() << "Sign extension is useless now\n"); 1919 TPT.eraseInstruction(SExt); 1920 } 1921 return SExtOpnd; 1922 } 1923 1924 /// IsPromotionProfitable - Check whether or not promoting an instruction 1925 /// to a wider type was profitable. 1926 /// \p MatchedSize gives the number of instructions that have been matched 1927 /// in the addressing mode after the promotion was applied. 1928 /// \p SizeWithPromotion gives the number of created instructions for 1929 /// the promotion plus the number of instructions that have been 1930 /// matched in the addressing mode before the promotion. 1931 /// \p PromotedOperand is the value that has been promoted. 1932 /// \return True if the promotion is profitable, false otherwise. 1933 bool 1934 AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, 1935 unsigned SizeWithPromotion, 1936 Value *PromotedOperand) const { 1937 // We folded less instructions than what we created to promote the operand. 1938 // This is not profitable. 1939 if (MatchedSize < SizeWithPromotion) 1940 return false; 1941 if (MatchedSize > SizeWithPromotion) 1942 return true; 1943 // The promotion is neutral but it may help folding the sign extension in 1944 // loads for instance. 1945 // Check that we did not create an illegal instruction. 1946 Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand); 1947 if (!PromotedInst) 1948 return false; 1949 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); 1950 // If the ISDOpcode is undefined, it was undefined before the promotion. 1951 if (!ISDOpcode) 1952 return true; 1953 // Otherwise, check if the promoted instruction is legal or not. 1954 return TLI.isOperationLegalOrCustom(ISDOpcode, 1955 EVT::getEVT(PromotedInst->getType())); 1956 } 1957 1958 /// MatchOperationAddr - Given an instruction or constant expr, see if we can 1959 /// fold the operation into the addressing mode. If so, update the addressing 1960 /// mode and return true, otherwise return false without modifying AddrMode. 1961 /// If \p MovedAway is not NULL, it contains the information of whether or 1962 /// not AddrInst has to be folded into the addressing mode on success. 1963 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing 1964 /// because it has been moved away. 1965 /// Thus AddrInst must not be added in the matched instructions. 1966 /// This state can happen when AddrInst is a sext, since it may be moved away. 1967 /// Therefore, AddrInst may not be valid when MovedAway is true and it must 1968 /// not be referenced anymore. 1969 bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, 1970 unsigned Depth, 1971 bool *MovedAway) { 1972 // Avoid exponential behavior on extremely deep expression trees. 1973 if (Depth >= 5) return false; 1974 1975 // By default, all matched instructions stay in place. 1976 if (MovedAway) 1977 *MovedAway = false; 1978 1979 switch (Opcode) { 1980 case Instruction::PtrToInt: 1981 // PtrToInt is always a noop, as we know that the int type is pointer sized. 1982 return MatchAddr(AddrInst->getOperand(0), Depth); 1983 case Instruction::IntToPtr: 1984 // This inttoptr is a no-op if the integer type is pointer sized. 1985 if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == 1986 TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace())) 1987 return MatchAddr(AddrInst->getOperand(0), Depth); 1988 return false; 1989 case Instruction::BitCast: 1990 case Instruction::AddrSpaceCast: 1991 // BitCast is always a noop, and we can handle it as long as it is 1992 // int->int or pointer->pointer (we don't want int<->fp or something). 1993 if ((AddrInst->getOperand(0)->getType()->isPointerTy() || 1994 AddrInst->getOperand(0)->getType()->isIntegerTy()) && 1995 // Don't touch identity bitcasts. These were probably put here by LSR, 1996 // and we don't want to mess around with them. Assume it knows what it 1997 // is doing. 1998 AddrInst->getOperand(0)->getType() != AddrInst->getType()) 1999 return MatchAddr(AddrInst->getOperand(0), Depth); 2000 return false; 2001 case Instruction::Add: { 2002 // Check to see if we can merge in the RHS then the LHS. If so, we win. 2003 ExtAddrMode BackupAddrMode = AddrMode; 2004 unsigned OldSize = AddrModeInsts.size(); 2005 // Start a transaction at this point. 2006 // The LHS may match but not the RHS. 2007 // Therefore, we need a higher level restoration point to undo partially 2008 // matched operation. 2009 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2010 TPT.getRestorationPoint(); 2011 2012 if (MatchAddr(AddrInst->getOperand(1), Depth+1) && 2013 MatchAddr(AddrInst->getOperand(0), Depth+1)) 2014 return true; 2015 2016 // Restore the old addr mode info. 2017 AddrMode = BackupAddrMode; 2018 AddrModeInsts.resize(OldSize); 2019 TPT.rollback(LastKnownGood); 2020 2021 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 2022 if (MatchAddr(AddrInst->getOperand(0), Depth+1) && 2023 MatchAddr(AddrInst->getOperand(1), Depth+1)) 2024 return true; 2025 2026 // Otherwise we definitely can't merge the ADD in. 2027 AddrMode = BackupAddrMode; 2028 AddrModeInsts.resize(OldSize); 2029 TPT.rollback(LastKnownGood); 2030 break; 2031 } 2032 //case Instruction::Or: 2033 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 2034 //break; 2035 case Instruction::Mul: 2036 case Instruction::Shl: { 2037 // Can only handle X*C and X << C. 2038 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 2039 if (!RHS) 2040 return false; 2041 int64_t Scale = RHS->getSExtValue(); 2042 if (Opcode == Instruction::Shl) 2043 Scale = 1LL << Scale; 2044 2045 return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); 2046 } 2047 case Instruction::GetElementPtr: { 2048 // Scan the GEP. We check it if it contains constant offsets and at most 2049 // one variable offset. 2050 int VariableOperand = -1; 2051 unsigned VariableScale = 0; 2052 2053 int64_t ConstantOffset = 0; 2054 const DataLayout *TD = TLI.getDataLayout(); 2055 gep_type_iterator GTI = gep_type_begin(AddrInst); 2056 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 2057 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 2058 const StructLayout *SL = TD->getStructLayout(STy); 2059 unsigned Idx = 2060 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 2061 ConstantOffset += SL->getElementOffset(Idx); 2062 } else { 2063 uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); 2064 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 2065 ConstantOffset += CI->getSExtValue()*TypeSize; 2066 } else if (TypeSize) { // Scales of zero don't do anything. 2067 // We only allow one variable index at the moment. 2068 if (VariableOperand != -1) 2069 return false; 2070 2071 // Remember the variable index. 2072 VariableOperand = i; 2073 VariableScale = TypeSize; 2074 } 2075 } 2076 } 2077 2078 // A common case is for the GEP to only do a constant offset. In this case, 2079 // just add it to the disp field and check validity. 2080 if (VariableOperand == -1) { 2081 AddrMode.BaseOffs += ConstantOffset; 2082 if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ 2083 // Check to see if we can fold the base pointer in too. 2084 if (MatchAddr(AddrInst->getOperand(0), Depth+1)) 2085 return true; 2086 } 2087 AddrMode.BaseOffs -= ConstantOffset; 2088 return false; 2089 } 2090 2091 // Save the valid addressing mode in case we can't match. 2092 ExtAddrMode BackupAddrMode = AddrMode; 2093 unsigned OldSize = AddrModeInsts.size(); 2094 2095 // See if the scale and offset amount is valid for this target. 2096 AddrMode.BaseOffs += ConstantOffset; 2097 2098 // Match the base operand of the GEP. 2099 if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { 2100 // If it couldn't be matched, just stuff the value in a register. 2101 if (AddrMode.HasBaseReg) { 2102 AddrMode = BackupAddrMode; 2103 AddrModeInsts.resize(OldSize); 2104 return false; 2105 } 2106 AddrMode.HasBaseReg = true; 2107 AddrMode.BaseReg = AddrInst->getOperand(0); 2108 } 2109 2110 // Match the remaining variable portion of the GEP. 2111 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, 2112 Depth)) { 2113 // If it couldn't be matched, try stuffing the base into a register 2114 // instead of matching it, and retrying the match of the scale. 2115 AddrMode = BackupAddrMode; 2116 AddrModeInsts.resize(OldSize); 2117 if (AddrMode.HasBaseReg) 2118 return false; 2119 AddrMode.HasBaseReg = true; 2120 AddrMode.BaseReg = AddrInst->getOperand(0); 2121 AddrMode.BaseOffs += ConstantOffset; 2122 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), 2123 VariableScale, Depth)) { 2124 // If even that didn't work, bail. 2125 AddrMode = BackupAddrMode; 2126 AddrModeInsts.resize(OldSize); 2127 return false; 2128 } 2129 } 2130 2131 return true; 2132 } 2133 case Instruction::SExt: { 2134 Instruction *SExt = dyn_cast<Instruction>(AddrInst); 2135 if (!SExt) 2136 return false; 2137 2138 // Try to move this sext out of the way of the addressing mode. 2139 // Ask for a method for doing so. 2140 TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( 2141 SExt, InsertedTruncs, TLI, PromotedInsts); 2142 if (!TPH) 2143 return false; 2144 2145 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2146 TPT.getRestorationPoint(); 2147 unsigned CreatedInsts = 0; 2148 Value *PromotedOperand = TPH(SExt, TPT, PromotedInsts, CreatedInsts); 2149 // SExt has been moved away. 2150 // Thus either it will be rematched later in the recursive calls or it is 2151 // gone. Anyway, we must not fold it into the addressing mode at this point. 2152 // E.g., 2153 // op = add opnd, 1 2154 // idx = sext op 2155 // addr = gep base, idx 2156 // is now: 2157 // promotedOpnd = sext opnd <- no match here 2158 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) 2159 // addr = gep base, op <- match 2160 if (MovedAway) 2161 *MovedAway = true; 2162 2163 assert(PromotedOperand && 2164 "TypePromotionHelper should have filtered out those cases"); 2165 2166 ExtAddrMode BackupAddrMode = AddrMode; 2167 unsigned OldSize = AddrModeInsts.size(); 2168 2169 if (!MatchAddr(PromotedOperand, Depth) || 2170 !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts, 2171 PromotedOperand)) { 2172 AddrMode = BackupAddrMode; 2173 AddrModeInsts.resize(OldSize); 2174 DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); 2175 TPT.rollback(LastKnownGood); 2176 return false; 2177 } 2178 return true; 2179 } 2180 } 2181 return false; 2182 } 2183 2184 /// MatchAddr - If we can, try to add the value of 'Addr' into the current 2185 /// addressing mode. If Addr can't be added to AddrMode this returns false and 2186 /// leaves AddrMode unmodified. This assumes that Addr is either a pointer type 2187 /// or intptr_t for the target. 2188 /// 2189 bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { 2190 // Start a transaction at this point that we will rollback if the matching 2191 // fails. 2192 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2193 TPT.getRestorationPoint(); 2194 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 2195 // Fold in immediates if legal for the target. 2196 AddrMode.BaseOffs += CI->getSExtValue(); 2197 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2198 return true; 2199 AddrMode.BaseOffs -= CI->getSExtValue(); 2200 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 2201 // If this is a global variable, try to fold it into the addressing mode. 2202 if (!AddrMode.BaseGV) { 2203 AddrMode.BaseGV = GV; 2204 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2205 return true; 2206 AddrMode.BaseGV = nullptr; 2207 } 2208 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { 2209 ExtAddrMode BackupAddrMode = AddrMode; 2210 unsigned OldSize = AddrModeInsts.size(); 2211 2212 // Check to see if it is possible to fold this operation. 2213 bool MovedAway = false; 2214 if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { 2215 // This instruction may have been move away. If so, there is nothing 2216 // to check here. 2217 if (MovedAway) 2218 return true; 2219 // Okay, it's possible to fold this. Check to see if it is actually 2220 // *profitable* to do so. We use a simple cost model to avoid increasing 2221 // register pressure too much. 2222 if (I->hasOneUse() || 2223 IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { 2224 AddrModeInsts.push_back(I); 2225 return true; 2226 } 2227 2228 // It isn't profitable to do this, roll back. 2229 //cerr << "NOT FOLDING: " << *I; 2230 AddrMode = BackupAddrMode; 2231 AddrModeInsts.resize(OldSize); 2232 TPT.rollback(LastKnownGood); 2233 } 2234 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 2235 if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) 2236 return true; 2237 TPT.rollback(LastKnownGood); 2238 } else if (isa<ConstantPointerNull>(Addr)) { 2239 // Null pointer gets folded without affecting the addressing mode. 2240 return true; 2241 } 2242 2243 // Worse case, the target should support [reg] addressing modes. :) 2244 if (!AddrMode.HasBaseReg) { 2245 AddrMode.HasBaseReg = true; 2246 AddrMode.BaseReg = Addr; 2247 // Still check for legality in case the target supports [imm] but not [i+r]. 2248 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2249 return true; 2250 AddrMode.HasBaseReg = false; 2251 AddrMode.BaseReg = nullptr; 2252 } 2253 2254 // If the base register is already taken, see if we can do [r+r]. 2255 if (AddrMode.Scale == 0) { 2256 AddrMode.Scale = 1; 2257 AddrMode.ScaledReg = Addr; 2258 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2259 return true; 2260 AddrMode.Scale = 0; 2261 AddrMode.ScaledReg = nullptr; 2262 } 2263 // Couldn't match. 2264 TPT.rollback(LastKnownGood); 2265 return false; 2266 } 2267 2268 /// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified 2269 /// inline asm call are due to memory operands. If so, return true, otherwise 2270 /// return false. 2271 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, 2272 const TargetLowering &TLI) { 2273 TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); 2274 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 2275 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 2276 2277 // Compute the constraint code and ConstraintType to use. 2278 TLI.ComputeConstraintToUse(OpInfo, SDValue()); 2279 2280 // If this asm operand is our Value*, and if it isn't an indirect memory 2281 // operand, we can't fold it! 2282 if (OpInfo.CallOperandVal == OpVal && 2283 (OpInfo.ConstraintType != TargetLowering::C_Memory || 2284 !OpInfo.isIndirect)) 2285 return false; 2286 } 2287 2288 return true; 2289 } 2290 2291 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a 2292 /// memory use. If we find an obviously non-foldable instruction, return true. 2293 /// Add the ultimately found memory instructions to MemoryUses. 2294 static bool FindAllMemoryUses(Instruction *I, 2295 SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, 2296 SmallPtrSet<Instruction*, 16> &ConsideredInsts, 2297 const TargetLowering &TLI) { 2298 // If we already considered this instruction, we're done. 2299 if (!ConsideredInsts.insert(I)) 2300 return false; 2301 2302 // If this is an obviously unfoldable instruction, bail out. 2303 if (!MightBeFoldableInst(I)) 2304 return true; 2305 2306 // Loop over all the uses, recursively processing them. 2307 for (Use &U : I->uses()) { 2308 Instruction *UserI = cast<Instruction>(U.getUser()); 2309 2310 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { 2311 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); 2312 continue; 2313 } 2314 2315 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { 2316 unsigned opNo = U.getOperandNo(); 2317 if (opNo == 0) return true; // Storing addr, not into addr. 2318 MemoryUses.push_back(std::make_pair(SI, opNo)); 2319 continue; 2320 } 2321 2322 if (CallInst *CI = dyn_cast<CallInst>(UserI)) { 2323 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); 2324 if (!IA) return true; 2325 2326 // If this is a memory operand, we're cool, otherwise bail out. 2327 if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) 2328 return true; 2329 continue; 2330 } 2331 2332 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI)) 2333 return true; 2334 } 2335 2336 return false; 2337 } 2338 2339 /// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at 2340 /// the use site that we're folding it into. If so, there is no cost to 2341 /// include it in the addressing mode. KnownLive1 and KnownLive2 are two values 2342 /// that we know are live at the instruction already. 2343 bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, 2344 Value *KnownLive2) { 2345 // If Val is either of the known-live values, we know it is live! 2346 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) 2347 return true; 2348 2349 // All values other than instructions and arguments (e.g. constants) are live. 2350 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; 2351 2352 // If Val is a constant sized alloca in the entry block, it is live, this is 2353 // true because it is just a reference to the stack/frame pointer, which is 2354 // live for the whole function. 2355 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) 2356 if (AI->isStaticAlloca()) 2357 return true; 2358 2359 // Check to see if this value is already used in the memory instruction's 2360 // block. If so, it's already live into the block at the very least, so we 2361 // can reasonably fold it. 2362 return Val->isUsedInBasicBlock(MemoryInst->getParent()); 2363 } 2364 2365 /// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing 2366 /// mode of the machine to fold the specified instruction into a load or store 2367 /// that ultimately uses it. However, the specified instruction has multiple 2368 /// uses. Given this, it may actually increase register pressure to fold it 2369 /// into the load. For example, consider this code: 2370 /// 2371 /// X = ... 2372 /// Y = X+1 2373 /// use(Y) -> nonload/store 2374 /// Z = Y+1 2375 /// load Z 2376 /// 2377 /// In this case, Y has multiple uses, and can be folded into the load of Z 2378 /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to 2379 /// be live at the use(Y) line. If we don't fold Y into load Z, we use one 2380 /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the 2381 /// number of computations either. 2382 /// 2383 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If 2384 /// X was live across 'load Z' for other reasons, we actually *would* want to 2385 /// fold the addressing mode in the Z case. This would make Y die earlier. 2386 bool AddressingModeMatcher:: 2387 IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, 2388 ExtAddrMode &AMAfter) { 2389 if (IgnoreProfitability) return true; 2390 2391 // AMBefore is the addressing mode before this instruction was folded into it, 2392 // and AMAfter is the addressing mode after the instruction was folded. Get 2393 // the set of registers referenced by AMAfter and subtract out those 2394 // referenced by AMBefore: this is the set of values which folding in this 2395 // address extends the lifetime of. 2396 // 2397 // Note that there are only two potential values being referenced here, 2398 // BaseReg and ScaleReg (global addresses are always available, as are any 2399 // folded immediates). 2400 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; 2401 2402 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their 2403 // lifetime wasn't extended by adding this instruction. 2404 if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 2405 BaseReg = nullptr; 2406 if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 2407 ScaledReg = nullptr; 2408 2409 // If folding this instruction (and it's subexprs) didn't extend any live 2410 // ranges, we're ok with it. 2411 if (!BaseReg && !ScaledReg) 2412 return true; 2413 2414 // If all uses of this instruction are ultimately load/store/inlineasm's, 2415 // check to see if their addressing modes will include this instruction. If 2416 // so, we can fold it into all uses, so it doesn't matter if it has multiple 2417 // uses. 2418 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; 2419 SmallPtrSet<Instruction*, 16> ConsideredInsts; 2420 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) 2421 return false; // Has a non-memory, non-foldable use! 2422 2423 // Now that we know that all uses of this instruction are part of a chain of 2424 // computation involving only operations that could theoretically be folded 2425 // into a memory use, loop over each of these uses and see if they could 2426 // *actually* fold the instruction. 2427 SmallVector<Instruction*, 32> MatchedAddrModeInsts; 2428 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { 2429 Instruction *User = MemoryUses[i].first; 2430 unsigned OpNo = MemoryUses[i].second; 2431 2432 // Get the access type of this use. If the use isn't a pointer, we don't 2433 // know what it accesses. 2434 Value *Address = User->getOperand(OpNo); 2435 if (!Address->getType()->isPointerTy()) 2436 return false; 2437 Type *AddressAccessTy = Address->getType()->getPointerElementType(); 2438 2439 // Do a match against the root of this address, ignoring profitability. This 2440 // will tell us if the addressing mode for the memory operation will 2441 // *actually* cover the shared instruction. 2442 ExtAddrMode Result; 2443 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2444 TPT.getRestorationPoint(); 2445 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, 2446 MemoryInst, Result, InsertedTruncs, 2447 PromotedInsts, TPT); 2448 Matcher.IgnoreProfitability = true; 2449 bool Success = Matcher.MatchAddr(Address, 0); 2450 (void)Success; assert(Success && "Couldn't select *anything*?"); 2451 2452 // The match was to check the profitability, the changes made are not 2453 // part of the original matcher. Therefore, they should be dropped 2454 // otherwise the original matcher will not present the right state. 2455 TPT.rollback(LastKnownGood); 2456 2457 // If the match didn't cover I, then it won't be shared by it. 2458 if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), 2459 I) == MatchedAddrModeInsts.end()) 2460 return false; 2461 2462 MatchedAddrModeInsts.clear(); 2463 } 2464 2465 return true; 2466 } 2467 2468 } // end anonymous namespace 2469 2470 /// IsNonLocalValue - Return true if the specified values are defined in a 2471 /// different basic block than BB. 2472 static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 2473 if (Instruction *I = dyn_cast<Instruction>(V)) 2474 return I->getParent() != BB; 2475 return false; 2476 } 2477 2478 /// OptimizeMemoryInst - Load and Store Instructions often have 2479 /// addressing modes that can do significant amounts of computation. As such, 2480 /// instruction selection will try to get the load or store to do as much 2481 /// computation as possible for the program. The problem is that isel can only 2482 /// see within a single block. As such, we sink as much legal addressing mode 2483 /// stuff into the block as possible. 2484 /// 2485 /// This method is used to optimize both load/store and inline asms with memory 2486 /// operands. 2487 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 2488 Type *AccessTy) { 2489 Value *Repl = Addr; 2490 2491 // Try to collapse single-value PHI nodes. This is necessary to undo 2492 // unprofitable PRE transformations. 2493 SmallVector<Value*, 8> worklist; 2494 SmallPtrSet<Value*, 16> Visited; 2495 worklist.push_back(Addr); 2496 2497 // Use a worklist to iteratively look through PHI nodes, and ensure that 2498 // the addressing mode obtained from the non-PHI roots of the graph 2499 // are equivalent. 2500 Value *Consensus = nullptr; 2501 unsigned NumUsesConsensus = 0; 2502 bool IsNumUsesConsensusValid = false; 2503 SmallVector<Instruction*, 16> AddrModeInsts; 2504 ExtAddrMode AddrMode; 2505 TypePromotionTransaction TPT; 2506 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2507 TPT.getRestorationPoint(); 2508 while (!worklist.empty()) { 2509 Value *V = worklist.back(); 2510 worklist.pop_back(); 2511 2512 // Break use-def graph loops. 2513 if (!Visited.insert(V)) { 2514 Consensus = nullptr; 2515 break; 2516 } 2517 2518 // For a PHI node, push all of its incoming values. 2519 if (PHINode *P = dyn_cast<PHINode>(V)) { 2520 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) 2521 worklist.push_back(P->getIncomingValue(i)); 2522 continue; 2523 } 2524 2525 // For non-PHIs, determine the addressing mode being computed. 2526 SmallVector<Instruction*, 16> NewAddrModeInsts; 2527 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( 2528 V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet, 2529 PromotedInsts, TPT); 2530 2531 // This check is broken into two cases with very similar code to avoid using 2532 // getNumUses() as much as possible. Some values have a lot of uses, so 2533 // calling getNumUses() unconditionally caused a significant compile-time 2534 // regression. 2535 if (!Consensus) { 2536 Consensus = V; 2537 AddrMode = NewAddrMode; 2538 AddrModeInsts = NewAddrModeInsts; 2539 continue; 2540 } else if (NewAddrMode == AddrMode) { 2541 if (!IsNumUsesConsensusValid) { 2542 NumUsesConsensus = Consensus->getNumUses(); 2543 IsNumUsesConsensusValid = true; 2544 } 2545 2546 // Ensure that the obtained addressing mode is equivalent to that obtained 2547 // for all other roots of the PHI traversal. Also, when choosing one 2548 // such root as representative, select the one with the most uses in order 2549 // to keep the cost modeling heuristics in AddressingModeMatcher 2550 // applicable. 2551 unsigned NumUses = V->getNumUses(); 2552 if (NumUses > NumUsesConsensus) { 2553 Consensus = V; 2554 NumUsesConsensus = NumUses; 2555 AddrModeInsts = NewAddrModeInsts; 2556 } 2557 continue; 2558 } 2559 2560 Consensus = nullptr; 2561 break; 2562 } 2563 2564 // If the addressing mode couldn't be determined, or if multiple different 2565 // ones were determined, bail out now. 2566 if (!Consensus) { 2567 TPT.rollback(LastKnownGood); 2568 return false; 2569 } 2570 TPT.commit(); 2571 2572 // Check to see if any of the instructions supersumed by this addr mode are 2573 // non-local to I's BB. 2574 bool AnyNonLocal = false; 2575 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 2576 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 2577 AnyNonLocal = true; 2578 break; 2579 } 2580 } 2581 2582 // If all the instructions matched are already in this BB, don't do anything. 2583 if (!AnyNonLocal) { 2584 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 2585 return false; 2586 } 2587 2588 // Insert this computation right after this user. Since our caller is 2589 // scanning from the top of the BB to the bottom, reuse of the expr are 2590 // guaranteed to happen later. 2591 IRBuilder<> Builder(MemoryInst); 2592 2593 // Now that we determined the addressing expression we want to use and know 2594 // that we have to sink it into this block. Check to see if we have already 2595 // done this for some other load/store instr in this block. If so, reuse the 2596 // computation. 2597 Value *&SunkAddr = SunkAddrs[Addr]; 2598 if (SunkAddr) { 2599 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 2600 << *MemoryInst << "\n"); 2601 if (SunkAddr->getType() != Addr->getType()) 2602 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 2603 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && 2604 TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) { 2605 // By default, we use the GEP-based method when AA is used later. This 2606 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. 2607 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 2608 << *MemoryInst << "\n"); 2609 Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); 2610 Value *ResultPtr = nullptr, *ResultIndex = nullptr; 2611 2612 // First, find the pointer. 2613 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { 2614 ResultPtr = AddrMode.BaseReg; 2615 AddrMode.BaseReg = nullptr; 2616 } 2617 2618 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { 2619 // We can't add more than one pointer together, nor can we scale a 2620 // pointer (both of which seem meaningless). 2621 if (ResultPtr || AddrMode.Scale != 1) 2622 return false; 2623 2624 ResultPtr = AddrMode.ScaledReg; 2625 AddrMode.Scale = 0; 2626 } 2627 2628 if (AddrMode.BaseGV) { 2629 if (ResultPtr) 2630 return false; 2631 2632 ResultPtr = AddrMode.BaseGV; 2633 } 2634 2635 // If the real base value actually came from an inttoptr, then the matcher 2636 // will look through it and provide only the integer value. In that case, 2637 // use it here. 2638 if (!ResultPtr && AddrMode.BaseReg) { 2639 ResultPtr = 2640 Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); 2641 AddrMode.BaseReg = nullptr; 2642 } else if (!ResultPtr && AddrMode.Scale == 1) { 2643 ResultPtr = 2644 Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); 2645 AddrMode.Scale = 0; 2646 } 2647 2648 if (!ResultPtr && 2649 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { 2650 SunkAddr = Constant::getNullValue(Addr->getType()); 2651 } else if (!ResultPtr) { 2652 return false; 2653 } else { 2654 Type *I8PtrTy = 2655 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); 2656 2657 // Start with the base register. Do this first so that subsequent address 2658 // matching finds it last, which will prevent it from trying to match it 2659 // as the scaled value in case it happens to be a mul. That would be 2660 // problematic if we've sunk a different mul for the scale, because then 2661 // we'd end up sinking both muls. 2662 if (AddrMode.BaseReg) { 2663 Value *V = AddrMode.BaseReg; 2664 if (V->getType() != IntPtrTy) 2665 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 2666 2667 ResultIndex = V; 2668 } 2669 2670 // Add the scale value. 2671 if (AddrMode.Scale) { 2672 Value *V = AddrMode.ScaledReg; 2673 if (V->getType() == IntPtrTy) { 2674 // done. 2675 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 2676 cast<IntegerType>(V->getType())->getBitWidth()) { 2677 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 2678 } else { 2679 // It is only safe to sign extend the BaseReg if we know that the math 2680 // required to create it did not overflow before we extend it. Since 2681 // the original IR value was tossed in favor of a constant back when 2682 // the AddrMode was created we need to bail out gracefully if widths 2683 // do not match instead of extending it. 2684 Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex); 2685 if (I && (ResultIndex != AddrMode.BaseReg)) 2686 I->eraseFromParent(); 2687 return false; 2688 } 2689 2690 if (AddrMode.Scale != 1) 2691 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 2692 "sunkaddr"); 2693 if (ResultIndex) 2694 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); 2695 else 2696 ResultIndex = V; 2697 } 2698 2699 // Add in the Base Offset if present. 2700 if (AddrMode.BaseOffs) { 2701 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 2702 if (ResultIndex) { 2703 // We need to add this separately from the scale above to help with 2704 // SDAG consecutive load/store merging. 2705 if (ResultPtr->getType() != I8PtrTy) 2706 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); 2707 ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); 2708 } 2709 2710 ResultIndex = V; 2711 } 2712 2713 if (!ResultIndex) { 2714 SunkAddr = ResultPtr; 2715 } else { 2716 if (ResultPtr->getType() != I8PtrTy) 2717 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); 2718 SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); 2719 } 2720 2721 if (SunkAddr->getType() != Addr->getType()) 2722 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 2723 } 2724 } else { 2725 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 2726 << *MemoryInst << "\n"); 2727 Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); 2728 Value *Result = nullptr; 2729 2730 // Start with the base register. Do this first so that subsequent address 2731 // matching finds it last, which will prevent it from trying to match it 2732 // as the scaled value in case it happens to be a mul. That would be 2733 // problematic if we've sunk a different mul for the scale, because then 2734 // we'd end up sinking both muls. 2735 if (AddrMode.BaseReg) { 2736 Value *V = AddrMode.BaseReg; 2737 if (V->getType()->isPointerTy()) 2738 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 2739 if (V->getType() != IntPtrTy) 2740 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 2741 Result = V; 2742 } 2743 2744 // Add the scale value. 2745 if (AddrMode.Scale) { 2746 Value *V = AddrMode.ScaledReg; 2747 if (V->getType() == IntPtrTy) { 2748 // done. 2749 } else if (V->getType()->isPointerTy()) { 2750 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 2751 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 2752 cast<IntegerType>(V->getType())->getBitWidth()) { 2753 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 2754 } else { 2755 // It is only safe to sign extend the BaseReg if we know that the math 2756 // required to create it did not overflow before we extend it. Since 2757 // the original IR value was tossed in favor of a constant back when 2758 // the AddrMode was created we need to bail out gracefully if widths 2759 // do not match instead of extending it. 2760 Instruction *I = dyn_cast_or_null<Instruction>(Result); 2761 if (I && (Result != AddrMode.BaseReg)) 2762 I->eraseFromParent(); 2763 return false; 2764 } 2765 if (AddrMode.Scale != 1) 2766 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 2767 "sunkaddr"); 2768 if (Result) 2769 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 2770 else 2771 Result = V; 2772 } 2773 2774 // Add in the BaseGV if present. 2775 if (AddrMode.BaseGV) { 2776 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); 2777 if (Result) 2778 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 2779 else 2780 Result = V; 2781 } 2782 2783 // Add in the Base Offset if present. 2784 if (AddrMode.BaseOffs) { 2785 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 2786 if (Result) 2787 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 2788 else 2789 Result = V; 2790 } 2791 2792 if (!Result) 2793 SunkAddr = Constant::getNullValue(Addr->getType()); 2794 else 2795 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); 2796 } 2797 2798 MemoryInst->replaceUsesOfWith(Repl, SunkAddr); 2799 2800 // If we have no uses, recursively delete the value and all dead instructions 2801 // using it. 2802 if (Repl->use_empty()) { 2803 // This can cause recursive deletion, which can invalidate our iterator. 2804 // Use a WeakVH to hold onto it in case this happens. 2805 WeakVH IterHandle(CurInstIterator); 2806 BasicBlock *BB = CurInstIterator->getParent(); 2807 2808 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); 2809 2810 if (IterHandle != CurInstIterator) { 2811 // If the iterator instruction was recursively deleted, start over at the 2812 // start of the block. 2813 CurInstIterator = BB->begin(); 2814 SunkAddrs.clear(); 2815 } 2816 } 2817 ++NumMemoryInsts; 2818 return true; 2819 } 2820 2821 /// OptimizeInlineAsmInst - If there are any memory operands, use 2822 /// OptimizeMemoryInst to sink their address computing into the block when 2823 /// possible / profitable. 2824 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { 2825 bool MadeChange = false; 2826 2827 TargetLowering::AsmOperandInfoVector 2828 TargetConstraints = TLI->ParseConstraints(CS); 2829 unsigned ArgNo = 0; 2830 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 2831 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 2832 2833 // Compute the constraint code and ConstraintType to use. 2834 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 2835 2836 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 2837 OpInfo.isIndirect) { 2838 Value *OpVal = CS->getArgOperand(ArgNo++); 2839 MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); 2840 } else if (OpInfo.Type == InlineAsm::isInput) 2841 ArgNo++; 2842 } 2843 2844 return MadeChange; 2845 } 2846 2847 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 2848 /// basic block as the load, unless conditions are unfavorable. This allows 2849 /// SelectionDAG to fold the extend into the load. 2850 /// 2851 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { 2852 // Look for a load being extended. 2853 LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); 2854 if (!LI) return false; 2855 2856 // If they're already in the same block, there's nothing to do. 2857 if (LI->getParent() == I->getParent()) 2858 return false; 2859 2860 // If the load has other users and the truncate is not free, this probably 2861 // isn't worthwhile. 2862 if (!LI->hasOneUse() && 2863 TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || 2864 !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && 2865 !TLI->isTruncateFree(I->getType(), LI->getType())) 2866 return false; 2867 2868 // Check whether the target supports casts folded into loads. 2869 unsigned LType; 2870 if (isa<ZExtInst>(I)) 2871 LType = ISD::ZEXTLOAD; 2872 else { 2873 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 2874 LType = ISD::SEXTLOAD; 2875 } 2876 if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) 2877 return false; 2878 2879 // Move the extend into the same block as the load, so that SelectionDAG 2880 // can fold it. 2881 I->removeFromParent(); 2882 I->insertAfter(LI); 2883 ++NumExtsMoved; 2884 return true; 2885 } 2886 2887 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 2888 BasicBlock *DefBB = I->getParent(); 2889 2890 // If the result of a {s|z}ext and its source are both live out, rewrite all 2891 // other uses of the source with result of extension. 2892 Value *Src = I->getOperand(0); 2893 if (Src->hasOneUse()) 2894 return false; 2895 2896 // Only do this xform if truncating is free. 2897 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 2898 return false; 2899 2900 // Only safe to perform the optimization if the source is also defined in 2901 // this block. 2902 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 2903 return false; 2904 2905 bool DefIsLiveOut = false; 2906 for (User *U : I->users()) { 2907 Instruction *UI = cast<Instruction>(U); 2908 2909 // Figure out which BB this ext is used in. 2910 BasicBlock *UserBB = UI->getParent(); 2911 if (UserBB == DefBB) continue; 2912 DefIsLiveOut = true; 2913 break; 2914 } 2915 if (!DefIsLiveOut) 2916 return false; 2917 2918 // Make sure none of the uses are PHI nodes. 2919 for (User *U : Src->users()) { 2920 Instruction *UI = cast<Instruction>(U); 2921 BasicBlock *UserBB = UI->getParent(); 2922 if (UserBB == DefBB) continue; 2923 // Be conservative. We don't want this xform to end up introducing 2924 // reloads just before load / store instructions. 2925 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI)) 2926 return false; 2927 } 2928 2929 // InsertedTruncs - Only insert one trunc in each block once. 2930 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 2931 2932 bool MadeChange = false; 2933 for (Use &U : Src->uses()) { 2934 Instruction *User = cast<Instruction>(U.getUser()); 2935 2936 // Figure out which BB this ext is used in. 2937 BasicBlock *UserBB = User->getParent(); 2938 if (UserBB == DefBB) continue; 2939 2940 // Both src and def are live in this block. Rewrite the use. 2941 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 2942 2943 if (!InsertedTrunc) { 2944 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 2945 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 2946 InsertedTruncsSet.insert(InsertedTrunc); 2947 } 2948 2949 // Replace a use of the {s|z}ext source with a use of the result. 2950 U = InsertedTrunc; 2951 ++NumExtUses; 2952 MadeChange = true; 2953 } 2954 2955 return MadeChange; 2956 } 2957 2958 /// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be 2959 /// turned into an explicit branch. 2960 static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { 2961 // FIXME: This should use the same heuristics as IfConversion to determine 2962 // whether a select is better represented as a branch. This requires that 2963 // branch probability metadata is preserved for the select, which is not the 2964 // case currently. 2965 2966 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); 2967 2968 // If the branch is predicted right, an out of order CPU can avoid blocking on 2969 // the compare. Emit cmovs on compares with a memory operand as branches to 2970 // avoid stalls on the load from memory. If the compare has more than one use 2971 // there's probably another cmov or setcc around so it's not worth emitting a 2972 // branch. 2973 if (!Cmp) 2974 return false; 2975 2976 Value *CmpOp0 = Cmp->getOperand(0); 2977 Value *CmpOp1 = Cmp->getOperand(1); 2978 2979 // We check that the memory operand has one use to avoid uses of the loaded 2980 // value directly after the compare, making branches unprofitable. 2981 return Cmp->hasOneUse() && 2982 ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || 2983 (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())); 2984 } 2985 2986 2987 /// If we have a SelectInst that will likely profit from branch prediction, 2988 /// turn it into a branch. 2989 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { 2990 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 2991 2992 // Can we convert the 'select' to CF ? 2993 if (DisableSelectToBranch || OptSize || !TLI || VectorCond) 2994 return false; 2995 2996 TargetLowering::SelectSupportKind SelectKind; 2997 if (VectorCond) 2998 SelectKind = TargetLowering::VectorMaskSelect; 2999 else if (SI->getType()->isVectorTy()) 3000 SelectKind = TargetLowering::ScalarCondVectorVal; 3001 else 3002 SelectKind = TargetLowering::ScalarValSelect; 3003 3004 // Do we have efficient codegen support for this kind of 'selects' ? 3005 if (TLI->isSelectSupported(SelectKind)) { 3006 // We have efficient codegen support for the select instruction. 3007 // Check if it is profitable to keep this 'select'. 3008 if (!TLI->isPredictableSelectExpensive() || 3009 !isFormingBranchFromSelectProfitable(SI)) 3010 return false; 3011 } 3012 3013 ModifiedDT = true; 3014 3015 // First, we split the block containing the select into 2 blocks. 3016 BasicBlock *StartBlock = SI->getParent(); 3017 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); 3018 BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 3019 3020 // Create a new block serving as the landing pad for the branch. 3021 BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", 3022 NextBlock->getParent(), NextBlock); 3023 3024 // Move the unconditional branch from the block with the select in it into our 3025 // landing pad block. 3026 StartBlock->getTerminator()->eraseFromParent(); 3027 BranchInst::Create(NextBlock, SmallBlock); 3028 3029 // Insert the real conditional branch based on the original condition. 3030 BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); 3031 3032 // The select itself is replaced with a PHI Node. 3033 PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); 3034 PN->takeName(SI); 3035 PN->addIncoming(SI->getTrueValue(), StartBlock); 3036 PN->addIncoming(SI->getFalseValue(), SmallBlock); 3037 SI->replaceAllUsesWith(PN); 3038 SI->eraseFromParent(); 3039 3040 // Instruct OptimizeBlock to skip to the next block. 3041 CurInstIterator = StartBlock->end(); 3042 ++NumSelectsExpanded; 3043 return true; 3044 } 3045 3046 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { 3047 SmallVector<int, 16> Mask(SVI->getShuffleMask()); 3048 int SplatElem = -1; 3049 for (unsigned i = 0; i < Mask.size(); ++i) { 3050 if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem) 3051 return false; 3052 SplatElem = Mask[i]; 3053 } 3054 3055 return true; 3056 } 3057 3058 /// Some targets have expensive vector shifts if the lanes aren't all the same 3059 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases 3060 /// it's often worth sinking a shufflevector splat down to its use so that 3061 /// codegen can spot all lanes are identical. 3062 bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { 3063 BasicBlock *DefBB = SVI->getParent(); 3064 3065 // Only do this xform if variable vector shifts are particularly expensive. 3066 if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType())) 3067 return false; 3068 3069 // We only expect better codegen by sinking a shuffle if we can recognise a 3070 // constant splat. 3071 if (!isBroadcastShuffle(SVI)) 3072 return false; 3073 3074 // InsertedShuffles - Only insert a shuffle in each block once. 3075 DenseMap<BasicBlock*, Instruction*> InsertedShuffles; 3076 3077 bool MadeChange = false; 3078 for (User *U : SVI->users()) { 3079 Instruction *UI = cast<Instruction>(U); 3080 3081 // Figure out which BB this ext is used in. 3082 BasicBlock *UserBB = UI->getParent(); 3083 if (UserBB == DefBB) continue; 3084 3085 // For now only apply this when the splat is used by a shift instruction. 3086 if (!UI->isShift()) continue; 3087 3088 // Everything checks out, sink the shuffle if the user's block doesn't 3089 // already have a copy. 3090 Instruction *&InsertedShuffle = InsertedShuffles[UserBB]; 3091 3092 if (!InsertedShuffle) { 3093 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 3094 InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0), 3095 SVI->getOperand(1), 3096 SVI->getOperand(2), "", InsertPt); 3097 } 3098 3099 UI->replaceUsesOfWith(SVI, InsertedShuffle); 3100 MadeChange = true; 3101 } 3102 3103 // If we removed all uses, nuke the shuffle. 3104 if (SVI->use_empty()) { 3105 SVI->eraseFromParent(); 3106 MadeChange = true; 3107 } 3108 3109 return MadeChange; 3110 } 3111 3112 bool CodeGenPrepare::OptimizeInst(Instruction *I) { 3113 if (PHINode *P = dyn_cast<PHINode>(I)) { 3114 // It is possible for very late stage optimizations (such as SimplifyCFG) 3115 // to introduce PHI nodes too late to be cleaned up. If we detect such a 3116 // trivial PHI, go ahead and zap it here. 3117 if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, 3118 TLInfo, DT)) { 3119 P->replaceAllUsesWith(V); 3120 P->eraseFromParent(); 3121 ++NumPHIsElim; 3122 return true; 3123 } 3124 return false; 3125 } 3126 3127 if (CastInst *CI = dyn_cast<CastInst>(I)) { 3128 // If the source of the cast is a constant, then this should have 3129 // already been constant folded. The only reason NOT to constant fold 3130 // it is if something (e.g. LSR) was careful to place the constant 3131 // evaluation in a block other than then one that uses it (e.g. to hoist 3132 // the address of globals out of a loop). If this is the case, we don't 3133 // want to forward-subst the cast. 3134 if (isa<Constant>(CI->getOperand(0))) 3135 return false; 3136 3137 if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) 3138 return true; 3139 3140 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { 3141 /// Sink a zext or sext into its user blocks if the target type doesn't 3142 /// fit in one register 3143 if (TLI && TLI->getTypeAction(CI->getContext(), 3144 TLI->getValueType(CI->getType())) == 3145 TargetLowering::TypeExpandInteger) { 3146 return SinkCast(CI); 3147 } else { 3148 bool MadeChange = MoveExtToFormExtLoad(I); 3149 return MadeChange | OptimizeExtUses(I); 3150 } 3151 } 3152 return false; 3153 } 3154 3155 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 3156 if (!TLI || !TLI->hasMultipleConditionRegisters()) 3157 return OptimizeCmpExpression(CI); 3158 3159 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 3160 if (TLI) 3161 return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); 3162 return false; 3163 } 3164 3165 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 3166 if (TLI) 3167 return OptimizeMemoryInst(I, SI->getOperand(1), 3168 SI->getOperand(0)->getType()); 3169 return false; 3170 } 3171 3172 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); 3173 3174 if (BinOp && (BinOp->getOpcode() == Instruction::AShr || 3175 BinOp->getOpcode() == Instruction::LShr)) { 3176 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); 3177 if (TLI && CI && TLI->hasExtractBitsInsn()) 3178 return OptimizeExtractBits(BinOp, CI, *TLI); 3179 3180 return false; 3181 } 3182 3183 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 3184 if (GEPI->hasAllZeroIndices()) { 3185 /// The GEP operand must be a pointer, so must its result -> BitCast 3186 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 3187 GEPI->getName(), GEPI); 3188 GEPI->replaceAllUsesWith(NC); 3189 GEPI->eraseFromParent(); 3190 ++NumGEPsElim; 3191 OptimizeInst(NC); 3192 return true; 3193 } 3194 return false; 3195 } 3196 3197 if (CallInst *CI = dyn_cast<CallInst>(I)) 3198 return OptimizeCallInst(CI); 3199 3200 if (SelectInst *SI = dyn_cast<SelectInst>(I)) 3201 return OptimizeSelectInst(SI); 3202 3203 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) 3204 return OptimizeShuffleVectorInst(SVI); 3205 3206 return false; 3207 } 3208 3209 // In this pass we look for GEP and cast instructions that are used 3210 // across basic blocks and rewrite them to improve basic-block-at-a-time 3211 // selection. 3212 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 3213 SunkAddrs.clear(); 3214 bool MadeChange = false; 3215 3216 CurInstIterator = BB.begin(); 3217 while (CurInstIterator != BB.end()) 3218 MadeChange |= OptimizeInst(CurInstIterator++); 3219 3220 MadeChange |= DupRetToEnableTailCallOpts(&BB); 3221 3222 return MadeChange; 3223 } 3224 3225 // llvm.dbg.value is far away from the value then iSel may not be able 3226 // handle it properly. iSel will drop llvm.dbg.value if it can not 3227 // find a node corresponding to the value. 3228 bool CodeGenPrepare::PlaceDbgValues(Function &F) { 3229 bool MadeChange = false; 3230 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { 3231 Instruction *PrevNonDbgInst = nullptr; 3232 for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { 3233 Instruction *Insn = BI; ++BI; 3234 DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); 3235 // Leave dbg.values that refer to an alloca alone. These 3236 // instrinsics describe the address of a variable (= the alloca) 3237 // being taken. They should not be moved next to the alloca 3238 // (and to the beginning of the scope), but rather stay close to 3239 // where said address is used. 3240 if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) { 3241 PrevNonDbgInst = Insn; 3242 continue; 3243 } 3244 3245 Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); 3246 if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { 3247 DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); 3248 DVI->removeFromParent(); 3249 if (isa<PHINode>(VI)) 3250 DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); 3251 else 3252 DVI->insertAfter(VI); 3253 MadeChange = true; 3254 ++NumDbgValueMoved; 3255 } 3256 } 3257 } 3258 return MadeChange; 3259 } 3260 3261 // If there is a sequence that branches based on comparing a single bit 3262 // against zero that can be combined into a single instruction, and the 3263 // target supports folding these into a single instruction, sink the 3264 // mask and compare into the branch uses. Do this before OptimizeBlock -> 3265 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being 3266 // searched for. 3267 bool CodeGenPrepare::sinkAndCmp(Function &F) { 3268 if (!EnableAndCmpSinking) 3269 return false; 3270 if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) 3271 return false; 3272 bool MadeChange = false; 3273 for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { 3274 BasicBlock *BB = I++; 3275 3276 // Does this BB end with the following? 3277 // %andVal = and %val, #single-bit-set 3278 // %icmpVal = icmp %andResult, 0 3279 // br i1 %cmpVal label %dest1, label %dest2" 3280 BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator()); 3281 if (!Brcc || !Brcc->isConditional()) 3282 continue; 3283 ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0)); 3284 if (!Cmp || Cmp->getParent() != BB) 3285 continue; 3286 ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1)); 3287 if (!Zero || !Zero->isZero()) 3288 continue; 3289 Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0)); 3290 if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB) 3291 continue; 3292 ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1)); 3293 if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) 3294 continue; 3295 DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump()); 3296 3297 // Push the "and; icmp" for any users that are conditional branches. 3298 // Since there can only be one branch use per BB, we don't need to keep 3299 // track of which BBs we insert into. 3300 for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end(); 3301 UI != E; ) { 3302 Use &TheUse = *UI; 3303 // Find brcc use. 3304 BranchInst *BrccUser = dyn_cast<BranchInst>(*UI); 3305 ++UI; 3306 if (!BrccUser || !BrccUser->isConditional()) 3307 continue; 3308 BasicBlock *UserBB = BrccUser->getParent(); 3309 if (UserBB == BB) continue; 3310 DEBUG(dbgs() << "found Brcc use\n"); 3311 3312 // Sink the "and; icmp" to use. 3313 MadeChange = true; 3314 BinaryOperator *NewAnd = 3315 BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", 3316 BrccUser); 3317 CmpInst *NewCmp = 3318 CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, 3319 "", BrccUser); 3320 TheUse = NewCmp; 3321 ++NumAndCmpsMoved; 3322 DEBUG(BrccUser->getParent()->dump()); 3323 } 3324 } 3325 return MadeChange; 3326 } 3327