1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass munges the code in the input function to better prepare it for 11 // SelectionDAG-based code generation. This works around limitations in it's 12 // basic-block-at-a-time approach. It should eventually be removed. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "codegenprepare" 17 #include "llvm/CodeGen/Passes.h" 18 #include "llvm/ADT/DenseMap.h" 19 #include "llvm/ADT/SmallSet.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/Analysis/InstructionSimplify.h" 22 #include "llvm/IR/CallSite.h" 23 #include "llvm/IR/Constants.h" 24 #include "llvm/IR/DataLayout.h" 25 #include "llvm/IR/DerivedTypes.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/GetElementPtrTypeIterator.h" 29 #include "llvm/IR/IRBuilder.h" 30 #include "llvm/IR/InlineAsm.h" 31 #include "llvm/IR/Instructions.h" 32 #include "llvm/IR/IntrinsicInst.h" 33 #include "llvm/IR/PatternMatch.h" 34 #include "llvm/IR/ValueHandle.h" 35 #include "llvm/IR/ValueMap.h" 36 #include "llvm/Pass.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include "llvm/Target/TargetLibraryInfo.h" 41 #include "llvm/Target/TargetLowering.h" 42 #include "llvm/Target/TargetSubtargetInfo.h" 43 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 44 #include "llvm/Transforms/Utils/BuildLibCalls.h" 45 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 46 #include "llvm/Transforms/Utils/Local.h" 47 using namespace llvm; 48 using namespace llvm::PatternMatch; 49 50 STATISTIC(NumBlocksElim, "Number of blocks eliminated"); 51 STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); 52 STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); 53 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " 54 "sunken Cmps"); 55 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " 56 "of sunken Casts"); 57 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " 58 "computations were sunk"); 59 STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); 60 STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); 61 STATISTIC(NumRetsDup, "Number of return instructions duplicated"); 62 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); 63 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); 64 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); 65 66 static cl::opt<bool> DisableBranchOpts( 67 "disable-cgp-branch-opts", cl::Hidden, cl::init(false), 68 cl::desc("Disable branch optimizations in CodeGenPrepare")); 69 70 static cl::opt<bool> DisableSelectToBranch( 71 "disable-cgp-select2branch", cl::Hidden, cl::init(false), 72 cl::desc("Disable select to branch conversion.")); 73 74 static cl::opt<bool> AddrSinkUsingGEPs( 75 "addr-sink-using-gep", cl::Hidden, cl::init(false), 76 cl::desc("Address sinking in CGP using GEPs.")); 77 78 static cl::opt<bool> EnableAndCmpSinking( 79 "enable-andcmp-sinking", cl::Hidden, cl::init(true), 80 cl::desc("Enable sinkinig and/cmp into branches.")); 81 82 namespace { 83 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; 84 typedef DenseMap<Instruction *, Type *> InstrToOrigTy; 85 86 class CodeGenPrepare : public FunctionPass { 87 /// TLI - Keep a pointer of a TargetLowering to consult for determining 88 /// transformation profitability. 89 const TargetMachine *TM; 90 const TargetLowering *TLI; 91 const TargetLibraryInfo *TLInfo; 92 DominatorTree *DT; 93 94 /// CurInstIterator - As we scan instructions optimizing them, this is the 95 /// next instruction to optimize. Xforms that can invalidate this should 96 /// update it. 97 BasicBlock::iterator CurInstIterator; 98 99 /// Keeps track of non-local addresses that have been sunk into a block. 100 /// This allows us to avoid inserting duplicate code for blocks with 101 /// multiple load/stores of the same address. 102 ValueMap<Value*, Value*> SunkAddrs; 103 104 /// Keeps track of all truncates inserted for the current function. 105 SetOfInstrs InsertedTruncsSet; 106 /// Keeps track of the type of the related instruction before their 107 /// promotion for the current function. 108 InstrToOrigTy PromotedInsts; 109 110 /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to 111 /// be updated. 112 bool ModifiedDT; 113 114 /// OptSize - True if optimizing for size. 115 bool OptSize; 116 117 public: 118 static char ID; // Pass identification, replacement for typeid 119 explicit CodeGenPrepare(const TargetMachine *TM = nullptr) 120 : FunctionPass(ID), TM(TM), TLI(nullptr) { 121 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); 122 } 123 bool runOnFunction(Function &F) override; 124 125 const char *getPassName() const override { return "CodeGen Prepare"; } 126 127 void getAnalysisUsage(AnalysisUsage &AU) const override { 128 AU.addPreserved<DominatorTreeWrapperPass>(); 129 AU.addRequired<TargetLibraryInfo>(); 130 } 131 132 private: 133 bool EliminateFallThrough(Function &F); 134 bool EliminateMostlyEmptyBlocks(Function &F); 135 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 136 void EliminateMostlyEmptyBlock(BasicBlock *BB); 137 bool OptimizeBlock(BasicBlock &BB); 138 bool OptimizeInst(Instruction *I); 139 bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); 140 bool OptimizeInlineAsmInst(CallInst *CS); 141 bool OptimizeCallInst(CallInst *CI); 142 bool MoveExtToFormExtLoad(Instruction *I); 143 bool OptimizeExtUses(Instruction *I); 144 bool OptimizeSelectInst(SelectInst *SI); 145 bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); 146 bool DupRetToEnableTailCallOpts(BasicBlock *BB); 147 bool PlaceDbgValues(Function &F); 148 bool sinkAndCmp(Function &F); 149 }; 150 } 151 152 char CodeGenPrepare::ID = 0; 153 static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) { 154 initializeTargetLibraryInfoPass(Registry); 155 PassInfo *PI = new PassInfo( 156 "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID, 157 PassInfo::NormalCtor_t(callDefaultCtor<CodeGenPrepare>), false, false, 158 PassInfo::TargetMachineCtor_t(callTargetMachineCtor<CodeGenPrepare>)); 159 Registry.registerPass(*PI, true); 160 return PI; 161 } 162 163 void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) { 164 CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce) 165 } 166 167 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { 168 return new CodeGenPrepare(TM); 169 } 170 171 bool CodeGenPrepare::runOnFunction(Function &F) { 172 if (skipOptnoneFunction(F)) 173 return false; 174 175 bool EverMadeChange = false; 176 // Clear per function information. 177 InsertedTruncsSet.clear(); 178 PromotedInsts.clear(); 179 180 ModifiedDT = false; 181 if (TM) TLI = TM->getTargetLowering(); 182 TLInfo = &getAnalysis<TargetLibraryInfo>(); 183 DominatorTreeWrapperPass *DTWP = 184 getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 185 DT = DTWP ? &DTWP->getDomTree() : nullptr; 186 OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 187 Attribute::OptimizeForSize); 188 189 /// This optimization identifies DIV instructions that can be 190 /// profitably bypassed and carried out with a shorter, faster divide. 191 if (!OptSize && TLI && TLI->isSlowDivBypassed()) { 192 const DenseMap<unsigned int, unsigned int> &BypassWidths = 193 TLI->getBypassSlowDivWidths(); 194 for (Function::iterator I = F.begin(); I != F.end(); I++) 195 EverMadeChange |= bypassSlowDivision(F, I, BypassWidths); 196 } 197 198 // Eliminate blocks that contain only PHI nodes and an 199 // unconditional branch. 200 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 201 202 // llvm.dbg.value is far away from the value then iSel may not be able 203 // handle it properly. iSel will drop llvm.dbg.value if it can not 204 // find a node corresponding to the value. 205 EverMadeChange |= PlaceDbgValues(F); 206 207 // If there is a mask, compare against zero, and branch that can be combined 208 // into a single target instruction, push the mask and compare into branch 209 // users. Do this before OptimizeBlock -> OptimizeInst -> 210 // OptimizeCmpExpression, which perturbs the pattern being searched for. 211 if (!DisableBranchOpts) 212 EverMadeChange |= sinkAndCmp(F); 213 214 bool MadeChange = true; 215 while (MadeChange) { 216 MadeChange = false; 217 for (Function::iterator I = F.begin(); I != F.end(); ) { 218 BasicBlock *BB = I++; 219 MadeChange |= OptimizeBlock(*BB); 220 } 221 EverMadeChange |= MadeChange; 222 } 223 224 SunkAddrs.clear(); 225 226 if (!DisableBranchOpts) { 227 MadeChange = false; 228 SmallPtrSet<BasicBlock*, 8> WorkList; 229 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { 230 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 231 MadeChange |= ConstantFoldTerminator(BB, true); 232 if (!MadeChange) continue; 233 234 for (SmallVectorImpl<BasicBlock*>::iterator 235 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 236 if (pred_begin(*II) == pred_end(*II)) 237 WorkList.insert(*II); 238 } 239 240 // Delete the dead blocks and any of their dead successors. 241 MadeChange |= !WorkList.empty(); 242 while (!WorkList.empty()) { 243 BasicBlock *BB = *WorkList.begin(); 244 WorkList.erase(BB); 245 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 246 247 DeleteDeadBlock(BB); 248 249 for (SmallVectorImpl<BasicBlock*>::iterator 250 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 251 if (pred_begin(*II) == pred_end(*II)) 252 WorkList.insert(*II); 253 } 254 255 // Merge pairs of basic blocks with unconditional branches, connected by 256 // a single edge. 257 if (EverMadeChange || MadeChange) 258 MadeChange |= EliminateFallThrough(F); 259 260 if (MadeChange) 261 ModifiedDT = true; 262 EverMadeChange |= MadeChange; 263 } 264 265 if (ModifiedDT && DT) 266 DT->recalculate(F); 267 268 return EverMadeChange; 269 } 270 271 /// EliminateFallThrough - Merge basic blocks which are connected 272 /// by a single edge, where one of the basic blocks has a single successor 273 /// pointing to the other basic block, which has a single predecessor. 274 bool CodeGenPrepare::EliminateFallThrough(Function &F) { 275 bool Changed = false; 276 // Scan all of the blocks in the function, except for the entry block. 277 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { 278 BasicBlock *BB = I++; 279 // If the destination block has a single pred, then this is a trivial 280 // edge, just collapse it. 281 BasicBlock *SinglePred = BB->getSinglePredecessor(); 282 283 // Don't merge if BB's address is taken. 284 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; 285 286 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); 287 if (Term && !Term->isConditional()) { 288 Changed = true; 289 DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); 290 // Remember if SinglePred was the entry block of the function. 291 // If so, we will need to move BB back to the entry position. 292 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 293 MergeBasicBlockIntoOnlyPred(BB, this); 294 295 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 296 BB->moveBefore(&BB->getParent()->getEntryBlock()); 297 298 // We have erased a block. Update the iterator. 299 I = BB; 300 } 301 } 302 return Changed; 303 } 304 305 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 306 /// debug info directives, and an unconditional branch. Passes before isel 307 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 308 /// isel. Start by eliminating these blocks so we can split them the way we 309 /// want them. 310 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 311 bool MadeChange = false; 312 // Note that this intentionally skips the entry block. 313 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { 314 BasicBlock *BB = I++; 315 316 // If this block doesn't end with an uncond branch, ignore it. 317 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 318 if (!BI || !BI->isUnconditional()) 319 continue; 320 321 // If the instruction before the branch (skipping debug info) isn't a phi 322 // node, then other stuff is happening here. 323 BasicBlock::iterator BBI = BI; 324 if (BBI != BB->begin()) { 325 --BBI; 326 while (isa<DbgInfoIntrinsic>(BBI)) { 327 if (BBI == BB->begin()) 328 break; 329 --BBI; 330 } 331 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 332 continue; 333 } 334 335 // Do not break infinite loops. 336 BasicBlock *DestBB = BI->getSuccessor(0); 337 if (DestBB == BB) 338 continue; 339 340 if (!CanMergeBlocks(BB, DestBB)) 341 continue; 342 343 EliminateMostlyEmptyBlock(BB); 344 MadeChange = true; 345 } 346 return MadeChange; 347 } 348 349 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 350 /// single uncond branch between them, and BB contains no other non-phi 351 /// instructions. 352 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 353 const BasicBlock *DestBB) const { 354 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 355 // the successor. If there are more complex condition (e.g. preheaders), 356 // don't mess around with them. 357 BasicBlock::const_iterator BBI = BB->begin(); 358 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 359 for (const User *U : PN->users()) { 360 const Instruction *UI = cast<Instruction>(U); 361 if (UI->getParent() != DestBB || !isa<PHINode>(UI)) 362 return false; 363 // If User is inside DestBB block and it is a PHINode then check 364 // incoming value. If incoming value is not from BB then this is 365 // a complex condition (e.g. preheaders) we want to avoid here. 366 if (UI->getParent() == DestBB) { 367 if (const PHINode *UPN = dyn_cast<PHINode>(UI)) 368 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 369 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 370 if (Insn && Insn->getParent() == BB && 371 Insn->getParent() != UPN->getIncomingBlock(I)) 372 return false; 373 } 374 } 375 } 376 } 377 378 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 379 // and DestBB may have conflicting incoming values for the block. If so, we 380 // can't merge the block. 381 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 382 if (!DestBBPN) return true; // no conflict. 383 384 // Collect the preds of BB. 385 SmallPtrSet<const BasicBlock*, 16> BBPreds; 386 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 387 // It is faster to get preds from a PHI than with pred_iterator. 388 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 389 BBPreds.insert(BBPN->getIncomingBlock(i)); 390 } else { 391 BBPreds.insert(pred_begin(BB), pred_end(BB)); 392 } 393 394 // Walk the preds of DestBB. 395 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 396 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 397 if (BBPreds.count(Pred)) { // Common predecessor? 398 BBI = DestBB->begin(); 399 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 400 const Value *V1 = PN->getIncomingValueForBlock(Pred); 401 const Value *V2 = PN->getIncomingValueForBlock(BB); 402 403 // If V2 is a phi node in BB, look up what the mapped value will be. 404 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 405 if (V2PN->getParent() == BB) 406 V2 = V2PN->getIncomingValueForBlock(Pred); 407 408 // If there is a conflict, bail out. 409 if (V1 != V2) return false; 410 } 411 } 412 } 413 414 return true; 415 } 416 417 418 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 419 /// an unconditional branch in it. 420 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 421 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 422 BasicBlock *DestBB = BI->getSuccessor(0); 423 424 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 425 426 // If the destination block has a single pred, then this is a trivial edge, 427 // just collapse it. 428 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 429 if (SinglePred != DestBB) { 430 // Remember if SinglePred was the entry block of the function. If so, we 431 // will need to move BB back to the entry position. 432 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 433 MergeBasicBlockIntoOnlyPred(DestBB, this); 434 435 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 436 BB->moveBefore(&BB->getParent()->getEntryBlock()); 437 438 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 439 return; 440 } 441 } 442 443 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 444 // to handle the new incoming edges it is about to have. 445 PHINode *PN; 446 for (BasicBlock::iterator BBI = DestBB->begin(); 447 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 448 // Remove the incoming value for BB, and remember it. 449 Value *InVal = PN->removeIncomingValue(BB, false); 450 451 // Two options: either the InVal is a phi node defined in BB or it is some 452 // value that dominates BB. 453 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 454 if (InValPhi && InValPhi->getParent() == BB) { 455 // Add all of the input values of the input PHI as inputs of this phi. 456 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 457 PN->addIncoming(InValPhi->getIncomingValue(i), 458 InValPhi->getIncomingBlock(i)); 459 } else { 460 // Otherwise, add one instance of the dominating value for each edge that 461 // we will be adding. 462 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 463 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 464 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 465 } else { 466 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 467 PN->addIncoming(InVal, *PI); 468 } 469 } 470 } 471 472 // The PHIs are now updated, change everything that refers to BB to use 473 // DestBB and remove BB. 474 BB->replaceAllUsesWith(DestBB); 475 if (DT && !ModifiedDT) { 476 BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); 477 BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); 478 BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); 479 DT->changeImmediateDominator(DestBB, NewIDom); 480 DT->eraseNode(BB); 481 } 482 BB->eraseFromParent(); 483 ++NumBlocksElim; 484 485 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 486 } 487 488 /// SinkCast - Sink the specified cast instruction into its user blocks 489 static bool SinkCast(CastInst *CI) { 490 BasicBlock *DefBB = CI->getParent(); 491 492 /// InsertedCasts - Only insert a cast in each block once. 493 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 494 495 bool MadeChange = false; 496 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 497 UI != E; ) { 498 Use &TheUse = UI.getUse(); 499 Instruction *User = cast<Instruction>(*UI); 500 501 // Figure out which BB this cast is used in. For PHI's this is the 502 // appropriate predecessor block. 503 BasicBlock *UserBB = User->getParent(); 504 if (PHINode *PN = dyn_cast<PHINode>(User)) { 505 UserBB = PN->getIncomingBlock(TheUse); 506 } 507 508 // Preincrement use iterator so we don't invalidate it. 509 ++UI; 510 511 // If this user is in the same block as the cast, don't change the cast. 512 if (UserBB == DefBB) continue; 513 514 // If we have already inserted a cast into this block, use it. 515 CastInst *&InsertedCast = InsertedCasts[UserBB]; 516 517 if (!InsertedCast) { 518 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 519 InsertedCast = 520 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 521 InsertPt); 522 MadeChange = true; 523 } 524 525 // Replace a use of the cast with a use of the new cast. 526 TheUse = InsertedCast; 527 ++NumCastUses; 528 } 529 530 // If we removed all uses, nuke the cast. 531 if (CI->use_empty()) { 532 CI->eraseFromParent(); 533 MadeChange = true; 534 } 535 536 return MadeChange; 537 } 538 539 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 540 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 541 /// sink it into user blocks to reduce the number of virtual 542 /// registers that must be created and coalesced. 543 /// 544 /// Return true if any changes are made. 545 /// 546 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 547 // If this is a noop copy, 548 EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 549 EVT DstVT = TLI.getValueType(CI->getType()); 550 551 // This is an fp<->int conversion? 552 if (SrcVT.isInteger() != DstVT.isInteger()) 553 return false; 554 555 // If this is an extension, it will be a zero or sign extension, which 556 // isn't a noop. 557 if (SrcVT.bitsLT(DstVT)) return false; 558 559 // If these values will be promoted, find out what they will be promoted 560 // to. This helps us consider truncates on PPC as noop copies when they 561 // are. 562 if (TLI.getTypeAction(CI->getContext(), SrcVT) == 563 TargetLowering::TypePromoteInteger) 564 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 565 if (TLI.getTypeAction(CI->getContext(), DstVT) == 566 TargetLowering::TypePromoteInteger) 567 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 568 569 // If, after promotion, these are the same types, this is a noop copy. 570 if (SrcVT != DstVT) 571 return false; 572 573 return SinkCast(CI); 574 } 575 576 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 577 /// the number of virtual registers that must be created and coalesced. This is 578 /// a clear win except on targets with multiple condition code registers 579 /// (PowerPC), where it might lose; some adjustment may be wanted there. 580 /// 581 /// Return true if any changes are made. 582 static bool OptimizeCmpExpression(CmpInst *CI) { 583 BasicBlock *DefBB = CI->getParent(); 584 585 /// InsertedCmp - Only insert a cmp in each block once. 586 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 587 588 bool MadeChange = false; 589 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 590 UI != E; ) { 591 Use &TheUse = UI.getUse(); 592 Instruction *User = cast<Instruction>(*UI); 593 594 // Preincrement use iterator so we don't invalidate it. 595 ++UI; 596 597 // Don't bother for PHI nodes. 598 if (isa<PHINode>(User)) 599 continue; 600 601 // Figure out which BB this cmp is used in. 602 BasicBlock *UserBB = User->getParent(); 603 604 // If this user is in the same block as the cmp, don't change the cmp. 605 if (UserBB == DefBB) continue; 606 607 // If we have already inserted a cmp into this block, use it. 608 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 609 610 if (!InsertedCmp) { 611 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 612 InsertedCmp = 613 CmpInst::Create(CI->getOpcode(), 614 CI->getPredicate(), CI->getOperand(0), 615 CI->getOperand(1), "", InsertPt); 616 MadeChange = true; 617 } 618 619 // Replace a use of the cmp with a use of the new cmp. 620 TheUse = InsertedCmp; 621 ++NumCmpUses; 622 } 623 624 // If we removed all uses, nuke the cmp. 625 if (CI->use_empty()) 626 CI->eraseFromParent(); 627 628 return MadeChange; 629 } 630 631 /// isExtractBitsCandidateUse - Check if the candidates could 632 /// be combined with shift instruction, which includes: 633 /// 1. Truncate instruction 634 /// 2. And instruction and the imm is a mask of the low bits: 635 /// imm & (imm+1) == 0 636 bool isExtractBitsCandidateUse(Instruction *User) { 637 if (!isa<TruncInst>(User)) { 638 if (User->getOpcode() != Instruction::And || 639 !isa<ConstantInt>(User->getOperand(1))) 640 return false; 641 642 unsigned Cimm = dyn_cast<ConstantInt>(User->getOperand(1))->getZExtValue(); 643 644 if (Cimm & (Cimm + 1)) 645 return false; 646 } 647 return true; 648 } 649 650 /// SinkShiftAndTruncate - sink both shift and truncate instruction 651 /// to the use of truncate's BB. 652 bool 653 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, 654 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, 655 const TargetLowering &TLI) { 656 BasicBlock *UserBB = User->getParent(); 657 DenseMap<BasicBlock *, CastInst *> InsertedTruncs; 658 TruncInst *TruncI = dyn_cast<TruncInst>(User); 659 bool MadeChange = false; 660 661 for (Value::user_iterator TruncUI = TruncI->user_begin(), 662 TruncE = TruncI->user_end(); 663 TruncUI != TruncE;) { 664 665 Use &TruncTheUse = TruncUI.getUse(); 666 Instruction *TruncUser = cast<Instruction>(*TruncUI); 667 // Preincrement use iterator so we don't invalidate it. 668 669 ++TruncUI; 670 671 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); 672 if (!ISDOpcode) 673 continue; 674 675 // If the use is actually a legal node, there will not be an implicit 676 // truncate. 677 if (TLI.isOperationLegalOrCustom(ISDOpcode, 678 EVT::getEVT(TruncUser->getType()))) 679 continue; 680 681 // Don't bother for PHI nodes. 682 if (isa<PHINode>(TruncUser)) 683 continue; 684 685 BasicBlock *TruncUserBB = TruncUser->getParent(); 686 687 if (UserBB == TruncUserBB) 688 continue; 689 690 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; 691 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; 692 693 if (!InsertedShift && !InsertedTrunc) { 694 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); 695 // Sink the shift 696 if (ShiftI->getOpcode() == Instruction::AShr) 697 InsertedShift = 698 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); 699 else 700 InsertedShift = 701 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); 702 703 // Sink the trunc 704 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); 705 TruncInsertPt++; 706 707 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, 708 TruncI->getType(), "", TruncInsertPt); 709 710 MadeChange = true; 711 712 TruncTheUse = InsertedTrunc; 713 } 714 } 715 return MadeChange; 716 } 717 718 /// OptimizeExtractBits - sink the shift *right* instruction into user blocks if 719 /// the uses could potentially be combined with this shift instruction and 720 /// generate BitExtract instruction. It will only be applied if the architecture 721 /// supports BitExtract instruction. Here is an example: 722 /// BB1: 723 /// %x.extract.shift = lshr i64 %arg1, 32 724 /// BB2: 725 /// %x.extract.trunc = trunc i64 %x.extract.shift to i16 726 /// ==> 727 /// 728 /// BB2: 729 /// %x.extract.shift.1 = lshr i64 %arg1, 32 730 /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 731 /// 732 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract 733 /// instruction. 734 /// Return true if any changes are made. 735 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, 736 const TargetLowering &TLI) { 737 BasicBlock *DefBB = ShiftI->getParent(); 738 739 /// Only insert instructions in each block once. 740 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts; 741 742 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType())); 743 744 bool MadeChange = false; 745 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); 746 UI != E;) { 747 Use &TheUse = UI.getUse(); 748 Instruction *User = cast<Instruction>(*UI); 749 // Preincrement use iterator so we don't invalidate it. 750 ++UI; 751 752 // Don't bother for PHI nodes. 753 if (isa<PHINode>(User)) 754 continue; 755 756 if (!isExtractBitsCandidateUse(User)) 757 continue; 758 759 BasicBlock *UserBB = User->getParent(); 760 761 if (UserBB == DefBB) { 762 // If the shift and truncate instruction are in the same BB. The use of 763 // the truncate(TruncUse) may still introduce another truncate if not 764 // legal. In this case, we would like to sink both shift and truncate 765 // instruction to the BB of TruncUse. 766 // for example: 767 // BB1: 768 // i64 shift.result = lshr i64 opnd, imm 769 // trunc.result = trunc shift.result to i16 770 // 771 // BB2: 772 // ----> We will have an implicit truncate here if the architecture does 773 // not have i16 compare. 774 // cmp i16 trunc.result, opnd2 775 // 776 if (isa<TruncInst>(User) && shiftIsLegal 777 // If the type of the truncate is legal, no trucate will be 778 // introduced in other basic blocks. 779 && (!TLI.isTypeLegal(TLI.getValueType(User->getType())))) 780 MadeChange = 781 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI); 782 783 continue; 784 } 785 // If we have already inserted a shift into this block, use it. 786 BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; 787 788 if (!InsertedShift) { 789 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 790 791 if (ShiftI->getOpcode() == Instruction::AShr) 792 InsertedShift = 793 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); 794 else 795 InsertedShift = 796 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); 797 798 MadeChange = true; 799 } 800 801 // Replace a use of the shift with a use of the new shift. 802 TheUse = InsertedShift; 803 } 804 805 // If we removed all uses, nuke the shift. 806 if (ShiftI->use_empty()) 807 ShiftI->eraseFromParent(); 808 809 return MadeChange; 810 } 811 812 namespace { 813 class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { 814 protected: 815 void replaceCall(Value *With) override { 816 CI->replaceAllUsesWith(With); 817 CI->eraseFromParent(); 818 } 819 bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override { 820 if (ConstantInt *SizeCI = 821 dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) 822 return SizeCI->isAllOnesValue(); 823 return false; 824 } 825 }; 826 } // end anonymous namespace 827 828 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { 829 BasicBlock *BB = CI->getParent(); 830 831 // Lower inline assembly if we can. 832 // If we found an inline asm expession, and if the target knows how to 833 // lower it to normal LLVM code, do so now. 834 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 835 if (TLI->ExpandInlineAsm(CI)) { 836 // Avoid invalidating the iterator. 837 CurInstIterator = BB->begin(); 838 // Avoid processing instructions out of order, which could cause 839 // reuse before a value is defined. 840 SunkAddrs.clear(); 841 return true; 842 } 843 // Sink address computing for memory operands into the block. 844 if (OptimizeInlineAsmInst(CI)) 845 return true; 846 } 847 848 // Lower all uses of llvm.objectsize.* 849 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 850 if (II && II->getIntrinsicID() == Intrinsic::objectsize) { 851 bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); 852 Type *ReturnTy = CI->getType(); 853 Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); 854 855 // Substituting this can cause recursive simplifications, which can 856 // invalidate our iterator. Use a WeakVH to hold onto it in case this 857 // happens. 858 WeakVH IterHandle(CurInstIterator); 859 860 replaceAndRecursivelySimplify(CI, RetVal, 861 TLI ? TLI->getDataLayout() : nullptr, 862 TLInfo, ModifiedDT ? nullptr : DT); 863 864 // If the iterator instruction was recursively deleted, start over at the 865 // start of the block. 866 if (IterHandle != CurInstIterator) { 867 CurInstIterator = BB->begin(); 868 SunkAddrs.clear(); 869 } 870 return true; 871 } 872 // Lower all uses of llvm.safe.[us]{div|rem}... 873 if (II && 874 (II->getIntrinsicID() == Intrinsic::safe_sdiv || 875 II->getIntrinsicID() == Intrinsic::safe_udiv || 876 II->getIntrinsicID() == Intrinsic::safe_srem || 877 II->getIntrinsicID() == Intrinsic::safe_urem)) { 878 // Given 879 // result_struct = type {iN, i1} 880 // %R = call result_struct llvm.safe.sdiv.iN(iN %x, iN %y) 881 // Expand it to actual IR, which produces result to the same variable %R. 882 // First element of the result %R.1 is the result of division, second 883 // element shows whether the division was correct or not. 884 // If %y is 0, %R.1 is 0, %R.2 is 1. (1) 885 // If %x is minSignedValue and %y is -1, %R.1 is %x, %R.2 is 1. (2) 886 // In other cases %R.1 is (sdiv %x, %y), %R.2 is 0. (3) 887 // 888 // Similar applies to srem, udiv, and urem builtins, except that in unsigned 889 // variants we don't check condition (2). 890 891 bool IsSigned; 892 BinaryOperator::BinaryOps Op; 893 switch (II->getIntrinsicID()) { 894 case Intrinsic::safe_sdiv: 895 IsSigned = true; 896 Op = Instruction::SDiv; 897 break; 898 case Intrinsic::safe_udiv: 899 IsSigned = false; 900 Op = Instruction::UDiv; 901 break; 902 case Intrinsic::safe_srem: 903 IsSigned = true; 904 Op = Instruction::SRem; 905 break; 906 case Intrinsic::safe_urem: 907 IsSigned = false; 908 Op = Instruction::URem; 909 break; 910 default: 911 llvm_unreachable("Only Div/Rem intrinsics are handled here."); 912 } 913 914 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1); 915 bool DivWellDefined = TLI && TLI->isDivWellDefined(); 916 917 bool ResultNeeded[2] = {false, false}; 918 SmallVector<User*, 1> ResultsUsers[2]; 919 bool BadCase = false; 920 for (User *U: II->users()) { 921 ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U); 922 if (!EVI || EVI->getNumIndices() > 1 || EVI->getIndices()[0] > 1) { 923 BadCase = true; 924 break; 925 } 926 ResultNeeded[EVI->getIndices()[0]] = true; 927 ResultsUsers[EVI->getIndices()[0]].push_back(U); 928 } 929 // Behave conservatively, if there is an unusual user of the results. 930 if (BadCase) 931 ResultNeeded[0] = ResultNeeded[1] = true; 932 933 // Early exit if non of the results is ever used. 934 if (!ResultNeeded[0] && !ResultNeeded[1]) { 935 II->eraseFromParent(); 936 return true; 937 } 938 939 // Early exit if the second result (flag) isn't used and target 940 // div-instruction computes exactly what we want to get as the first result 941 // and never traps. 942 if (ResultNeeded[0] && !ResultNeeded[1] && DivWellDefined) { 943 BinaryOperator *Div = BinaryOperator::Create(Op, LHS, RHS); 944 Div->insertAfter(II); 945 for (User *U: ResultsUsers[0]) { 946 Instruction *UserInst = dyn_cast<Instruction>(U); 947 assert(UserInst && "Unexpected null-instruction"); 948 UserInst->replaceAllUsesWith(Div); 949 UserInst->eraseFromParent(); 950 } 951 II->eraseFromParent(); 952 CurInstIterator = Div; 953 ModifiedDT = true; 954 return true; 955 } 956 957 Value *MinusOne = Constant::getAllOnesValue(LHS->getType()); 958 Value *Zero = Constant::getNullValue(LHS->getType()); 959 960 // Split the original BB and create other basic blocks that will be used 961 // for checks. 962 BasicBlock *StartBB = II->getParent(); 963 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(II)); 964 BasicBlock *NextBB = StartBB->splitBasicBlock(SplitPt, "div.end"); 965 966 BasicBlock *DivByZeroBB; 967 DivByZeroBB = BasicBlock::Create(II->getContext(), "div.divz", 968 NextBB->getParent(), NextBB); 969 BranchInst::Create(NextBB, DivByZeroBB); 970 BasicBlock *DivBB = BasicBlock::Create(II->getContext(), "div.div", 971 NextBB->getParent(), NextBB); 972 BranchInst::Create(NextBB, DivBB); 973 974 // For signed variants, check the condition (2): 975 // LHS == SignedMinValue, RHS == -1. 976 Value *CmpMinusOne; 977 Value *CmpMinValue; 978 BasicBlock *ChkDivMinBB; 979 BasicBlock *DivMinBB; 980 Value *MinValue; 981 if (IsSigned) { 982 APInt SignedMinValue = 983 APInt::getSignedMinValue(LHS->getType()->getPrimitiveSizeInBits()); 984 MinValue = Constant::getIntegerValue(LHS->getType(), SignedMinValue); 985 ChkDivMinBB = BasicBlock::Create(II->getContext(), "div.chkdivmin", 986 NextBB->getParent(), NextBB); 987 BranchInst::Create(NextBB, ChkDivMinBB); 988 DivMinBB = BasicBlock::Create(II->getContext(), "div.divmin", 989 NextBB->getParent(), NextBB); 990 BranchInst::Create(NextBB, DivMinBB); 991 CmpMinusOne = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, 992 RHS, MinusOne, "cmp.rhs.minus.one", 993 ChkDivMinBB->getTerminator()); 994 CmpMinValue = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, 995 LHS, MinValue, "cmp.lhs.signed.min", 996 ChkDivMinBB->getTerminator()); 997 BinaryOperator *CmpSignedOvf = BinaryOperator::Create(Instruction::And, 998 CmpMinusOne, 999 CmpMinValue); 1000 // Here we're interested in the case when both %x is TMin and %y is -1. 1001 // In this case the result will overflow. 1002 // If that's not the case, we can perform usual division. These blocks 1003 // will be inserted after DivByZero, so the division will be safe. 1004 CmpSignedOvf->insertBefore(ChkDivMinBB->getTerminator()); 1005 BranchInst::Create(DivMinBB, DivBB, CmpSignedOvf, 1006 ChkDivMinBB->getTerminator()); 1007 ChkDivMinBB->getTerminator()->eraseFromParent(); 1008 } 1009 1010 // Check the condition (1): 1011 // RHS == 0. 1012 Value *CmpDivZero = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, 1013 RHS, Zero, "cmp.rhs.zero", 1014 StartBB->getTerminator()); 1015 1016 // If RHS != 0, we want to check condition (2) in signed case, or proceed 1017 // to usual division in unsigned case. 1018 BranchInst::Create(DivByZeroBB, IsSigned ? ChkDivMinBB : DivBB, CmpDivZero, 1019 StartBB->getTerminator()); 1020 StartBB->getTerminator()->eraseFromParent(); 1021 1022 // At the moment we have all the control flow created. We just need to 1023 // insert DIV and PHI (if needed) to get the result value. 1024 Instruction *DivRes, *FlagRes; 1025 Instruction *InsPoint = nullptr; 1026 if (ResultNeeded[0]) { 1027 BinaryOperator *Div = BinaryOperator::Create(Op, LHS, RHS); 1028 if (DivWellDefined) { 1029 // The result value is the result of DIV operation placed right at the 1030 // original place of the intrinsic. 1031 Div->insertAfter(II); 1032 DivRes = Div; 1033 } else { 1034 // The result is a PHI-node. 1035 Div->insertBefore(DivBB->getTerminator()); 1036 PHINode *DivResPN = 1037 PHINode::Create(LHS->getType(), IsSigned ? 3 : 2, "div.res.phi", 1038 NextBB->begin()); 1039 DivResPN->addIncoming(Div, DivBB); 1040 DivResPN->addIncoming(Zero, DivByZeroBB); 1041 if (IsSigned) 1042 DivResPN->addIncoming(MinValue, DivMinBB); 1043 DivRes = DivResPN; 1044 InsPoint = DivResPN; 1045 } 1046 } 1047 1048 // Prepare a value for the second result (flag) if it is needed. 1049 if (ResultNeeded[1]) { 1050 Type *FlagTy = II->getType()->getStructElementType(1); 1051 PHINode *FlagResPN = 1052 PHINode::Create(FlagTy, IsSigned ? 3 : 2, "div.flag.phi", 1053 NextBB->begin()); 1054 FlagResPN->addIncoming(Constant::getNullValue(FlagTy), DivBB); 1055 FlagResPN->addIncoming(Constant::getAllOnesValue(FlagTy), DivByZeroBB); 1056 if (IsSigned) 1057 FlagResPN->addIncoming(Constant::getAllOnesValue(FlagTy), DivMinBB); 1058 FlagRes = FlagResPN; 1059 if (!InsPoint) 1060 InsPoint = FlagRes; 1061 } 1062 1063 // If possible, propagate the results to the user. Otherwise, create alloca, 1064 // and create a struct with the results on stack. 1065 if (!BadCase) { 1066 if (ResultNeeded[0]) { 1067 for (User *U: ResultsUsers[0]) { 1068 Instruction *UserInst = dyn_cast<Instruction>(U); 1069 assert(UserInst && "Unexpected null-instruction"); 1070 UserInst->replaceAllUsesWith(DivRes); 1071 UserInst->eraseFromParent(); 1072 } 1073 } 1074 if (ResultNeeded[1]) { 1075 for (User *FlagU: ResultsUsers[1]) { 1076 Instruction *FlagUInst = dyn_cast<Instruction>(FlagU); 1077 FlagUInst->replaceAllUsesWith(FlagRes); 1078 FlagUInst->eraseFromParent(); 1079 } 1080 } 1081 } else { 1082 // Create alloca, store our new values to it, and then load the final 1083 // result from it. 1084 Constant *Idx0 = ConstantInt::get(Type::getInt32Ty(II->getContext()), 0); 1085 Constant *Idx1 = ConstantInt::get(Type::getInt32Ty(II->getContext()), 1); 1086 Value *Idxs_DivRes[2] = {Idx0, Idx0}; 1087 Value *Idxs_FlagRes[2] = {Idx0, Idx1}; 1088 Value *NewRes = new llvm::AllocaInst(II->getType(), 0, "div.res.ptr", II); 1089 Instruction *ResDivAddr = GetElementPtrInst::Create(NewRes, Idxs_DivRes); 1090 Instruction *ResFlagAddr = 1091 GetElementPtrInst::Create(NewRes, Idxs_FlagRes); 1092 ResDivAddr->insertAfter(InsPoint); 1093 ResFlagAddr->insertAfter(ResDivAddr); 1094 StoreInst *StoreResDiv = new StoreInst(DivRes, ResDivAddr); 1095 StoreInst *StoreResFlag = new StoreInst(FlagRes, ResFlagAddr); 1096 StoreResDiv->insertAfter(ResFlagAddr); 1097 StoreResFlag->insertAfter(StoreResDiv); 1098 LoadInst *LoadRes = new LoadInst(NewRes, "div.res"); 1099 LoadRes->insertAfter(StoreResFlag); 1100 II->replaceAllUsesWith(LoadRes); 1101 } 1102 1103 II->eraseFromParent(); 1104 CurInstIterator = StartBB->end(); 1105 ModifiedDT = true; 1106 return true; 1107 } 1108 1109 if (II && TLI) { 1110 SmallVector<Value*, 2> PtrOps; 1111 Type *AccessTy; 1112 if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) 1113 while (!PtrOps.empty()) 1114 if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) 1115 return true; 1116 } 1117 1118 // From here on out we're working with named functions. 1119 if (!CI->getCalledFunction()) return false; 1120 1121 // We'll need DataLayout from here on out. 1122 const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; 1123 if (!TD) return false; 1124 1125 // Lower all default uses of _chk calls. This is very similar 1126 // to what InstCombineCalls does, but here we are only lowering calls 1127 // that have the default "don't know" as the objectsize. Anything else 1128 // should be left alone. 1129 CodeGenPrepareFortifiedLibCalls Simplifier; 1130 return Simplifier.fold(CI, TD, TLInfo); 1131 } 1132 1133 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return 1134 /// instructions to the predecessor to enable tail call optimizations. The 1135 /// case it is currently looking for is: 1136 /// @code 1137 /// bb0: 1138 /// %tmp0 = tail call i32 @f0() 1139 /// br label %return 1140 /// bb1: 1141 /// %tmp1 = tail call i32 @f1() 1142 /// br label %return 1143 /// bb2: 1144 /// %tmp2 = tail call i32 @f2() 1145 /// br label %return 1146 /// return: 1147 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 1148 /// ret i32 %retval 1149 /// @endcode 1150 /// 1151 /// => 1152 /// 1153 /// @code 1154 /// bb0: 1155 /// %tmp0 = tail call i32 @f0() 1156 /// ret i32 %tmp0 1157 /// bb1: 1158 /// %tmp1 = tail call i32 @f1() 1159 /// ret i32 %tmp1 1160 /// bb2: 1161 /// %tmp2 = tail call i32 @f2() 1162 /// ret i32 %tmp2 1163 /// @endcode 1164 bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { 1165 if (!TLI) 1166 return false; 1167 1168 ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); 1169 if (!RI) 1170 return false; 1171 1172 PHINode *PN = nullptr; 1173 BitCastInst *BCI = nullptr; 1174 Value *V = RI->getReturnValue(); 1175 if (V) { 1176 BCI = dyn_cast<BitCastInst>(V); 1177 if (BCI) 1178 V = BCI->getOperand(0); 1179 1180 PN = dyn_cast<PHINode>(V); 1181 if (!PN) 1182 return false; 1183 } 1184 1185 if (PN && PN->getParent() != BB) 1186 return false; 1187 1188 // It's not safe to eliminate the sign / zero extension of the return value. 1189 // See llvm::isInTailCallPosition(). 1190 const Function *F = BB->getParent(); 1191 AttributeSet CallerAttrs = F->getAttributes(); 1192 if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || 1193 CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) 1194 return false; 1195 1196 // Make sure there are no instructions between the PHI and return, or that the 1197 // return is the first instruction in the block. 1198 if (PN) { 1199 BasicBlock::iterator BI = BB->begin(); 1200 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); 1201 if (&*BI == BCI) 1202 // Also skip over the bitcast. 1203 ++BI; 1204 if (&*BI != RI) 1205 return false; 1206 } else { 1207 BasicBlock::iterator BI = BB->begin(); 1208 while (isa<DbgInfoIntrinsic>(BI)) ++BI; 1209 if (&*BI != RI) 1210 return false; 1211 } 1212 1213 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail 1214 /// call. 1215 SmallVector<CallInst*, 4> TailCalls; 1216 if (PN) { 1217 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 1218 CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); 1219 // Make sure the phi value is indeed produced by the tail call. 1220 if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && 1221 TLI->mayBeEmittedAsTailCall(CI)) 1222 TailCalls.push_back(CI); 1223 } 1224 } else { 1225 SmallPtrSet<BasicBlock*, 4> VisitedBBs; 1226 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { 1227 if (!VisitedBBs.insert(*PI)) 1228 continue; 1229 1230 BasicBlock::InstListType &InstList = (*PI)->getInstList(); 1231 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); 1232 BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); 1233 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); 1234 if (RI == RE) 1235 continue; 1236 1237 CallInst *CI = dyn_cast<CallInst>(&*RI); 1238 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) 1239 TailCalls.push_back(CI); 1240 } 1241 } 1242 1243 bool Changed = false; 1244 for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { 1245 CallInst *CI = TailCalls[i]; 1246 CallSite CS(CI); 1247 1248 // Conservatively require the attributes of the call to match those of the 1249 // return. Ignore noalias because it doesn't affect the call sequence. 1250 AttributeSet CalleeAttrs = CS.getAttributes(); 1251 if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 1252 removeAttribute(Attribute::NoAlias) != 1253 AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 1254 removeAttribute(Attribute::NoAlias)) 1255 continue; 1256 1257 // Make sure the call instruction is followed by an unconditional branch to 1258 // the return block. 1259 BasicBlock *CallBB = CI->getParent(); 1260 BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); 1261 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) 1262 continue; 1263 1264 // Duplicate the return into CallBB. 1265 (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); 1266 ModifiedDT = Changed = true; 1267 ++NumRetsDup; 1268 } 1269 1270 // If we eliminated all predecessors of the block, delete the block now. 1271 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) 1272 BB->eraseFromParent(); 1273 1274 return Changed; 1275 } 1276 1277 //===----------------------------------------------------------------------===// 1278 // Memory Optimization 1279 //===----------------------------------------------------------------------===// 1280 1281 namespace { 1282 1283 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode 1284 /// which holds actual Value*'s for register values. 1285 struct ExtAddrMode : public TargetLowering::AddrMode { 1286 Value *BaseReg; 1287 Value *ScaledReg; 1288 ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {} 1289 void print(raw_ostream &OS) const; 1290 void dump() const; 1291 1292 bool operator==(const ExtAddrMode& O) const { 1293 return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && 1294 (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && 1295 (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); 1296 } 1297 }; 1298 1299 #ifndef NDEBUG 1300 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { 1301 AM.print(OS); 1302 return OS; 1303 } 1304 #endif 1305 1306 void ExtAddrMode::print(raw_ostream &OS) const { 1307 bool NeedPlus = false; 1308 OS << "["; 1309 if (BaseGV) { 1310 OS << (NeedPlus ? " + " : "") 1311 << "GV:"; 1312 BaseGV->printAsOperand(OS, /*PrintType=*/false); 1313 NeedPlus = true; 1314 } 1315 1316 if (BaseOffs) 1317 OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; 1318 1319 if (BaseReg) { 1320 OS << (NeedPlus ? " + " : "") 1321 << "Base:"; 1322 BaseReg->printAsOperand(OS, /*PrintType=*/false); 1323 NeedPlus = true; 1324 } 1325 if (Scale) { 1326 OS << (NeedPlus ? " + " : "") 1327 << Scale << "*"; 1328 ScaledReg->printAsOperand(OS, /*PrintType=*/false); 1329 } 1330 1331 OS << ']'; 1332 } 1333 1334 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1335 void ExtAddrMode::dump() const { 1336 print(dbgs()); 1337 dbgs() << '\n'; 1338 } 1339 #endif 1340 1341 /// \brief This class provides transaction based operation on the IR. 1342 /// Every change made through this class is recorded in the internal state and 1343 /// can be undone (rollback) until commit is called. 1344 class TypePromotionTransaction { 1345 1346 /// \brief This represents the common interface of the individual transaction. 1347 /// Each class implements the logic for doing one specific modification on 1348 /// the IR via the TypePromotionTransaction. 1349 class TypePromotionAction { 1350 protected: 1351 /// The Instruction modified. 1352 Instruction *Inst; 1353 1354 public: 1355 /// \brief Constructor of the action. 1356 /// The constructor performs the related action on the IR. 1357 TypePromotionAction(Instruction *Inst) : Inst(Inst) {} 1358 1359 virtual ~TypePromotionAction() {} 1360 1361 /// \brief Undo the modification done by this action. 1362 /// When this method is called, the IR must be in the same state as it was 1363 /// before this action was applied. 1364 /// \pre Undoing the action works if and only if the IR is in the exact same 1365 /// state as it was directly after this action was applied. 1366 virtual void undo() = 0; 1367 1368 /// \brief Advocate every change made by this action. 1369 /// When the results on the IR of the action are to be kept, it is important 1370 /// to call this function, otherwise hidden information may be kept forever. 1371 virtual void commit() { 1372 // Nothing to be done, this action is not doing anything. 1373 } 1374 }; 1375 1376 /// \brief Utility to remember the position of an instruction. 1377 class InsertionHandler { 1378 /// Position of an instruction. 1379 /// Either an instruction: 1380 /// - Is the first in a basic block: BB is used. 1381 /// - Has a previous instructon: PrevInst is used. 1382 union { 1383 Instruction *PrevInst; 1384 BasicBlock *BB; 1385 } Point; 1386 /// Remember whether or not the instruction had a previous instruction. 1387 bool HasPrevInstruction; 1388 1389 public: 1390 /// \brief Record the position of \p Inst. 1391 InsertionHandler(Instruction *Inst) { 1392 BasicBlock::iterator It = Inst; 1393 HasPrevInstruction = (It != (Inst->getParent()->begin())); 1394 if (HasPrevInstruction) 1395 Point.PrevInst = --It; 1396 else 1397 Point.BB = Inst->getParent(); 1398 } 1399 1400 /// \brief Insert \p Inst at the recorded position. 1401 void insert(Instruction *Inst) { 1402 if (HasPrevInstruction) { 1403 if (Inst->getParent()) 1404 Inst->removeFromParent(); 1405 Inst->insertAfter(Point.PrevInst); 1406 } else { 1407 Instruction *Position = Point.BB->getFirstInsertionPt(); 1408 if (Inst->getParent()) 1409 Inst->moveBefore(Position); 1410 else 1411 Inst->insertBefore(Position); 1412 } 1413 } 1414 }; 1415 1416 /// \brief Move an instruction before another. 1417 class InstructionMoveBefore : public TypePromotionAction { 1418 /// Original position of the instruction. 1419 InsertionHandler Position; 1420 1421 public: 1422 /// \brief Move \p Inst before \p Before. 1423 InstructionMoveBefore(Instruction *Inst, Instruction *Before) 1424 : TypePromotionAction(Inst), Position(Inst) { 1425 DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n"); 1426 Inst->moveBefore(Before); 1427 } 1428 1429 /// \brief Move the instruction back to its original position. 1430 void undo() override { 1431 DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); 1432 Position.insert(Inst); 1433 } 1434 }; 1435 1436 /// \brief Set the operand of an instruction with a new value. 1437 class OperandSetter : public TypePromotionAction { 1438 /// Original operand of the instruction. 1439 Value *Origin; 1440 /// Index of the modified instruction. 1441 unsigned Idx; 1442 1443 public: 1444 /// \brief Set \p Idx operand of \p Inst with \p NewVal. 1445 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) 1446 : TypePromotionAction(Inst), Idx(Idx) { 1447 DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" 1448 << "for:" << *Inst << "\n" 1449 << "with:" << *NewVal << "\n"); 1450 Origin = Inst->getOperand(Idx); 1451 Inst->setOperand(Idx, NewVal); 1452 } 1453 1454 /// \brief Restore the original value of the instruction. 1455 void undo() override { 1456 DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" 1457 << "for: " << *Inst << "\n" 1458 << "with: " << *Origin << "\n"); 1459 Inst->setOperand(Idx, Origin); 1460 } 1461 }; 1462 1463 /// \brief Hide the operands of an instruction. 1464 /// Do as if this instruction was not using any of its operands. 1465 class OperandsHider : public TypePromotionAction { 1466 /// The list of original operands. 1467 SmallVector<Value *, 4> OriginalValues; 1468 1469 public: 1470 /// \brief Remove \p Inst from the uses of the operands of \p Inst. 1471 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { 1472 DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); 1473 unsigned NumOpnds = Inst->getNumOperands(); 1474 OriginalValues.reserve(NumOpnds); 1475 for (unsigned It = 0; It < NumOpnds; ++It) { 1476 // Save the current operand. 1477 Value *Val = Inst->getOperand(It); 1478 OriginalValues.push_back(Val); 1479 // Set a dummy one. 1480 // We could use OperandSetter here, but that would implied an overhead 1481 // that we are not willing to pay. 1482 Inst->setOperand(It, UndefValue::get(Val->getType())); 1483 } 1484 } 1485 1486 /// \brief Restore the original list of uses. 1487 void undo() override { 1488 DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); 1489 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) 1490 Inst->setOperand(It, OriginalValues[It]); 1491 } 1492 }; 1493 1494 /// \brief Build a truncate instruction. 1495 class TruncBuilder : public TypePromotionAction { 1496 public: 1497 /// \brief Build a truncate instruction of \p Opnd producing a \p Ty 1498 /// result. 1499 /// trunc Opnd to Ty. 1500 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { 1501 IRBuilder<> Builder(Opnd); 1502 Inst = cast<Instruction>(Builder.CreateTrunc(Opnd, Ty, "promoted")); 1503 DEBUG(dbgs() << "Do: TruncBuilder: " << *Inst << "\n"); 1504 } 1505 1506 /// \brief Get the built instruction. 1507 Instruction *getBuiltInstruction() { return Inst; } 1508 1509 /// \brief Remove the built instruction. 1510 void undo() override { 1511 DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n"); 1512 Inst->eraseFromParent(); 1513 } 1514 }; 1515 1516 /// \brief Build a sign extension instruction. 1517 class SExtBuilder : public TypePromotionAction { 1518 public: 1519 /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty 1520 /// result. 1521 /// sext Opnd to Ty. 1522 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) 1523 : TypePromotionAction(Inst) { 1524 IRBuilder<> Builder(InsertPt); 1525 Inst = cast<Instruction>(Builder.CreateSExt(Opnd, Ty, "promoted")); 1526 DEBUG(dbgs() << "Do: SExtBuilder: " << *Inst << "\n"); 1527 } 1528 1529 /// \brief Get the built instruction. 1530 Instruction *getBuiltInstruction() { return Inst; } 1531 1532 /// \brief Remove the built instruction. 1533 void undo() override { 1534 DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n"); 1535 Inst->eraseFromParent(); 1536 } 1537 }; 1538 1539 /// \brief Mutate an instruction to another type. 1540 class TypeMutator : public TypePromotionAction { 1541 /// Record the original type. 1542 Type *OrigTy; 1543 1544 public: 1545 /// \brief Mutate the type of \p Inst into \p NewTy. 1546 TypeMutator(Instruction *Inst, Type *NewTy) 1547 : TypePromotionAction(Inst), OrigTy(Inst->getType()) { 1548 DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy 1549 << "\n"); 1550 Inst->mutateType(NewTy); 1551 } 1552 1553 /// \brief Mutate the instruction back to its original type. 1554 void undo() override { 1555 DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy 1556 << "\n"); 1557 Inst->mutateType(OrigTy); 1558 } 1559 }; 1560 1561 /// \brief Replace the uses of an instruction by another instruction. 1562 class UsesReplacer : public TypePromotionAction { 1563 /// Helper structure to keep track of the replaced uses. 1564 struct InstructionAndIdx { 1565 /// The instruction using the instruction. 1566 Instruction *Inst; 1567 /// The index where this instruction is used for Inst. 1568 unsigned Idx; 1569 InstructionAndIdx(Instruction *Inst, unsigned Idx) 1570 : Inst(Inst), Idx(Idx) {} 1571 }; 1572 1573 /// Keep track of the original uses (pair Instruction, Index). 1574 SmallVector<InstructionAndIdx, 4> OriginalUses; 1575 typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator; 1576 1577 public: 1578 /// \brief Replace all the use of \p Inst by \p New. 1579 UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { 1580 DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New 1581 << "\n"); 1582 // Record the original uses. 1583 for (Use &U : Inst->uses()) { 1584 Instruction *UserI = cast<Instruction>(U.getUser()); 1585 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); 1586 } 1587 // Now, we can replace the uses. 1588 Inst->replaceAllUsesWith(New); 1589 } 1590 1591 /// \brief Reassign the original uses of Inst to Inst. 1592 void undo() override { 1593 DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); 1594 for (use_iterator UseIt = OriginalUses.begin(), 1595 EndIt = OriginalUses.end(); 1596 UseIt != EndIt; ++UseIt) { 1597 UseIt->Inst->setOperand(UseIt->Idx, Inst); 1598 } 1599 } 1600 }; 1601 1602 /// \brief Remove an instruction from the IR. 1603 class InstructionRemover : public TypePromotionAction { 1604 /// Original position of the instruction. 1605 InsertionHandler Inserter; 1606 /// Helper structure to hide all the link to the instruction. In other 1607 /// words, this helps to do as if the instruction was removed. 1608 OperandsHider Hider; 1609 /// Keep track of the uses replaced, if any. 1610 UsesReplacer *Replacer; 1611 1612 public: 1613 /// \brief Remove all reference of \p Inst and optinally replace all its 1614 /// uses with New. 1615 /// \pre If !Inst->use_empty(), then New != nullptr 1616 InstructionRemover(Instruction *Inst, Value *New = nullptr) 1617 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), 1618 Replacer(nullptr) { 1619 if (New) 1620 Replacer = new UsesReplacer(Inst, New); 1621 DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); 1622 Inst->removeFromParent(); 1623 } 1624 1625 ~InstructionRemover() { delete Replacer; } 1626 1627 /// \brief Really remove the instruction. 1628 void commit() override { delete Inst; } 1629 1630 /// \brief Resurrect the instruction and reassign it to the proper uses if 1631 /// new value was provided when build this action. 1632 void undo() override { 1633 DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); 1634 Inserter.insert(Inst); 1635 if (Replacer) 1636 Replacer->undo(); 1637 Hider.undo(); 1638 } 1639 }; 1640 1641 public: 1642 /// Restoration point. 1643 /// The restoration point is a pointer to an action instead of an iterator 1644 /// because the iterator may be invalidated but not the pointer. 1645 typedef const TypePromotionAction *ConstRestorationPt; 1646 /// Advocate every changes made in that transaction. 1647 void commit(); 1648 /// Undo all the changes made after the given point. 1649 void rollback(ConstRestorationPt Point); 1650 /// Get the current restoration point. 1651 ConstRestorationPt getRestorationPoint() const; 1652 1653 /// \name API for IR modification with state keeping to support rollback. 1654 /// @{ 1655 /// Same as Instruction::setOperand. 1656 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); 1657 /// Same as Instruction::eraseFromParent. 1658 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); 1659 /// Same as Value::replaceAllUsesWith. 1660 void replaceAllUsesWith(Instruction *Inst, Value *New); 1661 /// Same as Value::mutateType. 1662 void mutateType(Instruction *Inst, Type *NewTy); 1663 /// Same as IRBuilder::createTrunc. 1664 Instruction *createTrunc(Instruction *Opnd, Type *Ty); 1665 /// Same as IRBuilder::createSExt. 1666 Instruction *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); 1667 /// Same as Instruction::moveBefore. 1668 void moveBefore(Instruction *Inst, Instruction *Before); 1669 /// @} 1670 1671 private: 1672 /// The ordered list of actions made so far. 1673 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions; 1674 typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt; 1675 }; 1676 1677 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, 1678 Value *NewVal) { 1679 Actions.push_back( 1680 make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal)); 1681 } 1682 1683 void TypePromotionTransaction::eraseInstruction(Instruction *Inst, 1684 Value *NewVal) { 1685 Actions.push_back( 1686 make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal)); 1687 } 1688 1689 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, 1690 Value *New) { 1691 Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); 1692 } 1693 1694 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { 1695 Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); 1696 } 1697 1698 Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd, 1699 Type *Ty) { 1700 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty)); 1701 Instruction *I = Ptr->getBuiltInstruction(); 1702 Actions.push_back(std::move(Ptr)); 1703 return I; 1704 } 1705 1706 Instruction *TypePromotionTransaction::createSExt(Instruction *Inst, 1707 Value *Opnd, Type *Ty) { 1708 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty)); 1709 Instruction *I = Ptr->getBuiltInstruction(); 1710 Actions.push_back(std::move(Ptr)); 1711 return I; 1712 } 1713 1714 void TypePromotionTransaction::moveBefore(Instruction *Inst, 1715 Instruction *Before) { 1716 Actions.push_back( 1717 make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before)); 1718 } 1719 1720 TypePromotionTransaction::ConstRestorationPt 1721 TypePromotionTransaction::getRestorationPoint() const { 1722 return !Actions.empty() ? Actions.back().get() : nullptr; 1723 } 1724 1725 void TypePromotionTransaction::commit() { 1726 for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; 1727 ++It) 1728 (*It)->commit(); 1729 Actions.clear(); 1730 } 1731 1732 void TypePromotionTransaction::rollback( 1733 TypePromotionTransaction::ConstRestorationPt Point) { 1734 while (!Actions.empty() && Point != Actions.back().get()) { 1735 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val(); 1736 Curr->undo(); 1737 } 1738 } 1739 1740 /// \brief A helper class for matching addressing modes. 1741 /// 1742 /// This encapsulates the logic for matching the target-legal addressing modes. 1743 class AddressingModeMatcher { 1744 SmallVectorImpl<Instruction*> &AddrModeInsts; 1745 const TargetLowering &TLI; 1746 1747 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and 1748 /// the memory instruction that we're computing this address for. 1749 Type *AccessTy; 1750 Instruction *MemoryInst; 1751 1752 /// AddrMode - This is the addressing mode that we're building up. This is 1753 /// part of the return value of this addressing mode matching stuff. 1754 ExtAddrMode &AddrMode; 1755 1756 /// The truncate instruction inserted by other CodeGenPrepare optimizations. 1757 const SetOfInstrs &InsertedTruncs; 1758 /// A map from the instructions to their type before promotion. 1759 InstrToOrigTy &PromotedInsts; 1760 /// The ongoing transaction where every action should be registered. 1761 TypePromotionTransaction &TPT; 1762 1763 /// IgnoreProfitability - This is set to true when we should not do 1764 /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode 1765 /// always returns true. 1766 bool IgnoreProfitability; 1767 1768 AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, 1769 const TargetLowering &T, Type *AT, 1770 Instruction *MI, ExtAddrMode &AM, 1771 const SetOfInstrs &InsertedTruncs, 1772 InstrToOrigTy &PromotedInsts, 1773 TypePromotionTransaction &TPT) 1774 : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM), 1775 InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) { 1776 IgnoreProfitability = false; 1777 } 1778 public: 1779 1780 /// Match - Find the maximal addressing mode that a load/store of V can fold, 1781 /// give an access type of AccessTy. This returns a list of involved 1782 /// instructions in AddrModeInsts. 1783 /// \p InsertedTruncs The truncate instruction inserted by other 1784 /// CodeGenPrepare 1785 /// optimizations. 1786 /// \p PromotedInsts maps the instructions to their type before promotion. 1787 /// \p The ongoing transaction where every action should be registered. 1788 static ExtAddrMode Match(Value *V, Type *AccessTy, 1789 Instruction *MemoryInst, 1790 SmallVectorImpl<Instruction*> &AddrModeInsts, 1791 const TargetLowering &TLI, 1792 const SetOfInstrs &InsertedTruncs, 1793 InstrToOrigTy &PromotedInsts, 1794 TypePromotionTransaction &TPT) { 1795 ExtAddrMode Result; 1796 1797 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, 1798 MemoryInst, Result, InsertedTruncs, 1799 PromotedInsts, TPT).MatchAddr(V, 0); 1800 (void)Success; assert(Success && "Couldn't select *anything*?"); 1801 return Result; 1802 } 1803 private: 1804 bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); 1805 bool MatchAddr(Value *V, unsigned Depth); 1806 bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, 1807 bool *MovedAway = nullptr); 1808 bool IsProfitableToFoldIntoAddressingMode(Instruction *I, 1809 ExtAddrMode &AMBefore, 1810 ExtAddrMode &AMAfter); 1811 bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); 1812 bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion, 1813 Value *PromotedOperand) const; 1814 }; 1815 1816 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. 1817 /// Return true and update AddrMode if this addr mode is legal for the target, 1818 /// false if not. 1819 bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, 1820 unsigned Depth) { 1821 // If Scale is 1, then this is the same as adding ScaleReg to the addressing 1822 // mode. Just process that directly. 1823 if (Scale == 1) 1824 return MatchAddr(ScaleReg, Depth); 1825 1826 // If the scale is 0, it takes nothing to add this. 1827 if (Scale == 0) 1828 return true; 1829 1830 // If we already have a scale of this value, we can add to it, otherwise, we 1831 // need an available scale field. 1832 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 1833 return false; 1834 1835 ExtAddrMode TestAddrMode = AddrMode; 1836 1837 // Add scale to turn X*4+X*3 -> X*7. This could also do things like 1838 // [A+B + A*7] -> [B+A*8]. 1839 TestAddrMode.Scale += Scale; 1840 TestAddrMode.ScaledReg = ScaleReg; 1841 1842 // If the new address isn't legal, bail out. 1843 if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) 1844 return false; 1845 1846 // It was legal, so commit it. 1847 AddrMode = TestAddrMode; 1848 1849 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 1850 // to see if ScaleReg is actually X+C. If so, we can turn this into adding 1851 // X*Scale + C*Scale to addr mode. 1852 ConstantInt *CI = nullptr; Value *AddLHS = nullptr; 1853 if (isa<Instruction>(ScaleReg) && // not a constant expr. 1854 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { 1855 TestAddrMode.ScaledReg = AddLHS; 1856 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; 1857 1858 // If this addressing mode is legal, commit it and remember that we folded 1859 // this instruction. 1860 if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { 1861 AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); 1862 AddrMode = TestAddrMode; 1863 return true; 1864 } 1865 } 1866 1867 // Otherwise, not (x+c)*scale, just return what we have. 1868 return true; 1869 } 1870 1871 /// MightBeFoldableInst - This is a little filter, which returns true if an 1872 /// addressing computation involving I might be folded into a load/store 1873 /// accessing it. This doesn't need to be perfect, but needs to accept at least 1874 /// the set of instructions that MatchOperationAddr can. 1875 static bool MightBeFoldableInst(Instruction *I) { 1876 switch (I->getOpcode()) { 1877 case Instruction::BitCast: 1878 // Don't touch identity bitcasts. 1879 if (I->getType() == I->getOperand(0)->getType()) 1880 return false; 1881 return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); 1882 case Instruction::PtrToInt: 1883 // PtrToInt is always a noop, as we know that the int type is pointer sized. 1884 return true; 1885 case Instruction::IntToPtr: 1886 // We know the input is intptr_t, so this is foldable. 1887 return true; 1888 case Instruction::Add: 1889 return true; 1890 case Instruction::Mul: 1891 case Instruction::Shl: 1892 // Can only handle X*C and X << C. 1893 return isa<ConstantInt>(I->getOperand(1)); 1894 case Instruction::GetElementPtr: 1895 return true; 1896 default: 1897 return false; 1898 } 1899 } 1900 1901 /// \brief Hepler class to perform type promotion. 1902 class TypePromotionHelper { 1903 /// \brief Utility function to check whether or not a sign extension of 1904 /// \p Inst with \p ConsideredSExtType can be moved through \p Inst by either 1905 /// using the operands of \p Inst or promoting \p Inst. 1906 /// In other words, check if: 1907 /// sext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredSExtType. 1908 /// #1 Promotion applies: 1909 /// ConsideredSExtType Inst (sext opnd1 to ConsideredSExtType, ...). 1910 /// #2 Operand reuses: 1911 /// sext opnd1 to ConsideredSExtType. 1912 /// \p PromotedInsts maps the instructions to their type before promotion. 1913 static bool canGetThrough(const Instruction *Inst, Type *ConsideredSExtType, 1914 const InstrToOrigTy &PromotedInsts); 1915 1916 /// \brief Utility function to determine if \p OpIdx should be promoted when 1917 /// promoting \p Inst. 1918 static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) { 1919 if (isa<SelectInst>(Inst) && OpIdx == 0) 1920 return false; 1921 return true; 1922 } 1923 1924 /// \brief Utility function to promote the operand of \p SExt when this 1925 /// operand is a promotable trunc or sext. 1926 /// \p PromotedInsts maps the instructions to their type before promotion. 1927 /// \p CreatedInsts[out] contains how many non-free instructions have been 1928 /// created to promote the operand of SExt. 1929 /// Should never be called directly. 1930 /// \return The promoted value which is used instead of SExt. 1931 static Value *promoteOperandForTruncAndSExt(Instruction *SExt, 1932 TypePromotionTransaction &TPT, 1933 InstrToOrigTy &PromotedInsts, 1934 unsigned &CreatedInsts); 1935 1936 /// \brief Utility function to promote the operand of \p SExt when this 1937 /// operand is promotable and is not a supported trunc or sext. 1938 /// \p PromotedInsts maps the instructions to their type before promotion. 1939 /// \p CreatedInsts[out] contains how many non-free instructions have been 1940 /// created to promote the operand of SExt. 1941 /// Should never be called directly. 1942 /// \return The promoted value which is used instead of SExt. 1943 static Value *promoteOperandForOther(Instruction *SExt, 1944 TypePromotionTransaction &TPT, 1945 InstrToOrigTy &PromotedInsts, 1946 unsigned &CreatedInsts); 1947 1948 public: 1949 /// Type for the utility function that promotes the operand of SExt. 1950 typedef Value *(*Action)(Instruction *SExt, TypePromotionTransaction &TPT, 1951 InstrToOrigTy &PromotedInsts, 1952 unsigned &CreatedInsts); 1953 /// \brief Given a sign extend instruction \p SExt, return the approriate 1954 /// action to promote the operand of \p SExt instead of using SExt. 1955 /// \return NULL if no promotable action is possible with the current 1956 /// sign extension. 1957 /// \p InsertedTruncs keeps track of all the truncate instructions inserted by 1958 /// the others CodeGenPrepare optimizations. This information is important 1959 /// because we do not want to promote these instructions as CodeGenPrepare 1960 /// will reinsert them later. Thus creating an infinite loop: create/remove. 1961 /// \p PromotedInsts maps the instructions to their type before promotion. 1962 static Action getAction(Instruction *SExt, const SetOfInstrs &InsertedTruncs, 1963 const TargetLowering &TLI, 1964 const InstrToOrigTy &PromotedInsts); 1965 }; 1966 1967 bool TypePromotionHelper::canGetThrough(const Instruction *Inst, 1968 Type *ConsideredSExtType, 1969 const InstrToOrigTy &PromotedInsts) { 1970 // We can always get through sext. 1971 if (isa<SExtInst>(Inst)) 1972 return true; 1973 1974 // We can get through binary operator, if it is legal. In other words, the 1975 // binary operator must have a nuw or nsw flag. 1976 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); 1977 if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && 1978 (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap())) 1979 return true; 1980 1981 // Check if we can do the following simplification. 1982 // sext(trunc(sext)) --> sext 1983 if (!isa<TruncInst>(Inst)) 1984 return false; 1985 1986 Value *OpndVal = Inst->getOperand(0); 1987 // Check if we can use this operand in the sext. 1988 // If the type is larger than the result type of the sign extension, 1989 // we cannot. 1990 if (OpndVal->getType()->getIntegerBitWidth() > 1991 ConsideredSExtType->getIntegerBitWidth()) 1992 return false; 1993 1994 // If the operand of the truncate is not an instruction, we will not have 1995 // any information on the dropped bits. 1996 // (Actually we could for constant but it is not worth the extra logic). 1997 Instruction *Opnd = dyn_cast<Instruction>(OpndVal); 1998 if (!Opnd) 1999 return false; 2000 2001 // Check if the source of the type is narrow enough. 2002 // I.e., check that trunc just drops sign extended bits. 2003 // #1 get the type of the operand. 2004 const Type *OpndType; 2005 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); 2006 if (It != PromotedInsts.end()) 2007 OpndType = It->second; 2008 else if (isa<SExtInst>(Opnd)) 2009 OpndType = cast<Instruction>(Opnd)->getOperand(0)->getType(); 2010 else 2011 return false; 2012 2013 // #2 check that the truncate just drop sign extended bits. 2014 if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth()) 2015 return true; 2016 2017 return false; 2018 } 2019 2020 TypePromotionHelper::Action TypePromotionHelper::getAction( 2021 Instruction *SExt, const SetOfInstrs &InsertedTruncs, 2022 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { 2023 Instruction *SExtOpnd = dyn_cast<Instruction>(SExt->getOperand(0)); 2024 Type *SExtTy = SExt->getType(); 2025 // If the operand of the sign extension is not an instruction, we cannot 2026 // get through. 2027 // If it, check we can get through. 2028 if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts)) 2029 return nullptr; 2030 2031 // Do not promote if the operand has been added by codegenprepare. 2032 // Otherwise, it means we are undoing an optimization that is likely to be 2033 // redone, thus causing potential infinite loop. 2034 if (isa<TruncInst>(SExtOpnd) && InsertedTruncs.count(SExtOpnd)) 2035 return nullptr; 2036 2037 // SExt or Trunc instructions. 2038 // Return the related handler. 2039 if (isa<SExtInst>(SExtOpnd) || isa<TruncInst>(SExtOpnd)) 2040 return promoteOperandForTruncAndSExt; 2041 2042 // Regular instruction. 2043 // Abort early if we will have to insert non-free instructions. 2044 if (!SExtOpnd->hasOneUse() && 2045 !TLI.isTruncateFree(SExtTy, SExtOpnd->getType())) 2046 return nullptr; 2047 return promoteOperandForOther; 2048 } 2049 2050 Value *TypePromotionHelper::promoteOperandForTruncAndSExt( 2051 llvm::Instruction *SExt, TypePromotionTransaction &TPT, 2052 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) { 2053 // By construction, the operand of SExt is an instruction. Otherwise we cannot 2054 // get through it and this method should not be called. 2055 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); 2056 // Replace sext(trunc(opnd)) or sext(sext(opnd)) 2057 // => sext(opnd). 2058 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); 2059 CreatedInsts = 0; 2060 2061 // Remove dead code. 2062 if (SExtOpnd->use_empty()) 2063 TPT.eraseInstruction(SExtOpnd); 2064 2065 // Check if the sext is still needed. 2066 if (SExt->getType() != SExt->getOperand(0)->getType()) 2067 return SExt; 2068 2069 // At this point we have: sext ty opnd to ty. 2070 // Reassign the uses of SExt to the opnd and remove SExt. 2071 Value *NextVal = SExt->getOperand(0); 2072 TPT.eraseInstruction(SExt, NextVal); 2073 return NextVal; 2074 } 2075 2076 Value * 2077 TypePromotionHelper::promoteOperandForOther(Instruction *SExt, 2078 TypePromotionTransaction &TPT, 2079 InstrToOrigTy &PromotedInsts, 2080 unsigned &CreatedInsts) { 2081 // By construction, the operand of SExt is an instruction. Otherwise we cannot 2082 // get through it and this method should not be called. 2083 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); 2084 CreatedInsts = 0; 2085 if (!SExtOpnd->hasOneUse()) { 2086 // SExtOpnd will be promoted. 2087 // All its uses, but SExt, will need to use a truncated value of the 2088 // promoted version. 2089 // Create the truncate now. 2090 Instruction *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType()); 2091 Trunc->removeFromParent(); 2092 // Insert it just after the definition. 2093 Trunc->insertAfter(SExtOpnd); 2094 2095 TPT.replaceAllUsesWith(SExtOpnd, Trunc); 2096 // Restore the operand of SExt (which has been replace by the previous call 2097 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. 2098 TPT.setOperand(SExt, 0, SExtOpnd); 2099 } 2100 2101 // Get through the Instruction: 2102 // 1. Update its type. 2103 // 2. Replace the uses of SExt by Inst. 2104 // 3. Sign extend each operand that needs to be sign extended. 2105 2106 // Remember the original type of the instruction before promotion. 2107 // This is useful to know that the high bits are sign extended bits. 2108 PromotedInsts.insert( 2109 std::pair<Instruction *, Type *>(SExtOpnd, SExtOpnd->getType())); 2110 // Step #1. 2111 TPT.mutateType(SExtOpnd, SExt->getType()); 2112 // Step #2. 2113 TPT.replaceAllUsesWith(SExt, SExtOpnd); 2114 // Step #3. 2115 Instruction *SExtForOpnd = SExt; 2116 2117 DEBUG(dbgs() << "Propagate SExt to operands\n"); 2118 for (int OpIdx = 0, EndOpIdx = SExtOpnd->getNumOperands(); OpIdx != EndOpIdx; 2119 ++OpIdx) { 2120 DEBUG(dbgs() << "Operand:\n" << *(SExtOpnd->getOperand(OpIdx)) << '\n'); 2121 if (SExtOpnd->getOperand(OpIdx)->getType() == SExt->getType() || 2122 !shouldSExtOperand(SExtOpnd, OpIdx)) { 2123 DEBUG(dbgs() << "No need to propagate\n"); 2124 continue; 2125 } 2126 // Check if we can statically sign extend the operand. 2127 Value *Opnd = SExtOpnd->getOperand(OpIdx); 2128 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { 2129 DEBUG(dbgs() << "Statically sign extend\n"); 2130 TPT.setOperand( 2131 SExtOpnd, OpIdx, 2132 ConstantInt::getSigned(SExt->getType(), Cst->getSExtValue())); 2133 continue; 2134 } 2135 // UndefValue are typed, so we have to statically sign extend them. 2136 if (isa<UndefValue>(Opnd)) { 2137 DEBUG(dbgs() << "Statically sign extend\n"); 2138 TPT.setOperand(SExtOpnd, OpIdx, UndefValue::get(SExt->getType())); 2139 continue; 2140 } 2141 2142 // Otherwise we have to explicity sign extend the operand. 2143 // Check if SExt was reused to sign extend an operand. 2144 if (!SExtForOpnd) { 2145 // If yes, create a new one. 2146 DEBUG(dbgs() << "More operands to sext\n"); 2147 SExtForOpnd = TPT.createSExt(SExt, Opnd, SExt->getType()); 2148 ++CreatedInsts; 2149 } 2150 2151 TPT.setOperand(SExtForOpnd, 0, Opnd); 2152 2153 // Move the sign extension before the insertion point. 2154 TPT.moveBefore(SExtForOpnd, SExtOpnd); 2155 TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd); 2156 // If more sext are required, new instructions will have to be created. 2157 SExtForOpnd = nullptr; 2158 } 2159 if (SExtForOpnd == SExt) { 2160 DEBUG(dbgs() << "Sign extension is useless now\n"); 2161 TPT.eraseInstruction(SExt); 2162 } 2163 return SExtOpnd; 2164 } 2165 2166 /// IsPromotionProfitable - Check whether or not promoting an instruction 2167 /// to a wider type was profitable. 2168 /// \p MatchedSize gives the number of instructions that have been matched 2169 /// in the addressing mode after the promotion was applied. 2170 /// \p SizeWithPromotion gives the number of created instructions for 2171 /// the promotion plus the number of instructions that have been 2172 /// matched in the addressing mode before the promotion. 2173 /// \p PromotedOperand is the value that has been promoted. 2174 /// \return True if the promotion is profitable, false otherwise. 2175 bool 2176 AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, 2177 unsigned SizeWithPromotion, 2178 Value *PromotedOperand) const { 2179 // We folded less instructions than what we created to promote the operand. 2180 // This is not profitable. 2181 if (MatchedSize < SizeWithPromotion) 2182 return false; 2183 if (MatchedSize > SizeWithPromotion) 2184 return true; 2185 // The promotion is neutral but it may help folding the sign extension in 2186 // loads for instance. 2187 // Check that we did not create an illegal instruction. 2188 Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand); 2189 if (!PromotedInst) 2190 return false; 2191 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); 2192 // If the ISDOpcode is undefined, it was undefined before the promotion. 2193 if (!ISDOpcode) 2194 return true; 2195 // Otherwise, check if the promoted instruction is legal or not. 2196 return TLI.isOperationLegalOrCustom(ISDOpcode, 2197 EVT::getEVT(PromotedInst->getType())); 2198 } 2199 2200 /// MatchOperationAddr - Given an instruction or constant expr, see if we can 2201 /// fold the operation into the addressing mode. If so, update the addressing 2202 /// mode and return true, otherwise return false without modifying AddrMode. 2203 /// If \p MovedAway is not NULL, it contains the information of whether or 2204 /// not AddrInst has to be folded into the addressing mode on success. 2205 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing 2206 /// because it has been moved away. 2207 /// Thus AddrInst must not be added in the matched instructions. 2208 /// This state can happen when AddrInst is a sext, since it may be moved away. 2209 /// Therefore, AddrInst may not be valid when MovedAway is true and it must 2210 /// not be referenced anymore. 2211 bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, 2212 unsigned Depth, 2213 bool *MovedAway) { 2214 // Avoid exponential behavior on extremely deep expression trees. 2215 if (Depth >= 5) return false; 2216 2217 // By default, all matched instructions stay in place. 2218 if (MovedAway) 2219 *MovedAway = false; 2220 2221 switch (Opcode) { 2222 case Instruction::PtrToInt: 2223 // PtrToInt is always a noop, as we know that the int type is pointer sized. 2224 return MatchAddr(AddrInst->getOperand(0), Depth); 2225 case Instruction::IntToPtr: 2226 // This inttoptr is a no-op if the integer type is pointer sized. 2227 if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == 2228 TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace())) 2229 return MatchAddr(AddrInst->getOperand(0), Depth); 2230 return false; 2231 case Instruction::BitCast: 2232 // BitCast is always a noop, and we can handle it as long as it is 2233 // int->int or pointer->pointer (we don't want int<->fp or something). 2234 if ((AddrInst->getOperand(0)->getType()->isPointerTy() || 2235 AddrInst->getOperand(0)->getType()->isIntegerTy()) && 2236 // Don't touch identity bitcasts. These were probably put here by LSR, 2237 // and we don't want to mess around with them. Assume it knows what it 2238 // is doing. 2239 AddrInst->getOperand(0)->getType() != AddrInst->getType()) 2240 return MatchAddr(AddrInst->getOperand(0), Depth); 2241 return false; 2242 case Instruction::Add: { 2243 // Check to see if we can merge in the RHS then the LHS. If so, we win. 2244 ExtAddrMode BackupAddrMode = AddrMode; 2245 unsigned OldSize = AddrModeInsts.size(); 2246 // Start a transaction at this point. 2247 // The LHS may match but not the RHS. 2248 // Therefore, we need a higher level restoration point to undo partially 2249 // matched operation. 2250 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2251 TPT.getRestorationPoint(); 2252 2253 if (MatchAddr(AddrInst->getOperand(1), Depth+1) && 2254 MatchAddr(AddrInst->getOperand(0), Depth+1)) 2255 return true; 2256 2257 // Restore the old addr mode info. 2258 AddrMode = BackupAddrMode; 2259 AddrModeInsts.resize(OldSize); 2260 TPT.rollback(LastKnownGood); 2261 2262 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 2263 if (MatchAddr(AddrInst->getOperand(0), Depth+1) && 2264 MatchAddr(AddrInst->getOperand(1), Depth+1)) 2265 return true; 2266 2267 // Otherwise we definitely can't merge the ADD in. 2268 AddrMode = BackupAddrMode; 2269 AddrModeInsts.resize(OldSize); 2270 TPT.rollback(LastKnownGood); 2271 break; 2272 } 2273 //case Instruction::Or: 2274 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 2275 //break; 2276 case Instruction::Mul: 2277 case Instruction::Shl: { 2278 // Can only handle X*C and X << C. 2279 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 2280 if (!RHS) return false; 2281 int64_t Scale = RHS->getSExtValue(); 2282 if (Opcode == Instruction::Shl) 2283 Scale = 1LL << Scale; 2284 2285 return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); 2286 } 2287 case Instruction::GetElementPtr: { 2288 // Scan the GEP. We check it if it contains constant offsets and at most 2289 // one variable offset. 2290 int VariableOperand = -1; 2291 unsigned VariableScale = 0; 2292 2293 int64_t ConstantOffset = 0; 2294 const DataLayout *TD = TLI.getDataLayout(); 2295 gep_type_iterator GTI = gep_type_begin(AddrInst); 2296 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 2297 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 2298 const StructLayout *SL = TD->getStructLayout(STy); 2299 unsigned Idx = 2300 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 2301 ConstantOffset += SL->getElementOffset(Idx); 2302 } else { 2303 uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); 2304 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 2305 ConstantOffset += CI->getSExtValue()*TypeSize; 2306 } else if (TypeSize) { // Scales of zero don't do anything. 2307 // We only allow one variable index at the moment. 2308 if (VariableOperand != -1) 2309 return false; 2310 2311 // Remember the variable index. 2312 VariableOperand = i; 2313 VariableScale = TypeSize; 2314 } 2315 } 2316 } 2317 2318 // A common case is for the GEP to only do a constant offset. In this case, 2319 // just add it to the disp field and check validity. 2320 if (VariableOperand == -1) { 2321 AddrMode.BaseOffs += ConstantOffset; 2322 if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ 2323 // Check to see if we can fold the base pointer in too. 2324 if (MatchAddr(AddrInst->getOperand(0), Depth+1)) 2325 return true; 2326 } 2327 AddrMode.BaseOffs -= ConstantOffset; 2328 return false; 2329 } 2330 2331 // Save the valid addressing mode in case we can't match. 2332 ExtAddrMode BackupAddrMode = AddrMode; 2333 unsigned OldSize = AddrModeInsts.size(); 2334 2335 // See if the scale and offset amount is valid for this target. 2336 AddrMode.BaseOffs += ConstantOffset; 2337 2338 // Match the base operand of the GEP. 2339 if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { 2340 // If it couldn't be matched, just stuff the value in a register. 2341 if (AddrMode.HasBaseReg) { 2342 AddrMode = BackupAddrMode; 2343 AddrModeInsts.resize(OldSize); 2344 return false; 2345 } 2346 AddrMode.HasBaseReg = true; 2347 AddrMode.BaseReg = AddrInst->getOperand(0); 2348 } 2349 2350 // Match the remaining variable portion of the GEP. 2351 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, 2352 Depth)) { 2353 // If it couldn't be matched, try stuffing the base into a register 2354 // instead of matching it, and retrying the match of the scale. 2355 AddrMode = BackupAddrMode; 2356 AddrModeInsts.resize(OldSize); 2357 if (AddrMode.HasBaseReg) 2358 return false; 2359 AddrMode.HasBaseReg = true; 2360 AddrMode.BaseReg = AddrInst->getOperand(0); 2361 AddrMode.BaseOffs += ConstantOffset; 2362 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), 2363 VariableScale, Depth)) { 2364 // If even that didn't work, bail. 2365 AddrMode = BackupAddrMode; 2366 AddrModeInsts.resize(OldSize); 2367 return false; 2368 } 2369 } 2370 2371 return true; 2372 } 2373 case Instruction::SExt: { 2374 // Try to move this sext out of the way of the addressing mode. 2375 Instruction *SExt = cast<Instruction>(AddrInst); 2376 // Ask for a method for doing so. 2377 TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( 2378 SExt, InsertedTruncs, TLI, PromotedInsts); 2379 if (!TPH) 2380 return false; 2381 2382 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2383 TPT.getRestorationPoint(); 2384 unsigned CreatedInsts = 0; 2385 Value *PromotedOperand = TPH(SExt, TPT, PromotedInsts, CreatedInsts); 2386 // SExt has been moved away. 2387 // Thus either it will be rematched later in the recursive calls or it is 2388 // gone. Anyway, we must not fold it into the addressing mode at this point. 2389 // E.g., 2390 // op = add opnd, 1 2391 // idx = sext op 2392 // addr = gep base, idx 2393 // is now: 2394 // promotedOpnd = sext opnd <- no match here 2395 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) 2396 // addr = gep base, op <- match 2397 if (MovedAway) 2398 *MovedAway = true; 2399 2400 assert(PromotedOperand && 2401 "TypePromotionHelper should have filtered out those cases"); 2402 2403 ExtAddrMode BackupAddrMode = AddrMode; 2404 unsigned OldSize = AddrModeInsts.size(); 2405 2406 if (!MatchAddr(PromotedOperand, Depth) || 2407 !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts, 2408 PromotedOperand)) { 2409 AddrMode = BackupAddrMode; 2410 AddrModeInsts.resize(OldSize); 2411 DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); 2412 TPT.rollback(LastKnownGood); 2413 return false; 2414 } 2415 return true; 2416 } 2417 } 2418 return false; 2419 } 2420 2421 /// MatchAddr - If we can, try to add the value of 'Addr' into the current 2422 /// addressing mode. If Addr can't be added to AddrMode this returns false and 2423 /// leaves AddrMode unmodified. This assumes that Addr is either a pointer type 2424 /// or intptr_t for the target. 2425 /// 2426 bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { 2427 // Start a transaction at this point that we will rollback if the matching 2428 // fails. 2429 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2430 TPT.getRestorationPoint(); 2431 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 2432 // Fold in immediates if legal for the target. 2433 AddrMode.BaseOffs += CI->getSExtValue(); 2434 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2435 return true; 2436 AddrMode.BaseOffs -= CI->getSExtValue(); 2437 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 2438 // If this is a global variable, try to fold it into the addressing mode. 2439 if (!AddrMode.BaseGV) { 2440 AddrMode.BaseGV = GV; 2441 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2442 return true; 2443 AddrMode.BaseGV = nullptr; 2444 } 2445 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { 2446 ExtAddrMode BackupAddrMode = AddrMode; 2447 unsigned OldSize = AddrModeInsts.size(); 2448 2449 // Check to see if it is possible to fold this operation. 2450 bool MovedAway = false; 2451 if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { 2452 // This instruction may have been move away. If so, there is nothing 2453 // to check here. 2454 if (MovedAway) 2455 return true; 2456 // Okay, it's possible to fold this. Check to see if it is actually 2457 // *profitable* to do so. We use a simple cost model to avoid increasing 2458 // register pressure too much. 2459 if (I->hasOneUse() || 2460 IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { 2461 AddrModeInsts.push_back(I); 2462 return true; 2463 } 2464 2465 // It isn't profitable to do this, roll back. 2466 //cerr << "NOT FOLDING: " << *I; 2467 AddrMode = BackupAddrMode; 2468 AddrModeInsts.resize(OldSize); 2469 TPT.rollback(LastKnownGood); 2470 } 2471 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 2472 if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) 2473 return true; 2474 TPT.rollback(LastKnownGood); 2475 } else if (isa<ConstantPointerNull>(Addr)) { 2476 // Null pointer gets folded without affecting the addressing mode. 2477 return true; 2478 } 2479 2480 // Worse case, the target should support [reg] addressing modes. :) 2481 if (!AddrMode.HasBaseReg) { 2482 AddrMode.HasBaseReg = true; 2483 AddrMode.BaseReg = Addr; 2484 // Still check for legality in case the target supports [imm] but not [i+r]. 2485 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2486 return true; 2487 AddrMode.HasBaseReg = false; 2488 AddrMode.BaseReg = nullptr; 2489 } 2490 2491 // If the base register is already taken, see if we can do [r+r]. 2492 if (AddrMode.Scale == 0) { 2493 AddrMode.Scale = 1; 2494 AddrMode.ScaledReg = Addr; 2495 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2496 return true; 2497 AddrMode.Scale = 0; 2498 AddrMode.ScaledReg = nullptr; 2499 } 2500 // Couldn't match. 2501 TPT.rollback(LastKnownGood); 2502 return false; 2503 } 2504 2505 /// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified 2506 /// inline asm call are due to memory operands. If so, return true, otherwise 2507 /// return false. 2508 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, 2509 const TargetLowering &TLI) { 2510 TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); 2511 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 2512 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 2513 2514 // Compute the constraint code and ConstraintType to use. 2515 TLI.ComputeConstraintToUse(OpInfo, SDValue()); 2516 2517 // If this asm operand is our Value*, and if it isn't an indirect memory 2518 // operand, we can't fold it! 2519 if (OpInfo.CallOperandVal == OpVal && 2520 (OpInfo.ConstraintType != TargetLowering::C_Memory || 2521 !OpInfo.isIndirect)) 2522 return false; 2523 } 2524 2525 return true; 2526 } 2527 2528 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a 2529 /// memory use. If we find an obviously non-foldable instruction, return true. 2530 /// Add the ultimately found memory instructions to MemoryUses. 2531 static bool FindAllMemoryUses(Instruction *I, 2532 SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, 2533 SmallPtrSet<Instruction*, 16> &ConsideredInsts, 2534 const TargetLowering &TLI) { 2535 // If we already considered this instruction, we're done. 2536 if (!ConsideredInsts.insert(I)) 2537 return false; 2538 2539 // If this is an obviously unfoldable instruction, bail out. 2540 if (!MightBeFoldableInst(I)) 2541 return true; 2542 2543 // Loop over all the uses, recursively processing them. 2544 for (Use &U : I->uses()) { 2545 Instruction *UserI = cast<Instruction>(U.getUser()); 2546 2547 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { 2548 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); 2549 continue; 2550 } 2551 2552 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { 2553 unsigned opNo = U.getOperandNo(); 2554 if (opNo == 0) return true; // Storing addr, not into addr. 2555 MemoryUses.push_back(std::make_pair(SI, opNo)); 2556 continue; 2557 } 2558 2559 if (CallInst *CI = dyn_cast<CallInst>(UserI)) { 2560 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); 2561 if (!IA) return true; 2562 2563 // If this is a memory operand, we're cool, otherwise bail out. 2564 if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) 2565 return true; 2566 continue; 2567 } 2568 2569 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI)) 2570 return true; 2571 } 2572 2573 return false; 2574 } 2575 2576 /// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at 2577 /// the use site that we're folding it into. If so, there is no cost to 2578 /// include it in the addressing mode. KnownLive1 and KnownLive2 are two values 2579 /// that we know are live at the instruction already. 2580 bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, 2581 Value *KnownLive2) { 2582 // If Val is either of the known-live values, we know it is live! 2583 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) 2584 return true; 2585 2586 // All values other than instructions and arguments (e.g. constants) are live. 2587 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; 2588 2589 // If Val is a constant sized alloca in the entry block, it is live, this is 2590 // true because it is just a reference to the stack/frame pointer, which is 2591 // live for the whole function. 2592 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) 2593 if (AI->isStaticAlloca()) 2594 return true; 2595 2596 // Check to see if this value is already used in the memory instruction's 2597 // block. If so, it's already live into the block at the very least, so we 2598 // can reasonably fold it. 2599 return Val->isUsedInBasicBlock(MemoryInst->getParent()); 2600 } 2601 2602 /// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing 2603 /// mode of the machine to fold the specified instruction into a load or store 2604 /// that ultimately uses it. However, the specified instruction has multiple 2605 /// uses. Given this, it may actually increase register pressure to fold it 2606 /// into the load. For example, consider this code: 2607 /// 2608 /// X = ... 2609 /// Y = X+1 2610 /// use(Y) -> nonload/store 2611 /// Z = Y+1 2612 /// load Z 2613 /// 2614 /// In this case, Y has multiple uses, and can be folded into the load of Z 2615 /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to 2616 /// be live at the use(Y) line. If we don't fold Y into load Z, we use one 2617 /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the 2618 /// number of computations either. 2619 /// 2620 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If 2621 /// X was live across 'load Z' for other reasons, we actually *would* want to 2622 /// fold the addressing mode in the Z case. This would make Y die earlier. 2623 bool AddressingModeMatcher:: 2624 IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, 2625 ExtAddrMode &AMAfter) { 2626 if (IgnoreProfitability) return true; 2627 2628 // AMBefore is the addressing mode before this instruction was folded into it, 2629 // and AMAfter is the addressing mode after the instruction was folded. Get 2630 // the set of registers referenced by AMAfter and subtract out those 2631 // referenced by AMBefore: this is the set of values which folding in this 2632 // address extends the lifetime of. 2633 // 2634 // Note that there are only two potential values being referenced here, 2635 // BaseReg and ScaleReg (global addresses are always available, as are any 2636 // folded immediates). 2637 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; 2638 2639 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their 2640 // lifetime wasn't extended by adding this instruction. 2641 if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 2642 BaseReg = nullptr; 2643 if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 2644 ScaledReg = nullptr; 2645 2646 // If folding this instruction (and it's subexprs) didn't extend any live 2647 // ranges, we're ok with it. 2648 if (!BaseReg && !ScaledReg) 2649 return true; 2650 2651 // If all uses of this instruction are ultimately load/store/inlineasm's, 2652 // check to see if their addressing modes will include this instruction. If 2653 // so, we can fold it into all uses, so it doesn't matter if it has multiple 2654 // uses. 2655 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; 2656 SmallPtrSet<Instruction*, 16> ConsideredInsts; 2657 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) 2658 return false; // Has a non-memory, non-foldable use! 2659 2660 // Now that we know that all uses of this instruction are part of a chain of 2661 // computation involving only operations that could theoretically be folded 2662 // into a memory use, loop over each of these uses and see if they could 2663 // *actually* fold the instruction. 2664 SmallVector<Instruction*, 32> MatchedAddrModeInsts; 2665 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { 2666 Instruction *User = MemoryUses[i].first; 2667 unsigned OpNo = MemoryUses[i].second; 2668 2669 // Get the access type of this use. If the use isn't a pointer, we don't 2670 // know what it accesses. 2671 Value *Address = User->getOperand(OpNo); 2672 if (!Address->getType()->isPointerTy()) 2673 return false; 2674 Type *AddressAccessTy = Address->getType()->getPointerElementType(); 2675 2676 // Do a match against the root of this address, ignoring profitability. This 2677 // will tell us if the addressing mode for the memory operation will 2678 // *actually* cover the shared instruction. 2679 ExtAddrMode Result; 2680 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2681 TPT.getRestorationPoint(); 2682 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, 2683 MemoryInst, Result, InsertedTruncs, 2684 PromotedInsts, TPT); 2685 Matcher.IgnoreProfitability = true; 2686 bool Success = Matcher.MatchAddr(Address, 0); 2687 (void)Success; assert(Success && "Couldn't select *anything*?"); 2688 2689 // The match was to check the profitability, the changes made are not 2690 // part of the original matcher. Therefore, they should be dropped 2691 // otherwise the original matcher will not present the right state. 2692 TPT.rollback(LastKnownGood); 2693 2694 // If the match didn't cover I, then it won't be shared by it. 2695 if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), 2696 I) == MatchedAddrModeInsts.end()) 2697 return false; 2698 2699 MatchedAddrModeInsts.clear(); 2700 } 2701 2702 return true; 2703 } 2704 2705 } // end anonymous namespace 2706 2707 /// IsNonLocalValue - Return true if the specified values are defined in a 2708 /// different basic block than BB. 2709 static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 2710 if (Instruction *I = dyn_cast<Instruction>(V)) 2711 return I->getParent() != BB; 2712 return false; 2713 } 2714 2715 /// OptimizeMemoryInst - Load and Store Instructions often have 2716 /// addressing modes that can do significant amounts of computation. As such, 2717 /// instruction selection will try to get the load or store to do as much 2718 /// computation as possible for the program. The problem is that isel can only 2719 /// see within a single block. As such, we sink as much legal addressing mode 2720 /// stuff into the block as possible. 2721 /// 2722 /// This method is used to optimize both load/store and inline asms with memory 2723 /// operands. 2724 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 2725 Type *AccessTy) { 2726 Value *Repl = Addr; 2727 2728 // Try to collapse single-value PHI nodes. This is necessary to undo 2729 // unprofitable PRE transformations. 2730 SmallVector<Value*, 8> worklist; 2731 SmallPtrSet<Value*, 16> Visited; 2732 worklist.push_back(Addr); 2733 2734 // Use a worklist to iteratively look through PHI nodes, and ensure that 2735 // the addressing mode obtained from the non-PHI roots of the graph 2736 // are equivalent. 2737 Value *Consensus = nullptr; 2738 unsigned NumUsesConsensus = 0; 2739 bool IsNumUsesConsensusValid = false; 2740 SmallVector<Instruction*, 16> AddrModeInsts; 2741 ExtAddrMode AddrMode; 2742 TypePromotionTransaction TPT; 2743 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2744 TPT.getRestorationPoint(); 2745 while (!worklist.empty()) { 2746 Value *V = worklist.back(); 2747 worklist.pop_back(); 2748 2749 // Break use-def graph loops. 2750 if (!Visited.insert(V)) { 2751 Consensus = nullptr; 2752 break; 2753 } 2754 2755 // For a PHI node, push all of its incoming values. 2756 if (PHINode *P = dyn_cast<PHINode>(V)) { 2757 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) 2758 worklist.push_back(P->getIncomingValue(i)); 2759 continue; 2760 } 2761 2762 // For non-PHIs, determine the addressing mode being computed. 2763 SmallVector<Instruction*, 16> NewAddrModeInsts; 2764 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( 2765 V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet, 2766 PromotedInsts, TPT); 2767 2768 // This check is broken into two cases with very similar code to avoid using 2769 // getNumUses() as much as possible. Some values have a lot of uses, so 2770 // calling getNumUses() unconditionally caused a significant compile-time 2771 // regression. 2772 if (!Consensus) { 2773 Consensus = V; 2774 AddrMode = NewAddrMode; 2775 AddrModeInsts = NewAddrModeInsts; 2776 continue; 2777 } else if (NewAddrMode == AddrMode) { 2778 if (!IsNumUsesConsensusValid) { 2779 NumUsesConsensus = Consensus->getNumUses(); 2780 IsNumUsesConsensusValid = true; 2781 } 2782 2783 // Ensure that the obtained addressing mode is equivalent to that obtained 2784 // for all other roots of the PHI traversal. Also, when choosing one 2785 // such root as representative, select the one with the most uses in order 2786 // to keep the cost modeling heuristics in AddressingModeMatcher 2787 // applicable. 2788 unsigned NumUses = V->getNumUses(); 2789 if (NumUses > NumUsesConsensus) { 2790 Consensus = V; 2791 NumUsesConsensus = NumUses; 2792 AddrModeInsts = NewAddrModeInsts; 2793 } 2794 continue; 2795 } 2796 2797 Consensus = nullptr; 2798 break; 2799 } 2800 2801 // If the addressing mode couldn't be determined, or if multiple different 2802 // ones were determined, bail out now. 2803 if (!Consensus) { 2804 TPT.rollback(LastKnownGood); 2805 return false; 2806 } 2807 TPT.commit(); 2808 2809 // Check to see if any of the instructions supersumed by this addr mode are 2810 // non-local to I's BB. 2811 bool AnyNonLocal = false; 2812 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 2813 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 2814 AnyNonLocal = true; 2815 break; 2816 } 2817 } 2818 2819 // If all the instructions matched are already in this BB, don't do anything. 2820 if (!AnyNonLocal) { 2821 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 2822 return false; 2823 } 2824 2825 // Insert this computation right after this user. Since our caller is 2826 // scanning from the top of the BB to the bottom, reuse of the expr are 2827 // guaranteed to happen later. 2828 IRBuilder<> Builder(MemoryInst); 2829 2830 // Now that we determined the addressing expression we want to use and know 2831 // that we have to sink it into this block. Check to see if we have already 2832 // done this for some other load/store instr in this block. If so, reuse the 2833 // computation. 2834 Value *&SunkAddr = SunkAddrs[Addr]; 2835 if (SunkAddr) { 2836 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 2837 << *MemoryInst); 2838 if (SunkAddr->getType() != Addr->getType()) 2839 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 2840 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && 2841 TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) { 2842 // By default, we use the GEP-based method when AA is used later. This 2843 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. 2844 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 2845 << *MemoryInst); 2846 Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); 2847 Value *ResultPtr = nullptr, *ResultIndex = nullptr; 2848 2849 // First, find the pointer. 2850 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { 2851 ResultPtr = AddrMode.BaseReg; 2852 AddrMode.BaseReg = nullptr; 2853 } 2854 2855 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { 2856 // We can't add more than one pointer together, nor can we scale a 2857 // pointer (both of which seem meaningless). 2858 if (ResultPtr || AddrMode.Scale != 1) 2859 return false; 2860 2861 ResultPtr = AddrMode.ScaledReg; 2862 AddrMode.Scale = 0; 2863 } 2864 2865 if (AddrMode.BaseGV) { 2866 if (ResultPtr) 2867 return false; 2868 2869 ResultPtr = AddrMode.BaseGV; 2870 } 2871 2872 // If the real base value actually came from an inttoptr, then the matcher 2873 // will look through it and provide only the integer value. In that case, 2874 // use it here. 2875 if (!ResultPtr && AddrMode.BaseReg) { 2876 ResultPtr = 2877 Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); 2878 AddrMode.BaseReg = nullptr; 2879 } else if (!ResultPtr && AddrMode.Scale == 1) { 2880 ResultPtr = 2881 Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); 2882 AddrMode.Scale = 0; 2883 } 2884 2885 if (!ResultPtr && 2886 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { 2887 SunkAddr = Constant::getNullValue(Addr->getType()); 2888 } else if (!ResultPtr) { 2889 return false; 2890 } else { 2891 Type *I8PtrTy = 2892 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); 2893 2894 // Start with the base register. Do this first so that subsequent address 2895 // matching finds it last, which will prevent it from trying to match it 2896 // as the scaled value in case it happens to be a mul. That would be 2897 // problematic if we've sunk a different mul for the scale, because then 2898 // we'd end up sinking both muls. 2899 if (AddrMode.BaseReg) { 2900 Value *V = AddrMode.BaseReg; 2901 if (V->getType() != IntPtrTy) 2902 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 2903 2904 ResultIndex = V; 2905 } 2906 2907 // Add the scale value. 2908 if (AddrMode.Scale) { 2909 Value *V = AddrMode.ScaledReg; 2910 if (V->getType() == IntPtrTy) { 2911 // done. 2912 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 2913 cast<IntegerType>(V->getType())->getBitWidth()) { 2914 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 2915 } else { 2916 // It is only safe to sign extend the BaseReg if we know that the math 2917 // required to create it did not overflow before we extend it. Since 2918 // the original IR value was tossed in favor of a constant back when 2919 // the AddrMode was created we need to bail out gracefully if widths 2920 // do not match instead of extending it. 2921 Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex); 2922 if (I && (ResultIndex != AddrMode.BaseReg)) 2923 I->eraseFromParent(); 2924 return false; 2925 } 2926 2927 if (AddrMode.Scale != 1) 2928 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 2929 "sunkaddr"); 2930 if (ResultIndex) 2931 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); 2932 else 2933 ResultIndex = V; 2934 } 2935 2936 // Add in the Base Offset if present. 2937 if (AddrMode.BaseOffs) { 2938 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 2939 if (ResultIndex) { 2940 // We need to add this separately from the scale above to help with 2941 // SDAG consecutive load/store merging. 2942 if (ResultPtr->getType() != I8PtrTy) 2943 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); 2944 ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); 2945 } 2946 2947 ResultIndex = V; 2948 } 2949 2950 if (!ResultIndex) { 2951 SunkAddr = ResultPtr; 2952 } else { 2953 if (ResultPtr->getType() != I8PtrTy) 2954 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); 2955 SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); 2956 } 2957 2958 if (SunkAddr->getType() != Addr->getType()) 2959 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 2960 } 2961 } else { 2962 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 2963 << *MemoryInst); 2964 Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); 2965 Value *Result = nullptr; 2966 2967 // Start with the base register. Do this first so that subsequent address 2968 // matching finds it last, which will prevent it from trying to match it 2969 // as the scaled value in case it happens to be a mul. That would be 2970 // problematic if we've sunk a different mul for the scale, because then 2971 // we'd end up sinking both muls. 2972 if (AddrMode.BaseReg) { 2973 Value *V = AddrMode.BaseReg; 2974 if (V->getType()->isPointerTy()) 2975 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 2976 if (V->getType() != IntPtrTy) 2977 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 2978 Result = V; 2979 } 2980 2981 // Add the scale value. 2982 if (AddrMode.Scale) { 2983 Value *V = AddrMode.ScaledReg; 2984 if (V->getType() == IntPtrTy) { 2985 // done. 2986 } else if (V->getType()->isPointerTy()) { 2987 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 2988 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 2989 cast<IntegerType>(V->getType())->getBitWidth()) { 2990 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 2991 } else { 2992 // It is only safe to sign extend the BaseReg if we know that the math 2993 // required to create it did not overflow before we extend it. Since 2994 // the original IR value was tossed in favor of a constant back when 2995 // the AddrMode was created we need to bail out gracefully if widths 2996 // do not match instead of extending it. 2997 Instruction *I = dyn_cast<Instruction>(Result); 2998 if (I && (Result != AddrMode.BaseReg)) 2999 I->eraseFromParent(); 3000 return false; 3001 } 3002 if (AddrMode.Scale != 1) 3003 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 3004 "sunkaddr"); 3005 if (Result) 3006 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 3007 else 3008 Result = V; 3009 } 3010 3011 // Add in the BaseGV if present. 3012 if (AddrMode.BaseGV) { 3013 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); 3014 if (Result) 3015 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 3016 else 3017 Result = V; 3018 } 3019 3020 // Add in the Base Offset if present. 3021 if (AddrMode.BaseOffs) { 3022 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 3023 if (Result) 3024 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 3025 else 3026 Result = V; 3027 } 3028 3029 if (!Result) 3030 SunkAddr = Constant::getNullValue(Addr->getType()); 3031 else 3032 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); 3033 } 3034 3035 MemoryInst->replaceUsesOfWith(Repl, SunkAddr); 3036 3037 // If we have no uses, recursively delete the value and all dead instructions 3038 // using it. 3039 if (Repl->use_empty()) { 3040 // This can cause recursive deletion, which can invalidate our iterator. 3041 // Use a WeakVH to hold onto it in case this happens. 3042 WeakVH IterHandle(CurInstIterator); 3043 BasicBlock *BB = CurInstIterator->getParent(); 3044 3045 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); 3046 3047 if (IterHandle != CurInstIterator) { 3048 // If the iterator instruction was recursively deleted, start over at the 3049 // start of the block. 3050 CurInstIterator = BB->begin(); 3051 SunkAddrs.clear(); 3052 } 3053 } 3054 ++NumMemoryInsts; 3055 return true; 3056 } 3057 3058 /// OptimizeInlineAsmInst - If there are any memory operands, use 3059 /// OptimizeMemoryInst to sink their address computing into the block when 3060 /// possible / profitable. 3061 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { 3062 bool MadeChange = false; 3063 3064 TargetLowering::AsmOperandInfoVector 3065 TargetConstraints = TLI->ParseConstraints(CS); 3066 unsigned ArgNo = 0; 3067 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 3068 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 3069 3070 // Compute the constraint code and ConstraintType to use. 3071 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 3072 3073 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 3074 OpInfo.isIndirect) { 3075 Value *OpVal = CS->getArgOperand(ArgNo++); 3076 MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); 3077 } else if (OpInfo.Type == InlineAsm::isInput) 3078 ArgNo++; 3079 } 3080 3081 return MadeChange; 3082 } 3083 3084 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 3085 /// basic block as the load, unless conditions are unfavorable. This allows 3086 /// SelectionDAG to fold the extend into the load. 3087 /// 3088 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { 3089 // Look for a load being extended. 3090 LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); 3091 if (!LI) return false; 3092 3093 // If they're already in the same block, there's nothing to do. 3094 if (LI->getParent() == I->getParent()) 3095 return false; 3096 3097 // If the load has other users and the truncate is not free, this probably 3098 // isn't worthwhile. 3099 if (!LI->hasOneUse() && 3100 TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || 3101 !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && 3102 !TLI->isTruncateFree(I->getType(), LI->getType())) 3103 return false; 3104 3105 // Check whether the target supports casts folded into loads. 3106 unsigned LType; 3107 if (isa<ZExtInst>(I)) 3108 LType = ISD::ZEXTLOAD; 3109 else { 3110 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 3111 LType = ISD::SEXTLOAD; 3112 } 3113 if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) 3114 return false; 3115 3116 // Move the extend into the same block as the load, so that SelectionDAG 3117 // can fold it. 3118 I->removeFromParent(); 3119 I->insertAfter(LI); 3120 ++NumExtsMoved; 3121 return true; 3122 } 3123 3124 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 3125 BasicBlock *DefBB = I->getParent(); 3126 3127 // If the result of a {s|z}ext and its source are both live out, rewrite all 3128 // other uses of the source with result of extension. 3129 Value *Src = I->getOperand(0); 3130 if (Src->hasOneUse()) 3131 return false; 3132 3133 // Only do this xform if truncating is free. 3134 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 3135 return false; 3136 3137 // Only safe to perform the optimization if the source is also defined in 3138 // this block. 3139 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 3140 return false; 3141 3142 bool DefIsLiveOut = false; 3143 for (User *U : I->users()) { 3144 Instruction *UI = cast<Instruction>(U); 3145 3146 // Figure out which BB this ext is used in. 3147 BasicBlock *UserBB = UI->getParent(); 3148 if (UserBB == DefBB) continue; 3149 DefIsLiveOut = true; 3150 break; 3151 } 3152 if (!DefIsLiveOut) 3153 return false; 3154 3155 // Make sure none of the uses are PHI nodes. 3156 for (User *U : Src->users()) { 3157 Instruction *UI = cast<Instruction>(U); 3158 BasicBlock *UserBB = UI->getParent(); 3159 if (UserBB == DefBB) continue; 3160 // Be conservative. We don't want this xform to end up introducing 3161 // reloads just before load / store instructions. 3162 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI)) 3163 return false; 3164 } 3165 3166 // InsertedTruncs - Only insert one trunc in each block once. 3167 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 3168 3169 bool MadeChange = false; 3170 for (Use &U : Src->uses()) { 3171 Instruction *User = cast<Instruction>(U.getUser()); 3172 3173 // Figure out which BB this ext is used in. 3174 BasicBlock *UserBB = User->getParent(); 3175 if (UserBB == DefBB) continue; 3176 3177 // Both src and def are live in this block. Rewrite the use. 3178 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 3179 3180 if (!InsertedTrunc) { 3181 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 3182 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 3183 InsertedTruncsSet.insert(InsertedTrunc); 3184 } 3185 3186 // Replace a use of the {s|z}ext source with a use of the result. 3187 U = InsertedTrunc; 3188 ++NumExtUses; 3189 MadeChange = true; 3190 } 3191 3192 return MadeChange; 3193 } 3194 3195 /// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be 3196 /// turned into an explicit branch. 3197 static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { 3198 // FIXME: This should use the same heuristics as IfConversion to determine 3199 // whether a select is better represented as a branch. This requires that 3200 // branch probability metadata is preserved for the select, which is not the 3201 // case currently. 3202 3203 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); 3204 3205 // If the branch is predicted right, an out of order CPU can avoid blocking on 3206 // the compare. Emit cmovs on compares with a memory operand as branches to 3207 // avoid stalls on the load from memory. If the compare has more than one use 3208 // there's probably another cmov or setcc around so it's not worth emitting a 3209 // branch. 3210 if (!Cmp) 3211 return false; 3212 3213 Value *CmpOp0 = Cmp->getOperand(0); 3214 Value *CmpOp1 = Cmp->getOperand(1); 3215 3216 // We check that the memory operand has one use to avoid uses of the loaded 3217 // value directly after the compare, making branches unprofitable. 3218 return Cmp->hasOneUse() && 3219 ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || 3220 (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())); 3221 } 3222 3223 3224 /// If we have a SelectInst that will likely profit from branch prediction, 3225 /// turn it into a branch. 3226 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { 3227 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 3228 3229 // Can we convert the 'select' to CF ? 3230 if (DisableSelectToBranch || OptSize || !TLI || VectorCond) 3231 return false; 3232 3233 TargetLowering::SelectSupportKind SelectKind; 3234 if (VectorCond) 3235 SelectKind = TargetLowering::VectorMaskSelect; 3236 else if (SI->getType()->isVectorTy()) 3237 SelectKind = TargetLowering::ScalarCondVectorVal; 3238 else 3239 SelectKind = TargetLowering::ScalarValSelect; 3240 3241 // Do we have efficient codegen support for this kind of 'selects' ? 3242 if (TLI->isSelectSupported(SelectKind)) { 3243 // We have efficient codegen support for the select instruction. 3244 // Check if it is profitable to keep this 'select'. 3245 if (!TLI->isPredictableSelectExpensive() || 3246 !isFormingBranchFromSelectProfitable(SI)) 3247 return false; 3248 } 3249 3250 ModifiedDT = true; 3251 3252 // First, we split the block containing the select into 2 blocks. 3253 BasicBlock *StartBlock = SI->getParent(); 3254 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); 3255 BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 3256 3257 // Create a new block serving as the landing pad for the branch. 3258 BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", 3259 NextBlock->getParent(), NextBlock); 3260 3261 // Move the unconditional branch from the block with the select in it into our 3262 // landing pad block. 3263 StartBlock->getTerminator()->eraseFromParent(); 3264 BranchInst::Create(NextBlock, SmallBlock); 3265 3266 // Insert the real conditional branch based on the original condition. 3267 BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); 3268 3269 // The select itself is replaced with a PHI Node. 3270 PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); 3271 PN->takeName(SI); 3272 PN->addIncoming(SI->getTrueValue(), StartBlock); 3273 PN->addIncoming(SI->getFalseValue(), SmallBlock); 3274 SI->replaceAllUsesWith(PN); 3275 SI->eraseFromParent(); 3276 3277 // Instruct OptimizeBlock to skip to the next block. 3278 CurInstIterator = StartBlock->end(); 3279 ++NumSelectsExpanded; 3280 return true; 3281 } 3282 3283 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { 3284 SmallVector<int, 16> Mask(SVI->getShuffleMask()); 3285 int SplatElem = -1; 3286 for (unsigned i = 0; i < Mask.size(); ++i) { 3287 if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem) 3288 return false; 3289 SplatElem = Mask[i]; 3290 } 3291 3292 return true; 3293 } 3294 3295 /// Some targets have expensive vector shifts if the lanes aren't all the same 3296 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases 3297 /// it's often worth sinking a shufflevector splat down to its use so that 3298 /// codegen can spot all lanes are identical. 3299 bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { 3300 BasicBlock *DefBB = SVI->getParent(); 3301 3302 // Only do this xform if variable vector shifts are particularly expensive. 3303 if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType())) 3304 return false; 3305 3306 // We only expect better codegen by sinking a shuffle if we can recognise a 3307 // constant splat. 3308 if (!isBroadcastShuffle(SVI)) 3309 return false; 3310 3311 // InsertedShuffles - Only insert a shuffle in each block once. 3312 DenseMap<BasicBlock*, Instruction*> InsertedShuffles; 3313 3314 bool MadeChange = false; 3315 for (User *U : SVI->users()) { 3316 Instruction *UI = cast<Instruction>(U); 3317 3318 // Figure out which BB this ext is used in. 3319 BasicBlock *UserBB = UI->getParent(); 3320 if (UserBB == DefBB) continue; 3321 3322 // For now only apply this when the splat is used by a shift instruction. 3323 if (!UI->isShift()) continue; 3324 3325 // Everything checks out, sink the shuffle if the user's block doesn't 3326 // already have a copy. 3327 Instruction *&InsertedShuffle = InsertedShuffles[UserBB]; 3328 3329 if (!InsertedShuffle) { 3330 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 3331 InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0), 3332 SVI->getOperand(1), 3333 SVI->getOperand(2), "", InsertPt); 3334 } 3335 3336 UI->replaceUsesOfWith(SVI, InsertedShuffle); 3337 MadeChange = true; 3338 } 3339 3340 // If we removed all uses, nuke the shuffle. 3341 if (SVI->use_empty()) { 3342 SVI->eraseFromParent(); 3343 MadeChange = true; 3344 } 3345 3346 return MadeChange; 3347 } 3348 3349 bool CodeGenPrepare::OptimizeInst(Instruction *I) { 3350 if (PHINode *P = dyn_cast<PHINode>(I)) { 3351 // It is possible for very late stage optimizations (such as SimplifyCFG) 3352 // to introduce PHI nodes too late to be cleaned up. If we detect such a 3353 // trivial PHI, go ahead and zap it here. 3354 if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, 3355 TLInfo, DT)) { 3356 P->replaceAllUsesWith(V); 3357 P->eraseFromParent(); 3358 ++NumPHIsElim; 3359 return true; 3360 } 3361 return false; 3362 } 3363 3364 if (CastInst *CI = dyn_cast<CastInst>(I)) { 3365 // If the source of the cast is a constant, then this should have 3366 // already been constant folded. The only reason NOT to constant fold 3367 // it is if something (e.g. LSR) was careful to place the constant 3368 // evaluation in a block other than then one that uses it (e.g. to hoist 3369 // the address of globals out of a loop). If this is the case, we don't 3370 // want to forward-subst the cast. 3371 if (isa<Constant>(CI->getOperand(0))) 3372 return false; 3373 3374 if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) 3375 return true; 3376 3377 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { 3378 /// Sink a zext or sext into its user blocks if the target type doesn't 3379 /// fit in one register 3380 if (TLI && TLI->getTypeAction(CI->getContext(), 3381 TLI->getValueType(CI->getType())) == 3382 TargetLowering::TypeExpandInteger) { 3383 return SinkCast(CI); 3384 } else { 3385 bool MadeChange = MoveExtToFormExtLoad(I); 3386 return MadeChange | OptimizeExtUses(I); 3387 } 3388 } 3389 return false; 3390 } 3391 3392 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 3393 if (!TLI || !TLI->hasMultipleConditionRegisters()) 3394 return OptimizeCmpExpression(CI); 3395 3396 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 3397 if (TLI) 3398 return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); 3399 return false; 3400 } 3401 3402 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 3403 if (TLI) 3404 return OptimizeMemoryInst(I, SI->getOperand(1), 3405 SI->getOperand(0)->getType()); 3406 return false; 3407 } 3408 3409 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); 3410 3411 if (BinOp && (BinOp->getOpcode() == Instruction::AShr || 3412 BinOp->getOpcode() == Instruction::LShr)) { 3413 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); 3414 if (TLI && CI && TLI->hasExtractBitsInsn()) 3415 return OptimizeExtractBits(BinOp, CI, *TLI); 3416 3417 return false; 3418 } 3419 3420 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 3421 if (GEPI->hasAllZeroIndices()) { 3422 /// The GEP operand must be a pointer, so must its result -> BitCast 3423 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 3424 GEPI->getName(), GEPI); 3425 GEPI->replaceAllUsesWith(NC); 3426 GEPI->eraseFromParent(); 3427 ++NumGEPsElim; 3428 OptimizeInst(NC); 3429 return true; 3430 } 3431 return false; 3432 } 3433 3434 if (CallInst *CI = dyn_cast<CallInst>(I)) 3435 return OptimizeCallInst(CI); 3436 3437 if (SelectInst *SI = dyn_cast<SelectInst>(I)) 3438 return OptimizeSelectInst(SI); 3439 3440 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) 3441 return OptimizeShuffleVectorInst(SVI); 3442 3443 return false; 3444 } 3445 3446 // In this pass we look for GEP and cast instructions that are used 3447 // across basic blocks and rewrite them to improve basic-block-at-a-time 3448 // selection. 3449 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 3450 SunkAddrs.clear(); 3451 bool MadeChange = false; 3452 3453 CurInstIterator = BB.begin(); 3454 while (CurInstIterator != BB.end()) 3455 MadeChange |= OptimizeInst(CurInstIterator++); 3456 3457 MadeChange |= DupRetToEnableTailCallOpts(&BB); 3458 3459 return MadeChange; 3460 } 3461 3462 // llvm.dbg.value is far away from the value then iSel may not be able 3463 // handle it properly. iSel will drop llvm.dbg.value if it can not 3464 // find a node corresponding to the value. 3465 bool CodeGenPrepare::PlaceDbgValues(Function &F) { 3466 bool MadeChange = false; 3467 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { 3468 Instruction *PrevNonDbgInst = nullptr; 3469 for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { 3470 Instruction *Insn = BI; ++BI; 3471 DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); 3472 if (!DVI) { 3473 PrevNonDbgInst = Insn; 3474 continue; 3475 } 3476 3477 Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); 3478 if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { 3479 DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); 3480 DVI->removeFromParent(); 3481 if (isa<PHINode>(VI)) 3482 DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); 3483 else 3484 DVI->insertAfter(VI); 3485 MadeChange = true; 3486 ++NumDbgValueMoved; 3487 } 3488 } 3489 } 3490 return MadeChange; 3491 } 3492 3493 // If there is a sequence that branches based on comparing a single bit 3494 // against zero that can be combined into a single instruction, and the 3495 // target supports folding these into a single instruction, sink the 3496 // mask and compare into the branch uses. Do this before OptimizeBlock -> 3497 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being 3498 // searched for. 3499 bool CodeGenPrepare::sinkAndCmp(Function &F) { 3500 if (!EnableAndCmpSinking) 3501 return false; 3502 if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) 3503 return false; 3504 bool MadeChange = false; 3505 for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { 3506 BasicBlock *BB = I++; 3507 3508 // Does this BB end with the following? 3509 // %andVal = and %val, #single-bit-set 3510 // %icmpVal = icmp %andResult, 0 3511 // br i1 %cmpVal label %dest1, label %dest2" 3512 BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator()); 3513 if (!Brcc || !Brcc->isConditional()) 3514 continue; 3515 ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0)); 3516 if (!Cmp || Cmp->getParent() != BB) 3517 continue; 3518 ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1)); 3519 if (!Zero || !Zero->isZero()) 3520 continue; 3521 Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0)); 3522 if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB) 3523 continue; 3524 ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1)); 3525 if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) 3526 continue; 3527 DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump()); 3528 3529 // Push the "and; icmp" for any users that are conditional branches. 3530 // Since there can only be one branch use per BB, we don't need to keep 3531 // track of which BBs we insert into. 3532 for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end(); 3533 UI != E; ) { 3534 Use &TheUse = *UI; 3535 // Find brcc use. 3536 BranchInst *BrccUser = dyn_cast<BranchInst>(*UI); 3537 ++UI; 3538 if (!BrccUser || !BrccUser->isConditional()) 3539 continue; 3540 BasicBlock *UserBB = BrccUser->getParent(); 3541 if (UserBB == BB) continue; 3542 DEBUG(dbgs() << "found Brcc use\n"); 3543 3544 // Sink the "and; icmp" to use. 3545 MadeChange = true; 3546 BinaryOperator *NewAnd = 3547 BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", 3548 BrccUser); 3549 CmpInst *NewCmp = 3550 CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, 3551 "", BrccUser); 3552 TheUse = NewCmp; 3553 ++NumAndCmpsMoved; 3554 DEBUG(BrccUser->getParent()->dump()); 3555 } 3556 } 3557 return MadeChange; 3558 } 3559