1 //===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a pass that optimizes call sequences on x86. 10 // Currently, it converts movs of function parameters onto the stack into 11 // pushes. This is beneficial for two main reasons: 12 // 1) The push instruction encoding is much smaller than a stack-ptr-based mov. 13 // 2) It is possible to push memory arguments directly. So, if the 14 // the transformation is performed pre-reg-alloc, it can help relieve 15 // register pressure. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "MCTargetDesc/X86BaseInfo.h" 20 #include "X86FrameLowering.h" 21 #include "X86InstrInfo.h" 22 #include "X86MachineFunctionInfo.h" 23 #include "X86RegisterInfo.h" 24 #include "X86Subtarget.h" 25 #include "llvm/ADT/DenseSet.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "llvm/CodeGen/MachineBasicBlock.h" 29 #include "llvm/CodeGen/MachineFrameInfo.h" 30 #include "llvm/CodeGen/MachineFunction.h" 31 #include "llvm/CodeGen/MachineFunctionPass.h" 32 #include "llvm/CodeGen/MachineInstr.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineOperand.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/TargetInstrInfo.h" 37 #include "llvm/CodeGen/TargetRegisterInfo.h" 38 #include "llvm/IR/DebugLoc.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/MC/MCDwarf.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/MathExtras.h" 44 #include <cassert> 45 #include <cstddef> 46 #include <cstdint> 47 #include <iterator> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "x86-cf-opt" 52 53 static cl::opt<bool> 54 NoX86CFOpt("no-x86-call-frame-opt", 55 cl::desc("Avoid optimizing x86 call frames for size"), 56 cl::init(false), cl::Hidden); 57 58 namespace { 59 60 class X86CallFrameOptimization : public MachineFunctionPass { 61 public: 62 X86CallFrameOptimization() : MachineFunctionPass(ID) { 63 initializeX86CallFrameOptimizationPass( 64 *PassRegistry::getPassRegistry()); 65 } 66 67 bool runOnMachineFunction(MachineFunction &MF) override; 68 69 static char ID; 70 71 private: 72 // Information we know about a particular call site 73 struct CallContext { 74 CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {} 75 76 // Iterator referring to the frame setup instruction 77 MachineBasicBlock::iterator FrameSetup; 78 79 // Actual call instruction 80 MachineInstr *Call = nullptr; 81 82 // A copy of the stack pointer 83 MachineInstr *SPCopy = nullptr; 84 85 // The total displacement of all passed parameters 86 int64_t ExpectedDist = 0; 87 88 // The sequence of storing instructions used to pass the parameters 89 SmallVector<MachineInstr *, 4> ArgStoreVector; 90 91 // True if this call site has no stack parameters 92 bool NoStackParams = false; 93 94 // True if this call site can use push instructions 95 bool UsePush = false; 96 }; 97 98 typedef SmallVector<CallContext, 8> ContextVector; 99 100 bool isLegal(MachineFunction &MF); 101 102 bool isProfitable(MachineFunction &MF, ContextVector &CallSeqMap); 103 104 void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB, 105 MachineBasicBlock::iterator I, CallContext &Context); 106 107 void adjustCallSequence(MachineFunction &MF, const CallContext &Context); 108 109 MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup, 110 unsigned Reg); 111 112 enum InstClassification { Convert, Skip, Exit }; 113 114 InstClassification classifyInstruction(MachineBasicBlock &MBB, 115 MachineBasicBlock::iterator MI, 116 const X86RegisterInfo &RegInfo, 117 DenseSet<unsigned int> &UsedRegs); 118 119 StringRef getPassName() const override { return "X86 Optimize Call Frame"; } 120 121 const X86InstrInfo *TII; 122 const X86FrameLowering *TFL; 123 const X86Subtarget *STI; 124 MachineRegisterInfo *MRI; 125 unsigned SlotSize; 126 unsigned Log2SlotSize; 127 }; 128 129 } // end anonymous namespace 130 char X86CallFrameOptimization::ID = 0; 131 INITIALIZE_PASS(X86CallFrameOptimization, DEBUG_TYPE, 132 "X86 Call Frame Optimization", false, false) 133 134 // This checks whether the transformation is legal. 135 // Also returns false in cases where it's potentially legal, but 136 // we don't even want to try. 137 bool X86CallFrameOptimization::isLegal(MachineFunction &MF) { 138 if (NoX86CFOpt.getValue()) 139 return false; 140 141 // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset 142 // in the compact unwind encoding that Darwin uses. So, bail if there 143 // is a danger of that being generated. 144 if (STI->isTargetDarwin() && 145 (!MF.getLandingPads().empty() || 146 (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF)))) 147 return false; 148 149 // It is not valid to change the stack pointer outside the prolog/epilog 150 // on 64-bit Windows. 151 if (STI->isTargetWin64()) 152 return false; 153 154 // You would expect straight-line code between call-frame setup and 155 // call-frame destroy. You would be wrong. There are circumstances (e.g. 156 // CMOV_GR8 expansion of a select that feeds a function call!) where we can 157 // end up with the setup and the destroy in different basic blocks. 158 // This is bad, and breaks SP adjustment. 159 // So, check that all of the frames in the function are closed inside 160 // the same block, and, for good measure, that there are no nested frames. 161 unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode(); 162 unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); 163 for (MachineBasicBlock &BB : MF) { 164 bool InsideFrameSequence = false; 165 for (MachineInstr &MI : BB) { 166 if (MI.getOpcode() == FrameSetupOpcode) { 167 if (InsideFrameSequence) 168 return false; 169 InsideFrameSequence = true; 170 } else if (MI.getOpcode() == FrameDestroyOpcode) { 171 if (!InsideFrameSequence) 172 return false; 173 InsideFrameSequence = false; 174 } 175 } 176 177 if (InsideFrameSequence) 178 return false; 179 } 180 181 return true; 182 } 183 184 // Check whether this transformation is profitable for a particular 185 // function - in terms of code size. 186 bool X86CallFrameOptimization::isProfitable(MachineFunction &MF, 187 ContextVector &CallSeqVector) { 188 // This transformation is always a win when we do not expect to have 189 // a reserved call frame. Under other circumstances, it may be either 190 // a win or a loss, and requires a heuristic. 191 bool CannotReserveFrame = MF.getFrameInfo().hasVarSizedObjects(); 192 if (CannotReserveFrame) 193 return true; 194 195 unsigned StackAlign = TFL->getStackAlignment(); 196 197 int64_t Advantage = 0; 198 for (auto CC : CallSeqVector) { 199 // Call sites where no parameters are passed on the stack 200 // do not affect the cost, since there needs to be no 201 // stack adjustment. 202 if (CC.NoStackParams) 203 continue; 204 205 if (!CC.UsePush) { 206 // If we don't use pushes for a particular call site, 207 // we pay for not having a reserved call frame with an 208 // additional sub/add esp pair. The cost is ~3 bytes per instruction, 209 // depending on the size of the constant. 210 // TODO: Callee-pop functions should have a smaller penalty, because 211 // an add is needed even with a reserved call frame. 212 Advantage -= 6; 213 } else { 214 // We can use pushes. First, account for the fixed costs. 215 // We'll need a add after the call. 216 Advantage -= 3; 217 // If we have to realign the stack, we'll also need a sub before 218 if (CC.ExpectedDist % StackAlign) 219 Advantage -= 3; 220 // Now, for each push, we save ~3 bytes. For small constants, we actually, 221 // save more (up to 5 bytes), but 3 should be a good approximation. 222 Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3; 223 } 224 } 225 226 return Advantage >= 0; 227 } 228 229 bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) { 230 STI = &MF.getSubtarget<X86Subtarget>(); 231 TII = STI->getInstrInfo(); 232 TFL = STI->getFrameLowering(); 233 MRI = &MF.getRegInfo(); 234 235 const X86RegisterInfo &RegInfo = 236 *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo()); 237 SlotSize = RegInfo.getSlotSize(); 238 assert(isPowerOf2_32(SlotSize) && "Expect power of 2 stack slot size"); 239 Log2SlotSize = Log2_32(SlotSize); 240 241 if (skipFunction(MF.getFunction()) || !isLegal(MF)) 242 return false; 243 244 unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode(); 245 246 bool Changed = false; 247 248 ContextVector CallSeqVector; 249 250 for (auto &MBB : MF) 251 for (auto &MI : MBB) 252 if (MI.getOpcode() == FrameSetupOpcode) { 253 CallContext Context; 254 collectCallInfo(MF, MBB, MI, Context); 255 CallSeqVector.push_back(Context); 256 } 257 258 if (!isProfitable(MF, CallSeqVector)) 259 return false; 260 261 for (auto CC : CallSeqVector) { 262 if (CC.UsePush) { 263 adjustCallSequence(MF, CC); 264 Changed = true; 265 } 266 } 267 268 return Changed; 269 } 270 271 X86CallFrameOptimization::InstClassification 272 X86CallFrameOptimization::classifyInstruction( 273 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 274 const X86RegisterInfo &RegInfo, DenseSet<unsigned int> &UsedRegs) { 275 if (MI == MBB.end()) 276 return Exit; 277 278 // The instructions we actually care about are movs onto the stack or special 279 // cases of constant-stores to stack 280 switch (MI->getOpcode()) { 281 case X86::AND16mi8: 282 case X86::AND32mi8: 283 case X86::AND64mi8: { 284 MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands); 285 return ImmOp.getImm() == 0 ? Convert : Exit; 286 } 287 case X86::OR16mi8: 288 case X86::OR32mi8: 289 case X86::OR64mi8: { 290 MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands); 291 return ImmOp.getImm() == -1 ? Convert : Exit; 292 } 293 case X86::MOV32mi: 294 case X86::MOV32mr: 295 case X86::MOV64mi32: 296 case X86::MOV64mr: 297 return Convert; 298 } 299 300 // Not all calling conventions have only stack MOVs between the stack 301 // adjust and the call. 302 303 // We want to tolerate other instructions, to cover more cases. 304 // In particular: 305 // a) PCrel calls, where we expect an additional COPY of the basereg. 306 // b) Passing frame-index addresses. 307 // c) Calling conventions that have inreg parameters. These generate 308 // both copies and movs into registers. 309 // To avoid creating lots of special cases, allow any instruction 310 // that does not write into memory, does not def or use the stack 311 // pointer, and does not def any register that was used by a preceding 312 // push. 313 // (Reading from memory is allowed, even if referenced through a 314 // frame index, since these will get adjusted properly in PEI) 315 316 // The reason for the last condition is that the pushes can't replace 317 // the movs in place, because the order must be reversed. 318 // So if we have a MOV32mr that uses EDX, then an instruction that defs 319 // EDX, and then the call, after the transformation the push will use 320 // the modified version of EDX, and not the original one. 321 // Since we are still in SSA form at this point, we only need to 322 // make sure we don't clobber any *physical* registers that were 323 // used by an earlier mov that will become a push. 324 325 if (MI->isCall() || MI->mayStore()) 326 return Exit; 327 328 for (const MachineOperand &MO : MI->operands()) { 329 if (!MO.isReg()) 330 continue; 331 unsigned int Reg = MO.getReg(); 332 if (!RegInfo.isPhysicalRegister(Reg)) 333 continue; 334 if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister())) 335 return Exit; 336 if (MO.isDef()) { 337 for (unsigned int U : UsedRegs) 338 if (RegInfo.regsOverlap(Reg, U)) 339 return Exit; 340 } 341 } 342 343 return Skip; 344 } 345 346 void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, 347 MachineBasicBlock &MBB, 348 MachineBasicBlock::iterator I, 349 CallContext &Context) { 350 // Check that this particular call sequence is amenable to the 351 // transformation. 352 const X86RegisterInfo &RegInfo = 353 *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo()); 354 355 // We expect to enter this at the beginning of a call sequence 356 assert(I->getOpcode() == TII->getCallFrameSetupOpcode()); 357 MachineBasicBlock::iterator FrameSetup = I++; 358 Context.FrameSetup = FrameSetup; 359 360 // How much do we adjust the stack? This puts an upper bound on 361 // the number of parameters actually passed on it. 362 unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize; 363 364 // A zero adjustment means no stack parameters 365 if (!MaxAdjust) { 366 Context.NoStackParams = true; 367 return; 368 } 369 370 // Skip over DEBUG_VALUE. 371 // For globals in PIC mode, we can have some LEAs here. Skip them as well. 372 // TODO: Extend this to something that covers more cases. 373 while (I->getOpcode() == X86::LEA32r || I->isDebugInstr()) 374 ++I; 375 376 unsigned StackPtr = RegInfo.getStackRegister(); 377 auto StackPtrCopyInst = MBB.end(); 378 // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual 379 // register. If it's there, use that virtual register as stack pointer 380 // instead. Also, we need to locate this instruction so that we can later 381 // safely ignore it while doing the conservative processing of the call chain. 382 // The COPY can be located anywhere between the call-frame setup 383 // instruction and its first use. We use the call instruction as a boundary 384 // because it is usually cheaper to check if an instruction is a call than 385 // checking if an instruction uses a register. 386 for (auto J = I; !J->isCall(); ++J) 387 if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() && 388 J->getOperand(1).getReg() == StackPtr) { 389 StackPtrCopyInst = J; 390 Context.SPCopy = &*J++; 391 StackPtr = Context.SPCopy->getOperand(0).getReg(); 392 break; 393 } 394 395 // Scan the call setup sequence for the pattern we're looking for. 396 // We only handle a simple case - a sequence of store instructions that 397 // push a sequence of stack-slot-aligned values onto the stack, with 398 // no gaps between them. 399 if (MaxAdjust > 4) 400 Context.ArgStoreVector.resize(MaxAdjust, nullptr); 401 402 DenseSet<unsigned int> UsedRegs; 403 404 for (InstClassification Classification = Skip; Classification != Exit; ++I) { 405 // If this is the COPY of the stack pointer, it's ok to ignore. 406 if (I == StackPtrCopyInst) 407 continue; 408 Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs); 409 if (Classification != Convert) 410 continue; 411 // We know the instruction has a supported store opcode. 412 // We only want movs of the form: 413 // mov imm/reg, k(%StackPtr) 414 // If we run into something else, bail. 415 // Note that AddrBaseReg may, counter to its name, not be a register, 416 // but rather a frame index. 417 // TODO: Support the fi case. This should probably work now that we 418 // have the infrastructure to track the stack pointer within a call 419 // sequence. 420 if (!I->getOperand(X86::AddrBaseReg).isReg() || 421 (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || 422 !I->getOperand(X86::AddrScaleAmt).isImm() || 423 (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || 424 (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || 425 (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || 426 !I->getOperand(X86::AddrDisp).isImm()) 427 return; 428 429 int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); 430 assert(StackDisp >= 0 && 431 "Negative stack displacement when passing parameters"); 432 433 // We really don't want to consider the unaligned case. 434 if (StackDisp & (SlotSize - 1)) 435 return; 436 StackDisp >>= Log2SlotSize; 437 438 assert((size_t)StackDisp < Context.ArgStoreVector.size() && 439 "Function call has more parameters than the stack is adjusted for."); 440 441 // If the same stack slot is being filled twice, something's fishy. 442 if (Context.ArgStoreVector[StackDisp] != nullptr) 443 return; 444 Context.ArgStoreVector[StackDisp] = &*I; 445 446 for (const MachineOperand &MO : I->uses()) { 447 if (!MO.isReg()) 448 continue; 449 unsigned int Reg = MO.getReg(); 450 if (RegInfo.isPhysicalRegister(Reg)) 451 UsedRegs.insert(Reg); 452 } 453 } 454 455 --I; 456 457 // We now expect the end of the sequence. If we stopped early, 458 // or reached the end of the block without finding a call, bail. 459 if (I == MBB.end() || !I->isCall()) 460 return; 461 462 Context.Call = &*I; 463 if ((++I)->getOpcode() != TII->getCallFrameDestroyOpcode()) 464 return; 465 466 // Now, go through the vector, and see that we don't have any gaps, 467 // but only a series of storing instructions. 468 auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end(); 469 for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize) 470 if (*MMI == nullptr) 471 break; 472 473 // If the call had no parameters, do nothing 474 if (MMI == Context.ArgStoreVector.begin()) 475 return; 476 477 // We are either at the last parameter, or a gap. 478 // Make sure it's not a gap 479 for (; MMI != MME; ++MMI) 480 if (*MMI != nullptr) 481 return; 482 483 Context.UsePush = true; 484 } 485 486 void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, 487 const CallContext &Context) { 488 // Ok, we can in fact do the transformation for this call. 489 // Do not remove the FrameSetup instruction, but adjust the parameters. 490 // PEI will end up finalizing the handling of this. 491 MachineBasicBlock::iterator FrameSetup = Context.FrameSetup; 492 MachineBasicBlock &MBB = *(FrameSetup->getParent()); 493 TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist); 494 495 DebugLoc DL = FrameSetup->getDebugLoc(); 496 bool Is64Bit = STI->is64Bit(); 497 // Now, iterate through the vector in reverse order, and replace the store to 498 // stack with pushes. MOVmi/MOVmr doesn't have any defs, so no need to 499 // replace uses. 500 for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) { 501 MachineBasicBlock::iterator Store = *Context.ArgStoreVector[Idx]; 502 MachineOperand PushOp = Store->getOperand(X86::AddrNumOperands); 503 MachineBasicBlock::iterator Push = nullptr; 504 unsigned PushOpcode; 505 switch (Store->getOpcode()) { 506 default: 507 llvm_unreachable("Unexpected Opcode!"); 508 case X86::AND16mi8: 509 case X86::AND32mi8: 510 case X86::AND64mi8: 511 case X86::OR16mi8: 512 case X86::OR32mi8: 513 case X86::OR64mi8: 514 case X86::MOV32mi: 515 case X86::MOV64mi32: 516 PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32; 517 // If the operand is a small (8-bit) immediate, we can use a 518 // PUSH instruction with a shorter encoding. 519 // Note that isImm() may fail even though this is a MOVmi, because 520 // the operand can also be a symbol. 521 if (PushOp.isImm()) { 522 int64_t Val = PushOp.getImm(); 523 if (isInt<8>(Val)) 524 PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8; 525 } 526 Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).add(PushOp); 527 break; 528 case X86::MOV32mr: 529 case X86::MOV64mr: { 530 unsigned int Reg = PushOp.getReg(); 531 532 // If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg 533 // in preparation for the PUSH64. The upper 32 bits can be undef. 534 if (Is64Bit && Store->getOpcode() == X86::MOV32mr) { 535 unsigned UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass); 536 Reg = MRI->createVirtualRegister(&X86::GR64RegClass); 537 BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg); 538 BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg) 539 .addReg(UndefReg) 540 .add(PushOp) 541 .addImm(X86::sub_32bit); 542 } 543 544 // If PUSHrmm is not slow on this target, try to fold the source of the 545 // push into the instruction. 546 bool SlowPUSHrmm = STI->isAtom() || STI->isSLM(); 547 548 // Check that this is legal to fold. Right now, we're extremely 549 // conservative about that. 550 MachineInstr *DefMov = nullptr; 551 if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { 552 PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm; 553 Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)); 554 555 unsigned NumOps = DefMov->getDesc().getNumOperands(); 556 for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) 557 Push->addOperand(DefMov->getOperand(i)); 558 559 DefMov->eraseFromParent(); 560 } else { 561 PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r; 562 Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)) 563 .addReg(Reg) 564 .getInstr(); 565 } 566 break; 567 } 568 } 569 570 // For debugging, when using SP-based CFA, we need to adjust the CFA 571 // offset after each push. 572 // TODO: This is needed only if we require precise CFA. 573 if (!TFL->hasFP(MF)) 574 TFL->BuildCFI( 575 MBB, std::next(Push), DL, 576 MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize)); 577 578 MBB.erase(Store); 579 } 580 581 // The stack-pointer copy is no longer used in the call sequences. 582 // There should not be any other users, but we can't commit to that, so: 583 if (Context.SPCopy && MRI->use_empty(Context.SPCopy->getOperand(0).getReg())) 584 Context.SPCopy->eraseFromParent(); 585 586 // Once we've done this, we need to make sure PEI doesn't assume a reserved 587 // frame. 588 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 589 FuncInfo->setHasPushSequences(true); 590 } 591 592 MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush( 593 MachineBasicBlock::iterator FrameSetup, unsigned Reg) { 594 // Do an extremely restricted form of load folding. 595 // ISel will often create patterns like: 596 // movl 4(%edi), %eax 597 // movl 8(%edi), %ecx 598 // movl 12(%edi), %edx 599 // movl %edx, 8(%esp) 600 // movl %ecx, 4(%esp) 601 // movl %eax, (%esp) 602 // call 603 // Get rid of those with prejudice. 604 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 605 return nullptr; 606 607 // Make sure this is the only use of Reg. 608 if (!MRI->hasOneNonDBGUse(Reg)) 609 return nullptr; 610 611 MachineInstr &DefMI = *MRI->getVRegDef(Reg); 612 613 // Make sure the def is a MOV from memory. 614 // If the def is in another block, give up. 615 if ((DefMI.getOpcode() != X86::MOV32rm && 616 DefMI.getOpcode() != X86::MOV64rm) || 617 DefMI.getParent() != FrameSetup->getParent()) 618 return nullptr; 619 620 // Make sure we don't have any instructions between DefMI and the 621 // push that make folding the load illegal. 622 for (MachineBasicBlock::iterator I = DefMI; I != FrameSetup; ++I) 623 if (I->isLoadFoldBarrier()) 624 return nullptr; 625 626 return &DefMI; 627 } 628 629 FunctionPass *llvm::createX86CallFrameOptimization() { 630 return new X86CallFrameOptimization(); 631 } 632