1 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the X86 implementation of TargetFrameLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "X86FrameLowering.h" 15 #include "X86InstrBuilder.h" 16 #include "X86InstrInfo.h" 17 #include "X86MachineFunctionInfo.h" 18 #include "X86Subtarget.h" 19 #include "X86TargetMachine.h" 20 #include "llvm/ADT/SmallSet.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/IR/DataLayout.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCSymbol.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Target/TargetOptions.h" 32 #include "llvm/Support/Debug.h" 33 #include <cstdlib> 34 35 using namespace llvm; 36 37 // FIXME: completely move here. 38 extern cl::opt<bool> ForceStackAlign; 39 40 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 41 return !MF.getFrameInfo()->hasVarSizedObjects() && 42 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); 43 } 44 45 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the 46 /// call frame pseudos can be simplified. Having a FP, as in the default 47 /// implementation, is not sufficient here since we can't always use it. 48 /// Use a more nuanced condition. 49 bool 50 X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { 51 const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *> 52 (MF.getSubtarget().getRegisterInfo()); 53 return hasReservedCallFrame(MF) || 54 (hasFP(MF) && !TRI->needsStackRealignment(MF)) 55 || TRI->hasBasePointer(MF); 56 } 57 58 // needsFrameIndexResolution - Do we need to perform FI resolution for 59 // this function. Normally, this is required only when the function 60 // has any stack objects. However, FI resolution actually has another job, 61 // not apparent from the title - it resolves callframesetup/destroy 62 // that were not simplified earlier. 63 // So, this is required for x86 functions that have push sequences even 64 // when there are no stack objects. 65 bool 66 X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { 67 return MF.getFrameInfo()->hasStackObjects() || 68 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); 69 } 70 71 /// hasFP - Return true if the specified function should have a dedicated frame 72 /// pointer register. This is true if the function has variable sized allocas 73 /// or if frame pointer elimination is disabled. 74 bool X86FrameLowering::hasFP(const MachineFunction &MF) const { 75 const MachineFrameInfo *MFI = MF.getFrameInfo(); 76 const MachineModuleInfo &MMI = MF.getMMI(); 77 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 78 79 return (MF.getTarget().Options.DisableFramePointerElim(MF) || 80 RegInfo->needsStackRealignment(MF) || 81 MFI->hasVarSizedObjects() || 82 MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() || 83 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 84 MMI.callsUnwindInit() || MMI.callsEHReturn() || 85 MFI->hasStackMap() || MFI->hasPatchPoint()); 86 } 87 88 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { 89 if (IsLP64) { 90 if (isInt<8>(Imm)) 91 return X86::SUB64ri8; 92 return X86::SUB64ri32; 93 } else { 94 if (isInt<8>(Imm)) 95 return X86::SUB32ri8; 96 return X86::SUB32ri; 97 } 98 } 99 100 static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { 101 if (IsLP64) { 102 if (isInt<8>(Imm)) 103 return X86::ADD64ri8; 104 return X86::ADD64ri32; 105 } else { 106 if (isInt<8>(Imm)) 107 return X86::ADD32ri8; 108 return X86::ADD32ri; 109 } 110 } 111 112 static unsigned getSUBrrOpcode(unsigned isLP64) { 113 return isLP64 ? X86::SUB64rr : X86::SUB32rr; 114 } 115 116 static unsigned getADDrrOpcode(unsigned isLP64) { 117 return isLP64 ? X86::ADD64rr : X86::ADD32rr; 118 } 119 120 static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { 121 if (IsLP64) { 122 if (isInt<8>(Imm)) 123 return X86::AND64ri8; 124 return X86::AND64ri32; 125 } 126 if (isInt<8>(Imm)) 127 return X86::AND32ri8; 128 return X86::AND32ri; 129 } 130 131 static unsigned getLEArOpcode(unsigned IsLP64) { 132 return IsLP64 ? X86::LEA64r : X86::LEA32r; 133 } 134 135 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live 136 /// when it reaches the "return" instruction. We can then pop a stack object 137 /// to this register without worry about clobbering it. 138 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, 139 MachineBasicBlock::iterator &MBBI, 140 const TargetRegisterInfo &TRI, 141 bool Is64Bit) { 142 const MachineFunction *MF = MBB.getParent(); 143 const Function *F = MF->getFunction(); 144 if (!F || MF->getMMI().callsEHReturn()) 145 return 0; 146 147 static const uint16_t CallerSavedRegs32Bit[] = { 148 X86::EAX, X86::EDX, X86::ECX, 0 149 }; 150 151 static const uint16_t CallerSavedRegs64Bit[] = { 152 X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, 153 X86::R8, X86::R9, X86::R10, X86::R11, 0 154 }; 155 156 unsigned Opc = MBBI->getOpcode(); 157 switch (Opc) { 158 default: return 0; 159 case X86::RETL: 160 case X86::RETQ: 161 case X86::RETIL: 162 case X86::RETIQ: 163 case X86::TCRETURNdi: 164 case X86::TCRETURNri: 165 case X86::TCRETURNmi: 166 case X86::TCRETURNdi64: 167 case X86::TCRETURNri64: 168 case X86::TCRETURNmi64: 169 case X86::EH_RETURN: 170 case X86::EH_RETURN64: { 171 SmallSet<uint16_t, 8> Uses; 172 for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { 173 MachineOperand &MO = MBBI->getOperand(i); 174 if (!MO.isReg() || MO.isDef()) 175 continue; 176 unsigned Reg = MO.getReg(); 177 if (!Reg) 178 continue; 179 for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) 180 Uses.insert(*AI); 181 } 182 183 const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; 184 for (; *CS; ++CS) 185 if (!Uses.count(*CS)) 186 return *CS; 187 } 188 } 189 190 return 0; 191 } 192 193 static bool isEAXLiveIn(MachineFunction &MF) { 194 for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), 195 EE = MF.getRegInfo().livein_end(); II != EE; ++II) { 196 unsigned Reg = II->first; 197 198 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || 199 Reg == X86::AH || Reg == X86::AL) 200 return true; 201 } 202 203 return false; 204 } 205 206 /// emitSPUpdate - Emit a series of instructions to increment / decrement the 207 /// stack pointer by a constant value. 208 static 209 void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 210 unsigned StackPtr, int64_t NumBytes, 211 bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA, 212 const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { 213 bool isSub = NumBytes < 0; 214 uint64_t Offset = isSub ? -NumBytes : NumBytes; 215 unsigned Opc; 216 if (UseLEA) 217 Opc = getLEArOpcode(Is64BitStackPtr); 218 else 219 Opc = isSub 220 ? getSUBriOpcode(Is64BitStackPtr, Offset) 221 : getADDriOpcode(Is64BitStackPtr, Offset); 222 223 uint64_t Chunk = (1LL << 31) - 1; 224 DebugLoc DL = MBB.findDebugLoc(MBBI); 225 226 while (Offset) { 227 if (Offset > Chunk) { 228 // Rather than emit a long series of instructions for large offsets, 229 // load the offset into a register and do one sub/add 230 unsigned Reg = 0; 231 232 if (isSub && !isEAXLiveIn(*MBB.getParent())) 233 Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX); 234 else 235 Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget); 236 237 if (Reg) { 238 Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri; 239 BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg) 240 .addImm(Offset); 241 Opc = isSub 242 ? getSUBrrOpcode(Is64BitTarget) 243 : getADDrrOpcode(Is64BitTarget); 244 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 245 .addReg(StackPtr) 246 .addReg(Reg); 247 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 248 Offset = 0; 249 continue; 250 } 251 } 252 253 uint64_t ThisVal = std::min(Offset, Chunk); 254 if (ThisVal == (Is64BitTarget ? 8 : 4)) { 255 // Use push / pop instead. 256 unsigned Reg = isSub 257 ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX) 258 : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget); 259 if (Reg) { 260 Opc = isSub 261 ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r) 262 : (Is64BitTarget ? X86::POP64r : X86::POP32r); 263 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) 264 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); 265 if (isSub) 266 MI->setFlag(MachineInstr::FrameSetup); 267 Offset -= ThisVal; 268 continue; 269 } 270 } 271 272 MachineInstr *MI = nullptr; 273 274 if (UseLEA) { 275 MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), 276 StackPtr, false, isSub ? -ThisVal : ThisVal); 277 } else { 278 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 279 .addReg(StackPtr) 280 .addImm(ThisVal); 281 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 282 } 283 284 if (isSub) 285 MI->setFlag(MachineInstr::FrameSetup); 286 287 Offset -= ThisVal; 288 } 289 } 290 291 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. 292 static 293 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 294 unsigned StackPtr, uint64_t *NumBytes = nullptr) { 295 if (MBBI == MBB.begin()) return; 296 297 MachineBasicBlock::iterator PI = std::prev(MBBI); 298 unsigned Opc = PI->getOpcode(); 299 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 300 Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || 301 Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 302 PI->getOperand(0).getReg() == StackPtr) { 303 if (NumBytes) 304 *NumBytes += PI->getOperand(2).getImm(); 305 MBB.erase(PI); 306 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 307 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 308 PI->getOperand(0).getReg() == StackPtr) { 309 if (NumBytes) 310 *NumBytes -= PI->getOperand(2).getImm(); 311 MBB.erase(PI); 312 } 313 } 314 315 /// mergeSPUpdates - Checks the instruction before/after the passed 316 /// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and 317 /// the stack adjustment is returned as a positive value for ADD/LEA and a 318 /// negative for SUB. 319 static int mergeSPUpdates(MachineBasicBlock &MBB, 320 MachineBasicBlock::iterator &MBBI, unsigned StackPtr, 321 bool doMergeWithPrevious) { 322 if ((doMergeWithPrevious && MBBI == MBB.begin()) || 323 (!doMergeWithPrevious && MBBI == MBB.end())) 324 return 0; 325 326 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; 327 MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr 328 : std::next(MBBI); 329 unsigned Opc = PI->getOpcode(); 330 int Offset = 0; 331 332 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 333 Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || 334 Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 335 PI->getOperand(0).getReg() == StackPtr){ 336 Offset += PI->getOperand(2).getImm(); 337 MBB.erase(PI); 338 if (!doMergeWithPrevious) MBBI = NI; 339 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 340 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 341 PI->getOperand(0).getReg() == StackPtr) { 342 Offset -= PI->getOperand(2).getImm(); 343 MBB.erase(PI); 344 if (!doMergeWithPrevious) MBBI = NI; 345 } 346 347 return Offset; 348 } 349 350 void 351 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, 352 MachineBasicBlock::iterator MBBI, 353 DebugLoc DL) const { 354 MachineFunction &MF = *MBB.getParent(); 355 MachineFrameInfo *MFI = MF.getFrameInfo(); 356 MachineModuleInfo &MMI = MF.getMMI(); 357 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 358 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 359 360 // Add callee saved registers to move list. 361 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 362 if (CSI.empty()) return; 363 364 // Calculate offsets. 365 for (std::vector<CalleeSavedInfo>::const_iterator 366 I = CSI.begin(), E = CSI.end(); I != E; ++I) { 367 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); 368 unsigned Reg = I->getReg(); 369 370 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 371 unsigned CFIIndex = 372 MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 373 Offset)); 374 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 375 .addCFIIndex(CFIIndex); 376 } 377 } 378 379 /// usesTheStack - This function checks if any of the users of EFLAGS 380 /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has 381 /// to use the stack, and if we don't adjust the stack we clobber the first 382 /// frame index. 383 /// See X86InstrInfo::copyPhysReg. 384 static bool usesTheStack(const MachineFunction &MF) { 385 const MachineRegisterInfo &MRI = MF.getRegInfo(); 386 387 for (MachineRegisterInfo::reg_instr_iterator 388 ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end(); 389 ri != re; ++ri) 390 if (ri->isCopy()) 391 return true; 392 393 return false; 394 } 395 396 void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, 397 MachineBasicBlock &MBB, 398 MachineBasicBlock::iterator MBBI, 399 DebugLoc DL) { 400 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 401 const TargetInstrInfo &TII = *STI.getInstrInfo(); 402 bool Is64Bit = STI.is64Bit(); 403 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; 404 405 unsigned CallOp; 406 if (Is64Bit) 407 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; 408 else 409 CallOp = X86::CALLpcrel32; 410 411 const char *Symbol; 412 if (Is64Bit) { 413 if (STI.isTargetCygMing()) { 414 Symbol = "___chkstk_ms"; 415 } else { 416 Symbol = "__chkstk"; 417 } 418 } else if (STI.isTargetCygMing()) 419 Symbol = "_alloca"; 420 else 421 Symbol = "_chkstk"; 422 423 MachineInstrBuilder CI; 424 425 // All current stack probes take AX and SP as input, clobber flags, and 426 // preserve all registers. x86_64 probes leave RSP unmodified. 427 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { 428 // For the large code model, we have to call through a register. Use R11, 429 // as it is scratch in all supported calling conventions. 430 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) 431 .addExternalSymbol(Symbol); 432 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); 433 } else { 434 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol); 435 } 436 437 unsigned AX = Is64Bit ? X86::RAX : X86::EAX; 438 unsigned SP = Is64Bit ? X86::RSP : X86::ESP; 439 CI.addReg(AX, RegState::Implicit) 440 .addReg(SP, RegState::Implicit) 441 .addReg(AX, RegState::Define | RegState::Implicit) 442 .addReg(SP, RegState::Define | RegState::Implicit) 443 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); 444 445 if (Is64Bit) { 446 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp 447 // themselves. It also does not clobber %rax so we can reuse it when 448 // adjusting %rsp. 449 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) 450 .addReg(X86::RSP) 451 .addReg(X86::RAX); 452 } 453 } 454 455 static unsigned calculateSetFPREG(uint64_t SPAdjust) { 456 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well 457 // and might require smaller successive adjustments. 458 const uint64_t Win64MaxSEHOffset = 128; 459 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset); 460 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. 461 return SEHFrameOffset & -16; 462 } 463 464 // If we're forcing a stack realignment we can't rely on just the frame 465 // info, we need to know the ABI stack alignment as well in case we 466 // have a call out. Otherwise just make sure we have some alignment - we'll 467 // go with the minimum SlotSize. 468 static uint64_t calculateMaxStackAlign(const MachineFunction &MF) { 469 const MachineFrameInfo *MFI = MF.getFrameInfo(); 470 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. 471 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 472 const X86RegisterInfo *RegInfo = STI.getRegisterInfo(); 473 unsigned SlotSize = RegInfo->getSlotSize(); 474 unsigned StackAlign = STI.getFrameLowering()->getStackAlignment(); 475 if (ForceStackAlign) { 476 if (MFI->hasCalls()) 477 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 478 else if (MaxAlign < SlotSize) 479 MaxAlign = SlotSize; 480 } 481 return MaxAlign; 482 } 483 484 /// emitPrologue - Push callee-saved registers onto the stack, which 485 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate 486 /// space for local variables. Also emit labels used by the exception handler to 487 /// generate the exception handling frames. 488 489 /* 490 Here's a gist of what gets emitted: 491 492 ; Establish frame pointer, if needed 493 [if needs FP] 494 push %rbp 495 .cfi_def_cfa_offset 16 496 .cfi_offset %rbp, -16 497 .seh_pushreg %rpb 498 mov %rsp, %rbp 499 .cfi_def_cfa_register %rbp 500 501 ; Spill general-purpose registers 502 [for all callee-saved GPRs] 503 pushq %<reg> 504 [if not needs FP] 505 .cfi_def_cfa_offset (offset from RETADDR) 506 .seh_pushreg %<reg> 507 508 ; If the required stack alignment > default stack alignment 509 ; rsp needs to be re-aligned. This creates a "re-alignment gap" 510 ; of unknown size in the stack frame. 511 [if stack needs re-alignment] 512 and $MASK, %rsp 513 514 ; Allocate space for locals 515 [if target is Windows and allocated space > 4096 bytes] 516 ; Windows needs special care for allocations larger 517 ; than one page. 518 mov $NNN, %rax 519 call ___chkstk_ms/___chkstk 520 sub %rax, %rsp 521 [else] 522 sub $NNN, %rsp 523 524 [if needs FP] 525 .seh_stackalloc (size of XMM spill slots) 526 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots 527 [else] 528 .seh_stackalloc NNN 529 530 ; Spill XMMs 531 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, 532 ; they may get spilled on any platform, if the current function 533 ; calls @llvm.eh.unwind.init 534 [if needs FP] 535 [for all callee-saved XMM registers] 536 movaps %<xmm reg>, -MMM(%rbp) 537 [for all callee-saved XMM registers] 538 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) 539 ; i.e. the offset relative to (%rbp - SEHFrameOffset) 540 [else] 541 [for all callee-saved XMM registers] 542 movaps %<xmm reg>, KKK(%rsp) 543 [for all callee-saved XMM registers] 544 .seh_savexmm %<xmm reg>, KKK 545 546 .seh_endprologue 547 548 [if needs base pointer] 549 mov %rsp, %rbx 550 [if needs to restore base pointer] 551 mov %rsp, -MMM(%rbp) 552 553 ; Emit CFI info 554 [if needs FP] 555 [for all callee-saved registers] 556 .cfi_offset %<reg>, (offset from %rbp) 557 [else] 558 .cfi_def_cfa_offset (offset from RETADDR) 559 [for all callee-saved registers] 560 .cfi_offset %<reg>, (offset from %rsp) 561 562 Notes: 563 - .seh directives are emitted only for Windows 64 ABI 564 - .cfi directives are emitted for all other ABIs 565 - for 32-bit code, substitute %e?? registers for %r?? 566 */ 567 568 void X86FrameLowering::emitPrologue(MachineFunction &MF) const { 569 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. 570 MachineBasicBlock::iterator MBBI = MBB.begin(); 571 MachineFrameInfo *MFI = MF.getFrameInfo(); 572 const Function *Fn = MF.getFunction(); 573 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 574 const X86RegisterInfo *RegInfo = STI.getRegisterInfo(); 575 const TargetInstrInfo &TII = *STI.getInstrInfo(); 576 MachineModuleInfo &MMI = MF.getMMI(); 577 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 578 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. 579 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. 580 bool HasFP = hasFP(MF); 581 bool Is64Bit = STI.is64Bit(); 582 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. 583 const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); 584 bool IsWin64 = STI.isTargetWin64(); 585 // Not necessarily synonymous with IsWin64. 586 bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 587 bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry(); 588 bool NeedsDwarfCFI = 589 !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); 590 bool UseLEA = STI.useLeaForSP(); 591 unsigned SlotSize = RegInfo->getSlotSize(); 592 unsigned FramePtr = RegInfo->getFrameRegister(MF); 593 const unsigned MachineFramePtr = 594 STI.isTarget64BitILP32() 595 ? getX86SubSuperRegister(FramePtr, MVT::i64, false) 596 : FramePtr; 597 unsigned StackPtr = RegInfo->getStackRegister(); 598 unsigned BasePtr = RegInfo->getBaseRegister(); 599 DebugLoc DL; 600 601 // Add RETADDR move area to callee saved frame size. 602 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 603 if (TailCallReturnAddrDelta && IsWinEH) 604 report_fatal_error("Can't handle guaranteed tail call under win64 yet"); 605 606 if (TailCallReturnAddrDelta < 0) 607 X86FI->setCalleeSavedFrameSize( 608 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); 609 610 bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()); 611 612 // The default stack probe size is 4096 if the function has no stackprobesize 613 // attribute. 614 unsigned StackProbeSize = 4096; 615 if (Fn->hasFnAttribute("stack-probe-size")) 616 Fn->getFnAttribute("stack-probe-size") 617 .getValueAsString() 618 .getAsInteger(0, StackProbeSize); 619 620 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 621 // function, and use up to 128 bytes of stack space, don't have a frame 622 // pointer, calls, or dynamic alloca then we do not need to adjust the 623 // stack pointer (we fit in the Red Zone). We also check that we don't 624 // push and pop from the stack. 625 if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) && 626 !RegInfo->needsStackRealignment(MF) && 627 !MFI->hasVarSizedObjects() && // No dynamic alloca. 628 !MFI->adjustsStack() && // No calls. 629 !IsWin64 && // Win64 has no Red Zone 630 !usesTheStack(MF) && // Don't push and pop. 631 !MF.shouldSplitStack()) { // Regular stack 632 uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); 633 if (HasFP) MinSize += SlotSize; 634 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); 635 MFI->setStackSize(StackSize); 636 } 637 638 // Insert stack pointer adjustment for later moving of return addr. Only 639 // applies to tail call optimized functions where the callee argument stack 640 // size is bigger than the callers. 641 if (TailCallReturnAddrDelta < 0) { 642 MachineInstr *MI = 643 BuildMI(MBB, MBBI, DL, 644 TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)), 645 StackPtr) 646 .addReg(StackPtr) 647 .addImm(-TailCallReturnAddrDelta) 648 .setMIFlag(MachineInstr::FrameSetup); 649 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 650 } 651 652 // Mapping for machine moves: 653 // 654 // DST: VirtualFP AND 655 // SRC: VirtualFP => DW_CFA_def_cfa_offset 656 // ELSE => DW_CFA_def_cfa 657 // 658 // SRC: VirtualFP AND 659 // DST: Register => DW_CFA_def_cfa_register 660 // 661 // ELSE 662 // OFFSET < 0 => DW_CFA_offset_extended_sf 663 // REG < 64 => DW_CFA_offset + Reg 664 // ELSE => DW_CFA_offset_extended 665 666 uint64_t NumBytes = 0; 667 int stackGrowth = -SlotSize; 668 669 if (HasFP) { 670 // Calculate required stack adjustment. 671 uint64_t FrameSize = StackSize - SlotSize; 672 // If required, include space for extra hidden slot for stashing base pointer. 673 if (X86FI->getRestoreBasePointer()) 674 FrameSize += SlotSize; 675 676 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); 677 678 // Callee-saved registers are pushed on stack before the stack is realigned. 679 if (RegInfo->needsStackRealignment(MF) && !IsWinEH) 680 NumBytes = RoundUpToAlignment(NumBytes, MaxAlign); 681 682 // Get the offset of the stack slot for the EBP register, which is 683 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. 684 // Update the frame offset adjustment. 685 MFI->setOffsetAdjustment(-NumBytes); 686 687 // Save EBP/RBP into the appropriate stack slot. 688 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 689 .addReg(MachineFramePtr, RegState::Kill) 690 .setMIFlag(MachineInstr::FrameSetup); 691 692 if (NeedsDwarfCFI) { 693 // Mark the place where EBP/RBP was saved. 694 // Define the current CFA rule to use the provided offset. 695 assert(StackSize); 696 unsigned CFIIndex = MMI.addFrameInst( 697 MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth)); 698 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 699 .addCFIIndex(CFIIndex); 700 701 // Change the rule for the FramePtr to be an "offset" rule. 702 unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true); 703 CFIIndex = MMI.addFrameInst( 704 MCCFIInstruction::createOffset(nullptr, 705 DwarfFramePtr, 2 * stackGrowth)); 706 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 707 .addCFIIndex(CFIIndex); 708 } 709 710 if (NeedsWinEH) { 711 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) 712 .addImm(FramePtr) 713 .setMIFlag(MachineInstr::FrameSetup); 714 } 715 716 if (!IsWinEH) { 717 // Update EBP with the new base value. 718 BuildMI(MBB, MBBI, DL, 719 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), 720 FramePtr) 721 .addReg(StackPtr) 722 .setMIFlag(MachineInstr::FrameSetup); 723 } 724 725 if (NeedsDwarfCFI) { 726 // Mark effective beginning of when frame pointer becomes valid. 727 // Define the current CFA to use the EBP/RBP register. 728 unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true); 729 unsigned CFIIndex = MMI.addFrameInst( 730 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); 731 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 732 .addCFIIndex(CFIIndex); 733 } 734 735 // Mark the FramePtr as live-in in every block. 736 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 737 I->addLiveIn(MachineFramePtr); 738 } else { 739 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); 740 } 741 742 // Skip the callee-saved push instructions. 743 bool PushedRegs = false; 744 int StackOffset = 2 * stackGrowth; 745 746 while (MBBI != MBB.end() && 747 (MBBI->getOpcode() == X86::PUSH32r || 748 MBBI->getOpcode() == X86::PUSH64r)) { 749 PushedRegs = true; 750 unsigned Reg = MBBI->getOperand(0).getReg(); 751 ++MBBI; 752 753 if (!HasFP && NeedsDwarfCFI) { 754 // Mark callee-saved push instruction. 755 // Define the current CFA rule to use the provided offset. 756 assert(StackSize); 757 unsigned CFIIndex = MMI.addFrameInst( 758 MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset)); 759 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 760 .addCFIIndex(CFIIndex); 761 StackOffset += stackGrowth; 762 } 763 764 if (NeedsWinEH) { 765 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag( 766 MachineInstr::FrameSetup); 767 } 768 } 769 770 // Realign stack after we pushed callee-saved registers (so that we'll be 771 // able to calculate their offsets from the frame pointer). 772 // Don't do this for Win64, it needs to realign the stack after the prologue. 773 if (!IsWinEH && RegInfo->needsStackRealignment(MF)) { 774 assert(HasFP && "There should be a frame pointer if stack is realigned."); 775 uint64_t Val = -MaxAlign; 776 MachineInstr *MI = 777 BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), 778 StackPtr) 779 .addReg(StackPtr) 780 .addImm(Val) 781 .setMIFlag(MachineInstr::FrameSetup); 782 783 // The EFLAGS implicit def is dead. 784 MI->getOperand(3).setIsDead(); 785 } 786 787 // If there is an SUB32ri of ESP immediately before this instruction, merge 788 // the two. This can be the case when tail call elimination is enabled and 789 // the callee has more arguments then the caller. 790 NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); 791 792 // Adjust stack pointer: ESP -= numbytes. 793 794 // Windows and cygwin/mingw require a prologue helper routine when allocating 795 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw 796 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the 797 // stack and adjust the stack pointer in one go. The 64-bit version of 798 // __chkstk is only responsible for probing the stack. The 64-bit prologue is 799 // responsible for adjusting the stack pointer. Touching the stack at 4K 800 // increments is necessary to ensure that the guard pages used by the OS 801 // virtual memory manager are allocated in correct sequence. 802 uint64_t AlignedNumBytes = NumBytes; 803 if (IsWinEH && RegInfo->needsStackRealignment(MF)) 804 AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); 805 if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { 806 // Check whether EAX is livein for this function. 807 bool isEAXAlive = isEAXLiveIn(MF); 808 809 if (isEAXAlive) { 810 // Sanity check that EAX is not livein for this function. 811 // It should not be, so throw an assert. 812 assert(!Is64Bit && "EAX is livein in x64 case!"); 813 814 // Save EAX 815 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 816 .addReg(X86::EAX, RegState::Kill) 817 .setMIFlag(MachineInstr::FrameSetup); 818 } 819 820 if (Is64Bit) { 821 // Handle the 64-bit Windows ABI case where we need to call __chkstk. 822 // Function prologue is responsible for adjusting the stack pointer. 823 if (isUInt<32>(NumBytes)) { 824 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 825 .addImm(NumBytes) 826 .setMIFlag(MachineInstr::FrameSetup); 827 } else if (isInt<32>(NumBytes)) { 828 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX) 829 .addImm(NumBytes) 830 .setMIFlag(MachineInstr::FrameSetup); 831 } else { 832 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) 833 .addImm(NumBytes) 834 .setMIFlag(MachineInstr::FrameSetup); 835 } 836 } else { 837 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. 838 // We'll also use 4 already allocated bytes for EAX. 839 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 840 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) 841 .setMIFlag(MachineInstr::FrameSetup); 842 } 843 844 // Save a pointer to the MI where we set AX. 845 MachineBasicBlock::iterator SetRAX = MBBI; 846 --SetRAX; 847 848 // Call __chkstk, __chkstk_ms, or __alloca. 849 emitStackProbeCall(MF, MBB, MBBI, DL); 850 851 // Apply the frame setup flag to all inserted instrs. 852 for (; SetRAX != MBBI; ++SetRAX) 853 SetRAX->setFlag(MachineInstr::FrameSetup); 854 855 if (isEAXAlive) { 856 // Restore EAX 857 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), 858 X86::EAX), 859 StackPtr, false, NumBytes - 4); 860 MI->setFlag(MachineInstr::FrameSetup); 861 MBB.insert(MBBI, MI); 862 } 863 } else if (NumBytes) { 864 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr, 865 UseLEA, TII, *RegInfo); 866 } 867 868 if (NeedsWinEH && NumBytes) 869 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) 870 .addImm(NumBytes) 871 .setMIFlag(MachineInstr::FrameSetup); 872 873 int SEHFrameOffset = 0; 874 if (IsWinEH && HasFP) { 875 SEHFrameOffset = calculateSetFPREG(NumBytes); 876 if (SEHFrameOffset) 877 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), 878 StackPtr, false, SEHFrameOffset); 879 else 880 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr); 881 882 if (NeedsWinEH) 883 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) 884 .addImm(FramePtr) 885 .addImm(SEHFrameOffset) 886 .setMIFlag(MachineInstr::FrameSetup); 887 } 888 889 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { 890 const MachineInstr *FrameInstr = &*MBBI; 891 ++MBBI; 892 893 if (NeedsWinEH) { 894 int FI; 895 if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { 896 if (X86::FR64RegClass.contains(Reg)) { 897 int Offset = getFrameIndexOffset(MF, FI); 898 Offset += SEHFrameOffset; 899 900 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) 901 .addImm(Reg) 902 .addImm(Offset) 903 .setMIFlag(MachineInstr::FrameSetup); 904 } 905 } 906 } 907 } 908 909 if (NeedsWinEH) 910 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) 911 .setMIFlag(MachineInstr::FrameSetup); 912 913 // Realign stack after we spilled callee-saved registers (so that we'll be 914 // able to calculate their offsets from the frame pointer). 915 // Win64 requires aligning the stack after the prologue. 916 if (IsWinEH && RegInfo->needsStackRealignment(MF)) { 917 assert(HasFP && "There should be a frame pointer if stack is realigned."); 918 uint64_t Val = -MaxAlign; 919 MachineInstr *MI = 920 BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), 921 StackPtr) 922 .addReg(StackPtr) 923 .addImm(Val) 924 .setMIFlag(MachineInstr::FrameSetup); 925 926 // The EFLAGS implicit def is dead. 927 MI->getOperand(3).setIsDead(); 928 } 929 930 // If we need a base pointer, set it up here. It's whatever the value 931 // of the stack pointer is at this point. Any variable size objects 932 // will be allocated after this, so we can still use the base pointer 933 // to reference locals. 934 if (RegInfo->hasBasePointer(MF)) { 935 // Update the base pointer with the current stack pointer. 936 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; 937 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) 938 .addReg(StackPtr) 939 .setMIFlag(MachineInstr::FrameSetup); 940 if (X86FI->getRestoreBasePointer()) { 941 // Stash value of base pointer. Saving RSP instead of EBP shortens dependence chain. 942 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 943 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), 944 FramePtr, true, X86FI->getRestoreBasePointerOffset()) 945 .addReg(StackPtr) 946 .setMIFlag(MachineInstr::FrameSetup); 947 } 948 } 949 950 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { 951 // Mark end of stack pointer adjustment. 952 if (!HasFP && NumBytes) { 953 // Define the current CFA rule to use the provided offset. 954 assert(StackSize); 955 unsigned CFIIndex = MMI.addFrameInst( 956 MCCFIInstruction::createDefCfaOffset(nullptr, 957 -StackSize + stackGrowth)); 958 959 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 960 .addCFIIndex(CFIIndex); 961 } 962 963 // Emit DWARF info specifying the offsets of the callee-saved registers. 964 if (PushedRegs) 965 emitCalleeSavedFrameMoves(MBB, MBBI, DL); 966 } 967 } 968 969 void X86FrameLowering::emitEpilogue(MachineFunction &MF, 970 MachineBasicBlock &MBB) const { 971 const MachineFrameInfo *MFI = MF.getFrameInfo(); 972 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 973 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 974 const X86RegisterInfo *RegInfo = STI.getRegisterInfo(); 975 const TargetInstrInfo &TII = *STI.getInstrInfo(); 976 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 977 assert(MBBI != MBB.end() && "Returning block has no instructions"); 978 unsigned RetOpcode = MBBI->getOpcode(); 979 DebugLoc DL = MBBI->getDebugLoc(); 980 bool Is64Bit = STI.is64Bit(); 981 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. 982 const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); 983 bool HasFP = hasFP(MF); 984 const bool Is64BitILP32 = STI.isTarget64BitILP32(); 985 unsigned SlotSize = RegInfo->getSlotSize(); 986 unsigned FramePtr = RegInfo->getFrameRegister(MF); 987 unsigned MachineFramePtr = 988 Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false) 989 : FramePtr; 990 unsigned StackPtr = RegInfo->getStackRegister(); 991 992 bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 993 bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry(); 994 bool UseLEAForSP = false; 995 996 // We can't use LEA instructions for adjusting the stack pointer if this is a 997 // leaf function in the Win64 ABI. Only ADD instructions may be used to 998 // deallocate the stack. 999 if (STI.useLeaForSP()) { 1000 if (!IsWinEH) { 1001 // We *aren't* using the Win64 ABI which means we are free to use LEA. 1002 UseLEAForSP = true; 1003 } else if (HasFP) { 1004 // We *have* a frame pointer which means we are permitted to use LEA. 1005 UseLEAForSP = true; 1006 } 1007 } 1008 1009 switch (RetOpcode) { 1010 default: 1011 llvm_unreachable("Can only insert epilogue into returning blocks"); 1012 case X86::RETQ: 1013 case X86::RETL: 1014 case X86::RETIL: 1015 case X86::RETIQ: 1016 case X86::TCRETURNdi: 1017 case X86::TCRETURNri: 1018 case X86::TCRETURNmi: 1019 case X86::TCRETURNdi64: 1020 case X86::TCRETURNri64: 1021 case X86::TCRETURNmi64: 1022 case X86::EH_RETURN: 1023 case X86::EH_RETURN64: 1024 break; // These are ok 1025 } 1026 1027 // Get the number of bytes to allocate from the FrameInfo. 1028 uint64_t StackSize = MFI->getStackSize(); 1029 uint64_t MaxAlign = calculateMaxStackAlign(MF); 1030 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 1031 uint64_t NumBytes = 0; 1032 1033 if (hasFP(MF)) { 1034 // Calculate required stack adjustment. 1035 uint64_t FrameSize = StackSize - SlotSize; 1036 NumBytes = FrameSize - CSSize; 1037 1038 // Callee-saved registers were pushed on stack before the stack was 1039 // realigned. 1040 if (RegInfo->needsStackRealignment(MF) && !IsWinEH) 1041 NumBytes = RoundUpToAlignment(FrameSize, MaxAlign); 1042 1043 // Pop EBP. 1044 BuildMI(MBB, MBBI, DL, 1045 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr); 1046 } else { 1047 NumBytes = StackSize - CSSize; 1048 } 1049 uint64_t SEHStackAllocAmt = NumBytes; 1050 1051 // Skip the callee-saved pop instructions. 1052 while (MBBI != MBB.begin()) { 1053 MachineBasicBlock::iterator PI = std::prev(MBBI); 1054 unsigned Opc = PI->getOpcode(); 1055 1056 if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && 1057 !PI->isTerminator()) 1058 break; 1059 1060 --MBBI; 1061 } 1062 MachineBasicBlock::iterator FirstCSPop = MBBI; 1063 1064 DL = MBBI->getDebugLoc(); 1065 1066 // If there is an ADD32ri or SUB32ri of ESP immediately before this 1067 // instruction, merge the two instructions. 1068 if (NumBytes || MFI->hasVarSizedObjects()) 1069 mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); 1070 1071 // If dynamic alloca is used, then reset esp to point to the last callee-saved 1072 // slot before popping them off! Same applies for the case, when stack was 1073 // realigned. 1074 if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { 1075 if (RegInfo->needsStackRealignment(MF)) 1076 MBBI = FirstCSPop; 1077 if (IsWinEH) { 1078 // There are only two legal forms of epilogue: 1079 // - add SEHAllocationSize, %rsp 1080 // - lea SEHAllocationSize(%FramePtr), %rsp 1081 // 1082 // We are *not* permitted to use 'mov %FramePtr, %rsp' because the Win64 1083 // unwinder will not recognize 'mov' as an epilogue instruction. 1084 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); 1085 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), StackPtr), 1086 FramePtr, false, SEHStackAllocAmt - SEHFrameOffset); 1087 --MBBI; 1088 } else if (CSSize != 0) { 1089 unsigned Opc = getLEArOpcode(Uses64BitFramePtr); 1090 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), 1091 FramePtr, false, -CSSize); 1092 --MBBI; 1093 } else { 1094 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); 1095 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 1096 .addReg(FramePtr); 1097 --MBBI; 1098 } 1099 } else if (NumBytes) { 1100 // Adjust stack pointer back: ESP += numbytes. 1101 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, 1102 UseLEAForSP, TII, *RegInfo); 1103 --MBBI; 1104 } 1105 1106 // Windows unwinder will not invoke function's exception handler if IP is 1107 // either in prologue or in epilogue. This behavior causes a problem when a 1108 // call immediately precedes an epilogue, because the return address points 1109 // into the epilogue. To cope with that, we insert an epilogue marker here, 1110 // then replace it with a 'nop' if it ends up immediately after a CALL in the 1111 // final emitted code. 1112 if (NeedsWinEH) 1113 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); 1114 1115 // We're returning from function via eh_return. 1116 if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { 1117 MBBI = MBB.getLastNonDebugInstr(); 1118 MachineOperand &DestAddr = MBBI->getOperand(0); 1119 assert(DestAddr.isReg() && "Offset should be in register!"); 1120 BuildMI(MBB, MBBI, DL, 1121 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), 1122 StackPtr).addReg(DestAddr.getReg()); 1123 } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || 1124 RetOpcode == X86::TCRETURNmi || 1125 RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || 1126 RetOpcode == X86::TCRETURNmi64) { 1127 bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; 1128 // Tail call return: adjust the stack pointer and jump to callee. 1129 MBBI = MBB.getLastNonDebugInstr(); 1130 MachineOperand &JumpTarget = MBBI->getOperand(0); 1131 MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); 1132 assert(StackAdjust.isImm() && "Expecting immediate value."); 1133 1134 // Adjust stack pointer. 1135 int StackAdj = StackAdjust.getImm(); 1136 int MaxTCDelta = X86FI->getTCReturnAddrDelta(); 1137 int Offset = 0; 1138 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); 1139 1140 // Incoporate the retaddr area. 1141 Offset = StackAdj-MaxTCDelta; 1142 assert(Offset >= 0 && "Offset should never be negative"); 1143 1144 if (Offset) { 1145 // Check for possible merge with preceding ADD instruction. 1146 Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); 1147 emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr, 1148 UseLEAForSP, TII, *RegInfo); 1149 } 1150 1151 // Jump to label or value in register. 1152 bool IsWin64 = STI.isTargetWin64(); 1153 if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { 1154 unsigned Op = (RetOpcode == X86::TCRETURNdi) 1155 ? X86::TAILJMPd 1156 : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64); 1157 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op)); 1158 if (JumpTarget.isGlobal()) 1159 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 1160 JumpTarget.getTargetFlags()); 1161 else { 1162 assert(JumpTarget.isSymbol()); 1163 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 1164 JumpTarget.getTargetFlags()); 1165 } 1166 } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { 1167 unsigned Op = (RetOpcode == X86::TCRETURNmi) 1168 ? X86::TAILJMPm 1169 : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); 1170 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op)); 1171 for (unsigned i = 0; i != 5; ++i) 1172 MIB.addOperand(MBBI->getOperand(i)); 1173 } else if (RetOpcode == X86::TCRETURNri64) { 1174 BuildMI(MBB, MBBI, DL, 1175 TII.get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) 1176 .addReg(JumpTarget.getReg(), RegState::Kill); 1177 } else { 1178 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). 1179 addReg(JumpTarget.getReg(), RegState::Kill); 1180 } 1181 1182 MachineInstr *NewMI = std::prev(MBBI); 1183 NewMI->copyImplicitOps(MF, MBBI); 1184 1185 // Delete the pseudo instruction TCRETURN. 1186 MBB.erase(MBBI); 1187 } else if ((RetOpcode == X86::RETQ || RetOpcode == X86::RETL || 1188 RetOpcode == X86::RETIQ || RetOpcode == X86::RETIL) && 1189 (X86FI->getTCReturnAddrDelta() < 0)) { 1190 // Add the return addr area delta back since we are not tail calling. 1191 int delta = -1*X86FI->getTCReturnAddrDelta(); 1192 MBBI = MBB.getLastNonDebugInstr(); 1193 1194 // Check for possible merge with preceding ADD instruction. 1195 delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); 1196 emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, 1197 UseLEAForSP, TII, *RegInfo); 1198 } 1199 } 1200 1201 int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, 1202 int FI) const { 1203 const X86RegisterInfo *RegInfo = 1204 MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 1205 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1206 // Offset will hold the offset from the stack pointer at function entry to the 1207 // object. 1208 // We need to factor in additional offsets applied during the prologue to the 1209 // frame, base, and stack pointer depending on which is used. 1210 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 1211 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1212 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 1213 uint64_t StackSize = MFI->getStackSize(); 1214 unsigned SlotSize = RegInfo->getSlotSize(); 1215 bool HasFP = hasFP(MF); 1216 bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 1217 int64_t FPDelta = 0; 1218 1219 if (IsWinEH) { 1220 assert(!MFI->hasCalls() || (StackSize % 16) == 8); 1221 1222 // Calculate required stack adjustment. 1223 uint64_t FrameSize = StackSize - SlotSize; 1224 // If required, include space for extra hidden slot for stashing base pointer. 1225 if (X86FI->getRestoreBasePointer()) 1226 FrameSize += SlotSize; 1227 uint64_t NumBytes = FrameSize - CSSize; 1228 1229 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes); 1230 if (FI && FI == X86FI->getFAIndex()) 1231 return -SEHFrameOffset; 1232 1233 // FPDelta is the offset from the "traditional" FP location of the old base 1234 // pointer followed by return address and the location required by the 1235 // restricted Win64 prologue. 1236 // Add FPDelta to all offsets below that go through the frame pointer. 1237 FPDelta = FrameSize - SEHFrameOffset; 1238 assert((!MFI->hasCalls() || (FPDelta % 16) == 0) && 1239 "FPDelta isn't aligned per the Win64 ABI!"); 1240 } 1241 1242 1243 if (RegInfo->hasBasePointer(MF)) { 1244 assert(HasFP && "VLAs and dynamic stack realign, but no FP?!"); 1245 if (FI < 0) { 1246 // Skip the saved EBP. 1247 return Offset + SlotSize + FPDelta; 1248 } else { 1249 assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 1250 return Offset + StackSize; 1251 } 1252 } else if (RegInfo->needsStackRealignment(MF)) { 1253 if (FI < 0) { 1254 // Skip the saved EBP. 1255 return Offset + SlotSize + FPDelta; 1256 } else { 1257 assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 1258 return Offset + StackSize; 1259 } 1260 // FIXME: Support tail calls 1261 } else { 1262 if (!HasFP) 1263 return Offset + StackSize; 1264 1265 // Skip the saved EBP. 1266 Offset += SlotSize; 1267 1268 // Skip the RETADDR move area 1269 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1270 if (TailCallReturnAddrDelta < 0) 1271 Offset -= TailCallReturnAddrDelta; 1272 } 1273 1274 return Offset + FPDelta; 1275 } 1276 1277 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 1278 unsigned &FrameReg) const { 1279 const X86RegisterInfo *RegInfo = 1280 MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 1281 // We can't calculate offset from frame pointer if the stack is realigned, 1282 // so enforce usage of stack/base pointer. The base pointer is used when we 1283 // have dynamic allocas in addition to dynamic realignment. 1284 if (RegInfo->hasBasePointer(MF)) 1285 FrameReg = RegInfo->getBaseRegister(); 1286 else if (RegInfo->needsStackRealignment(MF)) 1287 FrameReg = RegInfo->getStackRegister(); 1288 else 1289 FrameReg = RegInfo->getFrameRegister(MF); 1290 return getFrameIndexOffset(MF, FI); 1291 } 1292 1293 // Simplified from getFrameIndexOffset keeping only StackPointer cases 1294 int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const { 1295 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1296 // Does not include any dynamic realign. 1297 const uint64_t StackSize = MFI->getStackSize(); 1298 { 1299 #ifndef NDEBUG 1300 const X86RegisterInfo *RegInfo = 1301 MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 1302 // Note: LLVM arranges the stack as: 1303 // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP) 1304 // > "Stack Slots" (<--SP) 1305 // We can always address StackSlots from RSP. We can usually (unless 1306 // needsStackRealignment) address CSRs from RSP, but sometimes need to 1307 // address them from RBP. FixedObjects can be placed anywhere in the stack 1308 // frame depending on their specific requirements (i.e. we can actually 1309 // refer to arguments to the function which are stored in the *callers* 1310 // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs 1311 // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject. 1312 1313 assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case"); 1314 1315 // We don't handle tail calls, and shouldn't be seeing them 1316 // either. 1317 int TailCallReturnAddrDelta = 1318 MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta(); 1319 assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!"); 1320 #endif 1321 } 1322 1323 // This is how the math works out: 1324 // 1325 // %rsp grows (i.e. gets lower) left to right. Each box below is 1326 // one word (eight bytes). Obj0 is the stack slot we're trying to 1327 // get to. 1328 // 1329 // ---------------------------------- 1330 // | BP | Obj0 | Obj1 | ... | ObjN | 1331 // ---------------------------------- 1332 // ^ ^ ^ ^ 1333 // A B C E 1334 // 1335 // A is the incoming stack pointer. 1336 // (B - A) is the local area offset (-8 for x86-64) [1] 1337 // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2] 1338 // 1339 // |(E - B)| is the StackSize (absolute value, positive). For a 1340 // stack that grown down, this works out to be (B - E). [3] 1341 // 1342 // E is also the value of %rsp after stack has been set up, and we 1343 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now 1344 // (C - E) == (C - A) - (B - A) + (B - E) 1345 // { Using [1], [2] and [3] above } 1346 // == getObjectOffset - LocalAreaOffset + StackSize 1347 // 1348 1349 // Get the Offset from the StackPointer 1350 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 1351 1352 return Offset + StackSize; 1353 } 1354 // Simplified from getFrameIndexReference keeping only StackPointer cases 1355 int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, 1356 int FI, 1357 unsigned &FrameReg) const { 1358 const X86RegisterInfo *RegInfo = 1359 MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 1360 assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case"); 1361 1362 FrameReg = RegInfo->getStackRegister(); 1363 return getFrameIndexOffsetFromSP(MF, FI); 1364 } 1365 1366 bool X86FrameLowering::assignCalleeSavedSpillSlots( 1367 MachineFunction &MF, const TargetRegisterInfo *TRI, 1368 std::vector<CalleeSavedInfo> &CSI) const { 1369 MachineFrameInfo *MFI = MF.getFrameInfo(); 1370 const X86RegisterInfo *RegInfo = 1371 MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 1372 unsigned SlotSize = RegInfo->getSlotSize(); 1373 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1374 1375 unsigned CalleeSavedFrameSize = 0; 1376 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); 1377 1378 if (hasFP(MF)) { 1379 // emitPrologue always spills frame register the first thing. 1380 SpillSlotOffset -= SlotSize; 1381 MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 1382 1383 // Since emitPrologue and emitEpilogue will handle spilling and restoring of 1384 // the frame register, we can delete it from CSI list and not have to worry 1385 // about avoiding it later. 1386 unsigned FPReg = RegInfo->getFrameRegister(MF); 1387 for (unsigned i = 0; i < CSI.size(); ++i) { 1388 if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { 1389 CSI.erase(CSI.begin() + i); 1390 break; 1391 } 1392 } 1393 } 1394 1395 // Assign slots for GPRs. It increases frame size. 1396 for (unsigned i = CSI.size(); i != 0; --i) { 1397 unsigned Reg = CSI[i - 1].getReg(); 1398 1399 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) 1400 continue; 1401 1402 SpillSlotOffset -= SlotSize; 1403 CalleeSavedFrameSize += SlotSize; 1404 1405 int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 1406 CSI[i - 1].setFrameIdx(SlotIndex); 1407 } 1408 1409 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); 1410 1411 // Assign slots for XMMs. 1412 for (unsigned i = CSI.size(); i != 0; --i) { 1413 unsigned Reg = CSI[i - 1].getReg(); 1414 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) 1415 continue; 1416 1417 const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); 1418 // ensure alignment 1419 SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment(); 1420 // spill into slot 1421 SpillSlotOffset -= RC->getSize(); 1422 int SlotIndex = 1423 MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); 1424 CSI[i - 1].setFrameIdx(SlotIndex); 1425 MFI->ensureMaxAlignment(RC->getAlignment()); 1426 } 1427 1428 return true; 1429 } 1430 1431 bool X86FrameLowering::spillCalleeSavedRegisters( 1432 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1433 const std::vector<CalleeSavedInfo> &CSI, 1434 const TargetRegisterInfo *TRI) const { 1435 DebugLoc DL = MBB.findDebugLoc(MI); 1436 1437 MachineFunction &MF = *MBB.getParent(); 1438 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 1439 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1440 1441 // Push GPRs. It increases frame size. 1442 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; 1443 for (unsigned i = CSI.size(); i != 0; --i) { 1444 unsigned Reg = CSI[i - 1].getReg(); 1445 1446 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) 1447 continue; 1448 // Add the callee-saved register as live-in. It's killed at the spill. 1449 MBB.addLiveIn(Reg); 1450 1451 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) 1452 .setMIFlag(MachineInstr::FrameSetup); 1453 } 1454 1455 // Make XMM regs spilled. X86 does not have ability of push/pop XMM. 1456 // It can be done by spilling XMMs to stack frame. 1457 for (unsigned i = CSI.size(); i != 0; --i) { 1458 unsigned Reg = CSI[i-1].getReg(); 1459 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) 1460 continue; 1461 // Add the callee-saved register as live-in. It's killed at the spill. 1462 MBB.addLiveIn(Reg); 1463 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1464 1465 TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, 1466 TRI); 1467 --MI; 1468 MI->setFlag(MachineInstr::FrameSetup); 1469 ++MI; 1470 } 1471 1472 return true; 1473 } 1474 1475 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 1476 MachineBasicBlock::iterator MI, 1477 const std::vector<CalleeSavedInfo> &CSI, 1478 const TargetRegisterInfo *TRI) const { 1479 if (CSI.empty()) 1480 return false; 1481 1482 DebugLoc DL = MBB.findDebugLoc(MI); 1483 1484 MachineFunction &MF = *MBB.getParent(); 1485 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 1486 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1487 1488 // Reload XMMs from stack frame. 1489 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1490 unsigned Reg = CSI[i].getReg(); 1491 if (X86::GR64RegClass.contains(Reg) || 1492 X86::GR32RegClass.contains(Reg)) 1493 continue; 1494 1495 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1496 TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); 1497 } 1498 1499 // POP GPRs. 1500 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; 1501 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1502 unsigned Reg = CSI[i].getReg(); 1503 if (!X86::GR64RegClass.contains(Reg) && 1504 !X86::GR32RegClass.contains(Reg)) 1505 continue; 1506 1507 BuildMI(MBB, MI, DL, TII.get(Opc), Reg); 1508 } 1509 return true; 1510 } 1511 1512 void 1513 X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, 1514 RegScavenger *RS) const { 1515 MachineFrameInfo *MFI = MF.getFrameInfo(); 1516 const X86RegisterInfo *RegInfo = 1517 MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 1518 unsigned SlotSize = RegInfo->getSlotSize(); 1519 1520 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1521 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1522 1523 if (TailCallReturnAddrDelta < 0) { 1524 // create RETURNADDR area 1525 // arg 1526 // arg 1527 // RETADDR 1528 // { ... 1529 // RETADDR area 1530 // ... 1531 // } 1532 // [EBP] 1533 MFI->CreateFixedObject(-TailCallReturnAddrDelta, 1534 TailCallReturnAddrDelta - SlotSize, true); 1535 } 1536 1537 // Spill the BasePtr if it's used. 1538 if (RegInfo->hasBasePointer(MF)) 1539 MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); 1540 } 1541 1542 static bool 1543 HasNestArgument(const MachineFunction *MF) { 1544 const Function *F = MF->getFunction(); 1545 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 1546 I != E; I++) { 1547 if (I->hasNestAttr()) 1548 return true; 1549 } 1550 return false; 1551 } 1552 1553 /// GetScratchRegister - Get a temp register for performing work in the 1554 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform 1555 /// and the properties of the function either one or two registers will be 1556 /// needed. Set primary to true for the first register, false for the second. 1557 static unsigned 1558 GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) { 1559 CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); 1560 1561 // Erlang stuff. 1562 if (CallingConvention == CallingConv::HiPE) { 1563 if (Is64Bit) 1564 return Primary ? X86::R14 : X86::R13; 1565 else 1566 return Primary ? X86::EBX : X86::EDI; 1567 } 1568 1569 if (Is64Bit) { 1570 if (IsLP64) 1571 return Primary ? X86::R11 : X86::R12; 1572 else 1573 return Primary ? X86::R11D : X86::R12D; 1574 } 1575 1576 bool IsNested = HasNestArgument(&MF); 1577 1578 if (CallingConvention == CallingConv::X86_FastCall || 1579 CallingConvention == CallingConv::Fast) { 1580 if (IsNested) 1581 report_fatal_error("Segmented stacks does not support fastcall with " 1582 "nested function."); 1583 return Primary ? X86::EAX : X86::ECX; 1584 } 1585 if (IsNested) 1586 return Primary ? X86::EDX : X86::EAX; 1587 return Primary ? X86::ECX : X86::EAX; 1588 } 1589 1590 // The stack limit in the TCB is set to this many bytes above the actual stack 1591 // limit. 1592 static const uint64_t kSplitStackAvailable = 256; 1593 1594 void 1595 X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { 1596 MachineBasicBlock &prologueMBB = MF.front(); 1597 MachineFrameInfo *MFI = MF.getFrameInfo(); 1598 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 1599 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1600 uint64_t StackSize; 1601 bool Is64Bit = STI.is64Bit(); 1602 const bool IsLP64 = STI.isTarget64BitLP64(); 1603 unsigned TlsReg, TlsOffset; 1604 DebugLoc DL; 1605 1606 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1607 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 1608 "Scratch register is live-in"); 1609 1610 if (MF.getFunction()->isVarArg()) 1611 report_fatal_error("Segmented stacks do not support vararg functions."); 1612 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && 1613 !STI.isTargetWin64() && !STI.isTargetFreeBSD() && 1614 !STI.isTargetDragonFly()) 1615 report_fatal_error("Segmented stacks not supported on this platform."); 1616 1617 // Eventually StackSize will be calculated by a link-time pass; which will 1618 // also decide whether checking code needs to be injected into this particular 1619 // prologue. 1620 StackSize = MFI->getStackSize(); 1621 1622 // Do not generate a prologue for functions with a stack of size zero 1623 if (StackSize == 0) 1624 return; 1625 1626 MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); 1627 MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); 1628 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1629 bool IsNested = false; 1630 1631 // We need to know if the function has a nest argument only in 64 bit mode. 1632 if (Is64Bit) 1633 IsNested = HasNestArgument(&MF); 1634 1635 // The MOV R10, RAX needs to be in a different block, since the RET we emit in 1636 // allocMBB needs to be last (terminating) instruction. 1637 1638 for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), 1639 e = prologueMBB.livein_end(); i != e; i++) { 1640 allocMBB->addLiveIn(*i); 1641 checkMBB->addLiveIn(*i); 1642 } 1643 1644 if (IsNested) 1645 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D); 1646 1647 MF.push_front(allocMBB); 1648 MF.push_front(checkMBB); 1649 1650 // When the frame size is less than 256 we just compare the stack 1651 // boundary directly to the value of the stack pointer, per gcc. 1652 bool CompareStackPointer = StackSize < kSplitStackAvailable; 1653 1654 // Read the limit off the current stacklet off the stack_guard location. 1655 if (Is64Bit) { 1656 if (STI.isTargetLinux()) { 1657 TlsReg = X86::FS; 1658 TlsOffset = IsLP64 ? 0x70 : 0x40; 1659 } else if (STI.isTargetDarwin()) { 1660 TlsReg = X86::GS; 1661 TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. 1662 } else if (STI.isTargetWin64()) { 1663 TlsReg = X86::GS; 1664 TlsOffset = 0x28; // pvArbitrary, reserved for application use 1665 } else if (STI.isTargetFreeBSD()) { 1666 TlsReg = X86::FS; 1667 TlsOffset = 0x18; 1668 } else if (STI.isTargetDragonFly()) { 1669 TlsReg = X86::FS; 1670 TlsOffset = 0x20; // use tls_tcb.tcb_segstack 1671 } else { 1672 report_fatal_error("Segmented stacks not supported on this platform."); 1673 } 1674 1675 if (CompareStackPointer) 1676 ScratchReg = IsLP64 ? X86::RSP : X86::ESP; 1677 else 1678 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP) 1679 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 1680 1681 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg) 1682 .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); 1683 } else { 1684 if (STI.isTargetLinux()) { 1685 TlsReg = X86::GS; 1686 TlsOffset = 0x30; 1687 } else if (STI.isTargetDarwin()) { 1688 TlsReg = X86::GS; 1689 TlsOffset = 0x48 + 90*4; 1690 } else if (STI.isTargetWin32()) { 1691 TlsReg = X86::FS; 1692 TlsOffset = 0x14; // pvArbitrary, reserved for application use 1693 } else if (STI.isTargetDragonFly()) { 1694 TlsReg = X86::FS; 1695 TlsOffset = 0x10; // use tls_tcb.tcb_segstack 1696 } else if (STI.isTargetFreeBSD()) { 1697 report_fatal_error("Segmented stacks not supported on FreeBSD i386."); 1698 } else { 1699 report_fatal_error("Segmented stacks not supported on this platform."); 1700 } 1701 1702 if (CompareStackPointer) 1703 ScratchReg = X86::ESP; 1704 else 1705 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) 1706 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 1707 1708 if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() || 1709 STI.isTargetDragonFly()) { 1710 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) 1711 .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); 1712 } else if (STI.isTargetDarwin()) { 1713 1714 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. 1715 unsigned ScratchReg2; 1716 bool SaveScratch2; 1717 if (CompareStackPointer) { 1718 // The primary scratch register is available for holding the TLS offset. 1719 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1720 SaveScratch2 = false; 1721 } else { 1722 // Need to use a second register to hold the TLS offset 1723 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false); 1724 1725 // Unfortunately, with fastcc the second scratch register may hold an 1726 // argument. 1727 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); 1728 } 1729 1730 // If Scratch2 is live-in then it needs to be saved. 1731 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && 1732 "Scratch register is live-in and not saved"); 1733 1734 if (SaveScratch2) 1735 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) 1736 .addReg(ScratchReg2, RegState::Kill); 1737 1738 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) 1739 .addImm(TlsOffset); 1740 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) 1741 .addReg(ScratchReg) 1742 .addReg(ScratchReg2).addImm(1).addReg(0) 1743 .addImm(0) 1744 .addReg(TlsReg); 1745 1746 if (SaveScratch2) 1747 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); 1748 } 1749 } 1750 1751 // This jump is taken if SP >= (Stacklet Limit + Stack Space required). 1752 // It jumps to normal execution of the function body. 1753 BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&prologueMBB); 1754 1755 // On 32 bit we first push the arguments size and then the frame size. On 64 1756 // bit, we pass the stack frame size in r10 and the argument size in r11. 1757 if (Is64Bit) { 1758 // Functions with nested arguments use R10, so it needs to be saved across 1759 // the call to _morestack 1760 1761 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; 1762 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; 1763 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; 1764 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; 1765 const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri; 1766 1767 if (IsNested) 1768 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); 1769 1770 BuildMI(allocMBB, DL, TII.get(MOVri), Reg10) 1771 .addImm(StackSize); 1772 BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) 1773 .addImm(X86FI->getArgumentStackSize()); 1774 MF.getRegInfo().setPhysRegUsed(Reg10); 1775 MF.getRegInfo().setPhysRegUsed(Reg11); 1776 } else { 1777 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 1778 .addImm(X86FI->getArgumentStackSize()); 1779 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 1780 .addImm(StackSize); 1781 } 1782 1783 // __morestack is in libgcc 1784 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { 1785 // Under the large code model, we cannot assume that __morestack lives 1786 // within 2^31 bytes of the call site, so we cannot use pc-relative 1787 // addressing. We cannot perform the call via a temporary register, 1788 // as the rax register may be used to store the static chain, and all 1789 // other suitable registers may be either callee-save or used for 1790 // parameter passing. We cannot use the stack at this point either 1791 // because __morestack manipulates the stack directly. 1792 // 1793 // To avoid these issues, perform an indirect call via a read-only memory 1794 // location containing the address. 1795 // 1796 // This solution is not perfect, as it assumes that the .rodata section 1797 // is laid out within 2^31 bytes of each function body, but this seems 1798 // to be sufficient for JIT. 1799 BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) 1800 .addReg(X86::RIP) 1801 .addImm(0) 1802 .addReg(0) 1803 .addExternalSymbol("__morestack_addr") 1804 .addReg(0); 1805 MF.getMMI().setUsesMorestackAddr(true); 1806 } else { 1807 if (Is64Bit) 1808 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) 1809 .addExternalSymbol("__morestack"); 1810 else 1811 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) 1812 .addExternalSymbol("__morestack"); 1813 } 1814 1815 if (IsNested) 1816 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); 1817 else 1818 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); 1819 1820 allocMBB->addSuccessor(&prologueMBB); 1821 1822 checkMBB->addSuccessor(allocMBB); 1823 checkMBB->addSuccessor(&prologueMBB); 1824 1825 #ifdef XDEBUG 1826 MF.verify(); 1827 #endif 1828 } 1829 1830 /// Erlang programs may need a special prologue to handle the stack size they 1831 /// might need at runtime. That is because Erlang/OTP does not implement a C 1832 /// stack but uses a custom implementation of hybrid stack/heap architecture. 1833 /// (for more information see Eric Stenman's Ph.D. thesis: 1834 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) 1835 /// 1836 /// CheckStack: 1837 /// temp0 = sp - MaxStack 1838 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 1839 /// OldStart: 1840 /// ... 1841 /// IncStack: 1842 /// call inc_stack # doubles the stack space 1843 /// temp0 = sp - MaxStack 1844 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 1845 void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { 1846 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 1847 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1848 MachineFrameInfo *MFI = MF.getFrameInfo(); 1849 const unsigned SlotSize = STI.getRegisterInfo()->getSlotSize(); 1850 const bool Is64Bit = STI.is64Bit(); 1851 const bool IsLP64 = STI.isTarget64BitLP64(); 1852 DebugLoc DL; 1853 // HiPE-specific values 1854 const unsigned HipeLeafWords = 24; 1855 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; 1856 const unsigned Guaranteed = HipeLeafWords * SlotSize; 1857 unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ? 1858 MF.getFunction()->arg_size() - CCRegisteredArgs : 0; 1859 unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize; 1860 1861 assert(STI.isTargetLinux() && 1862 "HiPE prologue is only supported on Linux operating systems."); 1863 1864 // Compute the largest caller's frame that is needed to fit the callees' 1865 // frames. This 'MaxStack' is computed from: 1866 // 1867 // a) the fixed frame size, which is the space needed for all spilled temps, 1868 // b) outgoing on-stack parameter areas, and 1869 // c) the minimum stack space this function needs to make available for the 1870 // functions it calls (a tunable ABI property). 1871 if (MFI->hasCalls()) { 1872 unsigned MoreStackForCalls = 0; 1873 1874 for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); 1875 MBBI != MBBE; ++MBBI) 1876 for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); 1877 MI != ME; ++MI) { 1878 if (!MI->isCall()) 1879 continue; 1880 1881 // Get callee operand. 1882 const MachineOperand &MO = MI->getOperand(0); 1883 1884 // Only take account of global function calls (no closures etc.). 1885 if (!MO.isGlobal()) 1886 continue; 1887 1888 const Function *F = dyn_cast<Function>(MO.getGlobal()); 1889 if (!F) 1890 continue; 1891 1892 // Do not update 'MaxStack' for primitive and built-in functions 1893 // (encoded with names either starting with "erlang."/"bif_" or not 1894 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an 1895 // "_", such as the BIF "suspend_0") as they are executed on another 1896 // stack. 1897 if (F->getName().find("erlang.") != StringRef::npos || 1898 F->getName().find("bif_") != StringRef::npos || 1899 F->getName().find_first_of("._") == StringRef::npos) 1900 continue; 1901 1902 unsigned CalleeStkArity = 1903 F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; 1904 if (HipeLeafWords - 1 > CalleeStkArity) 1905 MoreStackForCalls = std::max(MoreStackForCalls, 1906 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); 1907 } 1908 MaxStack += MoreStackForCalls; 1909 } 1910 1911 // If the stack frame needed is larger than the guaranteed then runtime checks 1912 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. 1913 if (MaxStack > Guaranteed) { 1914 MachineBasicBlock &prologueMBB = MF.front(); 1915 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); 1916 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); 1917 1918 for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(), 1919 E = prologueMBB.livein_end(); I != E; I++) { 1920 stackCheckMBB->addLiveIn(*I); 1921 incStackMBB->addLiveIn(*I); 1922 } 1923 1924 MF.push_front(incStackMBB); 1925 MF.push_front(stackCheckMBB); 1926 1927 unsigned ScratchReg, SPReg, PReg, SPLimitOffset; 1928 unsigned LEAop, CMPop, CALLop; 1929 if (Is64Bit) { 1930 SPReg = X86::RSP; 1931 PReg = X86::RBP; 1932 LEAop = X86::LEA64r; 1933 CMPop = X86::CMP64rm; 1934 CALLop = X86::CALL64pcrel32; 1935 SPLimitOffset = 0x90; 1936 } else { 1937 SPReg = X86::ESP; 1938 PReg = X86::EBP; 1939 LEAop = X86::LEA32r; 1940 CMPop = X86::CMP32rm; 1941 CALLop = X86::CALLpcrel32; 1942 SPLimitOffset = 0x4c; 1943 } 1944 1945 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1946 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 1947 "HiPE prologue scratch register is live-in"); 1948 1949 // Create new MBB for StackCheck: 1950 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), 1951 SPReg, false, -MaxStack); 1952 // SPLimitOffset is in a fixed heap location (pointed by BP). 1953 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) 1954 .addReg(ScratchReg), PReg, false, SPLimitOffset); 1955 BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&prologueMBB); 1956 1957 // Create new MBB for IncStack: 1958 BuildMI(incStackMBB, DL, TII.get(CALLop)). 1959 addExternalSymbol("inc_stack_0"); 1960 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), 1961 SPReg, false, -MaxStack); 1962 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) 1963 .addReg(ScratchReg), PReg, false, SPLimitOffset); 1964 BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB); 1965 1966 stackCheckMBB->addSuccessor(&prologueMBB, 99); 1967 stackCheckMBB->addSuccessor(incStackMBB, 1); 1968 incStackMBB->addSuccessor(&prologueMBB, 99); 1969 incStackMBB->addSuccessor(incStackMBB, 1); 1970 } 1971 #ifdef XDEBUG 1972 MF.verify(); 1973 #endif 1974 } 1975 1976 void X86FrameLowering:: 1977 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 1978 MachineBasicBlock::iterator I) const { 1979 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 1980 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1981 const X86RegisterInfo &RegInfo = *STI.getRegisterInfo(); 1982 unsigned StackPtr = RegInfo.getStackRegister(); 1983 bool reserveCallFrame = hasReservedCallFrame(MF); 1984 int Opcode = I->getOpcode(); 1985 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); 1986 bool IsLP64 = STI.isTarget64BitLP64(); 1987 DebugLoc DL = I->getDebugLoc(); 1988 uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; 1989 uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0; 1990 I = MBB.erase(I); 1991 1992 if (!reserveCallFrame) { 1993 // If the stack pointer can be changed after prologue, turn the 1994 // adjcallstackup instruction into a 'sub ESP, <amt>' and the 1995 // adjcallstackdown instruction into 'add ESP, <amt>' 1996 if (Amount == 0) 1997 return; 1998 1999 // We need to keep the stack aligned properly. To do this, we round the 2000 // amount of space needed for the outgoing arguments up to the next 2001 // alignment boundary. 2002 unsigned StackAlign = getStackAlignment(); 2003 Amount = RoundUpToAlignment(Amount, StackAlign); 2004 2005 MachineInstr *New = nullptr; 2006 2007 // Factor out the amount that gets handled inside the sequence 2008 // (Pushes of argument for frame setup, callee pops for frame destroy) 2009 Amount -= InternalAmt; 2010 2011 if (Amount) { 2012 if (Opcode == TII.getCallFrameSetupOpcode()) { 2013 New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr) 2014 .addReg(StackPtr).addImm(Amount); 2015 } else { 2016 assert(Opcode == TII.getCallFrameDestroyOpcode()); 2017 2018 unsigned Opc = getADDriOpcode(IsLP64, Amount); 2019 New = BuildMI(MF, DL, TII.get(Opc), StackPtr) 2020 .addReg(StackPtr).addImm(Amount); 2021 } 2022 } 2023 2024 if (New) { 2025 // The EFLAGS implicit def is dead. 2026 New->getOperand(3).setIsDead(); 2027 2028 // Replace the pseudo instruction with a new instruction. 2029 MBB.insert(I, New); 2030 } 2031 2032 return; 2033 } 2034 2035 if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) { 2036 // If we are performing frame pointer elimination and if the callee pops 2037 // something off the stack pointer, add it back. We do this until we have 2038 // more advanced stack pointer tracking ability. 2039 unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt); 2040 MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) 2041 .addReg(StackPtr).addImm(InternalAmt); 2042 2043 // The EFLAGS implicit def is dead. 2044 New->getOperand(3).setIsDead(); 2045 2046 // We are not tracking the stack pointer adjustment by the callee, so make 2047 // sure we restore the stack pointer immediately after the call, there may 2048 // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. 2049 MachineBasicBlock::iterator B = MBB.begin(); 2050 while (I != B && !std::prev(I)->isCall()) 2051 --I; 2052 MBB.insert(I, New); 2053 } 2054 } 2055 2056