1 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the X86 implementation of TargetFrameLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "X86FrameLowering.h" 15 #include "X86InstrBuilder.h" 16 #include "X86InstrInfo.h" 17 #include "X86MachineFunctionInfo.h" 18 #include "X86Subtarget.h" 19 #include "X86TargetMachine.h" 20 #include "llvm/ADT/SmallSet.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/WinEHFuncInfo.h" 27 #include "llvm/IR/DataLayout.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Target/TargetOptions.h" 32 #include "llvm/Support/Debug.h" 33 #include <cstdlib> 34 35 using namespace llvm; 36 37 X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, 38 unsigned StackAlignOverride) 39 : TargetFrameLowering(StackGrowsDown, StackAlignOverride, 40 STI.is64Bit() ? -8 : -4), 41 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) { 42 // Cache a bunch of frame-related predicates for this subtarget. 43 SlotSize = TRI->getSlotSize(); 44 Is64Bit = STI.is64Bit(); 45 IsLP64 = STI.isTarget64BitLP64(); 46 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. 47 Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); 48 StackPtr = TRI->getStackRegister(); 49 } 50 51 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 52 return !MF.getFrameInfo()->hasVarSizedObjects() && 53 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); 54 } 55 56 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the 57 /// call frame pseudos can be simplified. Having a FP, as in the default 58 /// implementation, is not sufficient here since we can't always use it. 59 /// Use a more nuanced condition. 60 bool 61 X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { 62 return hasReservedCallFrame(MF) || 63 (hasFP(MF) && !TRI->needsStackRealignment(MF)) || 64 TRI->hasBasePointer(MF); 65 } 66 67 // needsFrameIndexResolution - Do we need to perform FI resolution for 68 // this function. Normally, this is required only when the function 69 // has any stack objects. However, FI resolution actually has another job, 70 // not apparent from the title - it resolves callframesetup/destroy 71 // that were not simplified earlier. 72 // So, this is required for x86 functions that have push sequences even 73 // when there are no stack objects. 74 bool 75 X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { 76 return MF.getFrameInfo()->hasStackObjects() || 77 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); 78 } 79 80 /// hasFP - Return true if the specified function should have a dedicated frame 81 /// pointer register. This is true if the function has variable sized allocas 82 /// or if frame pointer elimination is disabled. 83 bool X86FrameLowering::hasFP(const MachineFunction &MF) const { 84 const MachineFrameInfo *MFI = MF.getFrameInfo(); 85 const MachineModuleInfo &MMI = MF.getMMI(); 86 87 return (MF.getTarget().Options.DisableFramePointerElim(MF) || 88 TRI->needsStackRealignment(MF) || 89 MFI->hasVarSizedObjects() || 90 MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() || 91 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 92 MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() || 93 MFI->hasStackMap() || MFI->hasPatchPoint()); 94 } 95 96 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { 97 if (IsLP64) { 98 if (isInt<8>(Imm)) 99 return X86::SUB64ri8; 100 return X86::SUB64ri32; 101 } else { 102 if (isInt<8>(Imm)) 103 return X86::SUB32ri8; 104 return X86::SUB32ri; 105 } 106 } 107 108 static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { 109 if (IsLP64) { 110 if (isInt<8>(Imm)) 111 return X86::ADD64ri8; 112 return X86::ADD64ri32; 113 } else { 114 if (isInt<8>(Imm)) 115 return X86::ADD32ri8; 116 return X86::ADD32ri; 117 } 118 } 119 120 static unsigned getSUBrrOpcode(unsigned isLP64) { 121 return isLP64 ? X86::SUB64rr : X86::SUB32rr; 122 } 123 124 static unsigned getADDrrOpcode(unsigned isLP64) { 125 return isLP64 ? X86::ADD64rr : X86::ADD32rr; 126 } 127 128 static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { 129 if (IsLP64) { 130 if (isInt<8>(Imm)) 131 return X86::AND64ri8; 132 return X86::AND64ri32; 133 } 134 if (isInt<8>(Imm)) 135 return X86::AND32ri8; 136 return X86::AND32ri; 137 } 138 139 static unsigned getLEArOpcode(unsigned IsLP64) { 140 return IsLP64 ? X86::LEA64r : X86::LEA32r; 141 } 142 143 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live 144 /// when it reaches the "return" instruction. We can then pop a stack object 145 /// to this register without worry about clobbering it. 146 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, 147 MachineBasicBlock::iterator &MBBI, 148 const TargetRegisterInfo *TRI, 149 bool Is64Bit) { 150 const MachineFunction *MF = MBB.getParent(); 151 const Function *F = MF->getFunction(); 152 if (!F || MF->getMMI().callsEHReturn()) 153 return 0; 154 155 static const uint16_t CallerSavedRegs32Bit[] = { 156 X86::EAX, X86::EDX, X86::ECX, 0 157 }; 158 159 static const uint16_t CallerSavedRegs64Bit[] = { 160 X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, 161 X86::R8, X86::R9, X86::R10, X86::R11, 0 162 }; 163 164 unsigned Opc = MBBI->getOpcode(); 165 switch (Opc) { 166 default: return 0; 167 case X86::RETL: 168 case X86::RETQ: 169 case X86::RETIL: 170 case X86::RETIQ: 171 case X86::TCRETURNdi: 172 case X86::TCRETURNri: 173 case X86::TCRETURNmi: 174 case X86::TCRETURNdi64: 175 case X86::TCRETURNri64: 176 case X86::TCRETURNmi64: 177 case X86::EH_RETURN: 178 case X86::EH_RETURN64: { 179 SmallSet<uint16_t, 8> Uses; 180 for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { 181 MachineOperand &MO = MBBI->getOperand(i); 182 if (!MO.isReg() || MO.isDef()) 183 continue; 184 unsigned Reg = MO.getReg(); 185 if (!Reg) 186 continue; 187 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 188 Uses.insert(*AI); 189 } 190 191 const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; 192 for (; *CS; ++CS) 193 if (!Uses.count(*CS)) 194 return *CS; 195 } 196 } 197 198 return 0; 199 } 200 201 static bool isEAXLiveIn(MachineFunction &MF) { 202 for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), 203 EE = MF.getRegInfo().livein_end(); II != EE; ++II) { 204 unsigned Reg = II->first; 205 206 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || 207 Reg == X86::AH || Reg == X86::AL) 208 return true; 209 } 210 211 return false; 212 } 213 214 /// Check whether or not the terminators of \p MBB needs to read EFLAGS. 215 static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) { 216 for (const MachineInstr &MI : MBB.terminators()) { 217 bool BreakNext = false; 218 for (const MachineOperand &MO : MI.operands()) { 219 if (!MO.isReg()) 220 continue; 221 unsigned Reg = MO.getReg(); 222 if (Reg != X86::EFLAGS) 223 continue; 224 225 // This terminator needs an eflag that is not defined 226 // by a previous terminator. 227 if (!MO.isDef()) 228 return true; 229 BreakNext = true; 230 } 231 if (BreakNext) 232 break; 233 } 234 return false; 235 } 236 237 /// emitSPUpdate - Emit a series of instructions to increment / decrement the 238 /// stack pointer by a constant value. 239 void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, 240 MachineBasicBlock::iterator &MBBI, 241 int64_t NumBytes, bool InEpilogue) const { 242 bool isSub = NumBytes < 0; 243 uint64_t Offset = isSub ? -NumBytes : NumBytes; 244 245 uint64_t Chunk = (1LL << 31) - 1; 246 DebugLoc DL = MBB.findDebugLoc(MBBI); 247 248 while (Offset) { 249 if (Offset > Chunk) { 250 // Rather than emit a long series of instructions for large offsets, 251 // load the offset into a register and do one sub/add 252 unsigned Reg = 0; 253 254 if (isSub && !isEAXLiveIn(*MBB.getParent())) 255 Reg = (unsigned)(Is64Bit ? X86::RAX : X86::EAX); 256 else 257 Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); 258 259 if (Reg) { 260 unsigned Opc = Is64Bit ? X86::MOV64ri : X86::MOV32ri; 261 BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg) 262 .addImm(Offset); 263 Opc = isSub 264 ? getSUBrrOpcode(Is64Bit) 265 : getADDrrOpcode(Is64Bit); 266 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 267 .addReg(StackPtr) 268 .addReg(Reg); 269 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 270 Offset = 0; 271 continue; 272 } 273 } 274 275 uint64_t ThisVal = std::min(Offset, Chunk); 276 if (ThisVal == (Is64Bit ? 8 : 4)) { 277 // Use push / pop instead. 278 unsigned Reg = isSub 279 ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) 280 : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); 281 if (Reg) { 282 unsigned Opc = isSub 283 ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) 284 : (Is64Bit ? X86::POP64r : X86::POP32r); 285 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) 286 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); 287 if (isSub) 288 MI->setFlag(MachineInstr::FrameSetup); 289 else 290 MI->setFlag(MachineInstr::FrameDestroy); 291 Offset -= ThisVal; 292 continue; 293 } 294 } 295 296 MachineInstrBuilder MI = BuildStackAdjustment( 297 MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue); 298 if (isSub) 299 MI.setMIFlag(MachineInstr::FrameSetup); 300 else 301 MI.setMIFlag(MachineInstr::FrameDestroy); 302 303 Offset -= ThisVal; 304 } 305 } 306 307 MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( 308 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, 309 int64_t Offset, bool InEpilogue) const { 310 assert(Offset != 0 && "zero offset stack adjustment requested"); 311 312 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue 313 // is tricky. 314 bool UseLEA; 315 if (!InEpilogue) { 316 UseLEA = STI.useLeaForSP(); 317 } else { 318 // If we can use LEA for SP but we shouldn't, check that none 319 // of the terminators uses the eflags. Otherwise we will insert 320 // a ADD that will redefine the eflags and break the condition. 321 // Alternatively, we could move the ADD, but this may not be possible 322 // and is an optimization anyway. 323 UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent()); 324 if (UseLEA && !STI.useLeaForSP()) 325 UseLEA = terminatorsNeedFlagsAsInput(MBB); 326 // If that assert breaks, that means we do not do the right thing 327 // in canUseAsEpilogue. 328 assert((UseLEA || !terminatorsNeedFlagsAsInput(MBB)) && 329 "We shouldn't have allowed this insertion point"); 330 } 331 332 MachineInstrBuilder MI; 333 if (UseLEA) { 334 MI = addRegOffset(BuildMI(MBB, MBBI, DL, 335 TII.get(getLEArOpcode(Uses64BitFramePtr)), 336 StackPtr), 337 StackPtr, false, Offset); 338 } else { 339 bool IsSub = Offset < 0; 340 uint64_t AbsOffset = IsSub ? -Offset : Offset; 341 unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset) 342 : getADDriOpcode(Uses64BitFramePtr, AbsOffset); 343 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 344 .addReg(StackPtr) 345 .addImm(AbsOffset); 346 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 347 } 348 return MI; 349 } 350 351 int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, 352 MachineBasicBlock::iterator &MBBI, 353 bool doMergeWithPrevious) const { 354 if ((doMergeWithPrevious && MBBI == MBB.begin()) || 355 (!doMergeWithPrevious && MBBI == MBB.end())) 356 return 0; 357 358 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; 359 MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr 360 : std::next(MBBI); 361 unsigned Opc = PI->getOpcode(); 362 int Offset = 0; 363 364 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 365 Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || 366 Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 367 PI->getOperand(0).getReg() == StackPtr){ 368 Offset += PI->getOperand(2).getImm(); 369 MBB.erase(PI); 370 if (!doMergeWithPrevious) MBBI = NI; 371 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 372 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 373 PI->getOperand(0).getReg() == StackPtr) { 374 Offset -= PI->getOperand(2).getImm(); 375 MBB.erase(PI); 376 if (!doMergeWithPrevious) MBBI = NI; 377 } 378 379 return Offset; 380 } 381 382 void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, 383 MachineBasicBlock::iterator MBBI, DebugLoc DL, 384 MCCFIInstruction CFIInst) const { 385 MachineFunction &MF = *MBB.getParent(); 386 unsigned CFIIndex = MF.getMMI().addFrameInst(CFIInst); 387 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 388 .addCFIIndex(CFIIndex); 389 } 390 391 void 392 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, 393 MachineBasicBlock::iterator MBBI, 394 DebugLoc DL) const { 395 MachineFunction &MF = *MBB.getParent(); 396 MachineFrameInfo *MFI = MF.getFrameInfo(); 397 MachineModuleInfo &MMI = MF.getMMI(); 398 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 399 400 // Add callee saved registers to move list. 401 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 402 if (CSI.empty()) return; 403 404 // Calculate offsets. 405 for (std::vector<CalleeSavedInfo>::const_iterator 406 I = CSI.begin(), E = CSI.end(); I != E; ++I) { 407 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); 408 unsigned Reg = I->getReg(); 409 410 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 411 BuildCFI(MBB, MBBI, DL, 412 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 413 } 414 } 415 416 /// usesTheStack - This function checks if any of the users of EFLAGS 417 /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has 418 /// to use the stack, and if we don't adjust the stack we clobber the first 419 /// frame index. 420 /// See X86InstrInfo::copyPhysReg. 421 static bool usesTheStack(const MachineFunction &MF) { 422 const MachineRegisterInfo &MRI = MF.getRegInfo(); 423 424 for (MachineRegisterInfo::reg_instr_iterator 425 ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end(); 426 ri != re; ++ri) 427 if (ri->isCopy()) 428 return true; 429 430 return false; 431 } 432 433 void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, 434 MachineBasicBlock &MBB, 435 MachineBasicBlock::iterator MBBI, 436 DebugLoc DL) const { 437 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; 438 439 unsigned CallOp; 440 if (Is64Bit) 441 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; 442 else 443 CallOp = X86::CALLpcrel32; 444 445 const char *Symbol; 446 if (Is64Bit) { 447 if (STI.isTargetCygMing()) { 448 Symbol = "___chkstk_ms"; 449 } else { 450 Symbol = "__chkstk"; 451 } 452 } else if (STI.isTargetCygMing()) 453 Symbol = "_alloca"; 454 else 455 Symbol = "_chkstk"; 456 457 MachineInstrBuilder CI; 458 459 // All current stack probes take AX and SP as input, clobber flags, and 460 // preserve all registers. x86_64 probes leave RSP unmodified. 461 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { 462 // For the large code model, we have to call through a register. Use R11, 463 // as it is scratch in all supported calling conventions. 464 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) 465 .addExternalSymbol(Symbol); 466 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); 467 } else { 468 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol); 469 } 470 471 unsigned AX = Is64Bit ? X86::RAX : X86::EAX; 472 unsigned SP = Is64Bit ? X86::RSP : X86::ESP; 473 CI.addReg(AX, RegState::Implicit) 474 .addReg(SP, RegState::Implicit) 475 .addReg(AX, RegState::Define | RegState::Implicit) 476 .addReg(SP, RegState::Define | RegState::Implicit) 477 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); 478 479 if (Is64Bit) { 480 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp 481 // themselves. It also does not clobber %rax so we can reuse it when 482 // adjusting %rsp. 483 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) 484 .addReg(X86::RSP) 485 .addReg(X86::RAX); 486 } 487 } 488 489 static unsigned calculateSetFPREG(uint64_t SPAdjust) { 490 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well 491 // and might require smaller successive adjustments. 492 const uint64_t Win64MaxSEHOffset = 128; 493 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset); 494 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. 495 return SEHFrameOffset & -16; 496 } 497 498 // If we're forcing a stack realignment we can't rely on just the frame 499 // info, we need to know the ABI stack alignment as well in case we 500 // have a call out. Otherwise just make sure we have some alignment - we'll 501 // go with the minimum SlotSize. 502 uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const { 503 const MachineFrameInfo *MFI = MF.getFrameInfo(); 504 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. 505 unsigned StackAlign = getStackAlignment(); 506 if (MF.getFunction()->hasFnAttribute("stackrealign")) { 507 if (MFI->hasCalls()) 508 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 509 else if (MaxAlign < SlotSize) 510 MaxAlign = SlotSize; 511 } 512 return MaxAlign; 513 } 514 515 void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, 516 MachineBasicBlock::iterator MBBI, 517 DebugLoc DL, 518 uint64_t MaxAlign) const { 519 uint64_t Val = -MaxAlign; 520 MachineInstr *MI = 521 BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), 522 StackPtr) 523 .addReg(StackPtr) 524 .addImm(Val) 525 .setMIFlag(MachineInstr::FrameSetup); 526 527 // The EFLAGS implicit def is dead. 528 MI->getOperand(3).setIsDead(); 529 } 530 531 /// emitPrologue - Push callee-saved registers onto the stack, which 532 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate 533 /// space for local variables. Also emit labels used by the exception handler to 534 /// generate the exception handling frames. 535 536 /* 537 Here's a gist of what gets emitted: 538 539 ; Establish frame pointer, if needed 540 [if needs FP] 541 push %rbp 542 .cfi_def_cfa_offset 16 543 .cfi_offset %rbp, -16 544 .seh_pushreg %rpb 545 mov %rsp, %rbp 546 .cfi_def_cfa_register %rbp 547 548 ; Spill general-purpose registers 549 [for all callee-saved GPRs] 550 pushq %<reg> 551 [if not needs FP] 552 .cfi_def_cfa_offset (offset from RETADDR) 553 .seh_pushreg %<reg> 554 555 ; If the required stack alignment > default stack alignment 556 ; rsp needs to be re-aligned. This creates a "re-alignment gap" 557 ; of unknown size in the stack frame. 558 [if stack needs re-alignment] 559 and $MASK, %rsp 560 561 ; Allocate space for locals 562 [if target is Windows and allocated space > 4096 bytes] 563 ; Windows needs special care for allocations larger 564 ; than one page. 565 mov $NNN, %rax 566 call ___chkstk_ms/___chkstk 567 sub %rax, %rsp 568 [else] 569 sub $NNN, %rsp 570 571 [if needs FP] 572 .seh_stackalloc (size of XMM spill slots) 573 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots 574 [else] 575 .seh_stackalloc NNN 576 577 ; Spill XMMs 578 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, 579 ; they may get spilled on any platform, if the current function 580 ; calls @llvm.eh.unwind.init 581 [if needs FP] 582 [for all callee-saved XMM registers] 583 movaps %<xmm reg>, -MMM(%rbp) 584 [for all callee-saved XMM registers] 585 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) 586 ; i.e. the offset relative to (%rbp - SEHFrameOffset) 587 [else] 588 [for all callee-saved XMM registers] 589 movaps %<xmm reg>, KKK(%rsp) 590 [for all callee-saved XMM registers] 591 .seh_savexmm %<xmm reg>, KKK 592 593 .seh_endprologue 594 595 [if needs base pointer] 596 mov %rsp, %rbx 597 [if needs to restore base pointer] 598 mov %rsp, -MMM(%rbp) 599 600 ; Emit CFI info 601 [if needs FP] 602 [for all callee-saved registers] 603 .cfi_offset %<reg>, (offset from %rbp) 604 [else] 605 .cfi_def_cfa_offset (offset from RETADDR) 606 [for all callee-saved registers] 607 .cfi_offset %<reg>, (offset from %rsp) 608 609 Notes: 610 - .seh directives are emitted only for Windows 64 ABI 611 - .cfi directives are emitted for all other ABIs 612 - for 32-bit code, substitute %e?? registers for %r?? 613 */ 614 615 void X86FrameLowering::emitPrologue(MachineFunction &MF, 616 MachineBasicBlock &MBB) const { 617 assert(&STI == &MF.getSubtarget<X86Subtarget>() && 618 "MF used frame lowering for wrong subtarget"); 619 MachineBasicBlock::iterator MBBI = MBB.begin(); 620 MachineFrameInfo *MFI = MF.getFrameInfo(); 621 const Function *Fn = MF.getFunction(); 622 MachineModuleInfo &MMI = MF.getMMI(); 623 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 624 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. 625 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. 626 bool IsFunclet = MBB.isEHFuncletEntry(); 627 bool HasFP = hasFP(MF); 628 bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv()); 629 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 630 bool NeedsWinCFI = IsWin64Prologue && Fn->needsUnwindTableEntry(); 631 bool NeedsDwarfCFI = 632 !IsWin64Prologue && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); 633 unsigned FramePtr = TRI->getFrameRegister(MF); 634 const unsigned MachineFramePtr = 635 STI.isTarget64BitILP32() 636 ? getX86SubSuperRegister(FramePtr, MVT::i64, false) 637 : FramePtr; 638 unsigned BasePtr = TRI->getBaseRegister(); 639 DebugLoc DL; 640 641 // Add RETADDR move area to callee saved frame size. 642 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 643 if (TailCallReturnAddrDelta && IsWin64Prologue) 644 report_fatal_error("Can't handle guaranteed tail call under win64 yet"); 645 646 if (TailCallReturnAddrDelta < 0) 647 X86FI->setCalleeSavedFrameSize( 648 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); 649 650 bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()); 651 652 // The default stack probe size is 4096 if the function has no stackprobesize 653 // attribute. 654 unsigned StackProbeSize = 4096; 655 if (Fn->hasFnAttribute("stack-probe-size")) 656 Fn->getFnAttribute("stack-probe-size") 657 .getValueAsString() 658 .getAsInteger(0, StackProbeSize); 659 660 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 661 // function, and use up to 128 bytes of stack space, don't have a frame 662 // pointer, calls, or dynamic alloca then we do not need to adjust the 663 // stack pointer (we fit in the Red Zone). We also check that we don't 664 // push and pop from the stack. 665 if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) && 666 !TRI->needsStackRealignment(MF) && 667 !MFI->hasVarSizedObjects() && // No dynamic alloca. 668 !MFI->adjustsStack() && // No calls. 669 !IsWin64CC && // Win64 has no Red Zone 670 !usesTheStack(MF) && // Don't push and pop. 671 !MF.shouldSplitStack()) { // Regular stack 672 uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); 673 if (HasFP) MinSize += SlotSize; 674 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); 675 MFI->setStackSize(StackSize); 676 } 677 678 // Insert stack pointer adjustment for later moving of return addr. Only 679 // applies to tail call optimized functions where the callee argument stack 680 // size is bigger than the callers. 681 if (TailCallReturnAddrDelta < 0) { 682 BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta, 683 /*InEpilogue=*/false) 684 .setMIFlag(MachineInstr::FrameSetup); 685 } 686 687 // Mapping for machine moves: 688 // 689 // DST: VirtualFP AND 690 // SRC: VirtualFP => DW_CFA_def_cfa_offset 691 // ELSE => DW_CFA_def_cfa 692 // 693 // SRC: VirtualFP AND 694 // DST: Register => DW_CFA_def_cfa_register 695 // 696 // ELSE 697 // OFFSET < 0 => DW_CFA_offset_extended_sf 698 // REG < 64 => DW_CFA_offset + Reg 699 // ELSE => DW_CFA_offset_extended 700 701 uint64_t NumBytes = 0; 702 int stackGrowth = -SlotSize; 703 704 unsigned RDX = Uses64BitFramePtr ? X86::RDX : X86::EDX; 705 if (IsWin64Prologue && IsFunclet) { 706 // Immediately spill RDX into the home slot. The runtime cares about this. 707 // MOV64mr %rdx, 16(%rsp) 708 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 709 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16) 710 .addReg(RDX) 711 .setMIFlag(MachineInstr::FrameSetup); 712 } 713 714 if (HasFP) { 715 // Calculate required stack adjustment. 716 uint64_t FrameSize = StackSize - SlotSize; 717 // If required, include space for extra hidden slot for stashing base pointer. 718 if (X86FI->getRestoreBasePointer()) 719 FrameSize += SlotSize; 720 721 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); 722 723 // Callee-saved registers are pushed on stack before the stack is realigned. 724 if (TRI->needsStackRealignment(MF) && !IsWin64Prologue) 725 NumBytes = RoundUpToAlignment(NumBytes, MaxAlign); 726 727 // Get the offset of the stack slot for the EBP register, which is 728 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. 729 // Update the frame offset adjustment. 730 if (!IsFunclet) 731 MFI->setOffsetAdjustment(-NumBytes); 732 else 733 assert(MFI->getOffsetAdjustment() == -(int)NumBytes && 734 "should calculate same local variable offset for funclets"); 735 736 // Save EBP/RBP into the appropriate stack slot. 737 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 738 .addReg(MachineFramePtr, RegState::Kill) 739 .setMIFlag(MachineInstr::FrameSetup); 740 741 if (NeedsDwarfCFI) { 742 // Mark the place where EBP/RBP was saved. 743 // Define the current CFA rule to use the provided offset. 744 assert(StackSize); 745 BuildCFI(MBB, MBBI, DL, 746 MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth)); 747 748 // Change the rule for the FramePtr to be an "offset" rule. 749 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); 750 BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset( 751 nullptr, DwarfFramePtr, 2 * stackGrowth)); 752 } 753 754 if (NeedsWinCFI) { 755 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) 756 .addImm(FramePtr) 757 .setMIFlag(MachineInstr::FrameSetup); 758 } 759 760 if (!IsWin64Prologue && !IsFunclet) { 761 // Update EBP with the new base value. 762 BuildMI(MBB, MBBI, DL, 763 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), 764 FramePtr) 765 .addReg(StackPtr) 766 .setMIFlag(MachineInstr::FrameSetup); 767 768 if (NeedsDwarfCFI) { 769 // Mark effective beginning of when frame pointer becomes valid. 770 // Define the current CFA to use the EBP/RBP register. 771 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); 772 BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister( 773 nullptr, DwarfFramePtr)); 774 } 775 } 776 777 // Mark the FramePtr as live-in in every block. 778 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 779 I->addLiveIn(MachineFramePtr); 780 } else { 781 assert(!IsFunclet && "funclets without FPs not yet implemented"); 782 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); 783 } 784 785 // For EH funclets, only allocate enough space for outgoing calls. Save the 786 // NumBytes value that we would've used for the parent frame. 787 unsigned ParentFrameNumBytes = NumBytes; 788 if (IsFunclet) 789 NumBytes = MFI->getMaxCallFrameSize(); 790 791 // Skip the callee-saved push instructions. 792 bool PushedRegs = false; 793 int StackOffset = 2 * stackGrowth; 794 795 while (MBBI != MBB.end() && 796 MBBI->getFlag(MachineInstr::FrameSetup) && 797 (MBBI->getOpcode() == X86::PUSH32r || 798 MBBI->getOpcode() == X86::PUSH64r)) { 799 PushedRegs = true; 800 unsigned Reg = MBBI->getOperand(0).getReg(); 801 ++MBBI; 802 803 if (!HasFP && NeedsDwarfCFI) { 804 // Mark callee-saved push instruction. 805 // Define the current CFA rule to use the provided offset. 806 assert(StackSize); 807 BuildCFI(MBB, MBBI, DL, 808 MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset)); 809 StackOffset += stackGrowth; 810 } 811 812 if (NeedsWinCFI) { 813 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag( 814 MachineInstr::FrameSetup); 815 } 816 } 817 818 // Realign stack after we pushed callee-saved registers (so that we'll be 819 // able to calculate their offsets from the frame pointer). 820 // Don't do this for Win64, it needs to realign the stack after the prologue. 821 if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) { 822 assert(HasFP && "There should be a frame pointer if stack is realigned."); 823 BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); 824 } 825 826 // If there is an SUB32ri of ESP immediately before this instruction, merge 827 // the two. This can be the case when tail call elimination is enabled and 828 // the callee has more arguments then the caller. 829 NumBytes -= mergeSPUpdates(MBB, MBBI, true); 830 831 // Adjust stack pointer: ESP -= numbytes. 832 833 // Windows and cygwin/mingw require a prologue helper routine when allocating 834 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw 835 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the 836 // stack and adjust the stack pointer in one go. The 64-bit version of 837 // __chkstk is only responsible for probing the stack. The 64-bit prologue is 838 // responsible for adjusting the stack pointer. Touching the stack at 4K 839 // increments is necessary to ensure that the guard pages used by the OS 840 // virtual memory manager are allocated in correct sequence. 841 uint64_t AlignedNumBytes = NumBytes; 842 if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) 843 AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); 844 if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { 845 // Check whether EAX is livein for this function. 846 bool isEAXAlive = isEAXLiveIn(MF); 847 848 if (isEAXAlive) { 849 // Sanity check that EAX is not livein for this function. 850 // It should not be, so throw an assert. 851 assert(!Is64Bit && "EAX is livein in x64 case!"); 852 853 // Save EAX 854 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 855 .addReg(X86::EAX, RegState::Kill) 856 .setMIFlag(MachineInstr::FrameSetup); 857 } 858 859 if (Is64Bit) { 860 // Handle the 64-bit Windows ABI case where we need to call __chkstk. 861 // Function prologue is responsible for adjusting the stack pointer. 862 if (isUInt<32>(NumBytes)) { 863 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 864 .addImm(NumBytes) 865 .setMIFlag(MachineInstr::FrameSetup); 866 } else if (isInt<32>(NumBytes)) { 867 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX) 868 .addImm(NumBytes) 869 .setMIFlag(MachineInstr::FrameSetup); 870 } else { 871 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) 872 .addImm(NumBytes) 873 .setMIFlag(MachineInstr::FrameSetup); 874 } 875 } else { 876 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. 877 // We'll also use 4 already allocated bytes for EAX. 878 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 879 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) 880 .setMIFlag(MachineInstr::FrameSetup); 881 } 882 883 // Save a pointer to the MI where we set AX. 884 MachineBasicBlock::iterator SetRAX = MBBI; 885 --SetRAX; 886 887 // Call __chkstk, __chkstk_ms, or __alloca. 888 emitStackProbeCall(MF, MBB, MBBI, DL); 889 890 // Apply the frame setup flag to all inserted instrs. 891 for (; SetRAX != MBBI; ++SetRAX) 892 SetRAX->setFlag(MachineInstr::FrameSetup); 893 894 if (isEAXAlive) { 895 // Restore EAX 896 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), 897 X86::EAX), 898 StackPtr, false, NumBytes - 4); 899 MI->setFlag(MachineInstr::FrameSetup); 900 MBB.insert(MBBI, MI); 901 } 902 } else if (NumBytes) { 903 emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, /*InEpilogue=*/false); 904 } 905 906 if (NeedsWinCFI && NumBytes) 907 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) 908 .addImm(NumBytes) 909 .setMIFlag(MachineInstr::FrameSetup); 910 911 int SEHFrameOffset = 0; 912 if (IsWin64Prologue && HasFP) { 913 // Set RBP to a small fixed offset from RSP. In the funclet case, we base 914 // this calculation on the incoming RDX, which holds the value of RSP from 915 // the parent frame at the end of the prologue. 916 unsigned SPOrRDX = !IsFunclet ? StackPtr : RDX; 917 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes); 918 if (SEHFrameOffset) 919 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), 920 SPOrRDX, false, SEHFrameOffset); 921 else 922 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr) 923 .addReg(SPOrRDX); 924 925 // If this is not a funclet, emit the CFI describing our frame pointer. 926 if (NeedsWinCFI && !IsFunclet) 927 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) 928 .addImm(FramePtr) 929 .addImm(SEHFrameOffset) 930 .setMIFlag(MachineInstr::FrameSetup); 931 } else if (IsFunclet && STI.is32Bit()) { 932 // Reset EBP / ESI to something good for funclets. 933 MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); 934 } 935 936 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { 937 const MachineInstr *FrameInstr = &*MBBI; 938 ++MBBI; 939 940 if (NeedsWinCFI) { 941 int FI; 942 if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { 943 if (X86::FR64RegClass.contains(Reg)) { 944 unsigned IgnoredFrameReg; 945 int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg); 946 Offset += SEHFrameOffset; 947 948 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) 949 .addImm(Reg) 950 .addImm(Offset) 951 .setMIFlag(MachineInstr::FrameSetup); 952 } 953 } 954 } 955 } 956 957 if (NeedsWinCFI) 958 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) 959 .setMIFlag(MachineInstr::FrameSetup); 960 961 // Realign stack after we spilled callee-saved registers (so that we'll be 962 // able to calculate their offsets from the frame pointer). 963 // Win64 requires aligning the stack after the prologue. 964 if (IsWin64Prologue && TRI->needsStackRealignment(MF)) { 965 assert(HasFP && "There should be a frame pointer if stack is realigned."); 966 BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); 967 } 968 969 // If we need a base pointer, set it up here. It's whatever the value 970 // of the stack pointer is at this point. Any variable size objects 971 // will be allocated after this, so we can still use the base pointer 972 // to reference locals. 973 if (TRI->hasBasePointer(MF)) { 974 // Update the base pointer with the current stack pointer. 975 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; 976 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) 977 .addReg(StackPtr) 978 .setMIFlag(MachineInstr::FrameSetup); 979 if (X86FI->getRestoreBasePointer()) { 980 // Stash value of base pointer. Saving RSP instead of EBP shortens 981 // dependence chain. Used by SjLj EH. 982 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 983 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), 984 FramePtr, true, X86FI->getRestoreBasePointerOffset()) 985 .addReg(StackPtr) 986 .setMIFlag(MachineInstr::FrameSetup); 987 } 988 989 if (X86FI->getHasSEHFramePtrSave()) { 990 // Stash the value of the frame pointer relative to the base pointer for 991 // Win32 EH. This supports Win32 EH, which does the inverse of the above: 992 // it recovers the frame pointer from the base pointer rather than the 993 // other way around. 994 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 995 unsigned UsedReg; 996 int Offset = 997 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg); 998 assert(UsedReg == BasePtr); 999 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset) 1000 .addReg(FramePtr) 1001 .setMIFlag(MachineInstr::FrameSetup); 1002 } 1003 } 1004 1005 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { 1006 // Mark end of stack pointer adjustment. 1007 if (!HasFP && NumBytes) { 1008 // Define the current CFA rule to use the provided offset. 1009 assert(StackSize); 1010 BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset( 1011 nullptr, -StackSize + stackGrowth)); 1012 } 1013 1014 // Emit DWARF info specifying the offsets of the callee-saved registers. 1015 if (PushedRegs) 1016 emitCalleeSavedFrameMoves(MBB, MBBI, DL); 1017 } 1018 } 1019 1020 bool X86FrameLowering::canUseLEAForSPInEpilogue( 1021 const MachineFunction &MF) const { 1022 // We can't use LEA instructions for adjusting the stack pointer if this is a 1023 // leaf function in the Win64 ABI. Only ADD instructions may be used to 1024 // deallocate the stack. 1025 // This means that we can use LEA for SP in two situations: 1026 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA. 1027 // 2. We *have* a frame pointer which means we are permitted to use LEA. 1028 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF); 1029 } 1030 1031 static bool isFuncletReturnInstr(MachineInstr *MI) { 1032 switch (MI->getOpcode()) { 1033 case X86::CATCHRET: 1034 case X86::CLEANUPRET: 1035 return true; 1036 default: 1037 return false; 1038 } 1039 llvm_unreachable("impossible"); 1040 } 1041 1042 void X86FrameLowering::emitEpilogue(MachineFunction &MF, 1043 MachineBasicBlock &MBB) const { 1044 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1045 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1046 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1047 DebugLoc DL; 1048 if (MBBI != MBB.end()) 1049 DL = MBBI->getDebugLoc(); 1050 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. 1051 const bool Is64BitILP32 = STI.isTarget64BitILP32(); 1052 unsigned FramePtr = TRI->getFrameRegister(MF); 1053 unsigned MachineFramePtr = 1054 Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false) 1055 : FramePtr; 1056 1057 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 1058 bool NeedsWinCFI = 1059 IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry(); 1060 1061 // Get the number of bytes to allocate from the FrameInfo. 1062 uint64_t StackSize = MFI->getStackSize(); 1063 uint64_t MaxAlign = calculateMaxStackAlign(MF); 1064 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 1065 uint64_t NumBytes = 0; 1066 1067 if (MBBI->getOpcode() == X86::CATCHRET) { 1068 NumBytes = MFI->getMaxCallFrameSize(); 1069 assert(hasFP(MF) && "EH funclets without FP not yet implemented"); 1070 MachineBasicBlock *TargetMBB = MBBI->getOperand(0).getMBB(); 1071 1072 // If this is SEH, this isn't really a funclet return. 1073 bool IsSEH = isAsynchronousEHPersonality( 1074 classifyEHPersonality(MF.getFunction()->getPersonalityFn())); 1075 if (IsSEH) { 1076 if (STI.is32Bit()) 1077 restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/true); 1078 BuildMI(MBB, MBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB); 1079 MBBI->eraseFromParent(); 1080 return; 1081 } 1082 1083 // For 32-bit, create a new block for the restore code. 1084 MachineBasicBlock *RestoreMBB = TargetMBB; 1085 if (STI.is32Bit()) { 1086 RestoreMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 1087 MF.insert(TargetMBB, RestoreMBB); 1088 MBB.removeSuccessor(TargetMBB); 1089 MBB.addSuccessor(RestoreMBB); 1090 RestoreMBB->addSuccessor(TargetMBB); 1091 MBBI->getOperand(0).setMBB(RestoreMBB); 1092 } 1093 1094 // Fill EAX/RAX with the address of the target block. 1095 unsigned ReturnReg = STI.is64Bit() ? X86::RAX : X86::EAX; 1096 if (STI.is64Bit()) { 1097 // LEA64r RestoreMBB(%rip), %rax 1098 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), ReturnReg) 1099 .addReg(X86::RIP) 1100 .addImm(0) 1101 .addReg(0) 1102 .addMBB(RestoreMBB) 1103 .addReg(0); 1104 } else { 1105 // MOV32ri $RestoreMBB, %eax 1106 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri)) 1107 .addReg(ReturnReg) 1108 .addMBB(RestoreMBB); 1109 } 1110 1111 // Pop EBP. 1112 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), 1113 MachineFramePtr) 1114 .setMIFlag(MachineInstr::FrameDestroy); 1115 1116 // Insert frame restoration code in a new block. 1117 if (STI.is32Bit()) { 1118 auto RestoreMBBI = RestoreMBB->begin(); 1119 restoreWin32EHStackPointers(*RestoreMBB, RestoreMBBI, DL, 1120 /*RestoreSP=*/true); 1121 BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)) 1122 .addMBB(TargetMBB); 1123 } 1124 } else if (MBBI->getOpcode() == X86::CLEANUPRET) { 1125 NumBytes = MFI->getMaxCallFrameSize(); 1126 assert(hasFP(MF) && "EH funclets without FP not yet implemented"); 1127 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), 1128 MachineFramePtr) 1129 .setMIFlag(MachineInstr::FrameDestroy); 1130 } else if (hasFP(MF)) { 1131 // Calculate required stack adjustment. 1132 uint64_t FrameSize = StackSize - SlotSize; 1133 NumBytes = FrameSize - CSSize; 1134 1135 // Callee-saved registers were pushed on stack before the stack was 1136 // realigned. 1137 if (TRI->needsStackRealignment(MF) && !IsWin64Prologue) 1138 NumBytes = RoundUpToAlignment(FrameSize, MaxAlign); 1139 1140 // Pop EBP. 1141 BuildMI(MBB, MBBI, DL, 1142 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr) 1143 .setMIFlag(MachineInstr::FrameDestroy); 1144 } else { 1145 NumBytes = StackSize - CSSize; 1146 } 1147 uint64_t SEHStackAllocAmt = NumBytes; 1148 1149 // Skip the callee-saved pop instructions. 1150 while (MBBI != MBB.begin()) { 1151 MachineBasicBlock::iterator PI = std::prev(MBBI); 1152 unsigned Opc = PI->getOpcode(); 1153 1154 if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && 1155 (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && 1156 Opc != X86::DBG_VALUE && !PI->isTerminator()) 1157 break; 1158 1159 --MBBI; 1160 } 1161 MachineBasicBlock::iterator FirstCSPop = MBBI; 1162 1163 if (MBBI != MBB.end()) 1164 DL = MBBI->getDebugLoc(); 1165 1166 // If there is an ADD32ri or SUB32ri of ESP immediately before this 1167 // instruction, merge the two instructions. 1168 if (NumBytes || MFI->hasVarSizedObjects()) 1169 NumBytes += mergeSPUpdates(MBB, MBBI, true); 1170 1171 // If dynamic alloca is used, then reset esp to point to the last callee-saved 1172 // slot before popping them off! Same applies for the case, when stack was 1173 // realigned. 1174 if (TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { 1175 if (TRI->needsStackRealignment(MF)) 1176 MBBI = FirstCSPop; 1177 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); 1178 uint64_t LEAAmount = 1179 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; 1180 1181 // There are only two legal forms of epilogue: 1182 // - add SEHAllocationSize, %rsp 1183 // - lea SEHAllocationSize(%FramePtr), %rsp 1184 // 1185 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence. 1186 // However, we may use this sequence if we have a frame pointer because the 1187 // effects of the prologue can safely be undone. 1188 if (LEAAmount != 0) { 1189 unsigned Opc = getLEArOpcode(Uses64BitFramePtr); 1190 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), 1191 FramePtr, false, LEAAmount); 1192 --MBBI; 1193 } else { 1194 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); 1195 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 1196 .addReg(FramePtr); 1197 --MBBI; 1198 } 1199 } else if (NumBytes) { 1200 // Adjust stack pointer back: ESP += numbytes. 1201 emitSPUpdate(MBB, MBBI, NumBytes, /*InEpilogue=*/true); 1202 --MBBI; 1203 } 1204 1205 // Windows unwinder will not invoke function's exception handler if IP is 1206 // either in prologue or in epilogue. This behavior causes a problem when a 1207 // call immediately precedes an epilogue, because the return address points 1208 // into the epilogue. To cope with that, we insert an epilogue marker here, 1209 // then replace it with a 'nop' if it ends up immediately after a CALL in the 1210 // final emitted code. 1211 if (NeedsWinCFI) 1212 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); 1213 1214 // Add the return addr area delta back since we are not tail calling. 1215 int Offset = -1 * X86FI->getTCReturnAddrDelta(); 1216 assert(Offset >= 0 && "TCDelta should never be positive"); 1217 if (Offset) { 1218 MBBI = MBB.getFirstTerminator(); 1219 1220 // Check for possible merge with preceding ADD instruction. 1221 Offset += mergeSPUpdates(MBB, MBBI, true); 1222 emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true); 1223 } 1224 } 1225 1226 // NOTE: this only has a subset of the full frame index logic. In 1227 // particular, the FI < 0 and AfterFPPop logic is handled in 1228 // X86RegisterInfo::eliminateFrameIndex, but not here. Possibly 1229 // (probably?) it should be moved into here. 1230 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 1231 unsigned &FrameReg) const { 1232 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1233 1234 // We can't calculate offset from frame pointer if the stack is realigned, 1235 // so enforce usage of stack/base pointer. The base pointer is used when we 1236 // have dynamic allocas in addition to dynamic realignment. 1237 if (TRI->hasBasePointer(MF)) 1238 FrameReg = TRI->getBaseRegister(); 1239 else if (TRI->needsStackRealignment(MF)) 1240 FrameReg = TRI->getStackRegister(); 1241 else 1242 FrameReg = TRI->getFrameRegister(MF); 1243 1244 // Offset will hold the offset from the stack pointer at function entry to the 1245 // object. 1246 // We need to factor in additional offsets applied during the prologue to the 1247 // frame, base, and stack pointer depending on which is used. 1248 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 1249 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1250 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 1251 uint64_t StackSize = MFI->getStackSize(); 1252 bool HasFP = hasFP(MF); 1253 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 1254 int64_t FPDelta = 0; 1255 1256 if (IsWin64Prologue) { 1257 assert(!MFI->hasCalls() || (StackSize % 16) == 8); 1258 1259 // Calculate required stack adjustment. 1260 uint64_t FrameSize = StackSize - SlotSize; 1261 // If required, include space for extra hidden slot for stashing base pointer. 1262 if (X86FI->getRestoreBasePointer()) 1263 FrameSize += SlotSize; 1264 uint64_t NumBytes = FrameSize - CSSize; 1265 1266 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes); 1267 if (FI && FI == X86FI->getFAIndex()) 1268 return -SEHFrameOffset; 1269 1270 // FPDelta is the offset from the "traditional" FP location of the old base 1271 // pointer followed by return address and the location required by the 1272 // restricted Win64 prologue. 1273 // Add FPDelta to all offsets below that go through the frame pointer. 1274 FPDelta = FrameSize - SEHFrameOffset; 1275 assert((!MFI->hasCalls() || (FPDelta % 16) == 0) && 1276 "FPDelta isn't aligned per the Win64 ABI!"); 1277 } 1278 1279 1280 if (TRI->hasBasePointer(MF)) { 1281 assert(HasFP && "VLAs and dynamic stack realign, but no FP?!"); 1282 if (FI < 0) { 1283 // Skip the saved EBP. 1284 return Offset + SlotSize + FPDelta; 1285 } else { 1286 assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 1287 return Offset + StackSize; 1288 } 1289 } else if (TRI->needsStackRealignment(MF)) { 1290 if (FI < 0) { 1291 // Skip the saved EBP. 1292 return Offset + SlotSize + FPDelta; 1293 } else { 1294 assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 1295 return Offset + StackSize; 1296 } 1297 // FIXME: Support tail calls 1298 } else { 1299 if (!HasFP) 1300 return Offset + StackSize; 1301 1302 // Skip the saved EBP. 1303 Offset += SlotSize; 1304 1305 // Skip the RETADDR move area 1306 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1307 if (TailCallReturnAddrDelta < 0) 1308 Offset -= TailCallReturnAddrDelta; 1309 } 1310 1311 return Offset + FPDelta; 1312 } 1313 1314 // Simplified from getFrameIndexReference keeping only StackPointer cases 1315 int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, 1316 int FI, 1317 unsigned &FrameReg) const { 1318 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1319 // Does not include any dynamic realign. 1320 const uint64_t StackSize = MFI->getStackSize(); 1321 { 1322 #ifndef NDEBUG 1323 // Note: LLVM arranges the stack as: 1324 // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP) 1325 // > "Stack Slots" (<--SP) 1326 // We can always address StackSlots from RSP. We can usually (unless 1327 // needsStackRealignment) address CSRs from RSP, but sometimes need to 1328 // address them from RBP. FixedObjects can be placed anywhere in the stack 1329 // frame depending on their specific requirements (i.e. we can actually 1330 // refer to arguments to the function which are stored in the *callers* 1331 // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs 1332 // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject. 1333 1334 assert(!TRI->hasBasePointer(MF) && "we don't handle this case"); 1335 1336 // We don't handle tail calls, and shouldn't be seeing them 1337 // either. 1338 int TailCallReturnAddrDelta = 1339 MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta(); 1340 assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!"); 1341 #endif 1342 } 1343 1344 // Fill in FrameReg output argument. 1345 FrameReg = TRI->getStackRegister(); 1346 1347 // This is how the math works out: 1348 // 1349 // %rsp grows (i.e. gets lower) left to right. Each box below is 1350 // one word (eight bytes). Obj0 is the stack slot we're trying to 1351 // get to. 1352 // 1353 // ---------------------------------- 1354 // | BP | Obj0 | Obj1 | ... | ObjN | 1355 // ---------------------------------- 1356 // ^ ^ ^ ^ 1357 // A B C E 1358 // 1359 // A is the incoming stack pointer. 1360 // (B - A) is the local area offset (-8 for x86-64) [1] 1361 // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2] 1362 // 1363 // |(E - B)| is the StackSize (absolute value, positive). For a 1364 // stack that grown down, this works out to be (B - E). [3] 1365 // 1366 // E is also the value of %rsp after stack has been set up, and we 1367 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now 1368 // (C - E) == (C - A) - (B - A) + (B - E) 1369 // { Using [1], [2] and [3] above } 1370 // == getObjectOffset - LocalAreaOffset + StackSize 1371 // 1372 1373 // Get the Offset from the StackPointer 1374 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 1375 1376 return Offset + StackSize; 1377 } 1378 1379 bool X86FrameLowering::assignCalleeSavedSpillSlots( 1380 MachineFunction &MF, const TargetRegisterInfo *TRI, 1381 std::vector<CalleeSavedInfo> &CSI) const { 1382 MachineFrameInfo *MFI = MF.getFrameInfo(); 1383 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1384 1385 unsigned CalleeSavedFrameSize = 0; 1386 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); 1387 1388 if (hasFP(MF)) { 1389 // emitPrologue always spills frame register the first thing. 1390 SpillSlotOffset -= SlotSize; 1391 MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 1392 1393 // Since emitPrologue and emitEpilogue will handle spilling and restoring of 1394 // the frame register, we can delete it from CSI list and not have to worry 1395 // about avoiding it later. 1396 unsigned FPReg = TRI->getFrameRegister(MF); 1397 for (unsigned i = 0; i < CSI.size(); ++i) { 1398 if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { 1399 CSI.erase(CSI.begin() + i); 1400 break; 1401 } 1402 } 1403 } 1404 1405 // Assign slots for GPRs. It increases frame size. 1406 for (unsigned i = CSI.size(); i != 0; --i) { 1407 unsigned Reg = CSI[i - 1].getReg(); 1408 1409 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) 1410 continue; 1411 1412 SpillSlotOffset -= SlotSize; 1413 CalleeSavedFrameSize += SlotSize; 1414 1415 int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 1416 CSI[i - 1].setFrameIdx(SlotIndex); 1417 } 1418 1419 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); 1420 1421 // Assign slots for XMMs. 1422 for (unsigned i = CSI.size(); i != 0; --i) { 1423 unsigned Reg = CSI[i - 1].getReg(); 1424 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) 1425 continue; 1426 1427 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1428 // ensure alignment 1429 SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment(); 1430 // spill into slot 1431 SpillSlotOffset -= RC->getSize(); 1432 int SlotIndex = 1433 MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); 1434 CSI[i - 1].setFrameIdx(SlotIndex); 1435 MFI->ensureMaxAlignment(RC->getAlignment()); 1436 } 1437 1438 return true; 1439 } 1440 1441 bool X86FrameLowering::spillCalleeSavedRegisters( 1442 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1443 const std::vector<CalleeSavedInfo> &CSI, 1444 const TargetRegisterInfo *TRI) const { 1445 DebugLoc DL = MBB.findDebugLoc(MI); 1446 1447 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI 1448 // for us, and there are no XMM CSRs on Win32. 1449 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows()) 1450 return true; 1451 1452 // Push GPRs. It increases frame size. 1453 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; 1454 for (unsigned i = CSI.size(); i != 0; --i) { 1455 unsigned Reg = CSI[i - 1].getReg(); 1456 1457 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) 1458 continue; 1459 // Add the callee-saved register as live-in. It's killed at the spill. 1460 MBB.addLiveIn(Reg); 1461 1462 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) 1463 .setMIFlag(MachineInstr::FrameSetup); 1464 } 1465 1466 // Make XMM regs spilled. X86 does not have ability of push/pop XMM. 1467 // It can be done by spilling XMMs to stack frame. 1468 for (unsigned i = CSI.size(); i != 0; --i) { 1469 unsigned Reg = CSI[i-1].getReg(); 1470 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) 1471 continue; 1472 // Add the callee-saved register as live-in. It's killed at the spill. 1473 MBB.addLiveIn(Reg); 1474 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1475 1476 TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, 1477 TRI); 1478 --MI; 1479 MI->setFlag(MachineInstr::FrameSetup); 1480 ++MI; 1481 } 1482 1483 return true; 1484 } 1485 1486 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 1487 MachineBasicBlock::iterator MI, 1488 const std::vector<CalleeSavedInfo> &CSI, 1489 const TargetRegisterInfo *TRI) const { 1490 if (CSI.empty()) 1491 return false; 1492 1493 if (isFuncletReturnInstr(MI) && STI.isOSWindows()) { 1494 // Don't restore CSRs in 32-bit EH funclets. Matches 1495 // spillCalleeSavedRegisters. 1496 if (STI.is32Bit()) 1497 return true; 1498 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form 1499 // funclets. emitEpilogue transforms these to normal jumps. 1500 if (MI->getOpcode() == X86::CATCHRET) { 1501 const Function *Func = MBB.getParent()->getFunction(); 1502 bool IsSEH = isAsynchronousEHPersonality( 1503 classifyEHPersonality(Func->getPersonalityFn())); 1504 if (IsSEH) 1505 return true; 1506 } 1507 } 1508 1509 DebugLoc DL = MBB.findDebugLoc(MI); 1510 1511 // Reload XMMs from stack frame. 1512 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1513 unsigned Reg = CSI[i].getReg(); 1514 if (X86::GR64RegClass.contains(Reg) || 1515 X86::GR32RegClass.contains(Reg)) 1516 continue; 1517 1518 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1519 TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); 1520 } 1521 1522 // POP GPRs. 1523 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; 1524 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1525 unsigned Reg = CSI[i].getReg(); 1526 if (!X86::GR64RegClass.contains(Reg) && 1527 !X86::GR32RegClass.contains(Reg)) 1528 continue; 1529 1530 BuildMI(MBB, MI, DL, TII.get(Opc), Reg) 1531 .setMIFlag(MachineInstr::FrameDestroy); 1532 } 1533 return true; 1534 } 1535 1536 void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, 1537 BitVector &SavedRegs, 1538 RegScavenger *RS) const { 1539 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1540 1541 MachineFrameInfo *MFI = MF.getFrameInfo(); 1542 1543 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1544 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1545 1546 if (TailCallReturnAddrDelta < 0) { 1547 // create RETURNADDR area 1548 // arg 1549 // arg 1550 // RETADDR 1551 // { ... 1552 // RETADDR area 1553 // ... 1554 // } 1555 // [EBP] 1556 MFI->CreateFixedObject(-TailCallReturnAddrDelta, 1557 TailCallReturnAddrDelta - SlotSize, true); 1558 } 1559 1560 // Spill the BasePtr if it's used. 1561 if (TRI->hasBasePointer(MF)) { 1562 SavedRegs.set(TRI->getBaseRegister()); 1563 1564 // Allocate a spill slot for EBP if we have a base pointer and EH funclets. 1565 if (MF.getMMI().hasEHFunclets()) { 1566 int FI = MFI->CreateSpillStackObject(SlotSize, SlotSize); 1567 X86FI->setHasSEHFramePtrSave(true); 1568 X86FI->setSEHFramePtrSaveIndex(FI); 1569 } 1570 } 1571 } 1572 1573 static bool 1574 HasNestArgument(const MachineFunction *MF) { 1575 const Function *F = MF->getFunction(); 1576 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 1577 I != E; I++) { 1578 if (I->hasNestAttr()) 1579 return true; 1580 } 1581 return false; 1582 } 1583 1584 /// GetScratchRegister - Get a temp register for performing work in the 1585 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform 1586 /// and the properties of the function either one or two registers will be 1587 /// needed. Set primary to true for the first register, false for the second. 1588 static unsigned 1589 GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) { 1590 CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); 1591 1592 // Erlang stuff. 1593 if (CallingConvention == CallingConv::HiPE) { 1594 if (Is64Bit) 1595 return Primary ? X86::R14 : X86::R13; 1596 else 1597 return Primary ? X86::EBX : X86::EDI; 1598 } 1599 1600 if (Is64Bit) { 1601 if (IsLP64) 1602 return Primary ? X86::R11 : X86::R12; 1603 else 1604 return Primary ? X86::R11D : X86::R12D; 1605 } 1606 1607 bool IsNested = HasNestArgument(&MF); 1608 1609 if (CallingConvention == CallingConv::X86_FastCall || 1610 CallingConvention == CallingConv::Fast) { 1611 if (IsNested) 1612 report_fatal_error("Segmented stacks does not support fastcall with " 1613 "nested function."); 1614 return Primary ? X86::EAX : X86::ECX; 1615 } 1616 if (IsNested) 1617 return Primary ? X86::EDX : X86::EAX; 1618 return Primary ? X86::ECX : X86::EAX; 1619 } 1620 1621 // The stack limit in the TCB is set to this many bytes above the actual stack 1622 // limit. 1623 static const uint64_t kSplitStackAvailable = 256; 1624 1625 void X86FrameLowering::adjustForSegmentedStacks( 1626 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { 1627 MachineFrameInfo *MFI = MF.getFrameInfo(); 1628 uint64_t StackSize; 1629 unsigned TlsReg, TlsOffset; 1630 DebugLoc DL; 1631 1632 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1633 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 1634 "Scratch register is live-in"); 1635 1636 if (MF.getFunction()->isVarArg()) 1637 report_fatal_error("Segmented stacks do not support vararg functions."); 1638 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && 1639 !STI.isTargetWin64() && !STI.isTargetFreeBSD() && 1640 !STI.isTargetDragonFly()) 1641 report_fatal_error("Segmented stacks not supported on this platform."); 1642 1643 // Eventually StackSize will be calculated by a link-time pass; which will 1644 // also decide whether checking code needs to be injected into this particular 1645 // prologue. 1646 StackSize = MFI->getStackSize(); 1647 1648 // Do not generate a prologue for functions with a stack of size zero 1649 if (StackSize == 0) 1650 return; 1651 1652 MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); 1653 MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); 1654 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1655 bool IsNested = false; 1656 1657 // We need to know if the function has a nest argument only in 64 bit mode. 1658 if (Is64Bit) 1659 IsNested = HasNestArgument(&MF); 1660 1661 // The MOV R10, RAX needs to be in a different block, since the RET we emit in 1662 // allocMBB needs to be last (terminating) instruction. 1663 1664 for (const auto &LI : PrologueMBB.liveins()) { 1665 allocMBB->addLiveIn(LI); 1666 checkMBB->addLiveIn(LI); 1667 } 1668 1669 if (IsNested) 1670 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D); 1671 1672 MF.push_front(allocMBB); 1673 MF.push_front(checkMBB); 1674 1675 // When the frame size is less than 256 we just compare the stack 1676 // boundary directly to the value of the stack pointer, per gcc. 1677 bool CompareStackPointer = StackSize < kSplitStackAvailable; 1678 1679 // Read the limit off the current stacklet off the stack_guard location. 1680 if (Is64Bit) { 1681 if (STI.isTargetLinux()) { 1682 TlsReg = X86::FS; 1683 TlsOffset = IsLP64 ? 0x70 : 0x40; 1684 } else if (STI.isTargetDarwin()) { 1685 TlsReg = X86::GS; 1686 TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. 1687 } else if (STI.isTargetWin64()) { 1688 TlsReg = X86::GS; 1689 TlsOffset = 0x28; // pvArbitrary, reserved for application use 1690 } else if (STI.isTargetFreeBSD()) { 1691 TlsReg = X86::FS; 1692 TlsOffset = 0x18; 1693 } else if (STI.isTargetDragonFly()) { 1694 TlsReg = X86::FS; 1695 TlsOffset = 0x20; // use tls_tcb.tcb_segstack 1696 } else { 1697 report_fatal_error("Segmented stacks not supported on this platform."); 1698 } 1699 1700 if (CompareStackPointer) 1701 ScratchReg = IsLP64 ? X86::RSP : X86::ESP; 1702 else 1703 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP) 1704 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 1705 1706 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg) 1707 .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); 1708 } else { 1709 if (STI.isTargetLinux()) { 1710 TlsReg = X86::GS; 1711 TlsOffset = 0x30; 1712 } else if (STI.isTargetDarwin()) { 1713 TlsReg = X86::GS; 1714 TlsOffset = 0x48 + 90*4; 1715 } else if (STI.isTargetWin32()) { 1716 TlsReg = X86::FS; 1717 TlsOffset = 0x14; // pvArbitrary, reserved for application use 1718 } else if (STI.isTargetDragonFly()) { 1719 TlsReg = X86::FS; 1720 TlsOffset = 0x10; // use tls_tcb.tcb_segstack 1721 } else if (STI.isTargetFreeBSD()) { 1722 report_fatal_error("Segmented stacks not supported on FreeBSD i386."); 1723 } else { 1724 report_fatal_error("Segmented stacks not supported on this platform."); 1725 } 1726 1727 if (CompareStackPointer) 1728 ScratchReg = X86::ESP; 1729 else 1730 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) 1731 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 1732 1733 if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() || 1734 STI.isTargetDragonFly()) { 1735 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) 1736 .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); 1737 } else if (STI.isTargetDarwin()) { 1738 1739 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. 1740 unsigned ScratchReg2; 1741 bool SaveScratch2; 1742 if (CompareStackPointer) { 1743 // The primary scratch register is available for holding the TLS offset. 1744 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1745 SaveScratch2 = false; 1746 } else { 1747 // Need to use a second register to hold the TLS offset 1748 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false); 1749 1750 // Unfortunately, with fastcc the second scratch register may hold an 1751 // argument. 1752 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); 1753 } 1754 1755 // If Scratch2 is live-in then it needs to be saved. 1756 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && 1757 "Scratch register is live-in and not saved"); 1758 1759 if (SaveScratch2) 1760 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) 1761 .addReg(ScratchReg2, RegState::Kill); 1762 1763 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) 1764 .addImm(TlsOffset); 1765 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) 1766 .addReg(ScratchReg) 1767 .addReg(ScratchReg2).addImm(1).addReg(0) 1768 .addImm(0) 1769 .addReg(TlsReg); 1770 1771 if (SaveScratch2) 1772 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); 1773 } 1774 } 1775 1776 // This jump is taken if SP >= (Stacklet Limit + Stack Space required). 1777 // It jumps to normal execution of the function body. 1778 BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&PrologueMBB); 1779 1780 // On 32 bit we first push the arguments size and then the frame size. On 64 1781 // bit, we pass the stack frame size in r10 and the argument size in r11. 1782 if (Is64Bit) { 1783 // Functions with nested arguments use R10, so it needs to be saved across 1784 // the call to _morestack 1785 1786 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; 1787 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; 1788 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; 1789 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; 1790 const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri; 1791 1792 if (IsNested) 1793 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); 1794 1795 BuildMI(allocMBB, DL, TII.get(MOVri), Reg10) 1796 .addImm(StackSize); 1797 BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) 1798 .addImm(X86FI->getArgumentStackSize()); 1799 } else { 1800 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 1801 .addImm(X86FI->getArgumentStackSize()); 1802 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 1803 .addImm(StackSize); 1804 } 1805 1806 // __morestack is in libgcc 1807 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { 1808 // Under the large code model, we cannot assume that __morestack lives 1809 // within 2^31 bytes of the call site, so we cannot use pc-relative 1810 // addressing. We cannot perform the call via a temporary register, 1811 // as the rax register may be used to store the static chain, and all 1812 // other suitable registers may be either callee-save or used for 1813 // parameter passing. We cannot use the stack at this point either 1814 // because __morestack manipulates the stack directly. 1815 // 1816 // To avoid these issues, perform an indirect call via a read-only memory 1817 // location containing the address. 1818 // 1819 // This solution is not perfect, as it assumes that the .rodata section 1820 // is laid out within 2^31 bytes of each function body, but this seems 1821 // to be sufficient for JIT. 1822 BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) 1823 .addReg(X86::RIP) 1824 .addImm(0) 1825 .addReg(0) 1826 .addExternalSymbol("__morestack_addr") 1827 .addReg(0); 1828 MF.getMMI().setUsesMorestackAddr(true); 1829 } else { 1830 if (Is64Bit) 1831 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) 1832 .addExternalSymbol("__morestack"); 1833 else 1834 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) 1835 .addExternalSymbol("__morestack"); 1836 } 1837 1838 if (IsNested) 1839 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); 1840 else 1841 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); 1842 1843 allocMBB->addSuccessor(&PrologueMBB); 1844 1845 checkMBB->addSuccessor(allocMBB); 1846 checkMBB->addSuccessor(&PrologueMBB); 1847 1848 #ifdef XDEBUG 1849 MF.verify(); 1850 #endif 1851 } 1852 1853 /// Erlang programs may need a special prologue to handle the stack size they 1854 /// might need at runtime. That is because Erlang/OTP does not implement a C 1855 /// stack but uses a custom implementation of hybrid stack/heap architecture. 1856 /// (for more information see Eric Stenman's Ph.D. thesis: 1857 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) 1858 /// 1859 /// CheckStack: 1860 /// temp0 = sp - MaxStack 1861 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 1862 /// OldStart: 1863 /// ... 1864 /// IncStack: 1865 /// call inc_stack # doubles the stack space 1866 /// temp0 = sp - MaxStack 1867 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 1868 void X86FrameLowering::adjustForHiPEPrologue( 1869 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { 1870 MachineFrameInfo *MFI = MF.getFrameInfo(); 1871 DebugLoc DL; 1872 // HiPE-specific values 1873 const unsigned HipeLeafWords = 24; 1874 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; 1875 const unsigned Guaranteed = HipeLeafWords * SlotSize; 1876 unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ? 1877 MF.getFunction()->arg_size() - CCRegisteredArgs : 0; 1878 unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize; 1879 1880 assert(STI.isTargetLinux() && 1881 "HiPE prologue is only supported on Linux operating systems."); 1882 1883 // Compute the largest caller's frame that is needed to fit the callees' 1884 // frames. This 'MaxStack' is computed from: 1885 // 1886 // a) the fixed frame size, which is the space needed for all spilled temps, 1887 // b) outgoing on-stack parameter areas, and 1888 // c) the minimum stack space this function needs to make available for the 1889 // functions it calls (a tunable ABI property). 1890 if (MFI->hasCalls()) { 1891 unsigned MoreStackForCalls = 0; 1892 1893 for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); 1894 MBBI != MBBE; ++MBBI) 1895 for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); 1896 MI != ME; ++MI) { 1897 if (!MI->isCall()) 1898 continue; 1899 1900 // Get callee operand. 1901 const MachineOperand &MO = MI->getOperand(0); 1902 1903 // Only take account of global function calls (no closures etc.). 1904 if (!MO.isGlobal()) 1905 continue; 1906 1907 const Function *F = dyn_cast<Function>(MO.getGlobal()); 1908 if (!F) 1909 continue; 1910 1911 // Do not update 'MaxStack' for primitive and built-in functions 1912 // (encoded with names either starting with "erlang."/"bif_" or not 1913 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an 1914 // "_", such as the BIF "suspend_0") as they are executed on another 1915 // stack. 1916 if (F->getName().find("erlang.") != StringRef::npos || 1917 F->getName().find("bif_") != StringRef::npos || 1918 F->getName().find_first_of("._") == StringRef::npos) 1919 continue; 1920 1921 unsigned CalleeStkArity = 1922 F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; 1923 if (HipeLeafWords - 1 > CalleeStkArity) 1924 MoreStackForCalls = std::max(MoreStackForCalls, 1925 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); 1926 } 1927 MaxStack += MoreStackForCalls; 1928 } 1929 1930 // If the stack frame needed is larger than the guaranteed then runtime checks 1931 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. 1932 if (MaxStack > Guaranteed) { 1933 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); 1934 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); 1935 1936 for (const auto &LI : PrologueMBB.liveins()) { 1937 stackCheckMBB->addLiveIn(LI); 1938 incStackMBB->addLiveIn(LI); 1939 } 1940 1941 MF.push_front(incStackMBB); 1942 MF.push_front(stackCheckMBB); 1943 1944 unsigned ScratchReg, SPReg, PReg, SPLimitOffset; 1945 unsigned LEAop, CMPop, CALLop; 1946 if (Is64Bit) { 1947 SPReg = X86::RSP; 1948 PReg = X86::RBP; 1949 LEAop = X86::LEA64r; 1950 CMPop = X86::CMP64rm; 1951 CALLop = X86::CALL64pcrel32; 1952 SPLimitOffset = 0x90; 1953 } else { 1954 SPReg = X86::ESP; 1955 PReg = X86::EBP; 1956 LEAop = X86::LEA32r; 1957 CMPop = X86::CMP32rm; 1958 CALLop = X86::CALLpcrel32; 1959 SPLimitOffset = 0x4c; 1960 } 1961 1962 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1963 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 1964 "HiPE prologue scratch register is live-in"); 1965 1966 // Create new MBB for StackCheck: 1967 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), 1968 SPReg, false, -MaxStack); 1969 // SPLimitOffset is in a fixed heap location (pointed by BP). 1970 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) 1971 .addReg(ScratchReg), PReg, false, SPLimitOffset); 1972 BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&PrologueMBB); 1973 1974 // Create new MBB for IncStack: 1975 BuildMI(incStackMBB, DL, TII.get(CALLop)). 1976 addExternalSymbol("inc_stack_0"); 1977 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), 1978 SPReg, false, -MaxStack); 1979 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) 1980 .addReg(ScratchReg), PReg, false, SPLimitOffset); 1981 BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB); 1982 1983 stackCheckMBB->addSuccessor(&PrologueMBB, 99); 1984 stackCheckMBB->addSuccessor(incStackMBB, 1); 1985 incStackMBB->addSuccessor(&PrologueMBB, 99); 1986 incStackMBB->addSuccessor(incStackMBB, 1); 1987 } 1988 #ifdef XDEBUG 1989 MF.verify(); 1990 #endif 1991 } 1992 1993 bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, 1994 MachineBasicBlock::iterator MBBI, DebugLoc DL, int Offset) const { 1995 1996 if (Offset <= 0) 1997 return false; 1998 1999 if (Offset % SlotSize) 2000 return false; 2001 2002 int NumPops = Offset / SlotSize; 2003 // This is only worth it if we have at most 2 pops. 2004 if (NumPops != 1 && NumPops != 2) 2005 return false; 2006 2007 // Handle only the trivial case where the adjustment directly follows 2008 // a call. This is the most common one, anyway. 2009 if (MBBI == MBB.begin()) 2010 return false; 2011 MachineBasicBlock::iterator Prev = std::prev(MBBI); 2012 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask()) 2013 return false; 2014 2015 unsigned Regs[2]; 2016 unsigned FoundRegs = 0; 2017 2018 auto RegMask = Prev->getOperand(1); 2019 2020 auto &RegClass = 2021 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass; 2022 // Try to find up to NumPops free registers. 2023 for (auto Candidate : RegClass) { 2024 2025 // Poor man's liveness: 2026 // Since we're immediately after a call, any register that is clobbered 2027 // by the call and not defined by it can be considered dead. 2028 if (!RegMask.clobbersPhysReg(Candidate)) 2029 continue; 2030 2031 bool IsDef = false; 2032 for (const MachineOperand &MO : Prev->implicit_operands()) { 2033 if (MO.isReg() && MO.isDef() && MO.getReg() == Candidate) { 2034 IsDef = true; 2035 break; 2036 } 2037 } 2038 2039 if (IsDef) 2040 continue; 2041 2042 Regs[FoundRegs++] = Candidate; 2043 if (FoundRegs == (unsigned)NumPops) 2044 break; 2045 } 2046 2047 if (FoundRegs == 0) 2048 return false; 2049 2050 // If we found only one free register, but need two, reuse the same one twice. 2051 while (FoundRegs < (unsigned)NumPops) 2052 Regs[FoundRegs++] = Regs[0]; 2053 2054 for (int i = 0; i < NumPops; ++i) 2055 BuildMI(MBB, MBBI, DL, 2056 TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]); 2057 2058 return true; 2059 } 2060 2061 void X86FrameLowering:: 2062 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2063 MachineBasicBlock::iterator I) const { 2064 bool reserveCallFrame = hasReservedCallFrame(MF); 2065 unsigned Opcode = I->getOpcode(); 2066 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); 2067 DebugLoc DL = I->getDebugLoc(); 2068 uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; 2069 uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0; 2070 I = MBB.erase(I); 2071 2072 if (!reserveCallFrame) { 2073 // If the stack pointer can be changed after prologue, turn the 2074 // adjcallstackup instruction into a 'sub ESP, <amt>' and the 2075 // adjcallstackdown instruction into 'add ESP, <amt>' 2076 if (Amount == 0) 2077 return; 2078 2079 // We need to keep the stack aligned properly. To do this, we round the 2080 // amount of space needed for the outgoing arguments up to the next 2081 // alignment boundary. 2082 unsigned StackAlign = getStackAlignment(); 2083 Amount = RoundUpToAlignment(Amount, StackAlign); 2084 2085 // Factor out the amount that gets handled inside the sequence 2086 // (Pushes of argument for frame setup, callee pops for frame destroy) 2087 Amount -= InternalAmt; 2088 2089 if (Amount) { 2090 // Add Amount to SP to destroy a frame, and subtract to setup. 2091 int Offset = isDestroy ? Amount : -Amount; 2092 2093 if (!(MF.getFunction()->optForMinSize() && 2094 adjustStackWithPops(MBB, I, DL, Offset))) 2095 BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); 2096 } 2097 2098 return; 2099 } 2100 2101 if (isDestroy && InternalAmt) { 2102 // If we are performing frame pointer elimination and if the callee pops 2103 // something off the stack pointer, add it back. We do this until we have 2104 // more advanced stack pointer tracking ability. 2105 // We are not tracking the stack pointer adjustment by the callee, so make 2106 // sure we restore the stack pointer immediately after the call, there may 2107 // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. 2108 MachineBasicBlock::iterator B = MBB.begin(); 2109 while (I != B && !std::prev(I)->isCall()) 2110 --I; 2111 BuildStackAdjustment(MBB, I, DL, -InternalAmt, /*InEpilogue=*/false); 2112 } 2113 } 2114 2115 bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 2116 assert(MBB.getParent() && "Block is not attached to a function!"); 2117 2118 if (canUseLEAForSPInEpilogue(*MBB.getParent())) 2119 return true; 2120 2121 // If we cannot use LEA to adjust SP, we may need to use ADD, which 2122 // clobbers the EFLAGS. Check that none of the terminators reads the 2123 // EFLAGS, and if one uses it, conservatively assume this is not 2124 // safe to insert the epilogue here. 2125 return !terminatorsNeedFlagsAsInput(MBB); 2126 } 2127 2128 MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( 2129 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 2130 DebugLoc DL, bool RestoreSP) const { 2131 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env"); 2132 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32"); 2133 assert(STI.is32Bit() && !Uses64BitFramePtr && 2134 "restoring EBP/ESI on non-32-bit target"); 2135 2136 MachineFunction &MF = *MBB.getParent(); 2137 unsigned FramePtr = TRI->getFrameRegister(MF); 2138 unsigned BasePtr = TRI->getBaseRegister(); 2139 MachineModuleInfo &MMI = MF.getMMI(); 2140 const Function *Fn = MF.getFunction(); 2141 WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn); 2142 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2143 MachineFrameInfo *MFI = MF.getFrameInfo(); 2144 2145 // FIXME: Don't set FrameSetup flag in catchret case. 2146 2147 int FI = FuncInfo.EHRegNodeFrameIndex; 2148 int EHRegSize = MFI->getObjectSize(FI); 2149 2150 if (RestoreSP) { 2151 // MOV32rm -EHRegSize(%ebp), %esp 2152 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP), 2153 X86::EBP, true, -EHRegSize) 2154 .setMIFlag(MachineInstr::FrameSetup); 2155 } 2156 2157 unsigned UsedReg; 2158 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg); 2159 int EndOffset = -EHRegOffset - EHRegSize; 2160 FuncInfo.EHRegNodeEndOffset = EndOffset; 2161 assert(EndOffset >= 0 && 2162 "end of registration object above normal EBP position!"); 2163 2164 if (UsedReg == FramePtr) { 2165 // ADD $offset, %ebp 2166 assert(UsedReg == FramePtr); 2167 unsigned ADDri = getADDriOpcode(false, EndOffset); 2168 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr) 2169 .addReg(FramePtr) 2170 .addImm(EndOffset) 2171 .setMIFlag(MachineInstr::FrameSetup) 2172 ->getOperand(3) 2173 .setIsDead(); 2174 } else { 2175 assert(UsedReg == BasePtr); 2176 // LEA offset(%ebp), %esi 2177 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr), 2178 FramePtr, false, EndOffset) 2179 .setMIFlag(MachineInstr::FrameSetup); 2180 // MOV32rm SavedEBPOffset(%esi), %ebp 2181 assert(X86FI->getHasSEHFramePtrSave()); 2182 int Offset = 2183 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg); 2184 assert(UsedReg == BasePtr); 2185 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr), 2186 UsedReg, true, Offset) 2187 .setMIFlag(MachineInstr::FrameSetup); 2188 } 2189 return MBBI; 2190 } 2191