1 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the X86 implementation of TargetFrameLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "X86FrameLowering.h" 15 #include "X86InstrBuilder.h" 16 #include "X86InstrInfo.h" 17 #include "X86MachineFunctionInfo.h" 18 #include "X86Subtarget.h" 19 #include "X86TargetMachine.h" 20 #include "llvm/ADT/SmallSet.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/WinEHFuncInfo.h" 27 #include "llvm/IR/DataLayout.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Target/TargetOptions.h" 32 #include "llvm/Support/Debug.h" 33 #include <cstdlib> 34 35 using namespace llvm; 36 37 X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, 38 unsigned StackAlignOverride) 39 : TargetFrameLowering(StackGrowsDown, StackAlignOverride, 40 STI.is64Bit() ? -8 : -4), 41 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) { 42 // Cache a bunch of frame-related predicates for this subtarget. 43 SlotSize = TRI->getSlotSize(); 44 Is64Bit = STI.is64Bit(); 45 IsLP64 = STI.isTarget64BitLP64(); 46 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. 47 Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); 48 StackPtr = TRI->getStackRegister(); 49 } 50 51 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 52 return !MF.getFrameInfo()->hasVarSizedObjects() && 53 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); 54 } 55 56 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the 57 /// call frame pseudos can be simplified. Having a FP, as in the default 58 /// implementation, is not sufficient here since we can't always use it. 59 /// Use a more nuanced condition. 60 bool 61 X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { 62 return hasReservedCallFrame(MF) || 63 (hasFP(MF) && !TRI->needsStackRealignment(MF)) || 64 TRI->hasBasePointer(MF); 65 } 66 67 // needsFrameIndexResolution - Do we need to perform FI resolution for 68 // this function. Normally, this is required only when the function 69 // has any stack objects. However, FI resolution actually has another job, 70 // not apparent from the title - it resolves callframesetup/destroy 71 // that were not simplified earlier. 72 // So, this is required for x86 functions that have push sequences even 73 // when there are no stack objects. 74 bool 75 X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { 76 return MF.getFrameInfo()->hasStackObjects() || 77 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); 78 } 79 80 /// hasFP - Return true if the specified function should have a dedicated frame 81 /// pointer register. This is true if the function has variable sized allocas 82 /// or if frame pointer elimination is disabled. 83 bool X86FrameLowering::hasFP(const MachineFunction &MF) const { 84 const MachineFrameInfo *MFI = MF.getFrameInfo(); 85 const MachineModuleInfo &MMI = MF.getMMI(); 86 87 return (MF.getTarget().Options.DisableFramePointerElim(MF) || 88 TRI->needsStackRealignment(MF) || 89 MFI->hasVarSizedObjects() || 90 MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() || 91 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 92 MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() || 93 MFI->hasStackMap() || MFI->hasPatchPoint()); 94 } 95 96 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { 97 if (IsLP64) { 98 if (isInt<8>(Imm)) 99 return X86::SUB64ri8; 100 return X86::SUB64ri32; 101 } else { 102 if (isInt<8>(Imm)) 103 return X86::SUB32ri8; 104 return X86::SUB32ri; 105 } 106 } 107 108 static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { 109 if (IsLP64) { 110 if (isInt<8>(Imm)) 111 return X86::ADD64ri8; 112 return X86::ADD64ri32; 113 } else { 114 if (isInt<8>(Imm)) 115 return X86::ADD32ri8; 116 return X86::ADD32ri; 117 } 118 } 119 120 static unsigned getSUBrrOpcode(unsigned isLP64) { 121 return isLP64 ? X86::SUB64rr : X86::SUB32rr; 122 } 123 124 static unsigned getADDrrOpcode(unsigned isLP64) { 125 return isLP64 ? X86::ADD64rr : X86::ADD32rr; 126 } 127 128 static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { 129 if (IsLP64) { 130 if (isInt<8>(Imm)) 131 return X86::AND64ri8; 132 return X86::AND64ri32; 133 } 134 if (isInt<8>(Imm)) 135 return X86::AND32ri8; 136 return X86::AND32ri; 137 } 138 139 static unsigned getLEArOpcode(unsigned IsLP64) { 140 return IsLP64 ? X86::LEA64r : X86::LEA32r; 141 } 142 143 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live 144 /// when it reaches the "return" instruction. We can then pop a stack object 145 /// to this register without worry about clobbering it. 146 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, 147 MachineBasicBlock::iterator &MBBI, 148 const TargetRegisterInfo *TRI, 149 bool Is64Bit) { 150 const MachineFunction *MF = MBB.getParent(); 151 const Function *F = MF->getFunction(); 152 if (!F || MF->getMMI().callsEHReturn()) 153 return 0; 154 155 static const uint16_t CallerSavedRegs32Bit[] = { 156 X86::EAX, X86::EDX, X86::ECX, 0 157 }; 158 159 static const uint16_t CallerSavedRegs64Bit[] = { 160 X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, 161 X86::R8, X86::R9, X86::R10, X86::R11, 0 162 }; 163 164 unsigned Opc = MBBI->getOpcode(); 165 switch (Opc) { 166 default: return 0; 167 case X86::RETL: 168 case X86::RETQ: 169 case X86::RETIL: 170 case X86::RETIQ: 171 case X86::TCRETURNdi: 172 case X86::TCRETURNri: 173 case X86::TCRETURNmi: 174 case X86::TCRETURNdi64: 175 case X86::TCRETURNri64: 176 case X86::TCRETURNmi64: 177 case X86::EH_RETURN: 178 case X86::EH_RETURN64: { 179 SmallSet<uint16_t, 8> Uses; 180 for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { 181 MachineOperand &MO = MBBI->getOperand(i); 182 if (!MO.isReg() || MO.isDef()) 183 continue; 184 unsigned Reg = MO.getReg(); 185 if (!Reg) 186 continue; 187 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 188 Uses.insert(*AI); 189 } 190 191 const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; 192 for (; *CS; ++CS) 193 if (!Uses.count(*CS)) 194 return *CS; 195 } 196 } 197 198 return 0; 199 } 200 201 static bool isEAXLiveIn(MachineFunction &MF) { 202 for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), 203 EE = MF.getRegInfo().livein_end(); II != EE; ++II) { 204 unsigned Reg = II->first; 205 206 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || 207 Reg == X86::AH || Reg == X86::AL) 208 return true; 209 } 210 211 return false; 212 } 213 214 /// Check whether or not the terminators of \p MBB needs to read EFLAGS. 215 static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) { 216 for (const MachineInstr &MI : MBB.terminators()) { 217 bool BreakNext = false; 218 for (const MachineOperand &MO : MI.operands()) { 219 if (!MO.isReg()) 220 continue; 221 unsigned Reg = MO.getReg(); 222 if (Reg != X86::EFLAGS) 223 continue; 224 225 // This terminator needs an eflag that is not defined 226 // by a previous terminator. 227 if (!MO.isDef()) 228 return true; 229 BreakNext = true; 230 } 231 if (BreakNext) 232 break; 233 } 234 return false; 235 } 236 237 /// emitSPUpdate - Emit a series of instructions to increment / decrement the 238 /// stack pointer by a constant value. 239 void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, 240 MachineBasicBlock::iterator &MBBI, 241 int64_t NumBytes, bool InEpilogue) const { 242 bool isSub = NumBytes < 0; 243 uint64_t Offset = isSub ? -NumBytes : NumBytes; 244 245 uint64_t Chunk = (1LL << 31) - 1; 246 DebugLoc DL = MBB.findDebugLoc(MBBI); 247 248 while (Offset) { 249 if (Offset > Chunk) { 250 // Rather than emit a long series of instructions for large offsets, 251 // load the offset into a register and do one sub/add 252 unsigned Reg = 0; 253 254 if (isSub && !isEAXLiveIn(*MBB.getParent())) 255 Reg = (unsigned)(Is64Bit ? X86::RAX : X86::EAX); 256 else 257 Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); 258 259 if (Reg) { 260 unsigned Opc = Is64Bit ? X86::MOV64ri : X86::MOV32ri; 261 BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg) 262 .addImm(Offset); 263 Opc = isSub 264 ? getSUBrrOpcode(Is64Bit) 265 : getADDrrOpcode(Is64Bit); 266 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 267 .addReg(StackPtr) 268 .addReg(Reg); 269 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 270 Offset = 0; 271 continue; 272 } 273 } 274 275 uint64_t ThisVal = std::min(Offset, Chunk); 276 if (ThisVal == (Is64Bit ? 8 : 4)) { 277 // Use push / pop instead. 278 unsigned Reg = isSub 279 ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) 280 : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); 281 if (Reg) { 282 unsigned Opc = isSub 283 ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) 284 : (Is64Bit ? X86::POP64r : X86::POP32r); 285 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) 286 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); 287 if (isSub) 288 MI->setFlag(MachineInstr::FrameSetup); 289 else 290 MI->setFlag(MachineInstr::FrameDestroy); 291 Offset -= ThisVal; 292 continue; 293 } 294 } 295 296 MachineInstrBuilder MI = BuildStackAdjustment( 297 MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue); 298 if (isSub) 299 MI.setMIFlag(MachineInstr::FrameSetup); 300 else 301 MI.setMIFlag(MachineInstr::FrameDestroy); 302 303 Offset -= ThisVal; 304 } 305 } 306 307 MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( 308 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, 309 int64_t Offset, bool InEpilogue) const { 310 assert(Offset != 0 && "zero offset stack adjustment requested"); 311 312 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue 313 // is tricky. 314 bool UseLEA; 315 if (!InEpilogue) { 316 UseLEA = STI.useLeaForSP(); 317 } else { 318 // If we can use LEA for SP but we shouldn't, check that none 319 // of the terminators uses the eflags. Otherwise we will insert 320 // a ADD that will redefine the eflags and break the condition. 321 // Alternatively, we could move the ADD, but this may not be possible 322 // and is an optimization anyway. 323 UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent()); 324 if (UseLEA && !STI.useLeaForSP()) 325 UseLEA = terminatorsNeedFlagsAsInput(MBB); 326 // If that assert breaks, that means we do not do the right thing 327 // in canUseAsEpilogue. 328 assert((UseLEA || !terminatorsNeedFlagsAsInput(MBB)) && 329 "We shouldn't have allowed this insertion point"); 330 } 331 332 MachineInstrBuilder MI; 333 if (UseLEA) { 334 MI = addRegOffset(BuildMI(MBB, MBBI, DL, 335 TII.get(getLEArOpcode(Uses64BitFramePtr)), 336 StackPtr), 337 StackPtr, false, Offset); 338 } else { 339 bool IsSub = Offset < 0; 340 uint64_t AbsOffset = IsSub ? -Offset : Offset; 341 unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset) 342 : getADDriOpcode(Uses64BitFramePtr, AbsOffset); 343 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 344 .addReg(StackPtr) 345 .addImm(AbsOffset); 346 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 347 } 348 return MI; 349 } 350 351 int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, 352 MachineBasicBlock::iterator &MBBI, 353 bool doMergeWithPrevious) const { 354 if ((doMergeWithPrevious && MBBI == MBB.begin()) || 355 (!doMergeWithPrevious && MBBI == MBB.end())) 356 return 0; 357 358 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; 359 MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr 360 : std::next(MBBI); 361 unsigned Opc = PI->getOpcode(); 362 int Offset = 0; 363 364 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 365 Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || 366 Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 367 PI->getOperand(0).getReg() == StackPtr){ 368 Offset += PI->getOperand(2).getImm(); 369 MBB.erase(PI); 370 if (!doMergeWithPrevious) MBBI = NI; 371 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 372 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 373 PI->getOperand(0).getReg() == StackPtr) { 374 Offset -= PI->getOperand(2).getImm(); 375 MBB.erase(PI); 376 if (!doMergeWithPrevious) MBBI = NI; 377 } 378 379 return Offset; 380 } 381 382 void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, 383 MachineBasicBlock::iterator MBBI, DebugLoc DL, 384 MCCFIInstruction CFIInst) const { 385 MachineFunction &MF = *MBB.getParent(); 386 unsigned CFIIndex = MF.getMMI().addFrameInst(CFIInst); 387 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 388 .addCFIIndex(CFIIndex); 389 } 390 391 void 392 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, 393 MachineBasicBlock::iterator MBBI, 394 DebugLoc DL) const { 395 MachineFunction &MF = *MBB.getParent(); 396 MachineFrameInfo *MFI = MF.getFrameInfo(); 397 MachineModuleInfo &MMI = MF.getMMI(); 398 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 399 400 // Add callee saved registers to move list. 401 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 402 if (CSI.empty()) return; 403 404 // Calculate offsets. 405 for (std::vector<CalleeSavedInfo>::const_iterator 406 I = CSI.begin(), E = CSI.end(); I != E; ++I) { 407 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); 408 unsigned Reg = I->getReg(); 409 410 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 411 BuildCFI(MBB, MBBI, DL, 412 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 413 } 414 } 415 416 /// usesTheStack - This function checks if any of the users of EFLAGS 417 /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has 418 /// to use the stack, and if we don't adjust the stack we clobber the first 419 /// frame index. 420 /// See X86InstrInfo::copyPhysReg. 421 static bool usesTheStack(const MachineFunction &MF) { 422 const MachineRegisterInfo &MRI = MF.getRegInfo(); 423 424 for (MachineRegisterInfo::reg_instr_iterator 425 ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end(); 426 ri != re; ++ri) 427 if (ri->isCopy()) 428 return true; 429 430 return false; 431 } 432 433 void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, 434 MachineBasicBlock &MBB, 435 MachineBasicBlock::iterator MBBI, 436 DebugLoc DL) const { 437 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; 438 439 unsigned CallOp; 440 if (Is64Bit) 441 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; 442 else 443 CallOp = X86::CALLpcrel32; 444 445 const char *Symbol; 446 if (Is64Bit) { 447 if (STI.isTargetCygMing()) { 448 Symbol = "___chkstk_ms"; 449 } else { 450 Symbol = "__chkstk"; 451 } 452 } else if (STI.isTargetCygMing()) 453 Symbol = "_alloca"; 454 else 455 Symbol = "_chkstk"; 456 457 MachineInstrBuilder CI; 458 459 // All current stack probes take AX and SP as input, clobber flags, and 460 // preserve all registers. x86_64 probes leave RSP unmodified. 461 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { 462 // For the large code model, we have to call through a register. Use R11, 463 // as it is scratch in all supported calling conventions. 464 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) 465 .addExternalSymbol(Symbol); 466 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); 467 } else { 468 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol); 469 } 470 471 unsigned AX = Is64Bit ? X86::RAX : X86::EAX; 472 unsigned SP = Is64Bit ? X86::RSP : X86::ESP; 473 CI.addReg(AX, RegState::Implicit) 474 .addReg(SP, RegState::Implicit) 475 .addReg(AX, RegState::Define | RegState::Implicit) 476 .addReg(SP, RegState::Define | RegState::Implicit) 477 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); 478 479 if (Is64Bit) { 480 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp 481 // themselves. It also does not clobber %rax so we can reuse it when 482 // adjusting %rsp. 483 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) 484 .addReg(X86::RSP) 485 .addReg(X86::RAX); 486 } 487 } 488 489 static unsigned calculateSetFPREG(uint64_t SPAdjust) { 490 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well 491 // and might require smaller successive adjustments. 492 const uint64_t Win64MaxSEHOffset = 128; 493 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset); 494 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. 495 return SEHFrameOffset & -16; 496 } 497 498 // If we're forcing a stack realignment we can't rely on just the frame 499 // info, we need to know the ABI stack alignment as well in case we 500 // have a call out. Otherwise just make sure we have some alignment - we'll 501 // go with the minimum SlotSize. 502 uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const { 503 const MachineFrameInfo *MFI = MF.getFrameInfo(); 504 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. 505 unsigned StackAlign = getStackAlignment(); 506 if (MF.getFunction()->hasFnAttribute("stackrealign")) { 507 if (MFI->hasCalls()) 508 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 509 else if (MaxAlign < SlotSize) 510 MaxAlign = SlotSize; 511 } 512 return MaxAlign; 513 } 514 515 void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, 516 MachineBasicBlock::iterator MBBI, 517 DebugLoc DL, 518 uint64_t MaxAlign) const { 519 uint64_t Val = -MaxAlign; 520 MachineInstr *MI = 521 BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), 522 StackPtr) 523 .addReg(StackPtr) 524 .addImm(Val) 525 .setMIFlag(MachineInstr::FrameSetup); 526 527 // The EFLAGS implicit def is dead. 528 MI->getOperand(3).setIsDead(); 529 } 530 531 /// emitPrologue - Push callee-saved registers onto the stack, which 532 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate 533 /// space for local variables. Also emit labels used by the exception handler to 534 /// generate the exception handling frames. 535 536 /* 537 Here's a gist of what gets emitted: 538 539 ; Establish frame pointer, if needed 540 [if needs FP] 541 push %rbp 542 .cfi_def_cfa_offset 16 543 .cfi_offset %rbp, -16 544 .seh_pushreg %rpb 545 mov %rsp, %rbp 546 .cfi_def_cfa_register %rbp 547 548 ; Spill general-purpose registers 549 [for all callee-saved GPRs] 550 pushq %<reg> 551 [if not needs FP] 552 .cfi_def_cfa_offset (offset from RETADDR) 553 .seh_pushreg %<reg> 554 555 ; If the required stack alignment > default stack alignment 556 ; rsp needs to be re-aligned. This creates a "re-alignment gap" 557 ; of unknown size in the stack frame. 558 [if stack needs re-alignment] 559 and $MASK, %rsp 560 561 ; Allocate space for locals 562 [if target is Windows and allocated space > 4096 bytes] 563 ; Windows needs special care for allocations larger 564 ; than one page. 565 mov $NNN, %rax 566 call ___chkstk_ms/___chkstk 567 sub %rax, %rsp 568 [else] 569 sub $NNN, %rsp 570 571 [if needs FP] 572 .seh_stackalloc (size of XMM spill slots) 573 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots 574 [else] 575 .seh_stackalloc NNN 576 577 ; Spill XMMs 578 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, 579 ; they may get spilled on any platform, if the current function 580 ; calls @llvm.eh.unwind.init 581 [if needs FP] 582 [for all callee-saved XMM registers] 583 movaps %<xmm reg>, -MMM(%rbp) 584 [for all callee-saved XMM registers] 585 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) 586 ; i.e. the offset relative to (%rbp - SEHFrameOffset) 587 [else] 588 [for all callee-saved XMM registers] 589 movaps %<xmm reg>, KKK(%rsp) 590 [for all callee-saved XMM registers] 591 .seh_savexmm %<xmm reg>, KKK 592 593 .seh_endprologue 594 595 [if needs base pointer] 596 mov %rsp, %rbx 597 [if needs to restore base pointer] 598 mov %rsp, -MMM(%rbp) 599 600 ; Emit CFI info 601 [if needs FP] 602 [for all callee-saved registers] 603 .cfi_offset %<reg>, (offset from %rbp) 604 [else] 605 .cfi_def_cfa_offset (offset from RETADDR) 606 [for all callee-saved registers] 607 .cfi_offset %<reg>, (offset from %rsp) 608 609 Notes: 610 - .seh directives are emitted only for Windows 64 ABI 611 - .cfi directives are emitted for all other ABIs 612 - for 32-bit code, substitute %e?? registers for %r?? 613 */ 614 615 void X86FrameLowering::emitPrologue(MachineFunction &MF, 616 MachineBasicBlock &MBB) const { 617 assert(&STI == &MF.getSubtarget<X86Subtarget>() && 618 "MF used frame lowering for wrong subtarget"); 619 MachineBasicBlock::iterator MBBI = MBB.begin(); 620 MachineFrameInfo *MFI = MF.getFrameInfo(); 621 const Function *Fn = MF.getFunction(); 622 MachineModuleInfo &MMI = MF.getMMI(); 623 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 624 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. 625 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. 626 bool IsFunclet = MBB.isEHFuncletEntry(); 627 bool HasFP = hasFP(MF); 628 bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv()); 629 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 630 bool NeedsWinCFI = IsWin64Prologue && Fn->needsUnwindTableEntry(); 631 bool NeedsDwarfCFI = 632 !IsWin64Prologue && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); 633 unsigned FramePtr = TRI->getFrameRegister(MF); 634 const unsigned MachineFramePtr = 635 STI.isTarget64BitILP32() 636 ? getX86SubSuperRegister(FramePtr, MVT::i64, false) 637 : FramePtr; 638 unsigned BasePtr = TRI->getBaseRegister(); 639 DebugLoc DL; 640 641 // Add RETADDR move area to callee saved frame size. 642 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 643 if (TailCallReturnAddrDelta && IsWin64Prologue) 644 report_fatal_error("Can't handle guaranteed tail call under win64 yet"); 645 646 if (TailCallReturnAddrDelta < 0) 647 X86FI->setCalleeSavedFrameSize( 648 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); 649 650 bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()); 651 652 // The default stack probe size is 4096 if the function has no stackprobesize 653 // attribute. 654 unsigned StackProbeSize = 4096; 655 if (Fn->hasFnAttribute("stack-probe-size")) 656 Fn->getFnAttribute("stack-probe-size") 657 .getValueAsString() 658 .getAsInteger(0, StackProbeSize); 659 660 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 661 // function, and use up to 128 bytes of stack space, don't have a frame 662 // pointer, calls, or dynamic alloca then we do not need to adjust the 663 // stack pointer (we fit in the Red Zone). We also check that we don't 664 // push and pop from the stack. 665 if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) && 666 !TRI->needsStackRealignment(MF) && 667 !MFI->hasVarSizedObjects() && // No dynamic alloca. 668 !MFI->adjustsStack() && // No calls. 669 !IsWin64CC && // Win64 has no Red Zone 670 !usesTheStack(MF) && // Don't push and pop. 671 !MF.shouldSplitStack()) { // Regular stack 672 uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); 673 if (HasFP) MinSize += SlotSize; 674 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); 675 MFI->setStackSize(StackSize); 676 } 677 678 // Insert stack pointer adjustment for later moving of return addr. Only 679 // applies to tail call optimized functions where the callee argument stack 680 // size is bigger than the callers. 681 if (TailCallReturnAddrDelta < 0) { 682 BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta, 683 /*InEpilogue=*/false) 684 .setMIFlag(MachineInstr::FrameSetup); 685 } 686 687 // Mapping for machine moves: 688 // 689 // DST: VirtualFP AND 690 // SRC: VirtualFP => DW_CFA_def_cfa_offset 691 // ELSE => DW_CFA_def_cfa 692 // 693 // SRC: VirtualFP AND 694 // DST: Register => DW_CFA_def_cfa_register 695 // 696 // ELSE 697 // OFFSET < 0 => DW_CFA_offset_extended_sf 698 // REG < 64 => DW_CFA_offset + Reg 699 // ELSE => DW_CFA_offset_extended 700 701 uint64_t NumBytes = 0; 702 int stackGrowth = -SlotSize; 703 704 unsigned RDX = Uses64BitFramePtr ? X86::RDX : X86::EDX; 705 if (IsWin64Prologue && IsFunclet) { 706 // Immediately spill RDX into the home slot. The runtime cares about this. 707 // MOV64mr %rdx, 16(%rsp) 708 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 709 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16) 710 .addReg(RDX) 711 .setMIFlag(MachineInstr::FrameSetup); 712 } 713 714 if (HasFP) { 715 // Calculate required stack adjustment. 716 uint64_t FrameSize = StackSize - SlotSize; 717 // If required, include space for extra hidden slot for stashing base pointer. 718 if (X86FI->getRestoreBasePointer()) 719 FrameSize += SlotSize; 720 721 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); 722 723 // Callee-saved registers are pushed on stack before the stack is realigned. 724 if (TRI->needsStackRealignment(MF) && !IsWin64Prologue) 725 NumBytes = RoundUpToAlignment(NumBytes, MaxAlign); 726 727 // Get the offset of the stack slot for the EBP register, which is 728 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. 729 // Update the frame offset adjustment. 730 if (!IsFunclet) 731 MFI->setOffsetAdjustment(-NumBytes); 732 else 733 assert(MFI->getOffsetAdjustment() == -(int)NumBytes && 734 "should calculate same local variable offset for funclets"); 735 736 // Save EBP/RBP into the appropriate stack slot. 737 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 738 .addReg(MachineFramePtr, RegState::Kill) 739 .setMIFlag(MachineInstr::FrameSetup); 740 741 if (NeedsDwarfCFI) { 742 // Mark the place where EBP/RBP was saved. 743 // Define the current CFA rule to use the provided offset. 744 assert(StackSize); 745 BuildCFI(MBB, MBBI, DL, 746 MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth)); 747 748 // Change the rule for the FramePtr to be an "offset" rule. 749 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); 750 BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset( 751 nullptr, DwarfFramePtr, 2 * stackGrowth)); 752 } 753 754 if (NeedsWinCFI) { 755 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) 756 .addImm(FramePtr) 757 .setMIFlag(MachineInstr::FrameSetup); 758 } 759 760 if (!IsWin64Prologue && !IsFunclet) { 761 // Update EBP with the new base value. 762 BuildMI(MBB, MBBI, DL, 763 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), 764 FramePtr) 765 .addReg(StackPtr) 766 .setMIFlag(MachineInstr::FrameSetup); 767 768 if (NeedsDwarfCFI) { 769 // Mark effective beginning of when frame pointer becomes valid. 770 // Define the current CFA to use the EBP/RBP register. 771 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); 772 BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister( 773 nullptr, DwarfFramePtr)); 774 } 775 } 776 777 // Mark the FramePtr as live-in in every block. 778 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 779 I->addLiveIn(MachineFramePtr); 780 } else { 781 assert(!IsFunclet && "funclets without FPs not yet implemented"); 782 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); 783 } 784 785 // For EH funclets, only allocate enough space for outgoing calls. Save the 786 // NumBytes value that we would've used for the parent frame. 787 unsigned ParentFrameNumBytes = NumBytes; 788 if (IsFunclet) 789 NumBytes = getWinEHFuncletFrameSize(MF); 790 791 // Skip the callee-saved push instructions. 792 bool PushedRegs = false; 793 int StackOffset = 2 * stackGrowth; 794 795 while (MBBI != MBB.end() && 796 MBBI->getFlag(MachineInstr::FrameSetup) && 797 (MBBI->getOpcode() == X86::PUSH32r || 798 MBBI->getOpcode() == X86::PUSH64r)) { 799 PushedRegs = true; 800 unsigned Reg = MBBI->getOperand(0).getReg(); 801 ++MBBI; 802 803 if (!HasFP && NeedsDwarfCFI) { 804 // Mark callee-saved push instruction. 805 // Define the current CFA rule to use the provided offset. 806 assert(StackSize); 807 BuildCFI(MBB, MBBI, DL, 808 MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset)); 809 StackOffset += stackGrowth; 810 } 811 812 if (NeedsWinCFI) { 813 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag( 814 MachineInstr::FrameSetup); 815 } 816 } 817 818 // Realign stack after we pushed callee-saved registers (so that we'll be 819 // able to calculate their offsets from the frame pointer). 820 // Don't do this for Win64, it needs to realign the stack after the prologue. 821 if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) { 822 assert(HasFP && "There should be a frame pointer if stack is realigned."); 823 BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); 824 } 825 826 // If there is an SUB32ri of ESP immediately before this instruction, merge 827 // the two. This can be the case when tail call elimination is enabled and 828 // the callee has more arguments then the caller. 829 NumBytes -= mergeSPUpdates(MBB, MBBI, true); 830 831 // Adjust stack pointer: ESP -= numbytes. 832 833 // Windows and cygwin/mingw require a prologue helper routine when allocating 834 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw 835 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the 836 // stack and adjust the stack pointer in one go. The 64-bit version of 837 // __chkstk is only responsible for probing the stack. The 64-bit prologue is 838 // responsible for adjusting the stack pointer. Touching the stack at 4K 839 // increments is necessary to ensure that the guard pages used by the OS 840 // virtual memory manager are allocated in correct sequence. 841 uint64_t AlignedNumBytes = NumBytes; 842 if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) 843 AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); 844 if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { 845 // Check whether EAX is livein for this function. 846 bool isEAXAlive = isEAXLiveIn(MF); 847 848 if (isEAXAlive) { 849 // Sanity check that EAX is not livein for this function. 850 // It should not be, so throw an assert. 851 assert(!Is64Bit && "EAX is livein in x64 case!"); 852 853 // Save EAX 854 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 855 .addReg(X86::EAX, RegState::Kill) 856 .setMIFlag(MachineInstr::FrameSetup); 857 } 858 859 if (Is64Bit) { 860 // Handle the 64-bit Windows ABI case where we need to call __chkstk. 861 // Function prologue is responsible for adjusting the stack pointer. 862 if (isUInt<32>(NumBytes)) { 863 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 864 .addImm(NumBytes) 865 .setMIFlag(MachineInstr::FrameSetup); 866 } else if (isInt<32>(NumBytes)) { 867 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX) 868 .addImm(NumBytes) 869 .setMIFlag(MachineInstr::FrameSetup); 870 } else { 871 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) 872 .addImm(NumBytes) 873 .setMIFlag(MachineInstr::FrameSetup); 874 } 875 } else { 876 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. 877 // We'll also use 4 already allocated bytes for EAX. 878 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 879 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) 880 .setMIFlag(MachineInstr::FrameSetup); 881 } 882 883 // Save a pointer to the MI where we set AX. 884 MachineBasicBlock::iterator SetRAX = MBBI; 885 --SetRAX; 886 887 // Call __chkstk, __chkstk_ms, or __alloca. 888 emitStackProbeCall(MF, MBB, MBBI, DL); 889 890 // Apply the frame setup flag to all inserted instrs. 891 for (; SetRAX != MBBI; ++SetRAX) 892 SetRAX->setFlag(MachineInstr::FrameSetup); 893 894 if (isEAXAlive) { 895 // Restore EAX 896 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), 897 X86::EAX), 898 StackPtr, false, NumBytes - 4); 899 MI->setFlag(MachineInstr::FrameSetup); 900 MBB.insert(MBBI, MI); 901 } 902 } else if (NumBytes) { 903 emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, /*InEpilogue=*/false); 904 } 905 906 if (NeedsWinCFI && NumBytes) 907 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) 908 .addImm(NumBytes) 909 .setMIFlag(MachineInstr::FrameSetup); 910 911 int SEHFrameOffset = 0; 912 if (IsWin64Prologue && HasFP) { 913 // Set RBP to a small fixed offset from RSP. In the funclet case, we base 914 // this calculation on the incoming RDX, which holds the value of RSP from 915 // the parent frame at the end of the prologue. 916 unsigned SPOrRDX = !IsFunclet ? StackPtr : RDX; 917 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes); 918 if (SEHFrameOffset) 919 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), 920 SPOrRDX, false, SEHFrameOffset); 921 else 922 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr) 923 .addReg(SPOrRDX); 924 925 // If this is not a funclet, emit the CFI describing our frame pointer. 926 if (NeedsWinCFI && !IsFunclet) 927 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) 928 .addImm(FramePtr) 929 .addImm(SEHFrameOffset) 930 .setMIFlag(MachineInstr::FrameSetup); 931 } else if (IsFunclet && STI.is32Bit()) { 932 // Reset EBP / ESI to something good for funclets. 933 MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); 934 } 935 936 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { 937 const MachineInstr *FrameInstr = &*MBBI; 938 ++MBBI; 939 940 if (NeedsWinCFI) { 941 int FI; 942 if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { 943 if (X86::FR64RegClass.contains(Reg)) { 944 unsigned IgnoredFrameReg; 945 int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg); 946 Offset += SEHFrameOffset; 947 948 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) 949 .addImm(Reg) 950 .addImm(Offset) 951 .setMIFlag(MachineInstr::FrameSetup); 952 } 953 } 954 } 955 } 956 957 if (NeedsWinCFI) 958 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) 959 .setMIFlag(MachineInstr::FrameSetup); 960 961 // Realign stack after we spilled callee-saved registers (so that we'll be 962 // able to calculate their offsets from the frame pointer). 963 // Win64 requires aligning the stack after the prologue. 964 if (IsWin64Prologue && TRI->needsStackRealignment(MF)) { 965 assert(HasFP && "There should be a frame pointer if stack is realigned."); 966 BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); 967 } 968 969 // If we need a base pointer, set it up here. It's whatever the value 970 // of the stack pointer is at this point. Any variable size objects 971 // will be allocated after this, so we can still use the base pointer 972 // to reference locals. 973 if (TRI->hasBasePointer(MF)) { 974 // Update the base pointer with the current stack pointer. 975 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; 976 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) 977 .addReg(StackPtr) 978 .setMIFlag(MachineInstr::FrameSetup); 979 if (X86FI->getRestoreBasePointer()) { 980 // Stash value of base pointer. Saving RSP instead of EBP shortens 981 // dependence chain. Used by SjLj EH. 982 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 983 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), 984 FramePtr, true, X86FI->getRestoreBasePointerOffset()) 985 .addReg(StackPtr) 986 .setMIFlag(MachineInstr::FrameSetup); 987 } 988 989 if (X86FI->getHasSEHFramePtrSave()) { 990 // Stash the value of the frame pointer relative to the base pointer for 991 // Win32 EH. This supports Win32 EH, which does the inverse of the above: 992 // it recovers the frame pointer from the base pointer rather than the 993 // other way around. 994 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 995 unsigned UsedReg; 996 int Offset = 997 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg); 998 assert(UsedReg == BasePtr); 999 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset) 1000 .addReg(FramePtr) 1001 .setMIFlag(MachineInstr::FrameSetup); 1002 } 1003 } 1004 1005 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { 1006 // Mark end of stack pointer adjustment. 1007 if (!HasFP && NumBytes) { 1008 // Define the current CFA rule to use the provided offset. 1009 assert(StackSize); 1010 BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset( 1011 nullptr, -StackSize + stackGrowth)); 1012 } 1013 1014 // Emit DWARF info specifying the offsets of the callee-saved registers. 1015 if (PushedRegs) 1016 emitCalleeSavedFrameMoves(MBB, MBBI, DL); 1017 } 1018 } 1019 1020 bool X86FrameLowering::canUseLEAForSPInEpilogue( 1021 const MachineFunction &MF) const { 1022 // We can't use LEA instructions for adjusting the stack pointer if this is a 1023 // leaf function in the Win64 ABI. Only ADD instructions may be used to 1024 // deallocate the stack. 1025 // This means that we can use LEA for SP in two situations: 1026 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA. 1027 // 2. We *have* a frame pointer which means we are permitted to use LEA. 1028 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF); 1029 } 1030 1031 static bool isFuncletReturnInstr(MachineInstr *MI) { 1032 switch (MI->getOpcode()) { 1033 case X86::CATCHRET: 1034 case X86::CLEANUPRET: 1035 return true; 1036 default: 1037 return false; 1038 } 1039 llvm_unreachable("impossible"); 1040 } 1041 1042 unsigned X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { 1043 // This is the size of the pushed CSRs. 1044 unsigned CSSize = 1045 MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize(); 1046 // This is the amount of stack a funclet needs to allocate. 1047 unsigned MaxCallSize = MF.getFrameInfo()->getMaxCallFrameSize(); 1048 // RBP is not included in the callee saved register block. After pushing RBP, 1049 // everything is 16 byte aligned. Everything we allocate before an outgoing 1050 // call must also be 16 byte aligned. 1051 unsigned FrameSizeMinusRBP = 1052 RoundUpToAlignment(CSSize + MaxCallSize, getStackAlignment()); 1053 // Subtract out the size of the callee saved registers. This is how much stack 1054 // each funclet will allocate. 1055 return FrameSizeMinusRBP - CSSize; 1056 } 1057 1058 void X86FrameLowering::emitEpilogue(MachineFunction &MF, 1059 MachineBasicBlock &MBB) const { 1060 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1061 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1062 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1063 DebugLoc DL; 1064 if (MBBI != MBB.end()) 1065 DL = MBBI->getDebugLoc(); 1066 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. 1067 const bool Is64BitILP32 = STI.isTarget64BitILP32(); 1068 unsigned FramePtr = TRI->getFrameRegister(MF); 1069 unsigned MachineFramePtr = 1070 Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false) 1071 : FramePtr; 1072 1073 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 1074 bool NeedsWinCFI = 1075 IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry(); 1076 bool IsFunclet = isFuncletReturnInstr(MBBI); 1077 MachineBasicBlock *RestoreMBB = nullptr; 1078 1079 // Get the number of bytes to allocate from the FrameInfo. 1080 uint64_t StackSize = MFI->getStackSize(); 1081 uint64_t MaxAlign = calculateMaxStackAlign(MF); 1082 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 1083 uint64_t NumBytes = 0; 1084 1085 if (MBBI->getOpcode() == X86::CATCHRET) { 1086 NumBytes = getWinEHFuncletFrameSize(MF); 1087 assert(hasFP(MF) && "EH funclets without FP not yet implemented"); 1088 MachineBasicBlock *TargetMBB = MBBI->getOperand(0).getMBB(); 1089 1090 // If this is SEH, this isn't really a funclet return. 1091 bool IsSEH = isAsynchronousEHPersonality( 1092 classifyEHPersonality(MF.getFunction()->getPersonalityFn())); 1093 if (IsSEH) { 1094 if (STI.is32Bit()) 1095 restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/true); 1096 BuildMI(MBB, MBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB); 1097 MBBI->eraseFromParent(); 1098 return; 1099 } 1100 1101 // For 32-bit, create a new block for the restore code. 1102 RestoreMBB = TargetMBB; 1103 if (STI.is32Bit()) { 1104 RestoreMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 1105 MF.insert(TargetMBB->getIterator(), RestoreMBB); 1106 MBB.removeSuccessor(TargetMBB); 1107 MBB.addSuccessor(RestoreMBB); 1108 RestoreMBB->addSuccessor(TargetMBB); 1109 MBBI->getOperand(0).setMBB(RestoreMBB); 1110 } 1111 1112 // Pop EBP. 1113 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), 1114 MachineFramePtr) 1115 .setMIFlag(MachineInstr::FrameDestroy); 1116 1117 // Insert frame restoration code in a new block. 1118 if (STI.is32Bit()) { 1119 auto RestoreMBBI = RestoreMBB->begin(); 1120 restoreWin32EHStackPointers(*RestoreMBB, RestoreMBBI, DL, 1121 /*RestoreSP=*/true); 1122 BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)) 1123 .addMBB(TargetMBB); 1124 } 1125 } else if (MBBI->getOpcode() == X86::CLEANUPRET) { 1126 NumBytes = getWinEHFuncletFrameSize(MF); 1127 assert(hasFP(MF) && "EH funclets without FP not yet implemented"); 1128 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), 1129 MachineFramePtr) 1130 .setMIFlag(MachineInstr::FrameDestroy); 1131 } else if (hasFP(MF)) { 1132 // Calculate required stack adjustment. 1133 uint64_t FrameSize = StackSize - SlotSize; 1134 NumBytes = FrameSize - CSSize; 1135 1136 // Callee-saved registers were pushed on stack before the stack was 1137 // realigned. 1138 if (TRI->needsStackRealignment(MF) && !IsWin64Prologue) 1139 NumBytes = RoundUpToAlignment(FrameSize, MaxAlign); 1140 1141 // Pop EBP. 1142 BuildMI(MBB, MBBI, DL, 1143 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr) 1144 .setMIFlag(MachineInstr::FrameDestroy); 1145 } else { 1146 NumBytes = StackSize - CSSize; 1147 } 1148 uint64_t SEHStackAllocAmt = NumBytes; 1149 1150 // Skip the callee-saved pop instructions. 1151 while (MBBI != MBB.begin()) { 1152 MachineBasicBlock::iterator PI = std::prev(MBBI); 1153 unsigned Opc = PI->getOpcode(); 1154 1155 if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && 1156 (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && 1157 Opc != X86::DBG_VALUE && !PI->isTerminator()) 1158 break; 1159 1160 --MBBI; 1161 } 1162 MachineBasicBlock::iterator FirstCSPop = MBBI; 1163 1164 if (RestoreMBB) { 1165 // Fill EAX/RAX with the address of the target block. 1166 unsigned ReturnReg = STI.is64Bit() ? X86::RAX : X86::EAX; 1167 if (STI.is64Bit()) { 1168 // LEA64r RestoreMBB(%rip), %rax 1169 BuildMI(MBB, FirstCSPop, DL, TII.get(X86::LEA64r), ReturnReg) 1170 .addReg(X86::RIP) 1171 .addImm(0) 1172 .addReg(0) 1173 .addMBB(RestoreMBB) 1174 .addReg(0); 1175 } else { 1176 // MOV32ri $RestoreMBB, %eax 1177 BuildMI(MBB, FirstCSPop, DL, TII.get(X86::MOV32ri)) 1178 .addReg(ReturnReg) 1179 .addMBB(RestoreMBB); 1180 } 1181 } 1182 1183 if (MBBI != MBB.end()) 1184 DL = MBBI->getDebugLoc(); 1185 1186 // If there is an ADD32ri or SUB32ri of ESP immediately before this 1187 // instruction, merge the two instructions. 1188 if (NumBytes || MFI->hasVarSizedObjects()) 1189 NumBytes += mergeSPUpdates(MBB, MBBI, true); 1190 1191 // If dynamic alloca is used, then reset esp to point to the last callee-saved 1192 // slot before popping them off! Same applies for the case, when stack was 1193 // realigned. Don't do this if this was a funclet epilogue, since the funclets 1194 // will not do realignment or dynamic stack allocation. 1195 if ((TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) && 1196 !IsFunclet) { 1197 if (TRI->needsStackRealignment(MF)) 1198 MBBI = FirstCSPop; 1199 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); 1200 uint64_t LEAAmount = 1201 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; 1202 1203 // There are only two legal forms of epilogue: 1204 // - add SEHAllocationSize, %rsp 1205 // - lea SEHAllocationSize(%FramePtr), %rsp 1206 // 1207 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence. 1208 // However, we may use this sequence if we have a frame pointer because the 1209 // effects of the prologue can safely be undone. 1210 if (LEAAmount != 0) { 1211 unsigned Opc = getLEArOpcode(Uses64BitFramePtr); 1212 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), 1213 FramePtr, false, LEAAmount); 1214 --MBBI; 1215 } else { 1216 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); 1217 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 1218 .addReg(FramePtr); 1219 --MBBI; 1220 } 1221 } else if (NumBytes) { 1222 // Adjust stack pointer back: ESP += numbytes. 1223 emitSPUpdate(MBB, MBBI, NumBytes, /*InEpilogue=*/true); 1224 --MBBI; 1225 } 1226 1227 // Windows unwinder will not invoke function's exception handler if IP is 1228 // either in prologue or in epilogue. This behavior causes a problem when a 1229 // call immediately precedes an epilogue, because the return address points 1230 // into the epilogue. To cope with that, we insert an epilogue marker here, 1231 // then replace it with a 'nop' if it ends up immediately after a CALL in the 1232 // final emitted code. 1233 if (NeedsWinCFI) 1234 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); 1235 1236 // Add the return addr area delta back since we are not tail calling. 1237 int Offset = -1 * X86FI->getTCReturnAddrDelta(); 1238 assert(Offset >= 0 && "TCDelta should never be positive"); 1239 if (Offset) { 1240 MBBI = MBB.getFirstTerminator(); 1241 1242 // Check for possible merge with preceding ADD instruction. 1243 Offset += mergeSPUpdates(MBB, MBBI, true); 1244 emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true); 1245 } 1246 } 1247 1248 // NOTE: this only has a subset of the full frame index logic. In 1249 // particular, the FI < 0 and AfterFPPop logic is handled in 1250 // X86RegisterInfo::eliminateFrameIndex, but not here. Possibly 1251 // (probably?) it should be moved into here. 1252 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 1253 unsigned &FrameReg) const { 1254 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1255 1256 // We can't calculate offset from frame pointer if the stack is realigned, 1257 // so enforce usage of stack/base pointer. The base pointer is used when we 1258 // have dynamic allocas in addition to dynamic realignment. 1259 if (TRI->hasBasePointer(MF)) 1260 FrameReg = TRI->getBaseRegister(); 1261 else if (TRI->needsStackRealignment(MF)) 1262 FrameReg = TRI->getStackRegister(); 1263 else 1264 FrameReg = TRI->getFrameRegister(MF); 1265 1266 // Offset will hold the offset from the stack pointer at function entry to the 1267 // object. 1268 // We need to factor in additional offsets applied during the prologue to the 1269 // frame, base, and stack pointer depending on which is used. 1270 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 1271 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1272 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 1273 uint64_t StackSize = MFI->getStackSize(); 1274 bool HasFP = hasFP(MF); 1275 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 1276 int64_t FPDelta = 0; 1277 1278 if (IsWin64Prologue) { 1279 assert(!MFI->hasCalls() || (StackSize % 16) == 8); 1280 1281 // Calculate required stack adjustment. 1282 uint64_t FrameSize = StackSize - SlotSize; 1283 // If required, include space for extra hidden slot for stashing base pointer. 1284 if (X86FI->getRestoreBasePointer()) 1285 FrameSize += SlotSize; 1286 uint64_t NumBytes = FrameSize - CSSize; 1287 1288 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes); 1289 if (FI && FI == X86FI->getFAIndex()) 1290 return -SEHFrameOffset; 1291 1292 // FPDelta is the offset from the "traditional" FP location of the old base 1293 // pointer followed by return address and the location required by the 1294 // restricted Win64 prologue. 1295 // Add FPDelta to all offsets below that go through the frame pointer. 1296 FPDelta = FrameSize - SEHFrameOffset; 1297 assert((!MFI->hasCalls() || (FPDelta % 16) == 0) && 1298 "FPDelta isn't aligned per the Win64 ABI!"); 1299 } 1300 1301 1302 if (TRI->hasBasePointer(MF)) { 1303 assert(HasFP && "VLAs and dynamic stack realign, but no FP?!"); 1304 if (FI < 0) { 1305 // Skip the saved EBP. 1306 return Offset + SlotSize + FPDelta; 1307 } else { 1308 assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 1309 return Offset + StackSize; 1310 } 1311 } else if (TRI->needsStackRealignment(MF)) { 1312 if (FI < 0) { 1313 // Skip the saved EBP. 1314 return Offset + SlotSize + FPDelta; 1315 } else { 1316 assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 1317 return Offset + StackSize; 1318 } 1319 // FIXME: Support tail calls 1320 } else { 1321 if (!HasFP) 1322 return Offset + StackSize; 1323 1324 // Skip the saved EBP. 1325 Offset += SlotSize; 1326 1327 // Skip the RETADDR move area 1328 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1329 if (TailCallReturnAddrDelta < 0) 1330 Offset -= TailCallReturnAddrDelta; 1331 } 1332 1333 return Offset + FPDelta; 1334 } 1335 1336 // Simplified from getFrameIndexReference keeping only StackPointer cases 1337 int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, 1338 int FI, 1339 unsigned &FrameReg) const { 1340 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1341 // Does not include any dynamic realign. 1342 const uint64_t StackSize = MFI->getStackSize(); 1343 { 1344 #ifndef NDEBUG 1345 // Note: LLVM arranges the stack as: 1346 // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP) 1347 // > "Stack Slots" (<--SP) 1348 // We can always address StackSlots from RSP. We can usually (unless 1349 // needsStackRealignment) address CSRs from RSP, but sometimes need to 1350 // address them from RBP. FixedObjects can be placed anywhere in the stack 1351 // frame depending on their specific requirements (i.e. we can actually 1352 // refer to arguments to the function which are stored in the *callers* 1353 // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs 1354 // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject. 1355 1356 assert(!TRI->hasBasePointer(MF) && "we don't handle this case"); 1357 1358 // We don't handle tail calls, and shouldn't be seeing them 1359 // either. 1360 int TailCallReturnAddrDelta = 1361 MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta(); 1362 assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!"); 1363 #endif 1364 } 1365 1366 // Fill in FrameReg output argument. 1367 FrameReg = TRI->getStackRegister(); 1368 1369 // This is how the math works out: 1370 // 1371 // %rsp grows (i.e. gets lower) left to right. Each box below is 1372 // one word (eight bytes). Obj0 is the stack slot we're trying to 1373 // get to. 1374 // 1375 // ---------------------------------- 1376 // | BP | Obj0 | Obj1 | ... | ObjN | 1377 // ---------------------------------- 1378 // ^ ^ ^ ^ 1379 // A B C E 1380 // 1381 // A is the incoming stack pointer. 1382 // (B - A) is the local area offset (-8 for x86-64) [1] 1383 // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2] 1384 // 1385 // |(E - B)| is the StackSize (absolute value, positive). For a 1386 // stack that grown down, this works out to be (B - E). [3] 1387 // 1388 // E is also the value of %rsp after stack has been set up, and we 1389 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now 1390 // (C - E) == (C - A) - (B - A) + (B - E) 1391 // { Using [1], [2] and [3] above } 1392 // == getObjectOffset - LocalAreaOffset + StackSize 1393 // 1394 1395 // Get the Offset from the StackPointer 1396 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 1397 1398 return Offset + StackSize; 1399 } 1400 1401 bool X86FrameLowering::assignCalleeSavedSpillSlots( 1402 MachineFunction &MF, const TargetRegisterInfo *TRI, 1403 std::vector<CalleeSavedInfo> &CSI) const { 1404 MachineFrameInfo *MFI = MF.getFrameInfo(); 1405 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1406 1407 unsigned CalleeSavedFrameSize = 0; 1408 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); 1409 1410 if (hasFP(MF)) { 1411 // emitPrologue always spills frame register the first thing. 1412 SpillSlotOffset -= SlotSize; 1413 MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 1414 1415 // Since emitPrologue and emitEpilogue will handle spilling and restoring of 1416 // the frame register, we can delete it from CSI list and not have to worry 1417 // about avoiding it later. 1418 unsigned FPReg = TRI->getFrameRegister(MF); 1419 for (unsigned i = 0; i < CSI.size(); ++i) { 1420 if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { 1421 CSI.erase(CSI.begin() + i); 1422 break; 1423 } 1424 } 1425 } 1426 1427 // Assign slots for GPRs. It increases frame size. 1428 for (unsigned i = CSI.size(); i != 0; --i) { 1429 unsigned Reg = CSI[i - 1].getReg(); 1430 1431 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) 1432 continue; 1433 1434 SpillSlotOffset -= SlotSize; 1435 CalleeSavedFrameSize += SlotSize; 1436 1437 int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 1438 CSI[i - 1].setFrameIdx(SlotIndex); 1439 } 1440 1441 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); 1442 1443 // Assign slots for XMMs. 1444 for (unsigned i = CSI.size(); i != 0; --i) { 1445 unsigned Reg = CSI[i - 1].getReg(); 1446 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) 1447 continue; 1448 1449 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1450 // ensure alignment 1451 SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment(); 1452 // spill into slot 1453 SpillSlotOffset -= RC->getSize(); 1454 int SlotIndex = 1455 MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); 1456 CSI[i - 1].setFrameIdx(SlotIndex); 1457 MFI->ensureMaxAlignment(RC->getAlignment()); 1458 } 1459 1460 return true; 1461 } 1462 1463 bool X86FrameLowering::spillCalleeSavedRegisters( 1464 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1465 const std::vector<CalleeSavedInfo> &CSI, 1466 const TargetRegisterInfo *TRI) const { 1467 DebugLoc DL = MBB.findDebugLoc(MI); 1468 1469 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI 1470 // for us, and there are no XMM CSRs on Win32. 1471 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows()) 1472 return true; 1473 1474 // Push GPRs. It increases frame size. 1475 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; 1476 for (unsigned i = CSI.size(); i != 0; --i) { 1477 unsigned Reg = CSI[i - 1].getReg(); 1478 1479 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) 1480 continue; 1481 // Add the callee-saved register as live-in. It's killed at the spill. 1482 MBB.addLiveIn(Reg); 1483 1484 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) 1485 .setMIFlag(MachineInstr::FrameSetup); 1486 } 1487 1488 // Make XMM regs spilled. X86 does not have ability of push/pop XMM. 1489 // It can be done by spilling XMMs to stack frame. 1490 for (unsigned i = CSI.size(); i != 0; --i) { 1491 unsigned Reg = CSI[i-1].getReg(); 1492 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) 1493 continue; 1494 // Add the callee-saved register as live-in. It's killed at the spill. 1495 MBB.addLiveIn(Reg); 1496 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1497 1498 TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, 1499 TRI); 1500 --MI; 1501 MI->setFlag(MachineInstr::FrameSetup); 1502 ++MI; 1503 } 1504 1505 return true; 1506 } 1507 1508 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 1509 MachineBasicBlock::iterator MI, 1510 const std::vector<CalleeSavedInfo> &CSI, 1511 const TargetRegisterInfo *TRI) const { 1512 if (CSI.empty()) 1513 return false; 1514 1515 if (isFuncletReturnInstr(MI) && STI.isOSWindows()) { 1516 // Don't restore CSRs in 32-bit EH funclets. Matches 1517 // spillCalleeSavedRegisters. 1518 if (STI.is32Bit()) 1519 return true; 1520 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form 1521 // funclets. emitEpilogue transforms these to normal jumps. 1522 if (MI->getOpcode() == X86::CATCHRET) { 1523 const Function *Func = MBB.getParent()->getFunction(); 1524 bool IsSEH = isAsynchronousEHPersonality( 1525 classifyEHPersonality(Func->getPersonalityFn())); 1526 if (IsSEH) 1527 return true; 1528 } 1529 } 1530 1531 DebugLoc DL = MBB.findDebugLoc(MI); 1532 1533 // Reload XMMs from stack frame. 1534 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1535 unsigned Reg = CSI[i].getReg(); 1536 if (X86::GR64RegClass.contains(Reg) || 1537 X86::GR32RegClass.contains(Reg)) 1538 continue; 1539 1540 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1541 TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); 1542 } 1543 1544 // POP GPRs. 1545 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; 1546 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1547 unsigned Reg = CSI[i].getReg(); 1548 if (!X86::GR64RegClass.contains(Reg) && 1549 !X86::GR32RegClass.contains(Reg)) 1550 continue; 1551 1552 BuildMI(MBB, MI, DL, TII.get(Opc), Reg) 1553 .setMIFlag(MachineInstr::FrameDestroy); 1554 } 1555 return true; 1556 } 1557 1558 void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, 1559 BitVector &SavedRegs, 1560 RegScavenger *RS) const { 1561 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1562 1563 MachineFrameInfo *MFI = MF.getFrameInfo(); 1564 1565 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1566 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1567 1568 if (TailCallReturnAddrDelta < 0) { 1569 // create RETURNADDR area 1570 // arg 1571 // arg 1572 // RETADDR 1573 // { ... 1574 // RETADDR area 1575 // ... 1576 // } 1577 // [EBP] 1578 MFI->CreateFixedObject(-TailCallReturnAddrDelta, 1579 TailCallReturnAddrDelta - SlotSize, true); 1580 } 1581 1582 // Spill the BasePtr if it's used. 1583 if (TRI->hasBasePointer(MF)) { 1584 SavedRegs.set(TRI->getBaseRegister()); 1585 1586 // Allocate a spill slot for EBP if we have a base pointer and EH funclets. 1587 if (MF.getMMI().hasEHFunclets()) { 1588 int FI = MFI->CreateSpillStackObject(SlotSize, SlotSize); 1589 X86FI->setHasSEHFramePtrSave(true); 1590 X86FI->setSEHFramePtrSaveIndex(FI); 1591 } 1592 } 1593 } 1594 1595 static bool 1596 HasNestArgument(const MachineFunction *MF) { 1597 const Function *F = MF->getFunction(); 1598 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 1599 I != E; I++) { 1600 if (I->hasNestAttr()) 1601 return true; 1602 } 1603 return false; 1604 } 1605 1606 /// GetScratchRegister - Get a temp register for performing work in the 1607 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform 1608 /// and the properties of the function either one or two registers will be 1609 /// needed. Set primary to true for the first register, false for the second. 1610 static unsigned 1611 GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) { 1612 CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); 1613 1614 // Erlang stuff. 1615 if (CallingConvention == CallingConv::HiPE) { 1616 if (Is64Bit) 1617 return Primary ? X86::R14 : X86::R13; 1618 else 1619 return Primary ? X86::EBX : X86::EDI; 1620 } 1621 1622 if (Is64Bit) { 1623 if (IsLP64) 1624 return Primary ? X86::R11 : X86::R12; 1625 else 1626 return Primary ? X86::R11D : X86::R12D; 1627 } 1628 1629 bool IsNested = HasNestArgument(&MF); 1630 1631 if (CallingConvention == CallingConv::X86_FastCall || 1632 CallingConvention == CallingConv::Fast) { 1633 if (IsNested) 1634 report_fatal_error("Segmented stacks does not support fastcall with " 1635 "nested function."); 1636 return Primary ? X86::EAX : X86::ECX; 1637 } 1638 if (IsNested) 1639 return Primary ? X86::EDX : X86::EAX; 1640 return Primary ? X86::ECX : X86::EAX; 1641 } 1642 1643 // The stack limit in the TCB is set to this many bytes above the actual stack 1644 // limit. 1645 static const uint64_t kSplitStackAvailable = 256; 1646 1647 void X86FrameLowering::adjustForSegmentedStacks( 1648 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { 1649 MachineFrameInfo *MFI = MF.getFrameInfo(); 1650 uint64_t StackSize; 1651 unsigned TlsReg, TlsOffset; 1652 DebugLoc DL; 1653 1654 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1655 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 1656 "Scratch register is live-in"); 1657 1658 if (MF.getFunction()->isVarArg()) 1659 report_fatal_error("Segmented stacks do not support vararg functions."); 1660 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && 1661 !STI.isTargetWin64() && !STI.isTargetFreeBSD() && 1662 !STI.isTargetDragonFly()) 1663 report_fatal_error("Segmented stacks not supported on this platform."); 1664 1665 // Eventually StackSize will be calculated by a link-time pass; which will 1666 // also decide whether checking code needs to be injected into this particular 1667 // prologue. 1668 StackSize = MFI->getStackSize(); 1669 1670 // Do not generate a prologue for functions with a stack of size zero 1671 if (StackSize == 0) 1672 return; 1673 1674 MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); 1675 MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); 1676 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1677 bool IsNested = false; 1678 1679 // We need to know if the function has a nest argument only in 64 bit mode. 1680 if (Is64Bit) 1681 IsNested = HasNestArgument(&MF); 1682 1683 // The MOV R10, RAX needs to be in a different block, since the RET we emit in 1684 // allocMBB needs to be last (terminating) instruction. 1685 1686 for (const auto &LI : PrologueMBB.liveins()) { 1687 allocMBB->addLiveIn(LI); 1688 checkMBB->addLiveIn(LI); 1689 } 1690 1691 if (IsNested) 1692 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D); 1693 1694 MF.push_front(allocMBB); 1695 MF.push_front(checkMBB); 1696 1697 // When the frame size is less than 256 we just compare the stack 1698 // boundary directly to the value of the stack pointer, per gcc. 1699 bool CompareStackPointer = StackSize < kSplitStackAvailable; 1700 1701 // Read the limit off the current stacklet off the stack_guard location. 1702 if (Is64Bit) { 1703 if (STI.isTargetLinux()) { 1704 TlsReg = X86::FS; 1705 TlsOffset = IsLP64 ? 0x70 : 0x40; 1706 } else if (STI.isTargetDarwin()) { 1707 TlsReg = X86::GS; 1708 TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. 1709 } else if (STI.isTargetWin64()) { 1710 TlsReg = X86::GS; 1711 TlsOffset = 0x28; // pvArbitrary, reserved for application use 1712 } else if (STI.isTargetFreeBSD()) { 1713 TlsReg = X86::FS; 1714 TlsOffset = 0x18; 1715 } else if (STI.isTargetDragonFly()) { 1716 TlsReg = X86::FS; 1717 TlsOffset = 0x20; // use tls_tcb.tcb_segstack 1718 } else { 1719 report_fatal_error("Segmented stacks not supported on this platform."); 1720 } 1721 1722 if (CompareStackPointer) 1723 ScratchReg = IsLP64 ? X86::RSP : X86::ESP; 1724 else 1725 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP) 1726 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 1727 1728 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg) 1729 .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); 1730 } else { 1731 if (STI.isTargetLinux()) { 1732 TlsReg = X86::GS; 1733 TlsOffset = 0x30; 1734 } else if (STI.isTargetDarwin()) { 1735 TlsReg = X86::GS; 1736 TlsOffset = 0x48 + 90*4; 1737 } else if (STI.isTargetWin32()) { 1738 TlsReg = X86::FS; 1739 TlsOffset = 0x14; // pvArbitrary, reserved for application use 1740 } else if (STI.isTargetDragonFly()) { 1741 TlsReg = X86::FS; 1742 TlsOffset = 0x10; // use tls_tcb.tcb_segstack 1743 } else if (STI.isTargetFreeBSD()) { 1744 report_fatal_error("Segmented stacks not supported on FreeBSD i386."); 1745 } else { 1746 report_fatal_error("Segmented stacks not supported on this platform."); 1747 } 1748 1749 if (CompareStackPointer) 1750 ScratchReg = X86::ESP; 1751 else 1752 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) 1753 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 1754 1755 if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() || 1756 STI.isTargetDragonFly()) { 1757 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) 1758 .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); 1759 } else if (STI.isTargetDarwin()) { 1760 1761 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. 1762 unsigned ScratchReg2; 1763 bool SaveScratch2; 1764 if (CompareStackPointer) { 1765 // The primary scratch register is available for holding the TLS offset. 1766 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1767 SaveScratch2 = false; 1768 } else { 1769 // Need to use a second register to hold the TLS offset 1770 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false); 1771 1772 // Unfortunately, with fastcc the second scratch register may hold an 1773 // argument. 1774 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); 1775 } 1776 1777 // If Scratch2 is live-in then it needs to be saved. 1778 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && 1779 "Scratch register is live-in and not saved"); 1780 1781 if (SaveScratch2) 1782 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) 1783 .addReg(ScratchReg2, RegState::Kill); 1784 1785 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) 1786 .addImm(TlsOffset); 1787 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) 1788 .addReg(ScratchReg) 1789 .addReg(ScratchReg2).addImm(1).addReg(0) 1790 .addImm(0) 1791 .addReg(TlsReg); 1792 1793 if (SaveScratch2) 1794 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); 1795 } 1796 } 1797 1798 // This jump is taken if SP >= (Stacklet Limit + Stack Space required). 1799 // It jumps to normal execution of the function body. 1800 BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&PrologueMBB); 1801 1802 // On 32 bit we first push the arguments size and then the frame size. On 64 1803 // bit, we pass the stack frame size in r10 and the argument size in r11. 1804 if (Is64Bit) { 1805 // Functions with nested arguments use R10, so it needs to be saved across 1806 // the call to _morestack 1807 1808 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; 1809 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; 1810 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; 1811 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; 1812 const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri; 1813 1814 if (IsNested) 1815 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); 1816 1817 BuildMI(allocMBB, DL, TII.get(MOVri), Reg10) 1818 .addImm(StackSize); 1819 BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) 1820 .addImm(X86FI->getArgumentStackSize()); 1821 } else { 1822 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 1823 .addImm(X86FI->getArgumentStackSize()); 1824 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 1825 .addImm(StackSize); 1826 } 1827 1828 // __morestack is in libgcc 1829 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { 1830 // Under the large code model, we cannot assume that __morestack lives 1831 // within 2^31 bytes of the call site, so we cannot use pc-relative 1832 // addressing. We cannot perform the call via a temporary register, 1833 // as the rax register may be used to store the static chain, and all 1834 // other suitable registers may be either callee-save or used for 1835 // parameter passing. We cannot use the stack at this point either 1836 // because __morestack manipulates the stack directly. 1837 // 1838 // To avoid these issues, perform an indirect call via a read-only memory 1839 // location containing the address. 1840 // 1841 // This solution is not perfect, as it assumes that the .rodata section 1842 // is laid out within 2^31 bytes of each function body, but this seems 1843 // to be sufficient for JIT. 1844 BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) 1845 .addReg(X86::RIP) 1846 .addImm(0) 1847 .addReg(0) 1848 .addExternalSymbol("__morestack_addr") 1849 .addReg(0); 1850 MF.getMMI().setUsesMorestackAddr(true); 1851 } else { 1852 if (Is64Bit) 1853 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) 1854 .addExternalSymbol("__morestack"); 1855 else 1856 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) 1857 .addExternalSymbol("__morestack"); 1858 } 1859 1860 if (IsNested) 1861 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); 1862 else 1863 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); 1864 1865 allocMBB->addSuccessor(&PrologueMBB); 1866 1867 checkMBB->addSuccessor(allocMBB); 1868 checkMBB->addSuccessor(&PrologueMBB); 1869 1870 #ifdef XDEBUG 1871 MF.verify(); 1872 #endif 1873 } 1874 1875 /// Erlang programs may need a special prologue to handle the stack size they 1876 /// might need at runtime. That is because Erlang/OTP does not implement a C 1877 /// stack but uses a custom implementation of hybrid stack/heap architecture. 1878 /// (for more information see Eric Stenman's Ph.D. thesis: 1879 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) 1880 /// 1881 /// CheckStack: 1882 /// temp0 = sp - MaxStack 1883 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 1884 /// OldStart: 1885 /// ... 1886 /// IncStack: 1887 /// call inc_stack # doubles the stack space 1888 /// temp0 = sp - MaxStack 1889 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 1890 void X86FrameLowering::adjustForHiPEPrologue( 1891 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { 1892 MachineFrameInfo *MFI = MF.getFrameInfo(); 1893 DebugLoc DL; 1894 // HiPE-specific values 1895 const unsigned HipeLeafWords = 24; 1896 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; 1897 const unsigned Guaranteed = HipeLeafWords * SlotSize; 1898 unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ? 1899 MF.getFunction()->arg_size() - CCRegisteredArgs : 0; 1900 unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize; 1901 1902 assert(STI.isTargetLinux() && 1903 "HiPE prologue is only supported on Linux operating systems."); 1904 1905 // Compute the largest caller's frame that is needed to fit the callees' 1906 // frames. This 'MaxStack' is computed from: 1907 // 1908 // a) the fixed frame size, which is the space needed for all spilled temps, 1909 // b) outgoing on-stack parameter areas, and 1910 // c) the minimum stack space this function needs to make available for the 1911 // functions it calls (a tunable ABI property). 1912 if (MFI->hasCalls()) { 1913 unsigned MoreStackForCalls = 0; 1914 1915 for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); 1916 MBBI != MBBE; ++MBBI) 1917 for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); 1918 MI != ME; ++MI) { 1919 if (!MI->isCall()) 1920 continue; 1921 1922 // Get callee operand. 1923 const MachineOperand &MO = MI->getOperand(0); 1924 1925 // Only take account of global function calls (no closures etc.). 1926 if (!MO.isGlobal()) 1927 continue; 1928 1929 const Function *F = dyn_cast<Function>(MO.getGlobal()); 1930 if (!F) 1931 continue; 1932 1933 // Do not update 'MaxStack' for primitive and built-in functions 1934 // (encoded with names either starting with "erlang."/"bif_" or not 1935 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an 1936 // "_", such as the BIF "suspend_0") as they are executed on another 1937 // stack. 1938 if (F->getName().find("erlang.") != StringRef::npos || 1939 F->getName().find("bif_") != StringRef::npos || 1940 F->getName().find_first_of("._") == StringRef::npos) 1941 continue; 1942 1943 unsigned CalleeStkArity = 1944 F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; 1945 if (HipeLeafWords - 1 > CalleeStkArity) 1946 MoreStackForCalls = std::max(MoreStackForCalls, 1947 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); 1948 } 1949 MaxStack += MoreStackForCalls; 1950 } 1951 1952 // If the stack frame needed is larger than the guaranteed then runtime checks 1953 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. 1954 if (MaxStack > Guaranteed) { 1955 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); 1956 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); 1957 1958 for (const auto &LI : PrologueMBB.liveins()) { 1959 stackCheckMBB->addLiveIn(LI); 1960 incStackMBB->addLiveIn(LI); 1961 } 1962 1963 MF.push_front(incStackMBB); 1964 MF.push_front(stackCheckMBB); 1965 1966 unsigned ScratchReg, SPReg, PReg, SPLimitOffset; 1967 unsigned LEAop, CMPop, CALLop; 1968 if (Is64Bit) { 1969 SPReg = X86::RSP; 1970 PReg = X86::RBP; 1971 LEAop = X86::LEA64r; 1972 CMPop = X86::CMP64rm; 1973 CALLop = X86::CALL64pcrel32; 1974 SPLimitOffset = 0x90; 1975 } else { 1976 SPReg = X86::ESP; 1977 PReg = X86::EBP; 1978 LEAop = X86::LEA32r; 1979 CMPop = X86::CMP32rm; 1980 CALLop = X86::CALLpcrel32; 1981 SPLimitOffset = 0x4c; 1982 } 1983 1984 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1985 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 1986 "HiPE prologue scratch register is live-in"); 1987 1988 // Create new MBB for StackCheck: 1989 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), 1990 SPReg, false, -MaxStack); 1991 // SPLimitOffset is in a fixed heap location (pointed by BP). 1992 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) 1993 .addReg(ScratchReg), PReg, false, SPLimitOffset); 1994 BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&PrologueMBB); 1995 1996 // Create new MBB for IncStack: 1997 BuildMI(incStackMBB, DL, TII.get(CALLop)). 1998 addExternalSymbol("inc_stack_0"); 1999 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), 2000 SPReg, false, -MaxStack); 2001 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) 2002 .addReg(ScratchReg), PReg, false, SPLimitOffset); 2003 BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB); 2004 2005 stackCheckMBB->addSuccessor(&PrologueMBB, 99); 2006 stackCheckMBB->addSuccessor(incStackMBB, 1); 2007 incStackMBB->addSuccessor(&PrologueMBB, 99); 2008 incStackMBB->addSuccessor(incStackMBB, 1); 2009 } 2010 #ifdef XDEBUG 2011 MF.verify(); 2012 #endif 2013 } 2014 2015 bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, 2016 MachineBasicBlock::iterator MBBI, DebugLoc DL, int Offset) const { 2017 2018 if (Offset <= 0) 2019 return false; 2020 2021 if (Offset % SlotSize) 2022 return false; 2023 2024 int NumPops = Offset / SlotSize; 2025 // This is only worth it if we have at most 2 pops. 2026 if (NumPops != 1 && NumPops != 2) 2027 return false; 2028 2029 // Handle only the trivial case where the adjustment directly follows 2030 // a call. This is the most common one, anyway. 2031 if (MBBI == MBB.begin()) 2032 return false; 2033 MachineBasicBlock::iterator Prev = std::prev(MBBI); 2034 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask()) 2035 return false; 2036 2037 unsigned Regs[2]; 2038 unsigned FoundRegs = 0; 2039 2040 auto RegMask = Prev->getOperand(1); 2041 2042 auto &RegClass = 2043 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass; 2044 // Try to find up to NumPops free registers. 2045 for (auto Candidate : RegClass) { 2046 2047 // Poor man's liveness: 2048 // Since we're immediately after a call, any register that is clobbered 2049 // by the call and not defined by it can be considered dead. 2050 if (!RegMask.clobbersPhysReg(Candidate)) 2051 continue; 2052 2053 bool IsDef = false; 2054 for (const MachineOperand &MO : Prev->implicit_operands()) { 2055 if (MO.isReg() && MO.isDef() && MO.getReg() == Candidate) { 2056 IsDef = true; 2057 break; 2058 } 2059 } 2060 2061 if (IsDef) 2062 continue; 2063 2064 Regs[FoundRegs++] = Candidate; 2065 if (FoundRegs == (unsigned)NumPops) 2066 break; 2067 } 2068 2069 if (FoundRegs == 0) 2070 return false; 2071 2072 // If we found only one free register, but need two, reuse the same one twice. 2073 while (FoundRegs < (unsigned)NumPops) 2074 Regs[FoundRegs++] = Regs[0]; 2075 2076 for (int i = 0; i < NumPops; ++i) 2077 BuildMI(MBB, MBBI, DL, 2078 TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]); 2079 2080 return true; 2081 } 2082 2083 void X86FrameLowering:: 2084 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2085 MachineBasicBlock::iterator I) const { 2086 bool reserveCallFrame = hasReservedCallFrame(MF); 2087 unsigned Opcode = I->getOpcode(); 2088 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); 2089 DebugLoc DL = I->getDebugLoc(); 2090 uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; 2091 uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0; 2092 I = MBB.erase(I); 2093 2094 if (!reserveCallFrame) { 2095 // If the stack pointer can be changed after prologue, turn the 2096 // adjcallstackup instruction into a 'sub ESP, <amt>' and the 2097 // adjcallstackdown instruction into 'add ESP, <amt>' 2098 2099 // We need to keep the stack aligned properly. To do this, we round the 2100 // amount of space needed for the outgoing arguments up to the next 2101 // alignment boundary. 2102 unsigned StackAlign = getStackAlignment(); 2103 Amount = RoundUpToAlignment(Amount, StackAlign); 2104 2105 // If we have any exception handlers in this function, and we adjust 2106 // the SP before calls, we may need to indicate this to the unwinder, 2107 // using GNU_ARGS_SIZE. Note that this may be necessary 2108 // even when Amount == 0, because the preceding function may have 2109 // set a non-0 GNU_ARGS_SIZE. 2110 // TODO: We don't need to reset this between subsequent functions, 2111 // if it didn't change. 2112 bool HasDwarfEHHandlers = 2113 !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && 2114 !MF.getMMI().getLandingPads().empty(); 2115 2116 if (HasDwarfEHHandlers && !isDestroy && 2117 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()) 2118 BuildCFI(MBB, I, DL, 2119 MCCFIInstruction::createGnuArgsSize(nullptr, Amount)); 2120 2121 if (Amount == 0) 2122 return; 2123 2124 // Factor out the amount that gets handled inside the sequence 2125 // (Pushes of argument for frame setup, callee pops for frame destroy) 2126 Amount -= InternalAmt; 2127 2128 if (Amount) { 2129 // Add Amount to SP to destroy a frame, and subtract to setup. 2130 int Offset = isDestroy ? Amount : -Amount; 2131 2132 if (!(MF.getFunction()->optForMinSize() && 2133 adjustStackWithPops(MBB, I, DL, Offset))) 2134 BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); 2135 } 2136 2137 return; 2138 } 2139 2140 if (isDestroy && InternalAmt) { 2141 // If we are performing frame pointer elimination and if the callee pops 2142 // something off the stack pointer, add it back. We do this until we have 2143 // more advanced stack pointer tracking ability. 2144 // We are not tracking the stack pointer adjustment by the callee, so make 2145 // sure we restore the stack pointer immediately after the call, there may 2146 // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. 2147 MachineBasicBlock::iterator B = MBB.begin(); 2148 while (I != B && !std::prev(I)->isCall()) 2149 --I; 2150 BuildStackAdjustment(MBB, I, DL, -InternalAmt, /*InEpilogue=*/false); 2151 } 2152 } 2153 2154 bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 2155 assert(MBB.getParent() && "Block is not attached to a function!"); 2156 2157 if (canUseLEAForSPInEpilogue(*MBB.getParent())) 2158 return true; 2159 2160 // If we cannot use LEA to adjust SP, we may need to use ADD, which 2161 // clobbers the EFLAGS. Check that none of the terminators reads the 2162 // EFLAGS, and if one uses it, conservatively assume this is not 2163 // safe to insert the epilogue here. 2164 return !terminatorsNeedFlagsAsInput(MBB); 2165 } 2166 2167 MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( 2168 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 2169 DebugLoc DL, bool RestoreSP) const { 2170 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env"); 2171 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32"); 2172 assert(STI.is32Bit() && !Uses64BitFramePtr && 2173 "restoring EBP/ESI on non-32-bit target"); 2174 2175 MachineFunction &MF = *MBB.getParent(); 2176 unsigned FramePtr = TRI->getFrameRegister(MF); 2177 unsigned BasePtr = TRI->getBaseRegister(); 2178 MachineModuleInfo &MMI = MF.getMMI(); 2179 const Function *Fn = MF.getFunction(); 2180 WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn); 2181 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2182 MachineFrameInfo *MFI = MF.getFrameInfo(); 2183 2184 // FIXME: Don't set FrameSetup flag in catchret case. 2185 2186 int FI = FuncInfo.EHRegNodeFrameIndex; 2187 int EHRegSize = MFI->getObjectSize(FI); 2188 2189 if (RestoreSP) { 2190 // MOV32rm -EHRegSize(%ebp), %esp 2191 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP), 2192 X86::EBP, true, -EHRegSize) 2193 .setMIFlag(MachineInstr::FrameSetup); 2194 } 2195 2196 unsigned UsedReg; 2197 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg); 2198 int EndOffset = -EHRegOffset - EHRegSize; 2199 FuncInfo.EHRegNodeEndOffset = EndOffset; 2200 2201 if (UsedReg == FramePtr) { 2202 // ADD $offset, %ebp 2203 unsigned ADDri = getADDriOpcode(false, EndOffset); 2204 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr) 2205 .addReg(FramePtr) 2206 .addImm(EndOffset) 2207 .setMIFlag(MachineInstr::FrameSetup) 2208 ->getOperand(3) 2209 .setIsDead(); 2210 assert(EndOffset >= 0 && 2211 "end of registration object above normal EBP position!"); 2212 } else if (UsedReg == BasePtr) { 2213 // LEA offset(%ebp), %esi 2214 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr), 2215 FramePtr, false, EndOffset) 2216 .setMIFlag(MachineInstr::FrameSetup); 2217 // MOV32rm SavedEBPOffset(%esi), %ebp 2218 assert(X86FI->getHasSEHFramePtrSave()); 2219 int Offset = 2220 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg); 2221 assert(UsedReg == BasePtr); 2222 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr), 2223 UsedReg, true, Offset) 2224 .setMIFlag(MachineInstr::FrameSetup); 2225 } else { 2226 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr"); 2227 } 2228 return MBBI; 2229 } 2230 2231 unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const { 2232 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue. 2233 unsigned Offset = 16; 2234 // RBP is immediately pushed. 2235 Offset += SlotSize; 2236 // All callee-saved registers are then pushed. 2237 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize(); 2238 // Every funclet allocates enough stack space for the largest outgoing call. 2239 Offset += getWinEHFuncletFrameSize(MF); 2240 return Offset; 2241 } 2242