1 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the X86 implementation of TargetFrameLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "X86FrameLowering.h" 15 #include "X86InstrBuilder.h" 16 #include "X86InstrInfo.h" 17 #include "X86MachineFunctionInfo.h" 18 #include "X86Subtarget.h" 19 #include "X86TargetMachine.h" 20 #include "llvm/ADT/SmallSet.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/IR/DataLayout.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCSymbol.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Target/TargetOptions.h" 32 #include "llvm/Support/Debug.h" 33 #include <cstdlib> 34 35 using namespace llvm; 36 37 // FIXME: completely move here. 38 extern cl::opt<bool> ForceStackAlign; 39 40 X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, 41 unsigned StackAlignOverride) 42 : TargetFrameLowering(StackGrowsDown, StackAlignOverride, 43 STI.is64Bit() ? -8 : -4), 44 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) { 45 // Cache a bunch of frame-related predicates for this subtarget. 46 SlotSize = TRI->getSlotSize(); 47 Is64Bit = STI.is64Bit(); 48 IsLP64 = STI.isTarget64BitLP64(); 49 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. 50 Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); 51 StackPtr = TRI->getStackRegister(); 52 } 53 54 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 55 return !MF.getFrameInfo()->hasVarSizedObjects() && 56 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); 57 } 58 59 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the 60 /// call frame pseudos can be simplified. Having a FP, as in the default 61 /// implementation, is not sufficient here since we can't always use it. 62 /// Use a more nuanced condition. 63 bool 64 X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { 65 return hasReservedCallFrame(MF) || 66 (hasFP(MF) && !TRI->needsStackRealignment(MF)) || 67 TRI->hasBasePointer(MF); 68 } 69 70 // needsFrameIndexResolution - Do we need to perform FI resolution for 71 // this function. Normally, this is required only when the function 72 // has any stack objects. However, FI resolution actually has another job, 73 // not apparent from the title - it resolves callframesetup/destroy 74 // that were not simplified earlier. 75 // So, this is required for x86 functions that have push sequences even 76 // when there are no stack objects. 77 bool 78 X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { 79 return MF.getFrameInfo()->hasStackObjects() || 80 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); 81 } 82 83 /// hasFP - Return true if the specified function should have a dedicated frame 84 /// pointer register. This is true if the function has variable sized allocas 85 /// or if frame pointer elimination is disabled. 86 bool X86FrameLowering::hasFP(const MachineFunction &MF) const { 87 const MachineFrameInfo *MFI = MF.getFrameInfo(); 88 const MachineModuleInfo &MMI = MF.getMMI(); 89 90 return (MF.getTarget().Options.DisableFramePointerElim(MF) || 91 TRI->needsStackRealignment(MF) || 92 MFI->hasVarSizedObjects() || 93 MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() || 94 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 95 MMI.callsUnwindInit() || MMI.callsEHReturn() || 96 MFI->hasStackMap() || MFI->hasPatchPoint()); 97 } 98 99 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { 100 if (IsLP64) { 101 if (isInt<8>(Imm)) 102 return X86::SUB64ri8; 103 return X86::SUB64ri32; 104 } else { 105 if (isInt<8>(Imm)) 106 return X86::SUB32ri8; 107 return X86::SUB32ri; 108 } 109 } 110 111 static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { 112 if (IsLP64) { 113 if (isInt<8>(Imm)) 114 return X86::ADD64ri8; 115 return X86::ADD64ri32; 116 } else { 117 if (isInt<8>(Imm)) 118 return X86::ADD32ri8; 119 return X86::ADD32ri; 120 } 121 } 122 123 static unsigned getSUBrrOpcode(unsigned isLP64) { 124 return isLP64 ? X86::SUB64rr : X86::SUB32rr; 125 } 126 127 static unsigned getADDrrOpcode(unsigned isLP64) { 128 return isLP64 ? X86::ADD64rr : X86::ADD32rr; 129 } 130 131 static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { 132 if (IsLP64) { 133 if (isInt<8>(Imm)) 134 return X86::AND64ri8; 135 return X86::AND64ri32; 136 } 137 if (isInt<8>(Imm)) 138 return X86::AND32ri8; 139 return X86::AND32ri; 140 } 141 142 static unsigned getLEArOpcode(unsigned IsLP64) { 143 return IsLP64 ? X86::LEA64r : X86::LEA32r; 144 } 145 146 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live 147 /// when it reaches the "return" instruction. We can then pop a stack object 148 /// to this register without worry about clobbering it. 149 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, 150 MachineBasicBlock::iterator &MBBI, 151 const TargetRegisterInfo *TRI, 152 bool Is64Bit) { 153 const MachineFunction *MF = MBB.getParent(); 154 const Function *F = MF->getFunction(); 155 if (!F || MF->getMMI().callsEHReturn()) 156 return 0; 157 158 static const uint16_t CallerSavedRegs32Bit[] = { 159 X86::EAX, X86::EDX, X86::ECX, 0 160 }; 161 162 static const uint16_t CallerSavedRegs64Bit[] = { 163 X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, 164 X86::R8, X86::R9, X86::R10, X86::R11, 0 165 }; 166 167 unsigned Opc = MBBI->getOpcode(); 168 switch (Opc) { 169 default: return 0; 170 case X86::RETL: 171 case X86::RETQ: 172 case X86::RETIL: 173 case X86::RETIQ: 174 case X86::TCRETURNdi: 175 case X86::TCRETURNri: 176 case X86::TCRETURNmi: 177 case X86::TCRETURNdi64: 178 case X86::TCRETURNri64: 179 case X86::TCRETURNmi64: 180 case X86::EH_RETURN: 181 case X86::EH_RETURN64: { 182 SmallSet<uint16_t, 8> Uses; 183 for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { 184 MachineOperand &MO = MBBI->getOperand(i); 185 if (!MO.isReg() || MO.isDef()) 186 continue; 187 unsigned Reg = MO.getReg(); 188 if (!Reg) 189 continue; 190 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 191 Uses.insert(*AI); 192 } 193 194 const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; 195 for (; *CS; ++CS) 196 if (!Uses.count(*CS)) 197 return *CS; 198 } 199 } 200 201 return 0; 202 } 203 204 static bool isEAXLiveIn(MachineFunction &MF) { 205 for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), 206 EE = MF.getRegInfo().livein_end(); II != EE; ++II) { 207 unsigned Reg = II->first; 208 209 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || 210 Reg == X86::AH || Reg == X86::AL) 211 return true; 212 } 213 214 return false; 215 } 216 217 /// Check whether or not the terminators of \p MBB needs to read EFLAGS. 218 static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) { 219 for (const MachineInstr &MI : MBB.terminators()) { 220 bool BreakNext = false; 221 for (const MachineOperand &MO : MI.operands()) { 222 if (!MO.isReg()) 223 continue; 224 unsigned Reg = MO.getReg(); 225 if (Reg != X86::EFLAGS) 226 continue; 227 228 // This terminator needs an eflag that is not defined 229 // by a previous terminator. 230 if (!MO.isDef()) 231 return true; 232 BreakNext = true; 233 } 234 if (BreakNext) 235 break; 236 } 237 return false; 238 } 239 240 /// emitSPUpdate - Emit a series of instructions to increment / decrement the 241 /// stack pointer by a constant value. 242 void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, 243 MachineBasicBlock::iterator &MBBI, 244 int64_t NumBytes, bool InEpilogue) const { 245 bool isSub = NumBytes < 0; 246 uint64_t Offset = isSub ? -NumBytes : NumBytes; 247 248 uint64_t Chunk = (1LL << 31) - 1; 249 DebugLoc DL = MBB.findDebugLoc(MBBI); 250 251 while (Offset) { 252 if (Offset > Chunk) { 253 // Rather than emit a long series of instructions for large offsets, 254 // load the offset into a register and do one sub/add 255 unsigned Reg = 0; 256 257 if (isSub && !isEAXLiveIn(*MBB.getParent())) 258 Reg = (unsigned)(Is64Bit ? X86::RAX : X86::EAX); 259 else 260 Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); 261 262 if (Reg) { 263 unsigned Opc = Is64Bit ? X86::MOV64ri : X86::MOV32ri; 264 BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg) 265 .addImm(Offset); 266 Opc = isSub 267 ? getSUBrrOpcode(Is64Bit) 268 : getADDrrOpcode(Is64Bit); 269 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 270 .addReg(StackPtr) 271 .addReg(Reg); 272 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 273 Offset = 0; 274 continue; 275 } 276 } 277 278 uint64_t ThisVal = std::min(Offset, Chunk); 279 if (ThisVal == (Is64Bit ? 8 : 4)) { 280 // Use push / pop instead. 281 unsigned Reg = isSub 282 ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) 283 : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); 284 if (Reg) { 285 unsigned Opc = isSub 286 ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) 287 : (Is64Bit ? X86::POP64r : X86::POP32r); 288 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) 289 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); 290 if (isSub) 291 MI->setFlag(MachineInstr::FrameSetup); 292 Offset -= ThisVal; 293 continue; 294 } 295 } 296 297 MachineInstrBuilder MI = BuildStackAdjustment( 298 MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue); 299 if (isSub) 300 MI.setMIFlag(MachineInstr::FrameSetup); 301 302 Offset -= ThisVal; 303 } 304 } 305 306 MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( 307 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, 308 int64_t Offset, bool InEpilogue) const { 309 assert(Offset != 0 && "zero offset stack adjustment requested"); 310 311 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue 312 // is tricky. 313 bool UseLEA; 314 if (!InEpilogue) { 315 UseLEA = STI.useLeaForSP(); 316 } else { 317 // If we can use LEA for SP but we shouldn't, check that none 318 // of the terminators uses the eflags. Otherwise we will insert 319 // a ADD that will redefine the eflags and break the condition. 320 // Alternatively, we could move the ADD, but this may not be possible 321 // and is an optimization anyway. 322 UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent()); 323 if (UseLEA && !STI.useLeaForSP()) 324 UseLEA = terminatorsNeedFlagsAsInput(MBB); 325 // If that assert breaks, that means we do not do the right thing 326 // in canUseAsEpilogue. 327 assert((UseLEA || !terminatorsNeedFlagsAsInput(MBB)) && 328 "We shouldn't have allowed this insertion point"); 329 } 330 331 MachineInstrBuilder MI; 332 if (UseLEA) { 333 MI = addRegOffset(BuildMI(MBB, MBBI, DL, 334 TII.get(getLEArOpcode(Uses64BitFramePtr)), 335 StackPtr), 336 StackPtr, false, Offset); 337 } else { 338 bool IsSub = Offset < 0; 339 uint64_t AbsOffset = IsSub ? -Offset : Offset; 340 unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset) 341 : getADDriOpcode(Uses64BitFramePtr, AbsOffset); 342 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 343 .addReg(StackPtr) 344 .addImm(AbsOffset); 345 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 346 } 347 return MI; 348 } 349 350 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. 351 static 352 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 353 unsigned StackPtr, uint64_t *NumBytes = nullptr) { 354 if (MBBI == MBB.begin()) return; 355 356 MachineBasicBlock::iterator PI = std::prev(MBBI); 357 unsigned Opc = PI->getOpcode(); 358 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 359 Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || 360 Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 361 PI->getOperand(0).getReg() == StackPtr) { 362 if (NumBytes) 363 *NumBytes += PI->getOperand(2).getImm(); 364 MBB.erase(PI); 365 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 366 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 367 PI->getOperand(0).getReg() == StackPtr) { 368 if (NumBytes) 369 *NumBytes -= PI->getOperand(2).getImm(); 370 MBB.erase(PI); 371 } 372 } 373 374 int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, 375 MachineBasicBlock::iterator &MBBI, 376 bool doMergeWithPrevious) const { 377 if ((doMergeWithPrevious && MBBI == MBB.begin()) || 378 (!doMergeWithPrevious && MBBI == MBB.end())) 379 return 0; 380 381 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; 382 MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr 383 : std::next(MBBI); 384 unsigned Opc = PI->getOpcode(); 385 int Offset = 0; 386 387 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 388 Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || 389 Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 390 PI->getOperand(0).getReg() == StackPtr){ 391 Offset += PI->getOperand(2).getImm(); 392 MBB.erase(PI); 393 if (!doMergeWithPrevious) MBBI = NI; 394 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 395 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 396 PI->getOperand(0).getReg() == StackPtr) { 397 Offset -= PI->getOperand(2).getImm(); 398 MBB.erase(PI); 399 if (!doMergeWithPrevious) MBBI = NI; 400 } 401 402 return Offset; 403 } 404 405 void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, 406 MachineBasicBlock::iterator MBBI, DebugLoc DL, 407 MCCFIInstruction CFIInst) const { 408 MachineFunction &MF = *MBB.getParent(); 409 unsigned CFIIndex = MF.getMMI().addFrameInst(CFIInst); 410 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 411 .addCFIIndex(CFIIndex); 412 } 413 414 void 415 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, 416 MachineBasicBlock::iterator MBBI, 417 DebugLoc DL) const { 418 MachineFunction &MF = *MBB.getParent(); 419 MachineFrameInfo *MFI = MF.getFrameInfo(); 420 MachineModuleInfo &MMI = MF.getMMI(); 421 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 422 423 // Add callee saved registers to move list. 424 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 425 if (CSI.empty()) return; 426 427 // Calculate offsets. 428 for (std::vector<CalleeSavedInfo>::const_iterator 429 I = CSI.begin(), E = CSI.end(); I != E; ++I) { 430 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); 431 unsigned Reg = I->getReg(); 432 433 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 434 BuildCFI(MBB, MBBI, DL, 435 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 436 } 437 } 438 439 /// usesTheStack - This function checks if any of the users of EFLAGS 440 /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has 441 /// to use the stack, and if we don't adjust the stack we clobber the first 442 /// frame index. 443 /// See X86InstrInfo::copyPhysReg. 444 static bool usesTheStack(const MachineFunction &MF) { 445 const MachineRegisterInfo &MRI = MF.getRegInfo(); 446 447 for (MachineRegisterInfo::reg_instr_iterator 448 ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end(); 449 ri != re; ++ri) 450 if (ri->isCopy()) 451 return true; 452 453 return false; 454 } 455 456 void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, 457 MachineBasicBlock &MBB, 458 MachineBasicBlock::iterator MBBI, 459 DebugLoc DL) const { 460 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; 461 462 unsigned CallOp; 463 if (Is64Bit) 464 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; 465 else 466 CallOp = X86::CALLpcrel32; 467 468 const char *Symbol; 469 if (Is64Bit) { 470 if (STI.isTargetCygMing()) { 471 Symbol = "___chkstk_ms"; 472 } else { 473 Symbol = "__chkstk"; 474 } 475 } else if (STI.isTargetCygMing()) 476 Symbol = "_alloca"; 477 else 478 Symbol = "_chkstk"; 479 480 MachineInstrBuilder CI; 481 482 // All current stack probes take AX and SP as input, clobber flags, and 483 // preserve all registers. x86_64 probes leave RSP unmodified. 484 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { 485 // For the large code model, we have to call through a register. Use R11, 486 // as it is scratch in all supported calling conventions. 487 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) 488 .addExternalSymbol(Symbol); 489 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); 490 } else { 491 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol); 492 } 493 494 unsigned AX = Is64Bit ? X86::RAX : X86::EAX; 495 unsigned SP = Is64Bit ? X86::RSP : X86::ESP; 496 CI.addReg(AX, RegState::Implicit) 497 .addReg(SP, RegState::Implicit) 498 .addReg(AX, RegState::Define | RegState::Implicit) 499 .addReg(SP, RegState::Define | RegState::Implicit) 500 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); 501 502 if (Is64Bit) { 503 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp 504 // themselves. It also does not clobber %rax so we can reuse it when 505 // adjusting %rsp. 506 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) 507 .addReg(X86::RSP) 508 .addReg(X86::RAX); 509 } 510 } 511 512 static unsigned calculateSetFPREG(uint64_t SPAdjust) { 513 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well 514 // and might require smaller successive adjustments. 515 const uint64_t Win64MaxSEHOffset = 128; 516 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset); 517 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. 518 return SEHFrameOffset & -16; 519 } 520 521 // If we're forcing a stack realignment we can't rely on just the frame 522 // info, we need to know the ABI stack alignment as well in case we 523 // have a call out. Otherwise just make sure we have some alignment - we'll 524 // go with the minimum SlotSize. 525 uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const { 526 const MachineFrameInfo *MFI = MF.getFrameInfo(); 527 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. 528 unsigned StackAlign = getStackAlignment(); 529 if (ForceStackAlign) { 530 if (MFI->hasCalls()) 531 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 532 else if (MaxAlign < SlotSize) 533 MaxAlign = SlotSize; 534 } 535 return MaxAlign; 536 } 537 538 void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, 539 MachineBasicBlock::iterator MBBI, 540 DebugLoc DL, 541 uint64_t MaxAlign) const { 542 uint64_t Val = -MaxAlign; 543 MachineInstr *MI = 544 BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), 545 StackPtr) 546 .addReg(StackPtr) 547 .addImm(Val) 548 .setMIFlag(MachineInstr::FrameSetup); 549 550 // The EFLAGS implicit def is dead. 551 MI->getOperand(3).setIsDead(); 552 } 553 554 /// emitPrologue - Push callee-saved registers onto the stack, which 555 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate 556 /// space for local variables. Also emit labels used by the exception handler to 557 /// generate the exception handling frames. 558 559 /* 560 Here's a gist of what gets emitted: 561 562 ; Establish frame pointer, if needed 563 [if needs FP] 564 push %rbp 565 .cfi_def_cfa_offset 16 566 .cfi_offset %rbp, -16 567 .seh_pushreg %rpb 568 mov %rsp, %rbp 569 .cfi_def_cfa_register %rbp 570 571 ; Spill general-purpose registers 572 [for all callee-saved GPRs] 573 pushq %<reg> 574 [if not needs FP] 575 .cfi_def_cfa_offset (offset from RETADDR) 576 .seh_pushreg %<reg> 577 578 ; If the required stack alignment > default stack alignment 579 ; rsp needs to be re-aligned. This creates a "re-alignment gap" 580 ; of unknown size in the stack frame. 581 [if stack needs re-alignment] 582 and $MASK, %rsp 583 584 ; Allocate space for locals 585 [if target is Windows and allocated space > 4096 bytes] 586 ; Windows needs special care for allocations larger 587 ; than one page. 588 mov $NNN, %rax 589 call ___chkstk_ms/___chkstk 590 sub %rax, %rsp 591 [else] 592 sub $NNN, %rsp 593 594 [if needs FP] 595 .seh_stackalloc (size of XMM spill slots) 596 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots 597 [else] 598 .seh_stackalloc NNN 599 600 ; Spill XMMs 601 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, 602 ; they may get spilled on any platform, if the current function 603 ; calls @llvm.eh.unwind.init 604 [if needs FP] 605 [for all callee-saved XMM registers] 606 movaps %<xmm reg>, -MMM(%rbp) 607 [for all callee-saved XMM registers] 608 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) 609 ; i.e. the offset relative to (%rbp - SEHFrameOffset) 610 [else] 611 [for all callee-saved XMM registers] 612 movaps %<xmm reg>, KKK(%rsp) 613 [for all callee-saved XMM registers] 614 .seh_savexmm %<xmm reg>, KKK 615 616 .seh_endprologue 617 618 [if needs base pointer] 619 mov %rsp, %rbx 620 [if needs to restore base pointer] 621 mov %rsp, -MMM(%rbp) 622 623 ; Emit CFI info 624 [if needs FP] 625 [for all callee-saved registers] 626 .cfi_offset %<reg>, (offset from %rbp) 627 [else] 628 .cfi_def_cfa_offset (offset from RETADDR) 629 [for all callee-saved registers] 630 .cfi_offset %<reg>, (offset from %rsp) 631 632 Notes: 633 - .seh directives are emitted only for Windows 64 ABI 634 - .cfi directives are emitted for all other ABIs 635 - for 32-bit code, substitute %e?? registers for %r?? 636 */ 637 638 void X86FrameLowering::emitPrologue(MachineFunction &MF, 639 MachineBasicBlock &MBB) const { 640 assert(&STI == &MF.getSubtarget<X86Subtarget>() && 641 "MF used frame lowering for wrong subtarget"); 642 MachineBasicBlock::iterator MBBI = MBB.begin(); 643 MachineFrameInfo *MFI = MF.getFrameInfo(); 644 const Function *Fn = MF.getFunction(); 645 MachineModuleInfo &MMI = MF.getMMI(); 646 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 647 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. 648 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. 649 bool HasFP = hasFP(MF); 650 bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv()); 651 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 652 bool NeedsWinCFI = IsWin64Prologue && Fn->needsUnwindTableEntry(); 653 bool NeedsDwarfCFI = 654 !IsWin64Prologue && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); 655 unsigned FramePtr = TRI->getFrameRegister(MF); 656 const unsigned MachineFramePtr = 657 STI.isTarget64BitILP32() 658 ? getX86SubSuperRegister(FramePtr, MVT::i64, false) 659 : FramePtr; 660 unsigned BasePtr = TRI->getBaseRegister(); 661 DebugLoc DL; 662 663 // Add RETADDR move area to callee saved frame size. 664 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 665 if (TailCallReturnAddrDelta && IsWin64Prologue) 666 report_fatal_error("Can't handle guaranteed tail call under win64 yet"); 667 668 if (TailCallReturnAddrDelta < 0) 669 X86FI->setCalleeSavedFrameSize( 670 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); 671 672 bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()); 673 674 // The default stack probe size is 4096 if the function has no stackprobesize 675 // attribute. 676 unsigned StackProbeSize = 4096; 677 if (Fn->hasFnAttribute("stack-probe-size")) 678 Fn->getFnAttribute("stack-probe-size") 679 .getValueAsString() 680 .getAsInteger(0, StackProbeSize); 681 682 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 683 // function, and use up to 128 bytes of stack space, don't have a frame 684 // pointer, calls, or dynamic alloca then we do not need to adjust the 685 // stack pointer (we fit in the Red Zone). We also check that we don't 686 // push and pop from the stack. 687 if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) && 688 !TRI->needsStackRealignment(MF) && 689 !MFI->hasVarSizedObjects() && // No dynamic alloca. 690 !MFI->adjustsStack() && // No calls. 691 !IsWin64CC && // Win64 has no Red Zone 692 !usesTheStack(MF) && // Don't push and pop. 693 !MF.shouldSplitStack()) { // Regular stack 694 uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); 695 if (HasFP) MinSize += SlotSize; 696 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); 697 MFI->setStackSize(StackSize); 698 } 699 700 // Insert stack pointer adjustment for later moving of return addr. Only 701 // applies to tail call optimized functions where the callee argument stack 702 // size is bigger than the callers. 703 if (TailCallReturnAddrDelta < 0) { 704 BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta, 705 /*InEpilogue=*/false) 706 .setMIFlag(MachineInstr::FrameSetup); 707 } 708 709 // Mapping for machine moves: 710 // 711 // DST: VirtualFP AND 712 // SRC: VirtualFP => DW_CFA_def_cfa_offset 713 // ELSE => DW_CFA_def_cfa 714 // 715 // SRC: VirtualFP AND 716 // DST: Register => DW_CFA_def_cfa_register 717 // 718 // ELSE 719 // OFFSET < 0 => DW_CFA_offset_extended_sf 720 // REG < 64 => DW_CFA_offset + Reg 721 // ELSE => DW_CFA_offset_extended 722 723 uint64_t NumBytes = 0; 724 int stackGrowth = -SlotSize; 725 726 if (HasFP) { 727 // Calculate required stack adjustment. 728 uint64_t FrameSize = StackSize - SlotSize; 729 // If required, include space for extra hidden slot for stashing base pointer. 730 if (X86FI->getRestoreBasePointer()) 731 FrameSize += SlotSize; 732 733 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); 734 735 // Callee-saved registers are pushed on stack before the stack is realigned. 736 if (TRI->needsStackRealignment(MF) && !IsWin64Prologue) 737 NumBytes = RoundUpToAlignment(NumBytes, MaxAlign); 738 739 // Get the offset of the stack slot for the EBP register, which is 740 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. 741 // Update the frame offset adjustment. 742 MFI->setOffsetAdjustment(-NumBytes); 743 744 // Save EBP/RBP into the appropriate stack slot. 745 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 746 .addReg(MachineFramePtr, RegState::Kill) 747 .setMIFlag(MachineInstr::FrameSetup); 748 749 if (NeedsDwarfCFI) { 750 // Mark the place where EBP/RBP was saved. 751 // Define the current CFA rule to use the provided offset. 752 assert(StackSize); 753 BuildCFI(MBB, MBBI, DL, 754 MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth)); 755 756 // Change the rule for the FramePtr to be an "offset" rule. 757 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); 758 BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset( 759 nullptr, DwarfFramePtr, 2 * stackGrowth)); 760 } 761 762 if (NeedsWinCFI) { 763 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) 764 .addImm(FramePtr) 765 .setMIFlag(MachineInstr::FrameSetup); 766 } 767 768 if (!IsWin64Prologue) { 769 // Update EBP with the new base value. 770 BuildMI(MBB, MBBI, DL, 771 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), 772 FramePtr) 773 .addReg(StackPtr) 774 .setMIFlag(MachineInstr::FrameSetup); 775 } 776 777 if (NeedsDwarfCFI) { 778 // Mark effective beginning of when frame pointer becomes valid. 779 // Define the current CFA to use the EBP/RBP register. 780 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); 781 BuildCFI(MBB, MBBI, DL, 782 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); 783 } 784 785 // Mark the FramePtr as live-in in every block. 786 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 787 I->addLiveIn(MachineFramePtr); 788 } else { 789 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); 790 } 791 792 // Skip the callee-saved push instructions. 793 bool PushedRegs = false; 794 int StackOffset = 2 * stackGrowth; 795 796 while (MBBI != MBB.end() && 797 (MBBI->getOpcode() == X86::PUSH32r || 798 MBBI->getOpcode() == X86::PUSH64r)) { 799 PushedRegs = true; 800 unsigned Reg = MBBI->getOperand(0).getReg(); 801 ++MBBI; 802 803 if (!HasFP && NeedsDwarfCFI) { 804 // Mark callee-saved push instruction. 805 // Define the current CFA rule to use the provided offset. 806 assert(StackSize); 807 BuildCFI(MBB, MBBI, DL, 808 MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset)); 809 StackOffset += stackGrowth; 810 } 811 812 if (NeedsWinCFI) { 813 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag( 814 MachineInstr::FrameSetup); 815 } 816 } 817 818 // Realign stack after we pushed callee-saved registers (so that we'll be 819 // able to calculate their offsets from the frame pointer). 820 // Don't do this for Win64, it needs to realign the stack after the prologue. 821 if (!IsWin64Prologue && TRI->needsStackRealignment(MF)) { 822 assert(HasFP && "There should be a frame pointer if stack is realigned."); 823 BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); 824 } 825 826 // If there is an SUB32ri of ESP immediately before this instruction, merge 827 // the two. This can be the case when tail call elimination is enabled and 828 // the callee has more arguments then the caller. 829 NumBytes -= mergeSPUpdates(MBB, MBBI, true); 830 831 // Adjust stack pointer: ESP -= numbytes. 832 833 // Windows and cygwin/mingw require a prologue helper routine when allocating 834 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw 835 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the 836 // stack and adjust the stack pointer in one go. The 64-bit version of 837 // __chkstk is only responsible for probing the stack. The 64-bit prologue is 838 // responsible for adjusting the stack pointer. Touching the stack at 4K 839 // increments is necessary to ensure that the guard pages used by the OS 840 // virtual memory manager are allocated in correct sequence. 841 uint64_t AlignedNumBytes = NumBytes; 842 if (IsWin64Prologue && TRI->needsStackRealignment(MF)) 843 AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); 844 if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { 845 // Check whether EAX is livein for this function. 846 bool isEAXAlive = isEAXLiveIn(MF); 847 848 if (isEAXAlive) { 849 // Sanity check that EAX is not livein for this function. 850 // It should not be, so throw an assert. 851 assert(!Is64Bit && "EAX is livein in x64 case!"); 852 853 // Save EAX 854 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 855 .addReg(X86::EAX, RegState::Kill) 856 .setMIFlag(MachineInstr::FrameSetup); 857 } 858 859 if (Is64Bit) { 860 // Handle the 64-bit Windows ABI case where we need to call __chkstk. 861 // Function prologue is responsible for adjusting the stack pointer. 862 if (isUInt<32>(NumBytes)) { 863 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 864 .addImm(NumBytes) 865 .setMIFlag(MachineInstr::FrameSetup); 866 } else if (isInt<32>(NumBytes)) { 867 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX) 868 .addImm(NumBytes) 869 .setMIFlag(MachineInstr::FrameSetup); 870 } else { 871 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) 872 .addImm(NumBytes) 873 .setMIFlag(MachineInstr::FrameSetup); 874 } 875 } else { 876 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. 877 // We'll also use 4 already allocated bytes for EAX. 878 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 879 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) 880 .setMIFlag(MachineInstr::FrameSetup); 881 } 882 883 // Save a pointer to the MI where we set AX. 884 MachineBasicBlock::iterator SetRAX = MBBI; 885 --SetRAX; 886 887 // Call __chkstk, __chkstk_ms, or __alloca. 888 emitStackProbeCall(MF, MBB, MBBI, DL); 889 890 // Apply the frame setup flag to all inserted instrs. 891 for (; SetRAX != MBBI; ++SetRAX) 892 SetRAX->setFlag(MachineInstr::FrameSetup); 893 894 if (isEAXAlive) { 895 // Restore EAX 896 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), 897 X86::EAX), 898 StackPtr, false, NumBytes - 4); 899 MI->setFlag(MachineInstr::FrameSetup); 900 MBB.insert(MBBI, MI); 901 } 902 } else if (NumBytes) { 903 emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, /*InEpilogue=*/false); 904 } 905 906 if (NeedsWinCFI && NumBytes) 907 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) 908 .addImm(NumBytes) 909 .setMIFlag(MachineInstr::FrameSetup); 910 911 int SEHFrameOffset = 0; 912 if (IsWin64Prologue && HasFP) { 913 SEHFrameOffset = calculateSetFPREG(NumBytes); 914 if (SEHFrameOffset) 915 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), 916 StackPtr, false, SEHFrameOffset); 917 else 918 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr); 919 920 if (NeedsWinCFI) 921 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) 922 .addImm(FramePtr) 923 .addImm(SEHFrameOffset) 924 .setMIFlag(MachineInstr::FrameSetup); 925 } 926 927 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { 928 const MachineInstr *FrameInstr = &*MBBI; 929 ++MBBI; 930 931 if (NeedsWinCFI) { 932 int FI; 933 if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { 934 if (X86::FR64RegClass.contains(Reg)) { 935 int Offset = getFrameIndexOffset(MF, FI); 936 Offset += SEHFrameOffset; 937 938 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) 939 .addImm(Reg) 940 .addImm(Offset) 941 .setMIFlag(MachineInstr::FrameSetup); 942 } 943 } 944 } 945 } 946 947 if (NeedsWinCFI) 948 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) 949 .setMIFlag(MachineInstr::FrameSetup); 950 951 // Realign stack after we spilled callee-saved registers (so that we'll be 952 // able to calculate their offsets from the frame pointer). 953 // Win64 requires aligning the stack after the prologue. 954 if (IsWin64Prologue && TRI->needsStackRealignment(MF)) { 955 assert(HasFP && "There should be a frame pointer if stack is realigned."); 956 BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); 957 } 958 959 // If we need a base pointer, set it up here. It's whatever the value 960 // of the stack pointer is at this point. Any variable size objects 961 // will be allocated after this, so we can still use the base pointer 962 // to reference locals. 963 if (TRI->hasBasePointer(MF)) { 964 // Update the base pointer with the current stack pointer. 965 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; 966 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) 967 .addReg(StackPtr) 968 .setMIFlag(MachineInstr::FrameSetup); 969 if (X86FI->getRestoreBasePointer()) { 970 // Stash value of base pointer. Saving RSP instead of EBP shortens 971 // dependence chain. Used by SjLj EH. 972 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 973 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), 974 FramePtr, true, X86FI->getRestoreBasePointerOffset()) 975 .addReg(StackPtr) 976 .setMIFlag(MachineInstr::FrameSetup); 977 } 978 979 if (X86FI->getHasSEHFramePtrSave()) { 980 // Stash the value of the frame pointer relative to the base pointer for 981 // Win32 EH. This supports Win32 EH, which does the inverse of the above: 982 // it recovers the frame pointer from the base pointer rather than the 983 // other way around. 984 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 985 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), BasePtr, true, 986 getFrameIndexOffset(MF, X86FI->getSEHFramePtrSaveIndex())) 987 .addReg(FramePtr) 988 .setMIFlag(MachineInstr::FrameSetup); 989 } 990 } 991 992 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { 993 // Mark end of stack pointer adjustment. 994 if (!HasFP && NumBytes) { 995 // Define the current CFA rule to use the provided offset. 996 assert(StackSize); 997 BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset( 998 nullptr, -StackSize + stackGrowth)); 999 } 1000 1001 // Emit DWARF info specifying the offsets of the callee-saved registers. 1002 if (PushedRegs) 1003 emitCalleeSavedFrameMoves(MBB, MBBI, DL); 1004 } 1005 } 1006 1007 bool X86FrameLowering::canUseLEAForSPInEpilogue( 1008 const MachineFunction &MF) const { 1009 // We can't use LEA instructions for adjusting the stack pointer if this is a 1010 // leaf function in the Win64 ABI. Only ADD instructions may be used to 1011 // deallocate the stack. 1012 // This means that we can use LEA for SP in two situations: 1013 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA. 1014 // 2. We *have* a frame pointer which means we are permitted to use LEA. 1015 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF); 1016 } 1017 1018 void X86FrameLowering::emitEpilogue(MachineFunction &MF, 1019 MachineBasicBlock &MBB) const { 1020 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1021 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1022 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1023 DebugLoc DL; 1024 if (MBBI != MBB.end()) 1025 DL = MBBI->getDebugLoc(); 1026 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. 1027 const bool Is64BitILP32 = STI.isTarget64BitILP32(); 1028 unsigned FramePtr = TRI->getFrameRegister(MF); 1029 unsigned MachineFramePtr = 1030 Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false) 1031 : FramePtr; 1032 1033 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 1034 bool NeedsWinCFI = 1035 IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry(); 1036 1037 // Get the number of bytes to allocate from the FrameInfo. 1038 uint64_t StackSize = MFI->getStackSize(); 1039 uint64_t MaxAlign = calculateMaxStackAlign(MF); 1040 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 1041 uint64_t NumBytes = 0; 1042 1043 if (hasFP(MF)) { 1044 // Calculate required stack adjustment. 1045 uint64_t FrameSize = StackSize - SlotSize; 1046 NumBytes = FrameSize - CSSize; 1047 1048 // Callee-saved registers were pushed on stack before the stack was 1049 // realigned. 1050 if (TRI->needsStackRealignment(MF) && !IsWin64Prologue) 1051 NumBytes = RoundUpToAlignment(FrameSize, MaxAlign); 1052 1053 // Pop EBP. 1054 BuildMI(MBB, MBBI, DL, 1055 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr); 1056 } else { 1057 NumBytes = StackSize - CSSize; 1058 } 1059 uint64_t SEHStackAllocAmt = NumBytes; 1060 1061 // Skip the callee-saved pop instructions. 1062 while (MBBI != MBB.begin()) { 1063 MachineBasicBlock::iterator PI = std::prev(MBBI); 1064 unsigned Opc = PI->getOpcode(); 1065 1066 if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && 1067 !PI->isTerminator()) 1068 break; 1069 1070 --MBBI; 1071 } 1072 MachineBasicBlock::iterator FirstCSPop = MBBI; 1073 1074 if (MBBI != MBB.end()) 1075 DL = MBBI->getDebugLoc(); 1076 1077 // If there is an ADD32ri or SUB32ri of ESP immediately before this 1078 // instruction, merge the two instructions. 1079 if (NumBytes || MFI->hasVarSizedObjects()) 1080 mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); 1081 1082 // If dynamic alloca is used, then reset esp to point to the last callee-saved 1083 // slot before popping them off! Same applies for the case, when stack was 1084 // realigned. 1085 if (TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { 1086 if (TRI->needsStackRealignment(MF)) 1087 MBBI = FirstCSPop; 1088 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); 1089 uint64_t LEAAmount = 1090 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; 1091 1092 // There are only two legal forms of epilogue: 1093 // - add SEHAllocationSize, %rsp 1094 // - lea SEHAllocationSize(%FramePtr), %rsp 1095 // 1096 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence. 1097 // However, we may use this sequence if we have a frame pointer because the 1098 // effects of the prologue can safely be undone. 1099 if (LEAAmount != 0) { 1100 unsigned Opc = getLEArOpcode(Uses64BitFramePtr); 1101 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), 1102 FramePtr, false, LEAAmount); 1103 --MBBI; 1104 } else { 1105 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); 1106 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 1107 .addReg(FramePtr); 1108 --MBBI; 1109 } 1110 } else if (NumBytes) { 1111 // Adjust stack pointer back: ESP += numbytes. 1112 emitSPUpdate(MBB, MBBI, NumBytes, /*InEpilogue=*/true); 1113 --MBBI; 1114 } 1115 1116 // Windows unwinder will not invoke function's exception handler if IP is 1117 // either in prologue or in epilogue. This behavior causes a problem when a 1118 // call immediately precedes an epilogue, because the return address points 1119 // into the epilogue. To cope with that, we insert an epilogue marker here, 1120 // then replace it with a 'nop' if it ends up immediately after a CALL in the 1121 // final emitted code. 1122 if (NeedsWinCFI) 1123 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); 1124 1125 // Add the return addr area delta back since we are not tail calling. 1126 int Offset = -1 * X86FI->getTCReturnAddrDelta(); 1127 assert(Offset >= 0 && "TCDelta should never be positive"); 1128 if (Offset) { 1129 MBBI = MBB.getFirstTerminator(); 1130 1131 // Check for possible merge with preceding ADD instruction. 1132 Offset += mergeSPUpdates(MBB, MBBI, true); 1133 emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true); 1134 } 1135 } 1136 1137 int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, 1138 int FI) const { 1139 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1140 // Offset will hold the offset from the stack pointer at function entry to the 1141 // object. 1142 // We need to factor in additional offsets applied during the prologue to the 1143 // frame, base, and stack pointer depending on which is used. 1144 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 1145 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1146 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 1147 uint64_t StackSize = MFI->getStackSize(); 1148 bool HasFP = hasFP(MF); 1149 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 1150 int64_t FPDelta = 0; 1151 1152 if (IsWin64Prologue) { 1153 assert(!MFI->hasCalls() || (StackSize % 16) == 8); 1154 1155 // Calculate required stack adjustment. 1156 uint64_t FrameSize = StackSize - SlotSize; 1157 // If required, include space for extra hidden slot for stashing base pointer. 1158 if (X86FI->getRestoreBasePointer()) 1159 FrameSize += SlotSize; 1160 uint64_t NumBytes = FrameSize - CSSize; 1161 1162 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes); 1163 if (FI && FI == X86FI->getFAIndex()) 1164 return -SEHFrameOffset; 1165 1166 // FPDelta is the offset from the "traditional" FP location of the old base 1167 // pointer followed by return address and the location required by the 1168 // restricted Win64 prologue. 1169 // Add FPDelta to all offsets below that go through the frame pointer. 1170 FPDelta = FrameSize - SEHFrameOffset; 1171 assert((!MFI->hasCalls() || (FPDelta % 16) == 0) && 1172 "FPDelta isn't aligned per the Win64 ABI!"); 1173 } 1174 1175 1176 if (TRI->hasBasePointer(MF)) { 1177 assert(HasFP && "VLAs and dynamic stack realign, but no FP?!"); 1178 if (FI < 0) { 1179 // Skip the saved EBP. 1180 return Offset + SlotSize + FPDelta; 1181 } else { 1182 assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 1183 return Offset + StackSize; 1184 } 1185 } else if (TRI->needsStackRealignment(MF)) { 1186 if (FI < 0) { 1187 // Skip the saved EBP. 1188 return Offset + SlotSize + FPDelta; 1189 } else { 1190 assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 1191 return Offset + StackSize; 1192 } 1193 // FIXME: Support tail calls 1194 } else { 1195 if (!HasFP) 1196 return Offset + StackSize; 1197 1198 // Skip the saved EBP. 1199 Offset += SlotSize; 1200 1201 // Skip the RETADDR move area 1202 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1203 if (TailCallReturnAddrDelta < 0) 1204 Offset -= TailCallReturnAddrDelta; 1205 } 1206 1207 return Offset + FPDelta; 1208 } 1209 1210 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 1211 unsigned &FrameReg) const { 1212 // We can't calculate offset from frame pointer if the stack is realigned, 1213 // so enforce usage of stack/base pointer. The base pointer is used when we 1214 // have dynamic allocas in addition to dynamic realignment. 1215 if (TRI->hasBasePointer(MF)) 1216 FrameReg = TRI->getBaseRegister(); 1217 else if (TRI->needsStackRealignment(MF)) 1218 FrameReg = TRI->getStackRegister(); 1219 else 1220 FrameReg = TRI->getFrameRegister(MF); 1221 return getFrameIndexOffset(MF, FI); 1222 } 1223 1224 // Simplified from getFrameIndexOffset keeping only StackPointer cases 1225 int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const { 1226 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1227 // Does not include any dynamic realign. 1228 const uint64_t StackSize = MFI->getStackSize(); 1229 { 1230 #ifndef NDEBUG 1231 // Note: LLVM arranges the stack as: 1232 // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP) 1233 // > "Stack Slots" (<--SP) 1234 // We can always address StackSlots from RSP. We can usually (unless 1235 // needsStackRealignment) address CSRs from RSP, but sometimes need to 1236 // address them from RBP. FixedObjects can be placed anywhere in the stack 1237 // frame depending on their specific requirements (i.e. we can actually 1238 // refer to arguments to the function which are stored in the *callers* 1239 // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs 1240 // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject. 1241 1242 assert(!TRI->hasBasePointer(MF) && "we don't handle this case"); 1243 1244 // We don't handle tail calls, and shouldn't be seeing them 1245 // either. 1246 int TailCallReturnAddrDelta = 1247 MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta(); 1248 assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!"); 1249 #endif 1250 } 1251 1252 // This is how the math works out: 1253 // 1254 // %rsp grows (i.e. gets lower) left to right. Each box below is 1255 // one word (eight bytes). Obj0 is the stack slot we're trying to 1256 // get to. 1257 // 1258 // ---------------------------------- 1259 // | BP | Obj0 | Obj1 | ... | ObjN | 1260 // ---------------------------------- 1261 // ^ ^ ^ ^ 1262 // A B C E 1263 // 1264 // A is the incoming stack pointer. 1265 // (B - A) is the local area offset (-8 for x86-64) [1] 1266 // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2] 1267 // 1268 // |(E - B)| is the StackSize (absolute value, positive). For a 1269 // stack that grown down, this works out to be (B - E). [3] 1270 // 1271 // E is also the value of %rsp after stack has been set up, and we 1272 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now 1273 // (C - E) == (C - A) - (B - A) + (B - E) 1274 // { Using [1], [2] and [3] above } 1275 // == getObjectOffset - LocalAreaOffset + StackSize 1276 // 1277 1278 // Get the Offset from the StackPointer 1279 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 1280 1281 return Offset + StackSize; 1282 } 1283 // Simplified from getFrameIndexReference keeping only StackPointer cases 1284 int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, 1285 int FI, 1286 unsigned &FrameReg) const { 1287 assert(!TRI->hasBasePointer(MF) && "we don't handle this case"); 1288 1289 FrameReg = TRI->getStackRegister(); 1290 return getFrameIndexOffsetFromSP(MF, FI); 1291 } 1292 1293 bool X86FrameLowering::assignCalleeSavedSpillSlots( 1294 MachineFunction &MF, const TargetRegisterInfo *TRI, 1295 std::vector<CalleeSavedInfo> &CSI) const { 1296 MachineFrameInfo *MFI = MF.getFrameInfo(); 1297 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1298 1299 unsigned CalleeSavedFrameSize = 0; 1300 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); 1301 1302 if (hasFP(MF)) { 1303 // emitPrologue always spills frame register the first thing. 1304 SpillSlotOffset -= SlotSize; 1305 MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 1306 1307 // Since emitPrologue and emitEpilogue will handle spilling and restoring of 1308 // the frame register, we can delete it from CSI list and not have to worry 1309 // about avoiding it later. 1310 unsigned FPReg = TRI->getFrameRegister(MF); 1311 for (unsigned i = 0; i < CSI.size(); ++i) { 1312 if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { 1313 CSI.erase(CSI.begin() + i); 1314 break; 1315 } 1316 } 1317 } 1318 1319 // Assign slots for GPRs. It increases frame size. 1320 for (unsigned i = CSI.size(); i != 0; --i) { 1321 unsigned Reg = CSI[i - 1].getReg(); 1322 1323 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) 1324 continue; 1325 1326 SpillSlotOffset -= SlotSize; 1327 CalleeSavedFrameSize += SlotSize; 1328 1329 int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 1330 CSI[i - 1].setFrameIdx(SlotIndex); 1331 } 1332 1333 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); 1334 1335 // Assign slots for XMMs. 1336 for (unsigned i = CSI.size(); i != 0; --i) { 1337 unsigned Reg = CSI[i - 1].getReg(); 1338 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) 1339 continue; 1340 1341 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1342 // ensure alignment 1343 SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment(); 1344 // spill into slot 1345 SpillSlotOffset -= RC->getSize(); 1346 int SlotIndex = 1347 MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); 1348 CSI[i - 1].setFrameIdx(SlotIndex); 1349 MFI->ensureMaxAlignment(RC->getAlignment()); 1350 } 1351 1352 return true; 1353 } 1354 1355 bool X86FrameLowering::spillCalleeSavedRegisters( 1356 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1357 const std::vector<CalleeSavedInfo> &CSI, 1358 const TargetRegisterInfo *TRI) const { 1359 DebugLoc DL = MBB.findDebugLoc(MI); 1360 1361 // Push GPRs. It increases frame size. 1362 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; 1363 for (unsigned i = CSI.size(); i != 0; --i) { 1364 unsigned Reg = CSI[i - 1].getReg(); 1365 1366 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) 1367 continue; 1368 // Add the callee-saved register as live-in. It's killed at the spill. 1369 MBB.addLiveIn(Reg); 1370 1371 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) 1372 .setMIFlag(MachineInstr::FrameSetup); 1373 } 1374 1375 // Make XMM regs spilled. X86 does not have ability of push/pop XMM. 1376 // It can be done by spilling XMMs to stack frame. 1377 for (unsigned i = CSI.size(); i != 0; --i) { 1378 unsigned Reg = CSI[i-1].getReg(); 1379 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) 1380 continue; 1381 // Add the callee-saved register as live-in. It's killed at the spill. 1382 MBB.addLiveIn(Reg); 1383 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1384 1385 TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, 1386 TRI); 1387 --MI; 1388 MI->setFlag(MachineInstr::FrameSetup); 1389 ++MI; 1390 } 1391 1392 return true; 1393 } 1394 1395 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 1396 MachineBasicBlock::iterator MI, 1397 const std::vector<CalleeSavedInfo> &CSI, 1398 const TargetRegisterInfo *TRI) const { 1399 if (CSI.empty()) 1400 return false; 1401 1402 DebugLoc DL = MBB.findDebugLoc(MI); 1403 1404 // Reload XMMs from stack frame. 1405 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1406 unsigned Reg = CSI[i].getReg(); 1407 if (X86::GR64RegClass.contains(Reg) || 1408 X86::GR32RegClass.contains(Reg)) 1409 continue; 1410 1411 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1412 TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); 1413 } 1414 1415 // POP GPRs. 1416 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; 1417 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1418 unsigned Reg = CSI[i].getReg(); 1419 if (!X86::GR64RegClass.contains(Reg) && 1420 !X86::GR32RegClass.contains(Reg)) 1421 continue; 1422 1423 BuildMI(MBB, MI, DL, TII.get(Opc), Reg); 1424 } 1425 return true; 1426 } 1427 1428 void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, 1429 BitVector &SavedRegs, 1430 RegScavenger *RS) const { 1431 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1432 1433 MachineFrameInfo *MFI = MF.getFrameInfo(); 1434 1435 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1436 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1437 1438 if (TailCallReturnAddrDelta < 0) { 1439 // create RETURNADDR area 1440 // arg 1441 // arg 1442 // RETADDR 1443 // { ... 1444 // RETADDR area 1445 // ... 1446 // } 1447 // [EBP] 1448 MFI->CreateFixedObject(-TailCallReturnAddrDelta, 1449 TailCallReturnAddrDelta - SlotSize, true); 1450 } 1451 1452 // Spill the BasePtr if it's used. 1453 if (TRI->hasBasePointer(MF)) 1454 SavedRegs.set(TRI->getBaseRegister()); 1455 } 1456 1457 static bool 1458 HasNestArgument(const MachineFunction *MF) { 1459 const Function *F = MF->getFunction(); 1460 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 1461 I != E; I++) { 1462 if (I->hasNestAttr()) 1463 return true; 1464 } 1465 return false; 1466 } 1467 1468 /// GetScratchRegister - Get a temp register for performing work in the 1469 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform 1470 /// and the properties of the function either one or two registers will be 1471 /// needed. Set primary to true for the first register, false for the second. 1472 static unsigned 1473 GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) { 1474 CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); 1475 1476 // Erlang stuff. 1477 if (CallingConvention == CallingConv::HiPE) { 1478 if (Is64Bit) 1479 return Primary ? X86::R14 : X86::R13; 1480 else 1481 return Primary ? X86::EBX : X86::EDI; 1482 } 1483 1484 if (Is64Bit) { 1485 if (IsLP64) 1486 return Primary ? X86::R11 : X86::R12; 1487 else 1488 return Primary ? X86::R11D : X86::R12D; 1489 } 1490 1491 bool IsNested = HasNestArgument(&MF); 1492 1493 if (CallingConvention == CallingConv::X86_FastCall || 1494 CallingConvention == CallingConv::Fast) { 1495 if (IsNested) 1496 report_fatal_error("Segmented stacks does not support fastcall with " 1497 "nested function."); 1498 return Primary ? X86::EAX : X86::ECX; 1499 } 1500 if (IsNested) 1501 return Primary ? X86::EDX : X86::EAX; 1502 return Primary ? X86::ECX : X86::EAX; 1503 } 1504 1505 // The stack limit in the TCB is set to this many bytes above the actual stack 1506 // limit. 1507 static const uint64_t kSplitStackAvailable = 256; 1508 1509 void X86FrameLowering::adjustForSegmentedStacks( 1510 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { 1511 MachineFrameInfo *MFI = MF.getFrameInfo(); 1512 uint64_t StackSize; 1513 unsigned TlsReg, TlsOffset; 1514 DebugLoc DL; 1515 1516 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1517 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 1518 "Scratch register is live-in"); 1519 1520 if (MF.getFunction()->isVarArg()) 1521 report_fatal_error("Segmented stacks do not support vararg functions."); 1522 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && 1523 !STI.isTargetWin64() && !STI.isTargetFreeBSD() && 1524 !STI.isTargetDragonFly()) 1525 report_fatal_error("Segmented stacks not supported on this platform."); 1526 1527 // Eventually StackSize will be calculated by a link-time pass; which will 1528 // also decide whether checking code needs to be injected into this particular 1529 // prologue. 1530 StackSize = MFI->getStackSize(); 1531 1532 // Do not generate a prologue for functions with a stack of size zero 1533 if (StackSize == 0) 1534 return; 1535 1536 MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); 1537 MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); 1538 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1539 bool IsNested = false; 1540 1541 // We need to know if the function has a nest argument only in 64 bit mode. 1542 if (Is64Bit) 1543 IsNested = HasNestArgument(&MF); 1544 1545 // The MOV R10, RAX needs to be in a different block, since the RET we emit in 1546 // allocMBB needs to be last (terminating) instruction. 1547 1548 for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(), 1549 e = PrologueMBB.livein_end(); 1550 i != e; i++) { 1551 allocMBB->addLiveIn(*i); 1552 checkMBB->addLiveIn(*i); 1553 } 1554 1555 if (IsNested) 1556 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D); 1557 1558 MF.push_front(allocMBB); 1559 MF.push_front(checkMBB); 1560 1561 // When the frame size is less than 256 we just compare the stack 1562 // boundary directly to the value of the stack pointer, per gcc. 1563 bool CompareStackPointer = StackSize < kSplitStackAvailable; 1564 1565 // Read the limit off the current stacklet off the stack_guard location. 1566 if (Is64Bit) { 1567 if (STI.isTargetLinux()) { 1568 TlsReg = X86::FS; 1569 TlsOffset = IsLP64 ? 0x70 : 0x40; 1570 } else if (STI.isTargetDarwin()) { 1571 TlsReg = X86::GS; 1572 TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. 1573 } else if (STI.isTargetWin64()) { 1574 TlsReg = X86::GS; 1575 TlsOffset = 0x28; // pvArbitrary, reserved for application use 1576 } else if (STI.isTargetFreeBSD()) { 1577 TlsReg = X86::FS; 1578 TlsOffset = 0x18; 1579 } else if (STI.isTargetDragonFly()) { 1580 TlsReg = X86::FS; 1581 TlsOffset = 0x20; // use tls_tcb.tcb_segstack 1582 } else { 1583 report_fatal_error("Segmented stacks not supported on this platform."); 1584 } 1585 1586 if (CompareStackPointer) 1587 ScratchReg = IsLP64 ? X86::RSP : X86::ESP; 1588 else 1589 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP) 1590 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 1591 1592 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg) 1593 .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); 1594 } else { 1595 if (STI.isTargetLinux()) { 1596 TlsReg = X86::GS; 1597 TlsOffset = 0x30; 1598 } else if (STI.isTargetDarwin()) { 1599 TlsReg = X86::GS; 1600 TlsOffset = 0x48 + 90*4; 1601 } else if (STI.isTargetWin32()) { 1602 TlsReg = X86::FS; 1603 TlsOffset = 0x14; // pvArbitrary, reserved for application use 1604 } else if (STI.isTargetDragonFly()) { 1605 TlsReg = X86::FS; 1606 TlsOffset = 0x10; // use tls_tcb.tcb_segstack 1607 } else if (STI.isTargetFreeBSD()) { 1608 report_fatal_error("Segmented stacks not supported on FreeBSD i386."); 1609 } else { 1610 report_fatal_error("Segmented stacks not supported on this platform."); 1611 } 1612 1613 if (CompareStackPointer) 1614 ScratchReg = X86::ESP; 1615 else 1616 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) 1617 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 1618 1619 if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() || 1620 STI.isTargetDragonFly()) { 1621 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) 1622 .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); 1623 } else if (STI.isTargetDarwin()) { 1624 1625 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. 1626 unsigned ScratchReg2; 1627 bool SaveScratch2; 1628 if (CompareStackPointer) { 1629 // The primary scratch register is available for holding the TLS offset. 1630 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1631 SaveScratch2 = false; 1632 } else { 1633 // Need to use a second register to hold the TLS offset 1634 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false); 1635 1636 // Unfortunately, with fastcc the second scratch register may hold an 1637 // argument. 1638 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); 1639 } 1640 1641 // If Scratch2 is live-in then it needs to be saved. 1642 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && 1643 "Scratch register is live-in and not saved"); 1644 1645 if (SaveScratch2) 1646 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) 1647 .addReg(ScratchReg2, RegState::Kill); 1648 1649 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) 1650 .addImm(TlsOffset); 1651 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) 1652 .addReg(ScratchReg) 1653 .addReg(ScratchReg2).addImm(1).addReg(0) 1654 .addImm(0) 1655 .addReg(TlsReg); 1656 1657 if (SaveScratch2) 1658 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); 1659 } 1660 } 1661 1662 // This jump is taken if SP >= (Stacklet Limit + Stack Space required). 1663 // It jumps to normal execution of the function body. 1664 BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&PrologueMBB); 1665 1666 // On 32 bit we first push the arguments size and then the frame size. On 64 1667 // bit, we pass the stack frame size in r10 and the argument size in r11. 1668 if (Is64Bit) { 1669 // Functions with nested arguments use R10, so it needs to be saved across 1670 // the call to _morestack 1671 1672 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; 1673 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; 1674 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; 1675 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; 1676 const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri; 1677 1678 if (IsNested) 1679 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); 1680 1681 BuildMI(allocMBB, DL, TII.get(MOVri), Reg10) 1682 .addImm(StackSize); 1683 BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) 1684 .addImm(X86FI->getArgumentStackSize()); 1685 } else { 1686 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 1687 .addImm(X86FI->getArgumentStackSize()); 1688 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 1689 .addImm(StackSize); 1690 } 1691 1692 // __morestack is in libgcc 1693 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { 1694 // Under the large code model, we cannot assume that __morestack lives 1695 // within 2^31 bytes of the call site, so we cannot use pc-relative 1696 // addressing. We cannot perform the call via a temporary register, 1697 // as the rax register may be used to store the static chain, and all 1698 // other suitable registers may be either callee-save or used for 1699 // parameter passing. We cannot use the stack at this point either 1700 // because __morestack manipulates the stack directly. 1701 // 1702 // To avoid these issues, perform an indirect call via a read-only memory 1703 // location containing the address. 1704 // 1705 // This solution is not perfect, as it assumes that the .rodata section 1706 // is laid out within 2^31 bytes of each function body, but this seems 1707 // to be sufficient for JIT. 1708 BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) 1709 .addReg(X86::RIP) 1710 .addImm(0) 1711 .addReg(0) 1712 .addExternalSymbol("__morestack_addr") 1713 .addReg(0); 1714 MF.getMMI().setUsesMorestackAddr(true); 1715 } else { 1716 if (Is64Bit) 1717 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) 1718 .addExternalSymbol("__morestack"); 1719 else 1720 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) 1721 .addExternalSymbol("__morestack"); 1722 } 1723 1724 if (IsNested) 1725 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); 1726 else 1727 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); 1728 1729 allocMBB->addSuccessor(&PrologueMBB); 1730 1731 checkMBB->addSuccessor(allocMBB); 1732 checkMBB->addSuccessor(&PrologueMBB); 1733 1734 #ifdef XDEBUG 1735 MF.verify(); 1736 #endif 1737 } 1738 1739 /// Erlang programs may need a special prologue to handle the stack size they 1740 /// might need at runtime. That is because Erlang/OTP does not implement a C 1741 /// stack but uses a custom implementation of hybrid stack/heap architecture. 1742 /// (for more information see Eric Stenman's Ph.D. thesis: 1743 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) 1744 /// 1745 /// CheckStack: 1746 /// temp0 = sp - MaxStack 1747 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 1748 /// OldStart: 1749 /// ... 1750 /// IncStack: 1751 /// call inc_stack # doubles the stack space 1752 /// temp0 = sp - MaxStack 1753 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 1754 void X86FrameLowering::adjustForHiPEPrologue( 1755 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { 1756 MachineFrameInfo *MFI = MF.getFrameInfo(); 1757 DebugLoc DL; 1758 // HiPE-specific values 1759 const unsigned HipeLeafWords = 24; 1760 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; 1761 const unsigned Guaranteed = HipeLeafWords * SlotSize; 1762 unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ? 1763 MF.getFunction()->arg_size() - CCRegisteredArgs : 0; 1764 unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize; 1765 1766 assert(STI.isTargetLinux() && 1767 "HiPE prologue is only supported on Linux operating systems."); 1768 1769 // Compute the largest caller's frame that is needed to fit the callees' 1770 // frames. This 'MaxStack' is computed from: 1771 // 1772 // a) the fixed frame size, which is the space needed for all spilled temps, 1773 // b) outgoing on-stack parameter areas, and 1774 // c) the minimum stack space this function needs to make available for the 1775 // functions it calls (a tunable ABI property). 1776 if (MFI->hasCalls()) { 1777 unsigned MoreStackForCalls = 0; 1778 1779 for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); 1780 MBBI != MBBE; ++MBBI) 1781 for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); 1782 MI != ME; ++MI) { 1783 if (!MI->isCall()) 1784 continue; 1785 1786 // Get callee operand. 1787 const MachineOperand &MO = MI->getOperand(0); 1788 1789 // Only take account of global function calls (no closures etc.). 1790 if (!MO.isGlobal()) 1791 continue; 1792 1793 const Function *F = dyn_cast<Function>(MO.getGlobal()); 1794 if (!F) 1795 continue; 1796 1797 // Do not update 'MaxStack' for primitive and built-in functions 1798 // (encoded with names either starting with "erlang."/"bif_" or not 1799 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an 1800 // "_", such as the BIF "suspend_0") as they are executed on another 1801 // stack. 1802 if (F->getName().find("erlang.") != StringRef::npos || 1803 F->getName().find("bif_") != StringRef::npos || 1804 F->getName().find_first_of("._") == StringRef::npos) 1805 continue; 1806 1807 unsigned CalleeStkArity = 1808 F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; 1809 if (HipeLeafWords - 1 > CalleeStkArity) 1810 MoreStackForCalls = std::max(MoreStackForCalls, 1811 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); 1812 } 1813 MaxStack += MoreStackForCalls; 1814 } 1815 1816 // If the stack frame needed is larger than the guaranteed then runtime checks 1817 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. 1818 if (MaxStack > Guaranteed) { 1819 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); 1820 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); 1821 1822 for (MachineBasicBlock::livein_iterator I = PrologueMBB.livein_begin(), 1823 E = PrologueMBB.livein_end(); 1824 I != E; I++) { 1825 stackCheckMBB->addLiveIn(*I); 1826 incStackMBB->addLiveIn(*I); 1827 } 1828 1829 MF.push_front(incStackMBB); 1830 MF.push_front(stackCheckMBB); 1831 1832 unsigned ScratchReg, SPReg, PReg, SPLimitOffset; 1833 unsigned LEAop, CMPop, CALLop; 1834 if (Is64Bit) { 1835 SPReg = X86::RSP; 1836 PReg = X86::RBP; 1837 LEAop = X86::LEA64r; 1838 CMPop = X86::CMP64rm; 1839 CALLop = X86::CALL64pcrel32; 1840 SPLimitOffset = 0x90; 1841 } else { 1842 SPReg = X86::ESP; 1843 PReg = X86::EBP; 1844 LEAop = X86::LEA32r; 1845 CMPop = X86::CMP32rm; 1846 CALLop = X86::CALLpcrel32; 1847 SPLimitOffset = 0x4c; 1848 } 1849 1850 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); 1851 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 1852 "HiPE prologue scratch register is live-in"); 1853 1854 // Create new MBB for StackCheck: 1855 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), 1856 SPReg, false, -MaxStack); 1857 // SPLimitOffset is in a fixed heap location (pointed by BP). 1858 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) 1859 .addReg(ScratchReg), PReg, false, SPLimitOffset); 1860 BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&PrologueMBB); 1861 1862 // Create new MBB for IncStack: 1863 BuildMI(incStackMBB, DL, TII.get(CALLop)). 1864 addExternalSymbol("inc_stack_0"); 1865 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), 1866 SPReg, false, -MaxStack); 1867 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) 1868 .addReg(ScratchReg), PReg, false, SPLimitOffset); 1869 BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB); 1870 1871 stackCheckMBB->addSuccessor(&PrologueMBB, 99); 1872 stackCheckMBB->addSuccessor(incStackMBB, 1); 1873 incStackMBB->addSuccessor(&PrologueMBB, 99); 1874 incStackMBB->addSuccessor(incStackMBB, 1); 1875 } 1876 #ifdef XDEBUG 1877 MF.verify(); 1878 #endif 1879 } 1880 1881 void X86FrameLowering:: 1882 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 1883 MachineBasicBlock::iterator I) const { 1884 bool reserveCallFrame = hasReservedCallFrame(MF); 1885 unsigned Opcode = I->getOpcode(); 1886 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); 1887 DebugLoc DL = I->getDebugLoc(); 1888 uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; 1889 uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0; 1890 I = MBB.erase(I); 1891 1892 if (!reserveCallFrame) { 1893 // If the stack pointer can be changed after prologue, turn the 1894 // adjcallstackup instruction into a 'sub ESP, <amt>' and the 1895 // adjcallstackdown instruction into 'add ESP, <amt>' 1896 if (Amount == 0) 1897 return; 1898 1899 // We need to keep the stack aligned properly. To do this, we round the 1900 // amount of space needed for the outgoing arguments up to the next 1901 // alignment boundary. 1902 unsigned StackAlign = getStackAlignment(); 1903 Amount = RoundUpToAlignment(Amount, StackAlign); 1904 1905 // Factor out the amount that gets handled inside the sequence 1906 // (Pushes of argument for frame setup, callee pops for frame destroy) 1907 Amount -= InternalAmt; 1908 1909 if (Amount) { 1910 // Add Amount to SP to destroy a frame, and subtract to setup. 1911 int Offset = isDestroy ? Amount : -Amount; 1912 BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); 1913 } 1914 return; 1915 } 1916 1917 if (isDestroy && InternalAmt) { 1918 // If we are performing frame pointer elimination and if the callee pops 1919 // something off the stack pointer, add it back. We do this until we have 1920 // more advanced stack pointer tracking ability. 1921 // We are not tracking the stack pointer adjustment by the callee, so make 1922 // sure we restore the stack pointer immediately after the call, there may 1923 // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. 1924 MachineBasicBlock::iterator B = MBB.begin(); 1925 while (I != B && !std::prev(I)->isCall()) 1926 --I; 1927 BuildStackAdjustment(MBB, I, DL, -InternalAmt, /*InEpilogue=*/false); 1928 } 1929 } 1930 1931 bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 1932 assert(MBB.getParent() && "Block is not attached to a function!"); 1933 1934 if (canUseLEAForSPInEpilogue(*MBB.getParent())) 1935 return true; 1936 1937 // If we cannot use LEA to adjust SP, we may need to use ADD, which 1938 // clobbers the EFLAGS. Check that none of the terminators reads the 1939 // EFLAGS, and if one uses it, conservatively assume this is not 1940 // safe to insert the epilogue here. 1941 return !terminatorsNeedFlagsAsInput(MBB); 1942 } 1943