1 //=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the X86 implementation of TargetFrameLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "X86FrameLowering.h" 15 #include "X86InstrBuilder.h" 16 #include "X86InstrInfo.h" 17 #include "X86MachineFunctionInfo.h" 18 #include "X86TargetMachine.h" 19 #include "llvm/Function.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/Target/TargetData.h" 26 #include "llvm/Target/TargetOptions.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/ADT/SmallSet.h" 29 30 using namespace llvm; 31 32 // FIXME: completely move here. 33 extern cl::opt<bool> ForceStackAlign; 34 35 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 36 return !MF.getFrameInfo()->hasVarSizedObjects(); 37 } 38 39 /// hasFP - Return true if the specified function should have a dedicated frame 40 /// pointer register. This is true if the function has variable sized allocas 41 /// or if frame pointer elimination is disabled. 42 bool X86FrameLowering::hasFP(const MachineFunction &MF) const { 43 const MachineFrameInfo *MFI = MF.getFrameInfo(); 44 const MachineModuleInfo &MMI = MF.getMMI(); 45 const TargetRegisterInfo *RI = TM.getRegisterInfo(); 46 47 return (DisableFramePointerElim(MF) || 48 RI->needsStackRealignment(MF) || 49 MFI->hasVarSizedObjects() || 50 MFI->isFrameAddressTaken() || 51 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 52 MMI.callsUnwindInit()); 53 } 54 55 static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { 56 if (is64Bit) { 57 if (isInt<8>(Imm)) 58 return X86::SUB64ri8; 59 return X86::SUB64ri32; 60 } else { 61 if (isInt<8>(Imm)) 62 return X86::SUB32ri8; 63 return X86::SUB32ri; 64 } 65 } 66 67 static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { 68 if (is64Bit) { 69 if (isInt<8>(Imm)) 70 return X86::ADD64ri8; 71 return X86::ADD64ri32; 72 } else { 73 if (isInt<8>(Imm)) 74 return X86::ADD32ri8; 75 return X86::ADD32ri; 76 } 77 } 78 79 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live 80 /// when it reaches the "return" instruction. We can then pop a stack object 81 /// to this register without worry about clobbering it. 82 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, 83 MachineBasicBlock::iterator &MBBI, 84 const TargetRegisterInfo &TRI, 85 bool Is64Bit) { 86 const MachineFunction *MF = MBB.getParent(); 87 const Function *F = MF->getFunction(); 88 if (!F || MF->getMMI().callsEHReturn()) 89 return 0; 90 91 static const unsigned CallerSavedRegs32Bit[] = { 92 X86::EAX, X86::EDX, X86::ECX 93 }; 94 95 static const unsigned CallerSavedRegs64Bit[] = { 96 X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, 97 X86::R8, X86::R9, X86::R10, X86::R11 98 }; 99 100 unsigned Opc = MBBI->getOpcode(); 101 switch (Opc) { 102 default: return 0; 103 case X86::RET: 104 case X86::RETI: 105 case X86::TCRETURNdi: 106 case X86::TCRETURNri: 107 case X86::TCRETURNmi: 108 case X86::TCRETURNdi64: 109 case X86::TCRETURNri64: 110 case X86::TCRETURNmi64: 111 case X86::EH_RETURN: 112 case X86::EH_RETURN64: { 113 SmallSet<unsigned, 8> Uses; 114 for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { 115 MachineOperand &MO = MBBI->getOperand(i); 116 if (!MO.isReg() || MO.isDef()) 117 continue; 118 unsigned Reg = MO.getReg(); 119 if (!Reg) 120 continue; 121 for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI) 122 Uses.insert(*AsI); 123 } 124 125 const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; 126 for (; *CS; ++CS) 127 if (!Uses.count(*CS)) 128 return *CS; 129 } 130 } 131 132 return 0; 133 } 134 135 136 /// emitSPUpdate - Emit a series of instructions to increment / decrement the 137 /// stack pointer by a constant value. 138 static 139 void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 140 unsigned StackPtr, int64_t NumBytes, 141 bool Is64Bit, const TargetInstrInfo &TII, 142 const TargetRegisterInfo &TRI) { 143 bool isSub = NumBytes < 0; 144 uint64_t Offset = isSub ? -NumBytes : NumBytes; 145 unsigned Opc = isSub ? 146 getSUBriOpcode(Is64Bit, Offset) : 147 getADDriOpcode(Is64Bit, Offset); 148 uint64_t Chunk = (1LL << 31) - 1; 149 DebugLoc DL = MBB.findDebugLoc(MBBI); 150 151 while (Offset) { 152 uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; 153 if (ThisVal == (Is64Bit ? 8 : 4)) { 154 // Use push / pop instead. 155 unsigned Reg = isSub 156 ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) 157 : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); 158 if (Reg) { 159 Opc = isSub 160 ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) 161 : (Is64Bit ? X86::POP64r : X86::POP32r); 162 BuildMI(MBB, MBBI, DL, TII.get(Opc)) 163 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); 164 Offset -= ThisVal; 165 continue; 166 } 167 } 168 169 MachineInstr *MI = 170 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 171 .addReg(StackPtr) 172 .addImm(ThisVal); 173 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 174 Offset -= ThisVal; 175 } 176 } 177 178 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. 179 static 180 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 181 unsigned StackPtr, uint64_t *NumBytes = NULL) { 182 if (MBBI == MBB.begin()) return; 183 184 MachineBasicBlock::iterator PI = prior(MBBI); 185 unsigned Opc = PI->getOpcode(); 186 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 187 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 188 PI->getOperand(0).getReg() == StackPtr) { 189 if (NumBytes) 190 *NumBytes += PI->getOperand(2).getImm(); 191 MBB.erase(PI); 192 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 193 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 194 PI->getOperand(0).getReg() == StackPtr) { 195 if (NumBytes) 196 *NumBytes -= PI->getOperand(2).getImm(); 197 MBB.erase(PI); 198 } 199 } 200 201 /// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. 202 static 203 void mergeSPUpdatesDown(MachineBasicBlock &MBB, 204 MachineBasicBlock::iterator &MBBI, 205 unsigned StackPtr, uint64_t *NumBytes = NULL) { 206 // FIXME: THIS ISN'T RUN!!! 207 return; 208 209 if (MBBI == MBB.end()) return; 210 211 MachineBasicBlock::iterator NI = llvm::next(MBBI); 212 if (NI == MBB.end()) return; 213 214 unsigned Opc = NI->getOpcode(); 215 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 216 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 217 NI->getOperand(0).getReg() == StackPtr) { 218 if (NumBytes) 219 *NumBytes -= NI->getOperand(2).getImm(); 220 MBB.erase(NI); 221 MBBI = NI; 222 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 223 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 224 NI->getOperand(0).getReg() == StackPtr) { 225 if (NumBytes) 226 *NumBytes += NI->getOperand(2).getImm(); 227 MBB.erase(NI); 228 MBBI = NI; 229 } 230 } 231 232 /// mergeSPUpdates - Checks the instruction before/after the passed 233 /// instruction. If it is an ADD/SUB instruction it is deleted argument and the 234 /// stack adjustment is returned as a positive value for ADD and a negative for 235 /// SUB. 236 static int mergeSPUpdates(MachineBasicBlock &MBB, 237 MachineBasicBlock::iterator &MBBI, 238 unsigned StackPtr, 239 bool doMergeWithPrevious) { 240 if ((doMergeWithPrevious && MBBI == MBB.begin()) || 241 (!doMergeWithPrevious && MBBI == MBB.end())) 242 return 0; 243 244 MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; 245 MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); 246 unsigned Opc = PI->getOpcode(); 247 int Offset = 0; 248 249 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 250 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 251 PI->getOperand(0).getReg() == StackPtr){ 252 Offset += PI->getOperand(2).getImm(); 253 MBB.erase(PI); 254 if (!doMergeWithPrevious) MBBI = NI; 255 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 256 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 257 PI->getOperand(0).getReg() == StackPtr) { 258 Offset -= PI->getOperand(2).getImm(); 259 MBB.erase(PI); 260 if (!doMergeWithPrevious) MBBI = NI; 261 } 262 263 return Offset; 264 } 265 266 static bool isEAXLiveIn(MachineFunction &MF) { 267 for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), 268 EE = MF.getRegInfo().livein_end(); II != EE; ++II) { 269 unsigned Reg = II->first; 270 271 if (Reg == X86::EAX || Reg == X86::AX || 272 Reg == X86::AH || Reg == X86::AL) 273 return true; 274 } 275 276 return false; 277 } 278 279 void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, 280 MCSymbol *Label, 281 unsigned FramePtr) const { 282 MachineFrameInfo *MFI = MF.getFrameInfo(); 283 MachineModuleInfo &MMI = MF.getMMI(); 284 285 // Add callee saved registers to move list. 286 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 287 if (CSI.empty()) return; 288 289 std::vector<MachineMove> &Moves = MMI.getFrameMoves(); 290 const TargetData *TD = TM.getTargetData(); 291 bool HasFP = hasFP(MF); 292 293 // Calculate amount of bytes used for return address storing. 294 int stackGrowth = -TD->getPointerSize(); 295 296 // FIXME: This is dirty hack. The code itself is pretty mess right now. 297 // It should be rewritten from scratch and generalized sometimes. 298 299 // Determine maximum offset (minimum due to stack growth). 300 int64_t MaxOffset = 0; 301 for (std::vector<CalleeSavedInfo>::const_iterator 302 I = CSI.begin(), E = CSI.end(); I != E; ++I) 303 MaxOffset = std::min(MaxOffset, 304 MFI->getObjectOffset(I->getFrameIdx())); 305 306 // Calculate offsets. 307 int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; 308 for (std::vector<CalleeSavedInfo>::const_iterator 309 I = CSI.begin(), E = CSI.end(); I != E; ++I) { 310 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); 311 unsigned Reg = I->getReg(); 312 Offset = MaxOffset - Offset + saveAreaOffset; 313 314 // Don't output a new machine move if we're re-saving the frame 315 // pointer. This happens when the PrologEpilogInserter has inserted an extra 316 // "PUSH" of the frame pointer -- the "emitPrologue" method automatically 317 // generates one when frame pointers are used. If we generate a "machine 318 // move" for this extra "PUSH", the linker will lose track of the fact that 319 // the frame pointer should have the value of the first "PUSH" when it's 320 // trying to unwind. 321 // 322 // FIXME: This looks inelegant. It's possibly correct, but it's covering up 323 // another bug. I.e., one where we generate a prolog like this: 324 // 325 // pushl %ebp 326 // movl %esp, %ebp 327 // pushl %ebp 328 // pushl %esi 329 // ... 330 // 331 // The immediate re-push of EBP is unnecessary. At the least, it's an 332 // optimization bug. EBP can be used as a scratch register in certain 333 // cases, but probably not when we have a frame pointer. 334 if (HasFP && FramePtr == Reg) 335 continue; 336 337 MachineLocation CSDst(MachineLocation::VirtualFP, Offset); 338 MachineLocation CSSrc(Reg); 339 Moves.push_back(MachineMove(Label, CSDst, CSSrc)); 340 } 341 } 342 343 /// emitPrologue - Push callee-saved registers onto the stack, which 344 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate 345 /// space for local variables. Also emit labels used by the exception handler to 346 /// generate the exception handling frames. 347 void X86FrameLowering::emitPrologue(MachineFunction &MF) const { 348 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. 349 MachineBasicBlock::iterator MBBI = MBB.begin(); 350 MachineFrameInfo *MFI = MF.getFrameInfo(); 351 const Function *Fn = MF.getFunction(); 352 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 353 const X86InstrInfo &TII = *TM.getInstrInfo(); 354 MachineModuleInfo &MMI = MF.getMMI(); 355 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 356 bool needsFrameMoves = MMI.hasDebugInfo() || 357 !Fn->doesNotThrow() || UnwindTablesMandatory; 358 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. 359 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. 360 bool HasFP = hasFP(MF); 361 bool Is64Bit = STI.is64Bit(); 362 bool IsWin64 = STI.isTargetWin64(); 363 unsigned StackAlign = getStackAlignment(); 364 unsigned SlotSize = RegInfo->getSlotSize(); 365 unsigned FramePtr = RegInfo->getFrameRegister(MF); 366 unsigned StackPtr = RegInfo->getStackRegister(); 367 368 DebugLoc DL; 369 370 // If we're forcing a stack realignment we can't rely on just the frame 371 // info, we need to know the ABI stack alignment as well in case we 372 // have a call out. Otherwise just make sure we have some alignment - we'll 373 // go with the minimum SlotSize. 374 if (ForceStackAlign) { 375 if (MFI->hasCalls()) 376 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 377 else if (MaxAlign < SlotSize) 378 MaxAlign = SlotSize; 379 } 380 381 // Add RETADDR move area to callee saved frame size. 382 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 383 if (TailCallReturnAddrDelta < 0) 384 X86FI->setCalleeSavedFrameSize( 385 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); 386 387 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 388 // function, and use up to 128 bytes of stack space, don't have a frame 389 // pointer, calls, or dynamic alloca then we do not need to adjust the 390 // stack pointer (we fit in the Red Zone). 391 if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && 392 !RegInfo->needsStackRealignment(MF) && 393 !MFI->hasVarSizedObjects() && // No dynamic alloca. 394 !MFI->adjustsStack() && // No calls. 395 !IsWin64) { // Win64 has no Red Zone 396 uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); 397 if (HasFP) MinSize += SlotSize; 398 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); 399 MFI->setStackSize(StackSize); 400 } 401 402 // Insert stack pointer adjustment for later moving of return addr. Only 403 // applies to tail call optimized functions where the callee argument stack 404 // size is bigger than the callers. 405 if (TailCallReturnAddrDelta < 0) { 406 MachineInstr *MI = 407 BuildMI(MBB, MBBI, DL, 408 TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), 409 StackPtr) 410 .addReg(StackPtr) 411 .addImm(-TailCallReturnAddrDelta); 412 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 413 } 414 415 // Mapping for machine moves: 416 // 417 // DST: VirtualFP AND 418 // SRC: VirtualFP => DW_CFA_def_cfa_offset 419 // ELSE => DW_CFA_def_cfa 420 // 421 // SRC: VirtualFP AND 422 // DST: Register => DW_CFA_def_cfa_register 423 // 424 // ELSE 425 // OFFSET < 0 => DW_CFA_offset_extended_sf 426 // REG < 64 => DW_CFA_offset + Reg 427 // ELSE => DW_CFA_offset_extended 428 429 std::vector<MachineMove> &Moves = MMI.getFrameMoves(); 430 const TargetData *TD = MF.getTarget().getTargetData(); 431 uint64_t NumBytes = 0; 432 int stackGrowth = -TD->getPointerSize(); 433 434 if (HasFP) { 435 // Calculate required stack adjustment. 436 uint64_t FrameSize = StackSize - SlotSize; 437 if (RegInfo->needsStackRealignment(MF)) 438 FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; 439 440 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); 441 442 // Get the offset of the stack slot for the EBP register, which is 443 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. 444 // Update the frame offset adjustment. 445 MFI->setOffsetAdjustment(-NumBytes); 446 447 // Save EBP/RBP into the appropriate stack slot. 448 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 449 .addReg(FramePtr, RegState::Kill); 450 451 if (needsFrameMoves) { 452 // Mark the place where EBP/RBP was saved. 453 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); 454 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); 455 456 // Define the current CFA rule to use the provided offset. 457 if (StackSize) { 458 MachineLocation SPDst(MachineLocation::VirtualFP); 459 MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth); 460 Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); 461 } else { 462 MachineLocation SPDst(StackPtr); 463 MachineLocation SPSrc(StackPtr, stackGrowth); 464 Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); 465 } 466 467 // Change the rule for the FramePtr to be an "offset" rule. 468 MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth); 469 MachineLocation FPSrc(FramePtr); 470 Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); 471 } 472 473 // Update EBP with the new base value... 474 BuildMI(MBB, MBBI, DL, 475 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) 476 .addReg(StackPtr); 477 478 if (needsFrameMoves) { 479 // Mark effective beginning of when frame pointer becomes valid. 480 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); 481 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); 482 483 // Define the current CFA to use the EBP/RBP register. 484 MachineLocation FPDst(FramePtr); 485 MachineLocation FPSrc(MachineLocation::VirtualFP); 486 Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); 487 } 488 489 // Mark the FramePtr as live-in in every block except the entry. 490 for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); 491 I != E; ++I) 492 I->addLiveIn(FramePtr); 493 494 // Realign stack 495 if (RegInfo->needsStackRealignment(MF)) { 496 MachineInstr *MI = 497 BuildMI(MBB, MBBI, DL, 498 TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), 499 StackPtr).addReg(StackPtr).addImm(-MaxAlign); 500 501 // The EFLAGS implicit def is dead. 502 MI->getOperand(3).setIsDead(); 503 } 504 } else { 505 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); 506 } 507 508 // Skip the callee-saved push instructions. 509 bool PushedRegs = false; 510 int StackOffset = 2 * stackGrowth; 511 512 while (MBBI != MBB.end() && 513 (MBBI->getOpcode() == X86::PUSH32r || 514 MBBI->getOpcode() == X86::PUSH64r)) { 515 PushedRegs = true; 516 ++MBBI; 517 518 if (!HasFP && needsFrameMoves) { 519 // Mark callee-saved push instruction. 520 MCSymbol *Label = MMI.getContext().CreateTempSymbol(); 521 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); 522 523 // Define the current CFA rule to use the provided offset. 524 unsigned Ptr = StackSize ? 525 MachineLocation::VirtualFP : StackPtr; 526 MachineLocation SPDst(Ptr); 527 MachineLocation SPSrc(Ptr, StackOffset); 528 Moves.push_back(MachineMove(Label, SPDst, SPSrc)); 529 StackOffset += stackGrowth; 530 } 531 } 532 533 DL = MBB.findDebugLoc(MBBI); 534 535 // If there is an SUB32ri of ESP immediately before this instruction, merge 536 // the two. This can be the case when tail call elimination is enabled and 537 // the callee has more arguments then the caller. 538 NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); 539 540 // If there is an ADD32ri or SUB32ri of ESP immediately after this 541 // instruction, merge the two instructions. 542 mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); 543 544 // Adjust stack pointer: ESP -= numbytes. 545 546 // Windows and cygwin/mingw require a prologue helper routine when allocating 547 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw 548 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the 549 // stack and adjust the stack pointer in one go. The 64-bit version of 550 // __chkstk is only responsible for probing the stack. The 64-bit prologue is 551 // responsible for adjusting the stack pointer. Touching the stack at 4K 552 // increments is necessary to ensure that the guard pages used by the OS 553 // virtual memory manager are allocated in correct sequence. 554 if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) { 555 const char *StackProbeSymbol; 556 bool isSPUpdateNeeded = false; 557 558 if (Is64Bit) { 559 if (STI.isTargetCygMing()) 560 StackProbeSymbol = "___chkstk"; 561 else { 562 StackProbeSymbol = "__chkstk"; 563 isSPUpdateNeeded = true; 564 } 565 } else if (STI.isTargetCygMing()) 566 StackProbeSymbol = "_alloca"; 567 else 568 StackProbeSymbol = "_chkstk"; 569 570 // Check whether EAX is livein for this function. 571 bool isEAXAlive = isEAXLiveIn(MF); 572 573 if (isEAXAlive) { 574 // Sanity check that EAX is not livein for this function. 575 // It should not be, so throw an assert. 576 assert(!Is64Bit && "EAX is livein in x64 case!"); 577 578 // Save EAX 579 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 580 .addReg(X86::EAX, RegState::Kill); 581 } 582 583 if (Is64Bit) { 584 // Handle the 64-bit Windows ABI case where we need to call __chkstk. 585 // Function prologue is responsible for adjusting the stack pointer. 586 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) 587 .addImm(NumBytes); 588 } else { 589 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. 590 // We'll also use 4 already allocated bytes for EAX. 591 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 592 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes); 593 } 594 595 BuildMI(MBB, MBBI, DL, 596 TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32)) 597 .addExternalSymbol(StackProbeSymbol) 598 .addReg(StackPtr, RegState::Define | RegState::Implicit) 599 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); 600 601 // MSVC x64's __chkstk needs to adjust %rsp. 602 // FIXME: %rax preserves the offset and should be available. 603 if (isSPUpdateNeeded) 604 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, 605 TII, *RegInfo); 606 607 if (isEAXAlive) { 608 // Restore EAX 609 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), 610 X86::EAX), 611 StackPtr, false, NumBytes - 4); 612 MBB.insert(MBBI, MI); 613 } 614 } else if (NumBytes) 615 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, 616 TII, *RegInfo); 617 618 if ((NumBytes || PushedRegs) && needsFrameMoves) { 619 // Mark end of stack pointer adjustment. 620 MCSymbol *Label = MMI.getContext().CreateTempSymbol(); 621 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); 622 623 if (!HasFP && NumBytes) { 624 // Define the current CFA rule to use the provided offset. 625 if (StackSize) { 626 MachineLocation SPDst(MachineLocation::VirtualFP); 627 MachineLocation SPSrc(MachineLocation::VirtualFP, 628 -StackSize + stackGrowth); 629 Moves.push_back(MachineMove(Label, SPDst, SPSrc)); 630 } else { 631 MachineLocation SPDst(StackPtr); 632 MachineLocation SPSrc(StackPtr, stackGrowth); 633 Moves.push_back(MachineMove(Label, SPDst, SPSrc)); 634 } 635 } 636 637 // Emit DWARF info specifying the offsets of the callee-saved registers. 638 if (PushedRegs) 639 emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); 640 } 641 } 642 643 void X86FrameLowering::emitEpilogue(MachineFunction &MF, 644 MachineBasicBlock &MBB) const { 645 const MachineFrameInfo *MFI = MF.getFrameInfo(); 646 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 647 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 648 const X86InstrInfo &TII = *TM.getInstrInfo(); 649 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 650 assert(MBBI != MBB.end() && "Returning block has no instructions"); 651 unsigned RetOpcode = MBBI->getOpcode(); 652 DebugLoc DL = MBBI->getDebugLoc(); 653 bool Is64Bit = STI.is64Bit(); 654 unsigned StackAlign = getStackAlignment(); 655 unsigned SlotSize = RegInfo->getSlotSize(); 656 unsigned FramePtr = RegInfo->getFrameRegister(MF); 657 unsigned StackPtr = RegInfo->getStackRegister(); 658 659 switch (RetOpcode) { 660 default: 661 llvm_unreachable("Can only insert epilog into returning blocks"); 662 case X86::RET: 663 case X86::RETI: 664 case X86::TCRETURNdi: 665 case X86::TCRETURNri: 666 case X86::TCRETURNmi: 667 case X86::TCRETURNdi64: 668 case X86::TCRETURNri64: 669 case X86::TCRETURNmi64: 670 case X86::EH_RETURN: 671 case X86::EH_RETURN64: 672 break; // These are ok 673 } 674 675 // Get the number of bytes to allocate from the FrameInfo. 676 uint64_t StackSize = MFI->getStackSize(); 677 uint64_t MaxAlign = MFI->getMaxAlignment(); 678 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 679 uint64_t NumBytes = 0; 680 681 // If we're forcing a stack realignment we can't rely on just the frame 682 // info, we need to know the ABI stack alignment as well in case we 683 // have a call out. Otherwise just make sure we have some alignment - we'll 684 // go with the minimum. 685 if (ForceStackAlign) { 686 if (MFI->hasCalls()) 687 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 688 else 689 MaxAlign = MaxAlign ? MaxAlign : 4; 690 } 691 692 if (hasFP(MF)) { 693 // Calculate required stack adjustment. 694 uint64_t FrameSize = StackSize - SlotSize; 695 if (RegInfo->needsStackRealignment(MF)) 696 FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; 697 698 NumBytes = FrameSize - CSSize; 699 700 // Pop EBP. 701 BuildMI(MBB, MBBI, DL, 702 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); 703 } else { 704 NumBytes = StackSize - CSSize; 705 } 706 707 // Skip the callee-saved pop instructions. 708 MachineBasicBlock::iterator LastCSPop = MBBI; 709 while (MBBI != MBB.begin()) { 710 MachineBasicBlock::iterator PI = prior(MBBI); 711 unsigned Opc = PI->getOpcode(); 712 713 if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && 714 !PI->getDesc().isTerminator()) 715 break; 716 717 --MBBI; 718 } 719 720 DL = MBBI->getDebugLoc(); 721 722 // If there is an ADD32ri or SUB32ri of ESP immediately before this 723 // instruction, merge the two instructions. 724 if (NumBytes || MFI->hasVarSizedObjects()) 725 mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); 726 727 // If dynamic alloca is used, then reset esp to point to the last callee-saved 728 // slot before popping them off! Same applies for the case, when stack was 729 // realigned. 730 if (RegInfo->needsStackRealignment(MF)) { 731 // We cannot use LEA here, because stack pointer was realigned. We need to 732 // deallocate local frame back. 733 if (CSSize) { 734 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo); 735 MBBI = prior(LastCSPop); 736 } 737 738 BuildMI(MBB, MBBI, DL, 739 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 740 StackPtr).addReg(FramePtr); 741 } else if (MFI->hasVarSizedObjects()) { 742 if (CSSize) { 743 unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; 744 MachineInstr *MI = 745 addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), 746 FramePtr, false, -CSSize); 747 MBB.insert(MBBI, MI); 748 } else { 749 BuildMI(MBB, MBBI, DL, 750 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) 751 .addReg(FramePtr); 752 } 753 } else if (NumBytes) { 754 // Adjust stack pointer back: ESP += numbytes. 755 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo); 756 } 757 758 // We're returning from function via eh_return. 759 if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { 760 MBBI = MBB.getLastNonDebugInstr(); 761 MachineOperand &DestAddr = MBBI->getOperand(0); 762 assert(DestAddr.isReg() && "Offset should be in register!"); 763 BuildMI(MBB, MBBI, DL, 764 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 765 StackPtr).addReg(DestAddr.getReg()); 766 } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || 767 RetOpcode == X86::TCRETURNmi || 768 RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || 769 RetOpcode == X86::TCRETURNmi64) { 770 bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; 771 // Tail call return: adjust the stack pointer and jump to callee. 772 MBBI = MBB.getLastNonDebugInstr(); 773 MachineOperand &JumpTarget = MBBI->getOperand(0); 774 MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); 775 assert(StackAdjust.isImm() && "Expecting immediate value."); 776 777 // Adjust stack pointer. 778 int StackAdj = StackAdjust.getImm(); 779 int MaxTCDelta = X86FI->getTCReturnAddrDelta(); 780 int Offset = 0; 781 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); 782 783 // Incoporate the retaddr area. 784 Offset = StackAdj-MaxTCDelta; 785 assert(Offset >= 0 && "Offset should never be negative"); 786 787 if (Offset) { 788 // Check for possible merge with preceding ADD instruction. 789 Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); 790 emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo); 791 } 792 793 // Jump to label or value in register. 794 if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { 795 MachineInstrBuilder MIB = 796 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) 797 ? X86::TAILJMPd : X86::TAILJMPd64)); 798 if (JumpTarget.isGlobal()) 799 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 800 JumpTarget.getTargetFlags()); 801 else { 802 assert(JumpTarget.isSymbol()); 803 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 804 JumpTarget.getTargetFlags()); 805 } 806 } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { 807 MachineInstrBuilder MIB = 808 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) 809 ? X86::TAILJMPm : X86::TAILJMPm64)); 810 for (unsigned i = 0; i != 5; ++i) 811 MIB.addOperand(MBBI->getOperand(i)); 812 } else if (RetOpcode == X86::TCRETURNri64) { 813 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). 814 addReg(JumpTarget.getReg(), RegState::Kill); 815 } else { 816 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). 817 addReg(JumpTarget.getReg(), RegState::Kill); 818 } 819 820 MachineInstr *NewMI = prior(MBBI); 821 for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) 822 NewMI->addOperand(MBBI->getOperand(i)); 823 824 // Delete the pseudo instruction TCRETURN. 825 MBB.erase(MBBI); 826 } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && 827 (X86FI->getTCReturnAddrDelta() < 0)) { 828 // Add the return addr area delta back since we are not tail calling. 829 int delta = -1*X86FI->getTCReturnAddrDelta(); 830 MBBI = MBB.getLastNonDebugInstr(); 831 832 // Check for possible merge with preceding ADD instruction. 833 delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); 834 emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo); 835 } 836 } 837 838 void 839 X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const { 840 // Calculate amount of bytes used for return address storing 841 int stackGrowth = (STI.is64Bit() ? -8 : -4); 842 const X86RegisterInfo *RI = TM.getRegisterInfo(); 843 844 // Initial state of the frame pointer is esp+stackGrowth. 845 MachineLocation Dst(MachineLocation::VirtualFP); 846 MachineLocation Src(RI->getStackRegister(), stackGrowth); 847 Moves.push_back(MachineMove(0, Dst, Src)); 848 849 // Add return address to move list 850 MachineLocation CSDst(RI->getStackRegister(), stackGrowth); 851 MachineLocation CSSrc(RI->getRARegister()); 852 Moves.push_back(MachineMove(0, CSDst, CSSrc)); 853 } 854 855 int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { 856 const X86RegisterInfo *RI = 857 static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); 858 const MachineFrameInfo *MFI = MF.getFrameInfo(); 859 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 860 uint64_t StackSize = MFI->getStackSize(); 861 862 if (RI->needsStackRealignment(MF)) { 863 if (FI < 0) { 864 // Skip the saved EBP. 865 Offset += RI->getSlotSize(); 866 } else { 867 unsigned Align = MFI->getObjectAlignment(FI); 868 assert((-(Offset + StackSize)) % Align == 0); 869 Align = 0; 870 return Offset + StackSize; 871 } 872 // FIXME: Support tail calls 873 } else { 874 if (!hasFP(MF)) 875 return Offset + StackSize; 876 877 // Skip the saved EBP. 878 Offset += RI->getSlotSize(); 879 880 // Skip the RETADDR move area 881 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 882 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 883 if (TailCallReturnAddrDelta < 0) 884 Offset -= TailCallReturnAddrDelta; 885 } 886 887 return Offset; 888 } 889 890 bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 891 MachineBasicBlock::iterator MI, 892 const std::vector<CalleeSavedInfo> &CSI, 893 const TargetRegisterInfo *TRI) const { 894 if (CSI.empty()) 895 return false; 896 897 DebugLoc DL = MBB.findDebugLoc(MI); 898 899 MachineFunction &MF = *MBB.getParent(); 900 901 unsigned SlotSize = STI.is64Bit() ? 8 : 4; 902 unsigned FPReg = TRI->getFrameRegister(MF); 903 unsigned CalleeFrameSize = 0; 904 905 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 906 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 907 908 // Push GPRs. It increases frame size. 909 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; 910 for (unsigned i = CSI.size(); i != 0; --i) { 911 unsigned Reg = CSI[i-1].getReg(); 912 if (!X86::GR64RegClass.contains(Reg) && 913 !X86::GR32RegClass.contains(Reg)) 914 continue; 915 // Add the callee-saved register as live-in. It's killed at the spill. 916 MBB.addLiveIn(Reg); 917 if (Reg == FPReg) 918 // X86RegisterInfo::emitPrologue will handle spilling of frame register. 919 continue; 920 CalleeFrameSize += SlotSize; 921 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill); 922 } 923 924 X86FI->setCalleeSavedFrameSize(CalleeFrameSize); 925 926 // Make XMM regs spilled. X86 does not have ability of push/pop XMM. 927 // It can be done by spilling XMMs to stack frame. 928 // Note that only Win64 ABI might spill XMMs. 929 for (unsigned i = CSI.size(); i != 0; --i) { 930 unsigned Reg = CSI[i-1].getReg(); 931 if (X86::GR64RegClass.contains(Reg) || 932 X86::GR32RegClass.contains(Reg)) 933 continue; 934 // Add the callee-saved register as live-in. It's killed at the spill. 935 MBB.addLiveIn(Reg); 936 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 937 TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), 938 RC, TRI); 939 } 940 941 return true; 942 } 943 944 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 945 MachineBasicBlock::iterator MI, 946 const std::vector<CalleeSavedInfo> &CSI, 947 const TargetRegisterInfo *TRI) const { 948 if (CSI.empty()) 949 return false; 950 951 DebugLoc DL = MBB.findDebugLoc(MI); 952 953 MachineFunction &MF = *MBB.getParent(); 954 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 955 956 // Reload XMMs from stack frame. 957 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 958 unsigned Reg = CSI[i].getReg(); 959 if (X86::GR64RegClass.contains(Reg) || 960 X86::GR32RegClass.contains(Reg)) 961 continue; 962 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 963 TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), 964 RC, TRI); 965 } 966 967 // POP GPRs. 968 unsigned FPReg = TRI->getFrameRegister(MF); 969 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; 970 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 971 unsigned Reg = CSI[i].getReg(); 972 if (!X86::GR64RegClass.contains(Reg) && 973 !X86::GR32RegClass.contains(Reg)) 974 continue; 975 if (Reg == FPReg) 976 // X86RegisterInfo::emitEpilogue will handle restoring of frame register. 977 continue; 978 BuildMI(MBB, MI, DL, TII.get(Opc), Reg); 979 } 980 return true; 981 } 982 983 void 984 X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, 985 RegScavenger *RS) const { 986 MachineFrameInfo *MFI = MF.getFrameInfo(); 987 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 988 unsigned SlotSize = RegInfo->getSlotSize(); 989 990 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 991 int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 992 993 if (TailCallReturnAddrDelta < 0) { 994 // create RETURNADDR area 995 // arg 996 // arg 997 // RETADDR 998 // { ... 999 // RETADDR area 1000 // ... 1001 // } 1002 // [EBP] 1003 MFI->CreateFixedObject(-TailCallReturnAddrDelta, 1004 (-1U*SlotSize)+TailCallReturnAddrDelta, true); 1005 } 1006 1007 if (hasFP(MF)) { 1008 assert((TailCallReturnAddrDelta <= 0) && 1009 "The Delta should always be zero or negative"); 1010 const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); 1011 1012 // Create a frame entry for the EBP register that must be saved. 1013 int FrameIdx = MFI->CreateFixedObject(SlotSize, 1014 -(int)SlotSize + 1015 TFI.getOffsetOfLocalArea() + 1016 TailCallReturnAddrDelta, 1017 true); 1018 assert(FrameIdx == MFI->getObjectIndexBegin() && 1019 "Slot for EBP register must be last in order to be found!"); 1020 FrameIdx = 0; 1021 } 1022 } 1023