1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of TargetFrameLowering class. 11 // 12 // On AArch64, stack frames are structured as follows: 13 // 14 // The stack grows downward. 15 // 16 // All of the individual frame areas on the frame below are optional, i.e. it's 17 // possible to create a function so that the particular area isn't present 18 // in the frame. 19 // 20 // At function entry, the "frame" looks as follows: 21 // 22 // | | Higher address 23 // |-----------------------------------| 24 // | | 25 // | arguments passed on the stack | 26 // | | 27 // |-----------------------------------| <- sp 28 // | | Lower address 29 // 30 // 31 // After the prologue has run, the frame has the following general structure. 32 // Note that this doesn't depict the case where a red-zone is used. Also, 33 // technically the last frame area (VLAs) doesn't get created until in the 34 // main function body, after the prologue is run. However, it's depicted here 35 // for completeness. 36 // 37 // | | Higher address 38 // |-----------------------------------| 39 // | | 40 // | arguments passed on the stack | 41 // | | 42 // |-----------------------------------| 43 // | | 44 // | (Win64 only) varargs from reg | 45 // | | 46 // |-----------------------------------| 47 // | | 48 // | prev_fp, prev_lr | 49 // | (a.k.a. "frame record") | 50 // |-----------------------------------| <- fp(=x29) 51 // | | 52 // | other callee-saved registers | 53 // | | 54 // |-----------------------------------| 55 // |.empty.space.to.make.part.below....| 56 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at 57 // |.the.standard.16-byte.alignment....| compile time; if present) 58 // |-----------------------------------| 59 // | | 60 // | local variables of fixed size | 61 // | including spill slots | 62 // |-----------------------------------| <- bp(not defined by ABI, 63 // |.variable-sized.local.variables....| LLVM chooses X19) 64 // |.(VLAs)............................| (size of this area is unknown at 65 // |...................................| compile time) 66 // |-----------------------------------| <- sp 67 // | | Lower address 68 // 69 // 70 // To access the data in a frame, at-compile time, a constant offset must be 71 // computable from one of the pointers (fp, bp, sp) to access it. The size 72 // of the areas with a dotted background cannot be computed at compile-time 73 // if they are present, making it required to have all three of fp, bp and 74 // sp to be set up to be able to access all contents in the frame areas, 75 // assuming all of the frame areas are non-empty. 76 // 77 // For most functions, some of the frame areas are empty. For those functions, 78 // it may not be necessary to set up fp or bp: 79 // * A base pointer is definitely needed when there are both VLAs and local 80 // variables with more-than-default alignment requirements. 81 // * A frame pointer is definitely needed when there are local variables with 82 // more-than-default alignment requirements. 83 // 84 // In some cases when a base pointer is not strictly needed, it is generated 85 // anyway when offsets from the frame pointer to access local variables become 86 // so large that the offset can't be encoded in the immediate fields of loads 87 // or stores. 88 // 89 // FIXME: also explain the redzone concept. 90 // FIXME: also explain the concept of reserved call frames. 91 // 92 //===----------------------------------------------------------------------===// 93 94 #include "AArch64FrameLowering.h" 95 #include "AArch64InstrInfo.h" 96 #include "AArch64MachineFunctionInfo.h" 97 #include "AArch64RegisterInfo.h" 98 #include "AArch64Subtarget.h" 99 #include "AArch64TargetMachine.h" 100 #include "MCTargetDesc/AArch64AddressingModes.h" 101 #include "llvm/ADT/SmallVector.h" 102 #include "llvm/ADT/Statistic.h" 103 #include "llvm/CodeGen/LivePhysRegs.h" 104 #include "llvm/CodeGen/MachineBasicBlock.h" 105 #include "llvm/CodeGen/MachineFrameInfo.h" 106 #include "llvm/CodeGen/MachineFunction.h" 107 #include "llvm/CodeGen/MachineInstr.h" 108 #include "llvm/CodeGen/MachineInstrBuilder.h" 109 #include "llvm/CodeGen/MachineMemOperand.h" 110 #include "llvm/CodeGen/MachineModuleInfo.h" 111 #include "llvm/CodeGen/MachineOperand.h" 112 #include "llvm/CodeGen/MachineRegisterInfo.h" 113 #include "llvm/CodeGen/RegisterScavenging.h" 114 #include "llvm/CodeGen/TargetInstrInfo.h" 115 #include "llvm/CodeGen/TargetRegisterInfo.h" 116 #include "llvm/CodeGen/TargetSubtargetInfo.h" 117 #include "llvm/IR/Attributes.h" 118 #include "llvm/IR/CallingConv.h" 119 #include "llvm/IR/DataLayout.h" 120 #include "llvm/IR/DebugLoc.h" 121 #include "llvm/IR/Function.h" 122 #include "llvm/MC/MCDwarf.h" 123 #include "llvm/Support/CommandLine.h" 124 #include "llvm/Support/Debug.h" 125 #include "llvm/Support/ErrorHandling.h" 126 #include "llvm/Support/MathExtras.h" 127 #include "llvm/Support/raw_ostream.h" 128 #include "llvm/Target/TargetMachine.h" 129 #include "llvm/Target/TargetOptions.h" 130 #include <cassert> 131 #include <cstdint> 132 #include <iterator> 133 #include <vector> 134 135 using namespace llvm; 136 137 #define DEBUG_TYPE "frame-info" 138 139 static cl::opt<bool> EnableRedZone("aarch64-redzone", 140 cl::desc("enable use of redzone on AArch64"), 141 cl::init(false), cl::Hidden); 142 143 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); 144 145 /// Look at each instruction that references stack frames and return the stack 146 /// size limit beyond which some of these instructions will require a scratch 147 /// register during their expansion later. 148 static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { 149 // FIXME: For now, just conservatively guestimate based on unscaled indexing 150 // range. We'll end up allocating an unnecessary spill slot a lot, but 151 // realistically that's not a big deal at this stage of the game. 152 for (MachineBasicBlock &MBB : MF) { 153 for (MachineInstr &MI : MBB) { 154 if (MI.isDebugValue() || MI.isPseudo() || 155 MI.getOpcode() == AArch64::ADDXri || 156 MI.getOpcode() == AArch64::ADDSXri) 157 continue; 158 159 for (const MachineOperand &MO : MI.operands()) { 160 if (!MO.isFI()) 161 continue; 162 163 int Offset = 0; 164 if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) == 165 AArch64FrameOffsetCannotUpdate) 166 return 0; 167 } 168 } 169 } 170 return 255; 171 } 172 173 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { 174 if (!EnableRedZone) 175 return false; 176 // Don't use the red zone if the function explicitly asks us not to. 177 // This is typically used for kernel code. 178 if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone)) 179 return false; 180 181 const MachineFrameInfo &MFI = MF.getFrameInfo(); 182 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 183 unsigned NumBytes = AFI->getLocalStackSize(); 184 185 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128); 186 } 187 188 /// hasFP - Return true if the specified function should have a dedicated frame 189 /// pointer register. 190 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { 191 const MachineFrameInfo &MFI = MF.getFrameInfo(); 192 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 193 // Retain behavior of always omitting the FP for leaf functions when possible. 194 return (MFI.hasCalls() && 195 MF.getTarget().Options.DisableFramePointerElim(MF)) || 196 MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || 197 MFI.hasStackMap() || MFI.hasPatchPoint() || 198 RegInfo->needsStackRealignment(MF); 199 } 200 201 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is 202 /// not required, we reserve argument space for call sites in the function 203 /// immediately on entry to the current function. This eliminates the need for 204 /// add/sub sp brackets around call sites. Returns true if the call frame is 205 /// included as part of the stack frame. 206 bool 207 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 208 return !MF.getFrameInfo().hasVarSizedObjects(); 209 } 210 211 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( 212 MachineFunction &MF, MachineBasicBlock &MBB, 213 MachineBasicBlock::iterator I) const { 214 const AArch64InstrInfo *TII = 215 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 216 DebugLoc DL = I->getDebugLoc(); 217 unsigned Opc = I->getOpcode(); 218 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 219 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 220 221 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 222 if (!TFI->hasReservedCallFrame(MF)) { 223 unsigned Align = getStackAlignment(); 224 225 int64_t Amount = I->getOperand(0).getImm(); 226 Amount = alignTo(Amount, Align); 227 if (!IsDestroy) 228 Amount = -Amount; 229 230 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it 231 // doesn't have to pop anything), then the first operand will be zero too so 232 // this adjustment is a no-op. 233 if (CalleePopAmount == 0) { 234 // FIXME: in-function stack adjustment for calls is limited to 24-bits 235 // because there's no guaranteed temporary register available. 236 // 237 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available. 238 // 1) For offset <= 12-bit, we use LSL #0 239 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses 240 // LSL #0, and the other uses LSL #12. 241 // 242 // Most call frames will be allocated at the start of a function so 243 // this is OK, but it is a limitation that needs dealing with. 244 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); 245 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII); 246 } 247 } else if (CalleePopAmount != 0) { 248 // If the calling convention demands that the callee pops arguments from the 249 // stack, we want to add it back if we have a reserved call frame. 250 assert(CalleePopAmount < 0xffffff && "call frame too large"); 251 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount, 252 TII); 253 } 254 return MBB.erase(I); 255 } 256 257 void AArch64FrameLowering::emitCalleeSavedFrameMoves( 258 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 259 MachineFunction &MF = *MBB.getParent(); 260 MachineFrameInfo &MFI = MF.getFrameInfo(); 261 const TargetSubtargetInfo &STI = MF.getSubtarget(); 262 const MCRegisterInfo *MRI = STI.getRegisterInfo(); 263 const TargetInstrInfo *TII = STI.getInstrInfo(); 264 DebugLoc DL = MBB.findDebugLoc(MBBI); 265 266 // Add callee saved registers to move list. 267 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 268 if (CSI.empty()) 269 return; 270 271 for (const auto &Info : CSI) { 272 unsigned Reg = Info.getReg(); 273 int64_t Offset = 274 MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea(); 275 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 276 unsigned CFIIndex = MF.addFrameInst( 277 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 278 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 279 .addCFIIndex(CFIIndex) 280 .setMIFlags(MachineInstr::FrameSetup); 281 } 282 } 283 284 // Find a scratch register that we can use at the start of the prologue to 285 // re-align the stack pointer. We avoid using callee-save registers since they 286 // may appear to be free when this is called from canUseAsPrologue (during 287 // shrink wrapping), but then no longer be free when this is called from 288 // emitPrologue. 289 // 290 // FIXME: This is a bit conservative, since in the above case we could use one 291 // of the callee-save registers as a scratch temp to re-align the stack pointer, 292 // but we would then have to make sure that we were in fact saving at least one 293 // callee-save register in the prologue, which is additional complexity that 294 // doesn't seem worth the benefit. 295 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { 296 MachineFunction *MF = MBB->getParent(); 297 298 // If MBB is an entry block, use X9 as the scratch register 299 if (&MF->front() == MBB) 300 return AArch64::X9; 301 302 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 303 const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); 304 LivePhysRegs LiveRegs(TRI); 305 LiveRegs.addLiveIns(*MBB); 306 307 // Mark callee saved registers as used so we will not choose them. 308 const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF); 309 for (unsigned i = 0; CSRegs[i]; ++i) 310 LiveRegs.addReg(CSRegs[i]); 311 312 // Prefer X9 since it was historically used for the prologue scratch reg. 313 const MachineRegisterInfo &MRI = MF->getRegInfo(); 314 if (LiveRegs.available(MRI, AArch64::X9)) 315 return AArch64::X9; 316 317 for (unsigned Reg : AArch64::GPR64RegClass) { 318 if (LiveRegs.available(MRI, Reg)) 319 return Reg; 320 } 321 return AArch64::NoRegister; 322 } 323 324 bool AArch64FrameLowering::canUseAsPrologue( 325 const MachineBasicBlock &MBB) const { 326 const MachineFunction *MF = MBB.getParent(); 327 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 328 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 329 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 330 331 // Don't need a scratch register if we're not going to re-align the stack. 332 if (!RegInfo->needsStackRealignment(*MF)) 333 return true; 334 // Otherwise, we can use any block as long as it has a scratch register 335 // available. 336 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister; 337 } 338 339 static bool windowsRequiresStackProbe(MachineFunction &MF, 340 unsigned StackSizeInBytes) { 341 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 342 if (!Subtarget.isTargetWindows()) 343 return false; 344 const Function &F = MF.getFunction(); 345 // TODO: When implementing stack protectors, take that into account 346 // for the probe threshold. 347 unsigned StackProbeSize = 4096; 348 if (F.hasFnAttribute("stack-probe-size")) 349 F.getFnAttribute("stack-probe-size") 350 .getValueAsString() 351 .getAsInteger(0, StackProbeSize); 352 return StackSizeInBytes >= StackProbeSize; 353 } 354 355 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( 356 MachineFunction &MF, unsigned StackBumpBytes) const { 357 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 358 const MachineFrameInfo &MFI = MF.getFrameInfo(); 359 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 360 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 361 362 if (AFI->getLocalStackSize() == 0) 363 return false; 364 365 // 512 is the maximum immediate for stp/ldp that will be used for 366 // callee-save save/restores 367 if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes)) 368 return false; 369 370 if (MFI.hasVarSizedObjects()) 371 return false; 372 373 if (RegInfo->needsStackRealignment(MF)) 374 return false; 375 376 // This isn't strictly necessary, but it simplifies things a bit since the 377 // current RedZone handling code assumes the SP is adjusted by the 378 // callee-save save/restore code. 379 if (canUseRedZone(MF)) 380 return false; 381 382 return true; 383 } 384 385 // Convert callee-save register save/restore instruction to do stack pointer 386 // decrement/increment to allocate/deallocate the callee-save stack area by 387 // converting store/load to use pre/post increment version. 388 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( 389 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 390 const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { 391 unsigned NewOpc; 392 bool NewIsUnscaled = false; 393 switch (MBBI->getOpcode()) { 394 default: 395 llvm_unreachable("Unexpected callee-save save/restore opcode!"); 396 case AArch64::STPXi: 397 NewOpc = AArch64::STPXpre; 398 break; 399 case AArch64::STPDi: 400 NewOpc = AArch64::STPDpre; 401 break; 402 case AArch64::STRXui: 403 NewOpc = AArch64::STRXpre; 404 NewIsUnscaled = true; 405 break; 406 case AArch64::STRDui: 407 NewOpc = AArch64::STRDpre; 408 NewIsUnscaled = true; 409 break; 410 case AArch64::LDPXi: 411 NewOpc = AArch64::LDPXpost; 412 break; 413 case AArch64::LDPDi: 414 NewOpc = AArch64::LDPDpost; 415 break; 416 case AArch64::LDRXui: 417 NewOpc = AArch64::LDRXpost; 418 NewIsUnscaled = true; 419 break; 420 case AArch64::LDRDui: 421 NewOpc = AArch64::LDRDpost; 422 NewIsUnscaled = true; 423 break; 424 } 425 426 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 427 MIB.addReg(AArch64::SP, RegState::Define); 428 429 // Copy all operands other than the immediate offset. 430 unsigned OpndIdx = 0; 431 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd; 432 ++OpndIdx) 433 MIB.add(MBBI->getOperand(OpndIdx)); 434 435 assert(MBBI->getOperand(OpndIdx).getImm() == 0 && 436 "Unexpected immediate offset in first/last callee-save save/restore " 437 "instruction!"); 438 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && 439 "Unexpected base register in callee-save save/restore instruction!"); 440 // Last operand is immediate offset that needs fixing. 441 assert(CSStackSizeInc % 8 == 0); 442 int64_t CSStackSizeIncImm = CSStackSizeInc; 443 if (!NewIsUnscaled) 444 CSStackSizeIncImm /= 8; 445 MIB.addImm(CSStackSizeIncImm); 446 447 MIB.setMIFlags(MBBI->getFlags()); 448 MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end()); 449 450 return std::prev(MBB.erase(MBBI)); 451 } 452 453 // Fixup callee-save register save/restore instructions to take into account 454 // combined SP bump by adding the local stack size to the stack offsets. 455 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, 456 unsigned LocalStackSize) { 457 unsigned Opc = MI.getOpcode(); 458 (void)Opc; 459 assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi || 460 Opc == AArch64::STRXui || Opc == AArch64::STRDui || 461 Opc == AArch64::LDPXi || Opc == AArch64::LDPDi || 462 Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) && 463 "Unexpected callee-save save/restore opcode!"); 464 465 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1; 466 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP && 467 "Unexpected base register in callee-save save/restore instruction!"); 468 // Last operand is immediate offset that needs fixing. 469 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); 470 // All generated opcodes have scaled offsets. 471 assert(LocalStackSize % 8 == 0); 472 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); 473 } 474 475 void AArch64FrameLowering::emitPrologue(MachineFunction &MF, 476 MachineBasicBlock &MBB) const { 477 MachineBasicBlock::iterator MBBI = MBB.begin(); 478 const MachineFrameInfo &MFI = MF.getFrameInfo(); 479 const Function &F = MF.getFunction(); 480 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 481 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 482 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 483 MachineModuleInfo &MMI = MF.getMMI(); 484 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 485 bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry(); 486 bool HasFP = hasFP(MF); 487 488 // Debug location must be unknown since the first debug location is used 489 // to determine the end of the prologue. 490 DebugLoc DL; 491 492 // All calls are tail calls in GHC calling conv, and functions have no 493 // prologue/epilogue. 494 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 495 return; 496 497 int NumBytes = (int)MFI.getStackSize(); 498 if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { 499 assert(!HasFP && "unexpected function without stack frame but with FP"); 500 501 // All of the stack allocation is for locals. 502 AFI->setLocalStackSize(NumBytes); 503 504 if (!NumBytes) 505 return; 506 // REDZONE: If the stack size is less than 128 bytes, we don't need 507 // to actually allocate. 508 if (canUseRedZone(MF)) 509 ++NumRedZoneFunctions; 510 else { 511 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 512 MachineInstr::FrameSetup); 513 514 // Label used to tie together the PROLOG_LABEL and the MachineMoves. 515 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); 516 // Encode the stack size of the leaf function. 517 unsigned CFIIndex = MF.addFrameInst( 518 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); 519 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 520 .addCFIIndex(CFIIndex) 521 .setMIFlags(MachineInstr::FrameSetup); 522 } 523 return; 524 } 525 526 bool IsWin64 = 527 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); 528 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; 529 530 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; 531 // All of the remaining stack allocations are for locals. 532 AFI->setLocalStackSize(NumBytes - PrologueSaveSize); 533 534 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); 535 if (CombineSPBump) { 536 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 537 MachineInstr::FrameSetup); 538 NumBytes = 0; 539 } else if (PrologueSaveSize != 0) { 540 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII, 541 -PrologueSaveSize); 542 NumBytes -= PrologueSaveSize; 543 } 544 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 545 546 // Move past the saves of the callee-saved registers, fixing up the offsets 547 // and pre-inc if we decided to combine the callee-save and local stack 548 // pointer bump above. 549 MachineBasicBlock::iterator End = MBB.end(); 550 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) { 551 if (CombineSPBump) 552 fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize()); 553 ++MBBI; 554 } 555 if (HasFP) { 556 // Only set up FP if we actually need to. Frame pointer is fp = 557 // sp - fixedobject - 16. 558 int FPOffset = AFI->getCalleeSavedStackSize() - 16; 559 if (CombineSPBump) 560 FPOffset += AFI->getLocalStackSize(); 561 562 // Issue sub fp, sp, FPOffset or 563 // mov fp,sp when FPOffset is zero. 564 // Note: All stores of callee-saved registers are marked as "FrameSetup". 565 // This code marks the instruction(s) that set the FP also. 566 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, 567 MachineInstr::FrameSetup); 568 } 569 570 if (windowsRequiresStackProbe(MF, NumBytes)) { 571 uint32_t NumWords = NumBytes >> 4; 572 573 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15) 574 .addImm(NumWords) 575 .setMIFlags(MachineInstr::FrameSetup); 576 577 switch (MF.getTarget().getCodeModel()) { 578 case CodeModel::Small: 579 case CodeModel::Medium: 580 case CodeModel::Kernel: 581 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 582 .addExternalSymbol("__chkstk") 583 .addReg(AArch64::X15, RegState::Implicit) 584 .setMIFlags(MachineInstr::FrameSetup); 585 break; 586 case CodeModel::Large: 587 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT)) 588 .addReg(AArch64::X16, RegState::Define) 589 .addExternalSymbol("__chkstk") 590 .addExternalSymbol("__chkstk") 591 .setMIFlags(MachineInstr::FrameSetup); 592 593 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR)) 594 .addReg(AArch64::X16, RegState::Kill) 595 .addReg(AArch64::X15, RegState::Implicit | RegState::Define) 596 .setMIFlags(MachineInstr::FrameSetup); 597 break; 598 } 599 600 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP) 601 .addReg(AArch64::SP, RegState::Kill) 602 .addReg(AArch64::X15, RegState::Kill) 603 .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4)) 604 .setMIFlags(MachineInstr::FrameSetup); 605 NumBytes = 0; 606 } 607 608 // Allocate space for the rest of the frame. 609 if (NumBytes) { 610 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); 611 unsigned scratchSPReg = AArch64::SP; 612 613 if (NeedsRealignment) { 614 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); 615 assert(scratchSPReg != AArch64::NoRegister); 616 } 617 618 // If we're a leaf function, try using the red zone. 619 if (!canUseRedZone(MF)) 620 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have 621 // the correct value here, as NumBytes also includes padding bytes, 622 // which shouldn't be counted here. 623 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, 624 MachineInstr::FrameSetup); 625 626 if (NeedsRealignment) { 627 const unsigned Alignment = MFI.getMaxAlignment(); 628 const unsigned NrBitsToZero = countTrailingZeros(Alignment); 629 assert(NrBitsToZero > 1); 630 assert(scratchSPReg != AArch64::SP); 631 632 // SUB X9, SP, NumBytes 633 // -- X9 is temporary register, so shouldn't contain any live data here, 634 // -- free to use. This is already produced by emitFrameOffset above. 635 // AND SP, X9, 0b11111...0000 636 // The logical immediates have a non-trivial encoding. The following 637 // formula computes the encoded immediate with all ones but 638 // NrBitsToZero zero bits as least significant bits. 639 uint32_t andMaskEncoded = (1 << 12) // = N 640 | ((64 - NrBitsToZero) << 6) // immr 641 | ((64 - NrBitsToZero - 1) << 0); // imms 642 643 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) 644 .addReg(scratchSPReg, RegState::Kill) 645 .addImm(andMaskEncoded); 646 AFI->setStackRealigned(true); 647 } 648 } 649 650 // If we need a base pointer, set it up here. It's whatever the value of the 651 // stack pointer is at this point. Any variable size objects will be allocated 652 // after this, so we can still use the base pointer to reference locals. 653 // 654 // FIXME: Clarify FrameSetup flags here. 655 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is 656 // needed. 657 if (RegInfo->hasBasePointer(MF)) { 658 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, 659 false); 660 } 661 662 if (needsFrameMoves) { 663 const DataLayout &TD = MF.getDataLayout(); 664 const int StackGrowth = -TD.getPointerSize(0); 665 unsigned FramePtr = RegInfo->getFrameRegister(MF); 666 // An example of the prologue: 667 // 668 // .globl __foo 669 // .align 2 670 // __foo: 671 // Ltmp0: 672 // .cfi_startproc 673 // .cfi_personality 155, ___gxx_personality_v0 674 // Leh_func_begin: 675 // .cfi_lsda 16, Lexception33 676 // 677 // stp xa,bx, [sp, -#offset]! 678 // ... 679 // stp x28, x27, [sp, #offset-32] 680 // stp fp, lr, [sp, #offset-16] 681 // add fp, sp, #offset - 16 682 // sub sp, sp, #1360 683 // 684 // The Stack: 685 // +-------------------------------------------+ 686 // 10000 | ........ | ........ | ........ | ........ | 687 // 10004 | ........ | ........ | ........ | ........ | 688 // +-------------------------------------------+ 689 // 10008 | ........ | ........ | ........ | ........ | 690 // 1000c | ........ | ........ | ........ | ........ | 691 // +===========================================+ 692 // 10010 | X28 Register | 693 // 10014 | X28 Register | 694 // +-------------------------------------------+ 695 // 10018 | X27 Register | 696 // 1001c | X27 Register | 697 // +===========================================+ 698 // 10020 | Frame Pointer | 699 // 10024 | Frame Pointer | 700 // +-------------------------------------------+ 701 // 10028 | Link Register | 702 // 1002c | Link Register | 703 // +===========================================+ 704 // 10030 | ........ | ........ | ........ | ........ | 705 // 10034 | ........ | ........ | ........ | ........ | 706 // +-------------------------------------------+ 707 // 10038 | ........ | ........ | ........ | ........ | 708 // 1003c | ........ | ........ | ........ | ........ | 709 // +-------------------------------------------+ 710 // 711 // [sp] = 10030 :: >>initial value<< 712 // sp = 10020 :: stp fp, lr, [sp, #-16]! 713 // fp = sp == 10020 :: mov fp, sp 714 // [sp] == 10020 :: stp x28, x27, [sp, #-16]! 715 // sp == 10010 :: >>final value<< 716 // 717 // The frame pointer (w29) points to address 10020. If we use an offset of 718 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 719 // for w27, and -32 for w28: 720 // 721 // Ltmp1: 722 // .cfi_def_cfa w29, 16 723 // Ltmp2: 724 // .cfi_offset w30, -8 725 // Ltmp3: 726 // .cfi_offset w29, -16 727 // Ltmp4: 728 // .cfi_offset w27, -24 729 // Ltmp5: 730 // .cfi_offset w28, -32 731 732 if (HasFP) { 733 // Define the current CFA rule to use the provided FP. 734 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); 735 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( 736 nullptr, Reg, 2 * StackGrowth - FixedObject)); 737 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 738 .addCFIIndex(CFIIndex) 739 .setMIFlags(MachineInstr::FrameSetup); 740 } else { 741 // Encode the stack size of the leaf function. 742 unsigned CFIIndex = MF.addFrameInst( 743 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize())); 744 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 745 .addCFIIndex(CFIIndex) 746 .setMIFlags(MachineInstr::FrameSetup); 747 } 748 749 // Now emit the moves for whatever callee saved regs we have (including FP, 750 // LR if those are saved). 751 emitCalleeSavedFrameMoves(MBB, MBBI); 752 } 753 } 754 755 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, 756 MachineBasicBlock &MBB) const { 757 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 758 MachineFrameInfo &MFI = MF.getFrameInfo(); 759 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 760 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 761 DebugLoc DL; 762 bool IsTailCallReturn = false; 763 if (MBB.end() != MBBI) { 764 DL = MBBI->getDebugLoc(); 765 unsigned RetOpcode = MBBI->getOpcode(); 766 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || 767 RetOpcode == AArch64::TCRETURNri; 768 } 769 int NumBytes = MFI.getStackSize(); 770 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 771 772 // All calls are tail calls in GHC calling conv, and functions have no 773 // prologue/epilogue. 774 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 775 return; 776 777 // Initial and residual are named for consistency with the prologue. Note that 778 // in the epilogue, the residual adjustment is executed first. 779 uint64_t ArgumentPopSize = 0; 780 if (IsTailCallReturn) { 781 MachineOperand &StackAdjust = MBBI->getOperand(1); 782 783 // For a tail-call in a callee-pops-arguments environment, some or all of 784 // the stack may actually be in use for the call's arguments, this is 785 // calculated during LowerCall and consumed here... 786 ArgumentPopSize = StackAdjust.getImm(); 787 } else { 788 // ... otherwise the amount to pop is *all* of the argument space, 789 // conveniently stored in the MachineFunctionInfo by 790 // LowerFormalArguments. This will, of course, be zero for the C calling 791 // convention. 792 ArgumentPopSize = AFI->getArgumentStackToRestore(); 793 } 794 795 // The stack frame should be like below, 796 // 797 // ---------------------- --- 798 // | | | 799 // | BytesInStackArgArea| CalleeArgStackSize 800 // | (NumReusableBytes) | (of tail call) 801 // | | --- 802 // | | | 803 // ---------------------| --- | 804 // | | | | 805 // | CalleeSavedReg | | | 806 // | (CalleeSavedStackSize)| | | 807 // | | | | 808 // ---------------------| | NumBytes 809 // | | StackSize (StackAdjustUp) 810 // | LocalStackSize | | | 811 // | (covering callee | | | 812 // | args) | | | 813 // | | | | 814 // ---------------------- --- --- 815 // 816 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize 817 // = StackSize + ArgumentPopSize 818 // 819 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps 820 // it as the 2nd argument of AArch64ISD::TC_RETURN. 821 822 bool IsWin64 = 823 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); 824 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; 825 826 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; 827 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); 828 829 if (!CombineSPBump && PrologueSaveSize != 0) 830 convertCalleeSaveRestoreToSPPrePostIncDec( 831 MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize); 832 833 // Move past the restores of the callee-saved registers. 834 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); 835 MachineBasicBlock::iterator Begin = MBB.begin(); 836 while (LastPopI != Begin) { 837 --LastPopI; 838 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) { 839 ++LastPopI; 840 break; 841 } else if (CombineSPBump) 842 fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize()); 843 } 844 845 // If there is a single SP update, insert it before the ret and we're done. 846 if (CombineSPBump) { 847 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 848 NumBytes + ArgumentPopSize, TII, 849 MachineInstr::FrameDestroy); 850 return; 851 } 852 853 NumBytes -= PrologueSaveSize; 854 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 855 856 if (!hasFP(MF)) { 857 bool RedZone = canUseRedZone(MF); 858 // If this was a redzone leaf function, we don't need to restore the 859 // stack pointer (but we may need to pop stack args for fastcc). 860 if (RedZone && ArgumentPopSize == 0) 861 return; 862 863 bool NoCalleeSaveRestore = PrologueSaveSize == 0; 864 int StackRestoreBytes = RedZone ? 0 : NumBytes; 865 if (NoCalleeSaveRestore) 866 StackRestoreBytes += ArgumentPopSize; 867 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, 868 StackRestoreBytes, TII, MachineInstr::FrameDestroy); 869 // If we were able to combine the local stack pop with the argument pop, 870 // then we're done. 871 if (NoCalleeSaveRestore || ArgumentPopSize == 0) 872 return; 873 NumBytes = 0; 874 } 875 876 // Restore the original stack pointer. 877 // FIXME: Rather than doing the math here, we should instead just use 878 // non-post-indexed loads for the restores if we aren't actually going to 879 // be able to save any instructions. 880 if (MFI.hasVarSizedObjects() || AFI->isStackRealigned()) 881 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, 882 -AFI->getCalleeSavedStackSize() + 16, TII, 883 MachineInstr::FrameDestroy); 884 else if (NumBytes) 885 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, 886 MachineInstr::FrameDestroy); 887 888 // This must be placed after the callee-save restore code because that code 889 // assumes the SP is at the same location as it was after the callee-save save 890 // code in the prologue. 891 if (ArgumentPopSize) 892 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 893 ArgumentPopSize, TII, MachineInstr::FrameDestroy); 894 } 895 896 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for 897 /// debug info. It's the same as what we use for resolving the code-gen 898 /// references for now. FIXME: This can go wrong when references are 899 /// SP-relative and simple call frames aren't used. 900 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, 901 int FI, 902 unsigned &FrameReg) const { 903 return resolveFrameIndexReference(MF, FI, FrameReg); 904 } 905 906 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, 907 int FI, unsigned &FrameReg, 908 bool PreferFP) const { 909 const MachineFrameInfo &MFI = MF.getFrameInfo(); 910 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 911 MF.getSubtarget().getRegisterInfo()); 912 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 913 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 914 bool IsWin64 = 915 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); 916 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; 917 int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16; 918 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); 919 bool isFixed = MFI.isFixedObjectIndex(FI); 920 921 // Use frame pointer to reference fixed objects. Use it for locals if 922 // there are VLAs or a dynamically realigned SP (and thus the SP isn't 923 // reliable as a base). Make sure useFPForScavengingIndex() does the 924 // right thing for the emergency spill slot. 925 bool UseFP = false; 926 if (AFI->hasStackFrame()) { 927 // Note: Keeping the following as multiple 'if' statements rather than 928 // merging to a single expression for readability. 929 // 930 // Argument access should always use the FP. 931 if (isFixed) { 932 UseFP = hasFP(MF); 933 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && 934 !RegInfo->needsStackRealignment(MF)) { 935 // Use SP or FP, whichever gives us the best chance of the offset 936 // being in range for direct access. If the FPOffset is positive, 937 // that'll always be best, as the SP will be even further away. 938 // If the FPOffset is negative, we have to keep in mind that the 939 // available offset range for negative offsets is smaller than for 940 // positive ones. If we have variable sized objects, we're stuck with 941 // using the FP regardless, though, as the SP offset is unknown 942 // and we don't have a base pointer available. If an offset is 943 // available via the FP and the SP, use whichever is closest. 944 if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 || 945 (FPOffset >= -256 && Offset > -FPOffset)) 946 UseFP = true; 947 } 948 } 949 950 assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && 951 "In the presence of dynamic stack pointer realignment, " 952 "non-argument objects cannot be accessed through the frame pointer"); 953 954 if (UseFP) { 955 FrameReg = RegInfo->getFrameRegister(MF); 956 return FPOffset; 957 } 958 959 // Use the base pointer if we have one. 960 if (RegInfo->hasBasePointer(MF)) 961 FrameReg = RegInfo->getBaseRegister(); 962 else { 963 FrameReg = AArch64::SP; 964 // If we're using the red zone for this function, the SP won't actually 965 // be adjusted, so the offsets will be negative. They're also all 966 // within range of the signed 9-bit immediate instructions. 967 if (canUseRedZone(MF)) 968 Offset -= AFI->getLocalStackSize(); 969 } 970 971 return Offset; 972 } 973 974 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { 975 // Do not set a kill flag on values that are also marked as live-in. This 976 // happens with the @llvm-returnaddress intrinsic and with arguments passed in 977 // callee saved registers. 978 // Omitting the kill flags is conservatively correct even if the live-in 979 // is not used after all. 980 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg); 981 return getKillRegState(!IsLiveIn); 982 } 983 984 static bool produceCompactUnwindFrame(MachineFunction &MF) { 985 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 986 AttributeList Attrs = MF.getFunction().getAttributes(); 987 return Subtarget.isTargetMachO() && 988 !(Subtarget.getTargetLowering()->supportSwiftError() && 989 Attrs.hasAttrSomewhere(Attribute::SwiftError)); 990 } 991 992 namespace { 993 994 struct RegPairInfo { 995 unsigned Reg1 = AArch64::NoRegister; 996 unsigned Reg2 = AArch64::NoRegister; 997 int FrameIdx; 998 int Offset; 999 bool IsGPR; 1000 1001 RegPairInfo() = default; 1002 1003 bool isPaired() const { return Reg2 != AArch64::NoRegister; } 1004 }; 1005 1006 } // end anonymous namespace 1007 1008 static void computeCalleeSaveRegisterPairs( 1009 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, 1010 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) { 1011 1012 if (CSI.empty()) 1013 return; 1014 1015 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1016 MachineFrameInfo &MFI = MF.getFrameInfo(); 1017 CallingConv::ID CC = MF.getFunction().getCallingConv(); 1018 unsigned Count = CSI.size(); 1019 (void)CC; 1020 // MachO's compact unwind format relies on all registers being stored in 1021 // pairs. 1022 assert((!produceCompactUnwindFrame(MF) || 1023 CC == CallingConv::PreserveMost || 1024 (Count & 1) == 0) && 1025 "Odd number of callee-saved regs to spill!"); 1026 int Offset = AFI->getCalleeSavedStackSize(); 1027 1028 for (unsigned i = 0; i < Count; ++i) { 1029 RegPairInfo RPI; 1030 RPI.Reg1 = CSI[i].getReg(); 1031 1032 assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || 1033 AArch64::FPR64RegClass.contains(RPI.Reg1)); 1034 RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); 1035 1036 // Add the next reg to the pair if it is in the same register class. 1037 if (i + 1 < Count) { 1038 unsigned NextReg = CSI[i + 1].getReg(); 1039 if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || 1040 (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) 1041 RPI.Reg2 = NextReg; 1042 } 1043 1044 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI 1045 // list to come in sorted by frame index so that we can issue the store 1046 // pair instructions directly. Assert if we see anything otherwise. 1047 // 1048 // The order of the registers in the list is controlled by 1049 // getCalleeSavedRegs(), so they will always be in-order, as well. 1050 assert((!RPI.isPaired() || 1051 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && 1052 "Out of order callee saved regs!"); 1053 1054 // MachO's compact unwind format relies on all registers being stored in 1055 // adjacent register pairs. 1056 assert((!produceCompactUnwindFrame(MF) || 1057 CC == CallingConv::PreserveMost || 1058 (RPI.isPaired() && 1059 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || 1060 RPI.Reg1 + 1 == RPI.Reg2))) && 1061 "Callee-save registers not saved as adjacent register pair!"); 1062 1063 RPI.FrameIdx = CSI[i].getFrameIdx(); 1064 1065 if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { 1066 // Round up size of non-pair to pair size if we need to pad the 1067 // callee-save area to ensure 16-byte alignment. 1068 Offset -= 16; 1069 assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16); 1070 MFI.setObjectAlignment(RPI.FrameIdx, 16); 1071 AFI->setCalleeSaveStackHasFreeSpace(true); 1072 } else 1073 Offset -= RPI.isPaired() ? 16 : 8; 1074 assert(Offset % 8 == 0); 1075 RPI.Offset = Offset / 8; 1076 assert((RPI.Offset >= -64 && RPI.Offset <= 63) && 1077 "Offset out of bounds for LDP/STP immediate"); 1078 1079 RegPairs.push_back(RPI); 1080 if (RPI.isPaired()) 1081 ++i; 1082 } 1083 } 1084 1085 bool AArch64FrameLowering::spillCalleeSavedRegisters( 1086 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1087 const std::vector<CalleeSavedInfo> &CSI, 1088 const TargetRegisterInfo *TRI) const { 1089 MachineFunction &MF = *MBB.getParent(); 1090 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1091 DebugLoc DL; 1092 SmallVector<RegPairInfo, 8> RegPairs; 1093 1094 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); 1095 const MachineRegisterInfo &MRI = MF.getRegInfo(); 1096 1097 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; 1098 ++RPII) { 1099 RegPairInfo RPI = *RPII; 1100 unsigned Reg1 = RPI.Reg1; 1101 unsigned Reg2 = RPI.Reg2; 1102 unsigned StrOpc; 1103 1104 // Issue sequence of spills for cs regs. The first spill may be converted 1105 // to a pre-decrement store later by emitPrologue if the callee-save stack 1106 // area allocation can't be combined with the local stack area allocation. 1107 // For example: 1108 // stp x22, x21, [sp, #0] // addImm(+0) 1109 // stp x20, x19, [sp, #16] // addImm(+2) 1110 // stp fp, lr, [sp, #32] // addImm(+4) 1111 // Rationale: This sequence saves uop updates compared to a sequence of 1112 // pre-increment spills like stp xi,xj,[sp,#-16]! 1113 // Note: Similar rationale and sequence for restores in epilog. 1114 if (RPI.IsGPR) 1115 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; 1116 else 1117 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; 1118 DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI); 1119 if (RPI.isPaired()) 1120 dbgs() << ", " << printReg(Reg2, TRI); 1121 dbgs() << ") -> fi#(" << RPI.FrameIdx; 1122 if (RPI.isPaired()) 1123 dbgs() << ", " << RPI.FrameIdx+1; 1124 dbgs() << ")\n"); 1125 1126 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); 1127 if (!MRI.isReserved(Reg1)) 1128 MBB.addLiveIn(Reg1); 1129 if (RPI.isPaired()) { 1130 if (!MRI.isReserved(Reg2)) 1131 MBB.addLiveIn(Reg2); 1132 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); 1133 MIB.addMemOperand(MF.getMachineMemOperand( 1134 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), 1135 MachineMemOperand::MOStore, 8, 8)); 1136 } 1137 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) 1138 .addReg(AArch64::SP) 1139 .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit 1140 .setMIFlag(MachineInstr::FrameSetup); 1141 MIB.addMemOperand(MF.getMachineMemOperand( 1142 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), 1143 MachineMemOperand::MOStore, 8, 8)); 1144 } 1145 return true; 1146 } 1147 1148 bool AArch64FrameLowering::restoreCalleeSavedRegisters( 1149 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1150 std::vector<CalleeSavedInfo> &CSI, 1151 const TargetRegisterInfo *TRI) const { 1152 MachineFunction &MF = *MBB.getParent(); 1153 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1154 DebugLoc DL; 1155 SmallVector<RegPairInfo, 8> RegPairs; 1156 1157 if (MI != MBB.end()) 1158 DL = MI->getDebugLoc(); 1159 1160 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); 1161 1162 for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; 1163 ++RPII) { 1164 RegPairInfo RPI = *RPII; 1165 unsigned Reg1 = RPI.Reg1; 1166 unsigned Reg2 = RPI.Reg2; 1167 1168 // Issue sequence of restores for cs regs. The last restore may be converted 1169 // to a post-increment load later by emitEpilogue if the callee-save stack 1170 // area allocation can't be combined with the local stack area allocation. 1171 // For example: 1172 // ldp fp, lr, [sp, #32] // addImm(+4) 1173 // ldp x20, x19, [sp, #16] // addImm(+2) 1174 // ldp x22, x21, [sp, #0] // addImm(+0) 1175 // Note: see comment in spillCalleeSavedRegisters() 1176 unsigned LdrOpc; 1177 if (RPI.IsGPR) 1178 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; 1179 else 1180 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; 1181 DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI); 1182 if (RPI.isPaired()) 1183 dbgs() << ", " << printReg(Reg2, TRI); 1184 dbgs() << ") -> fi#(" << RPI.FrameIdx; 1185 if (RPI.isPaired()) 1186 dbgs() << ", " << RPI.FrameIdx+1; 1187 dbgs() << ")\n"); 1188 1189 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); 1190 if (RPI.isPaired()) { 1191 MIB.addReg(Reg2, getDefRegState(true)); 1192 MIB.addMemOperand(MF.getMachineMemOperand( 1193 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), 1194 MachineMemOperand::MOLoad, 8, 8)); 1195 } 1196 MIB.addReg(Reg1, getDefRegState(true)) 1197 .addReg(AArch64::SP) 1198 .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit 1199 .setMIFlag(MachineInstr::FrameDestroy); 1200 MIB.addMemOperand(MF.getMachineMemOperand( 1201 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), 1202 MachineMemOperand::MOLoad, 8, 8)); 1203 } 1204 return true; 1205 } 1206 1207 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, 1208 BitVector &SavedRegs, 1209 RegScavenger *RS) const { 1210 // All calls are tail calls in GHC calling conv, and functions have no 1211 // prologue/epilogue. 1212 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 1213 return; 1214 1215 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1216 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 1217 MF.getSubtarget().getRegisterInfo()); 1218 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1219 unsigned UnspilledCSGPR = AArch64::NoRegister; 1220 unsigned UnspilledCSGPRPaired = AArch64::NoRegister; 1221 1222 MachineFrameInfo &MFI = MF.getFrameInfo(); 1223 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 1224 1225 unsigned BasePointerReg = RegInfo->hasBasePointer(MF) 1226 ? RegInfo->getBaseRegister() 1227 : (unsigned)AArch64::NoRegister; 1228 1229 unsigned SpillEstimate = SavedRegs.count(); 1230 for (unsigned i = 0; CSRegs[i]; ++i) { 1231 unsigned Reg = CSRegs[i]; 1232 unsigned PairedReg = CSRegs[i ^ 1]; 1233 if (Reg == BasePointerReg) 1234 SpillEstimate++; 1235 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) 1236 SpillEstimate++; 1237 } 1238 SpillEstimate += 2; // Conservatively include FP+LR in the estimate 1239 unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate; 1240 1241 // The frame record needs to be created by saving the appropriate registers 1242 if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) { 1243 SavedRegs.set(AArch64::FP); 1244 SavedRegs.set(AArch64::LR); 1245 } 1246 1247 unsigned ExtraCSSpill = 0; 1248 // Figure out which callee-saved registers to save/restore. 1249 for (unsigned i = 0; CSRegs[i]; ++i) { 1250 const unsigned Reg = CSRegs[i]; 1251 1252 // Add the base pointer register to SavedRegs if it is callee-save. 1253 if (Reg == BasePointerReg) 1254 SavedRegs.set(Reg); 1255 1256 bool RegUsed = SavedRegs.test(Reg); 1257 unsigned PairedReg = CSRegs[i ^ 1]; 1258 if (!RegUsed) { 1259 if (AArch64::GPR64RegClass.contains(Reg) && 1260 !RegInfo->isReservedReg(MF, Reg)) { 1261 UnspilledCSGPR = Reg; 1262 UnspilledCSGPRPaired = PairedReg; 1263 } 1264 continue; 1265 } 1266 1267 // MachO's compact unwind format relies on all registers being stored in 1268 // pairs. 1269 // FIXME: the usual format is actually better if unwinding isn't needed. 1270 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) { 1271 SavedRegs.set(PairedReg); 1272 if (AArch64::GPR64RegClass.contains(PairedReg) && 1273 !RegInfo->isReservedReg(MF, PairedReg)) 1274 ExtraCSSpill = PairedReg; 1275 } 1276 } 1277 1278 DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; 1279 for (unsigned Reg : SavedRegs.set_bits()) 1280 dbgs() << ' ' << printReg(Reg, RegInfo); 1281 dbgs() << "\n";); 1282 1283 // If any callee-saved registers are used, the frame cannot be eliminated. 1284 unsigned NumRegsSpilled = SavedRegs.count(); 1285 bool CanEliminateFrame = NumRegsSpilled == 0; 1286 1287 // The CSR spill slots have not been allocated yet, so estimateStackSize 1288 // won't include them. 1289 unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled; 1290 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); 1291 unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); 1292 bool BigStack = (CFSize > EstimatedStackSizeLimit); 1293 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) 1294 AFI->setHasStackFrame(true); 1295 1296 // Estimate if we might need to scavenge a register at some point in order 1297 // to materialize a stack offset. If so, either spill one additional 1298 // callee-saved register or reserve a special spill slot to facilitate 1299 // register scavenging. If we already spilled an extra callee-saved register 1300 // above to keep the number of spills even, we don't need to do anything else 1301 // here. 1302 if (BigStack) { 1303 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { 1304 DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo) 1305 << " to get a scratch register.\n"); 1306 SavedRegs.set(UnspilledCSGPR); 1307 // MachO's compact unwind format relies on all registers being stored in 1308 // pairs, so if we need to spill one extra for BigStack, then we need to 1309 // store the pair. 1310 if (produceCompactUnwindFrame(MF)) 1311 SavedRegs.set(UnspilledCSGPRPaired); 1312 ExtraCSSpill = UnspilledCSGPRPaired; 1313 NumRegsSpilled = SavedRegs.count(); 1314 } 1315 1316 // If we didn't find an extra callee-saved register to spill, create 1317 // an emergency spill slot. 1318 if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { 1319 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1320 const TargetRegisterClass &RC = AArch64::GPR64RegClass; 1321 unsigned Size = TRI->getSpillSize(RC); 1322 unsigned Align = TRI->getSpillAlignment(RC); 1323 int FI = MFI.CreateStackObject(Size, Align, false); 1324 RS->addScavengingFrameIndex(FI); 1325 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI 1326 << " as the emergency spill slot.\n"); 1327 } 1328 } 1329 1330 // Round up to register pair alignment to avoid additional SP adjustment 1331 // instructions. 1332 AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16)); 1333 } 1334 1335 bool AArch64FrameLowering::enableStackSlotScavenging( 1336 const MachineFunction &MF) const { 1337 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1338 return AFI->hasCalleeSaveStackFreeSpace(); 1339 } 1340