1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of TargetFrameLowering class. 11 // 12 // On AArch64, stack frames are structured as follows: 13 // 14 // The stack grows downward. 15 // 16 // All of the individual frame areas on the frame below are optional, i.e. it's 17 // possible to create a function so that the particular area isn't present 18 // in the frame. 19 // 20 // At function entry, the "frame" looks as follows: 21 // 22 // | | Higher address 23 // |-----------------------------------| 24 // | | 25 // | arguments passed on the stack | 26 // | | 27 // |-----------------------------------| <- sp 28 // | | Lower address 29 // 30 // 31 // After the prologue has run, the frame has the following general structure. 32 // Note that this doesn't depict the case where a red-zone is used. Also, 33 // technically the last frame area (VLAs) doesn't get created until in the 34 // main function body, after the prologue is run. However, it's depicted here 35 // for completeness. 36 // 37 // | | Higher address 38 // |-----------------------------------| 39 // | | 40 // | arguments passed on the stack | 41 // | | 42 // |-----------------------------------| 43 // | | 44 // | prev_fp, prev_lr | 45 // | (a.k.a. "frame record") | 46 // |-----------------------------------| <- fp(=x29) 47 // | | 48 // | other callee-saved registers | 49 // | | 50 // |-----------------------------------| 51 // |.empty.space.to.make.part.below....| 52 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at 53 // |.the.standard.16-byte.alignment....| compile time; if present) 54 // |-----------------------------------| 55 // | | 56 // | local variables of fixed size | 57 // | including spill slots | 58 // |-----------------------------------| <- bp(not defined by ABI, 59 // |.variable-sized.local.variables....| LLVM chooses X19) 60 // |.(VLAs)............................| (size of this area is unknown at 61 // |...................................| compile time) 62 // |-----------------------------------| <- sp 63 // | | Lower address 64 // 65 // 66 // To access the data in a frame, at-compile time, a constant offset must be 67 // computable from one of the pointers (fp, bp, sp) to access it. The size 68 // of the areas with a dotted background cannot be computed at compile-time 69 // if they are present, making it required to have all three of fp, bp and 70 // sp to be set up to be able to access all contents in the frame areas, 71 // assuming all of the frame areas are non-empty. 72 // 73 // For most functions, some of the frame areas are empty. For those functions, 74 // it may not be necessary to set up fp or bp: 75 // * A base pointer is definitely needed when there are both VLAs and local 76 // variables with more-than-default alignment requirements. 77 // * A frame pointer is definitely needed when there are local variables with 78 // more-than-default alignment requirements. 79 // 80 // In some cases when a base pointer is not strictly needed, it is generated 81 // anyway when offsets from the frame pointer to access local variables become 82 // so large that the offset can't be encoded in the immediate fields of loads 83 // or stores. 84 // 85 // FIXME: also explain the redzone concept. 86 // FIXME: also explain the concept of reserved call frames. 87 // 88 //===----------------------------------------------------------------------===// 89 90 #include "AArch64FrameLowering.h" 91 #include "AArch64InstrInfo.h" 92 #include "AArch64MachineFunctionInfo.h" 93 #include "AArch64Subtarget.h" 94 #include "AArch64TargetMachine.h" 95 #include "llvm/ADT/Statistic.h" 96 #include "llvm/CodeGen/MachineFrameInfo.h" 97 #include "llvm/CodeGen/MachineFunction.h" 98 #include "llvm/CodeGen/MachineInstrBuilder.h" 99 #include "llvm/CodeGen/MachineModuleInfo.h" 100 #include "llvm/CodeGen/MachineRegisterInfo.h" 101 #include "llvm/CodeGen/RegisterScavenging.h" 102 #include "llvm/IR/DataLayout.h" 103 #include "llvm/IR/Function.h" 104 #include "llvm/Support/CommandLine.h" 105 #include "llvm/Support/Debug.h" 106 #include "llvm/Support/raw_ostream.h" 107 108 using namespace llvm; 109 110 #define DEBUG_TYPE "frame-info" 111 112 static cl::opt<bool> EnableRedZone("aarch64-redzone", 113 cl::desc("enable use of redzone on AArch64"), 114 cl::init(false), cl::Hidden); 115 116 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); 117 118 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { 119 if (!EnableRedZone) 120 return false; 121 // Don't use the red zone if the function explicitly asks us not to. 122 // This is typically used for kernel code. 123 if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone)) 124 return false; 125 126 const MachineFrameInfo *MFI = MF.getFrameInfo(); 127 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 128 unsigned NumBytes = AFI->getLocalStackSize(); 129 130 return !(MFI->hasCalls() || hasFP(MF) || NumBytes > 128); 131 } 132 133 /// hasFP - Return true if the specified function should have a dedicated frame 134 /// pointer register. 135 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { 136 const MachineFrameInfo *MFI = MF.getFrameInfo(); 137 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 138 // Retain behavior of always omitting the FP for leaf functions when possible. 139 return (MFI->hasCalls() && 140 MF.getTarget().Options.DisableFramePointerElim(MF)) || 141 MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || 142 MFI->hasStackMap() || MFI->hasPatchPoint() || 143 RegInfo->needsStackRealignment(MF); 144 } 145 146 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is 147 /// not required, we reserve argument space for call sites in the function 148 /// immediately on entry to the current function. This eliminates the need for 149 /// add/sub sp brackets around call sites. Returns true if the call frame is 150 /// included as part of the stack frame. 151 bool 152 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 153 return !MF.getFrameInfo()->hasVarSizedObjects(); 154 } 155 156 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( 157 MachineFunction &MF, MachineBasicBlock &MBB, 158 MachineBasicBlock::iterator I) const { 159 const AArch64InstrInfo *TII = 160 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 161 DebugLoc DL = I->getDebugLoc(); 162 unsigned Opc = I->getOpcode(); 163 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 164 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 165 166 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 167 if (!TFI->hasReservedCallFrame(MF)) { 168 unsigned Align = getStackAlignment(); 169 170 int64_t Amount = I->getOperand(0).getImm(); 171 Amount = alignTo(Amount, Align); 172 if (!IsDestroy) 173 Amount = -Amount; 174 175 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it 176 // doesn't have to pop anything), then the first operand will be zero too so 177 // this adjustment is a no-op. 178 if (CalleePopAmount == 0) { 179 // FIXME: in-function stack adjustment for calls is limited to 24-bits 180 // because there's no guaranteed temporary register available. 181 // 182 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available. 183 // 1) For offset <= 12-bit, we use LSL #0 184 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses 185 // LSL #0, and the other uses LSL #12. 186 // 187 // Most call frames will be allocated at the start of a function so 188 // this is OK, but it is a limitation that needs dealing with. 189 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); 190 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII); 191 } 192 } else if (CalleePopAmount != 0) { 193 // If the calling convention demands that the callee pops arguments from the 194 // stack, we want to add it back if we have a reserved call frame. 195 assert(CalleePopAmount < 0xffffff && "call frame too large"); 196 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount, 197 TII); 198 } 199 return MBB.erase(I); 200 } 201 202 void AArch64FrameLowering::emitCalleeSavedFrameMoves( 203 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 204 MachineFunction &MF = *MBB.getParent(); 205 MachineFrameInfo *MFI = MF.getFrameInfo(); 206 MachineModuleInfo &MMI = MF.getMMI(); 207 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 208 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 209 DebugLoc DL = MBB.findDebugLoc(MBBI); 210 211 // Add callee saved registers to move list. 212 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 213 if (CSI.empty()) 214 return; 215 216 for (const auto &Info : CSI) { 217 unsigned Reg = Info.getReg(); 218 int64_t Offset = 219 MFI->getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea(); 220 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 221 unsigned CFIIndex = MMI.addFrameInst( 222 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 223 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 224 .addCFIIndex(CFIIndex) 225 .setMIFlags(MachineInstr::FrameSetup); 226 } 227 } 228 229 // Find a scratch register that we can use at the start of the prologue to 230 // re-align the stack pointer. We avoid using callee-save registers since they 231 // may appear to be free when this is called from canUseAsPrologue (during 232 // shrink wrapping), but then no longer be free when this is called from 233 // emitPrologue. 234 // 235 // FIXME: This is a bit conservative, since in the above case we could use one 236 // of the callee-save registers as a scratch temp to re-align the stack pointer, 237 // but we would then have to make sure that we were in fact saving at least one 238 // callee-save register in the prologue, which is additional complexity that 239 // doesn't seem worth the benefit. 240 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { 241 MachineFunction *MF = MBB->getParent(); 242 243 // If MBB is an entry block, use X9 as the scratch register 244 if (&MF->front() == MBB) 245 return AArch64::X9; 246 247 RegScavenger RS; 248 RS.enterBasicBlock(*MBB); 249 250 // Prefer X9 since it was historically used for the prologue scratch reg. 251 if (!RS.isRegUsed(AArch64::X9)) 252 return AArch64::X9; 253 254 // Find a free non callee-save reg. 255 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 256 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 257 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF); 258 BitVector CalleeSaveRegs(RegInfo->getNumRegs()); 259 for (unsigned i = 0; CSRegs[i]; ++i) 260 CalleeSaveRegs.set(CSRegs[i]); 261 262 BitVector Available = RS.getRegsAvailable(&AArch64::GPR64RegClass); 263 for (int AvailReg = Available.find_first(); AvailReg != -1; 264 AvailReg = Available.find_next(AvailReg)) 265 if (!CalleeSaveRegs.test(AvailReg)) 266 return AvailReg; 267 268 return AArch64::NoRegister; 269 } 270 271 bool AArch64FrameLowering::canUseAsPrologue( 272 const MachineBasicBlock &MBB) const { 273 const MachineFunction *MF = MBB.getParent(); 274 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 275 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 276 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 277 278 // Don't need a scratch register if we're not going to re-align the stack. 279 if (!RegInfo->needsStackRealignment(*MF)) 280 return true; 281 // Otherwise, we can use any block as long as it has a scratch register 282 // available. 283 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister; 284 } 285 286 void AArch64FrameLowering::emitPrologue(MachineFunction &MF, 287 MachineBasicBlock &MBB) const { 288 MachineBasicBlock::iterator MBBI = MBB.begin(); 289 const MachineFrameInfo *MFI = MF.getFrameInfo(); 290 const Function *Fn = MF.getFunction(); 291 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 292 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 293 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 294 MachineModuleInfo &MMI = MF.getMMI(); 295 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 296 bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); 297 bool HasFP = hasFP(MF); 298 299 // Debug location must be unknown since the first debug location is used 300 // to determine the end of the prologue. 301 DebugLoc DL; 302 303 // All calls are tail calls in GHC calling conv, and functions have no 304 // prologue/epilogue. 305 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 306 return; 307 308 int NumBytes = (int)MFI->getStackSize(); 309 if (!AFI->hasStackFrame()) { 310 assert(!HasFP && "unexpected function without stack frame but with FP"); 311 312 // All of the stack allocation is for locals. 313 AFI->setLocalStackSize(NumBytes); 314 315 if (!NumBytes) 316 return; 317 // REDZONE: If the stack size is less than 128 bytes, we don't need 318 // to actually allocate. 319 if (canUseRedZone(MF)) 320 ++NumRedZoneFunctions; 321 else { 322 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 323 MachineInstr::FrameSetup); 324 325 // Label used to tie together the PROLOG_LABEL and the MachineMoves. 326 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); 327 // Encode the stack size of the leaf function. 328 unsigned CFIIndex = MMI.addFrameInst( 329 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); 330 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 331 .addCFIIndex(CFIIndex) 332 .setMIFlags(MachineInstr::FrameSetup); 333 } 334 return; 335 } 336 337 NumBytes -= AFI->getCalleeSavedStackSize(); 338 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 339 // All of the remaining stack allocations are for locals. 340 AFI->setLocalStackSize(NumBytes); 341 342 // Move past the saves of the callee-saved registers. 343 MachineBasicBlock::iterator End = MBB.end(); 344 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) 345 ++MBBI; 346 if (HasFP) { 347 // Only set up FP if we actually need to. Frame pointer is fp = sp - 16. 348 int FPOffset = AFI->getCalleeSavedStackSize() - 16; 349 350 // Issue sub fp, sp, FPOffset or 351 // mov fp,sp when FPOffset is zero. 352 // Note: All stores of callee-saved registers are marked as "FrameSetup". 353 // This code marks the instruction(s) that set the FP also. 354 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, 355 MachineInstr::FrameSetup); 356 } 357 358 // Allocate space for the rest of the frame. 359 if (NumBytes) { 360 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); 361 unsigned scratchSPReg = AArch64::SP; 362 363 if (NeedsRealignment) { 364 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); 365 assert(scratchSPReg != AArch64::NoRegister); 366 } 367 368 // If we're a leaf function, try using the red zone. 369 if (!canUseRedZone(MF)) 370 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have 371 // the correct value here, as NumBytes also includes padding bytes, 372 // which shouldn't be counted here. 373 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, 374 MachineInstr::FrameSetup); 375 376 if (NeedsRealignment) { 377 const unsigned Alignment = MFI->getMaxAlignment(); 378 const unsigned NrBitsToZero = countTrailingZeros(Alignment); 379 assert(NrBitsToZero > 1); 380 assert(scratchSPReg != AArch64::SP); 381 382 // SUB X9, SP, NumBytes 383 // -- X9 is temporary register, so shouldn't contain any live data here, 384 // -- free to use. This is already produced by emitFrameOffset above. 385 // AND SP, X9, 0b11111...0000 386 // The logical immediates have a non-trivial encoding. The following 387 // formula computes the encoded immediate with all ones but 388 // NrBitsToZero zero bits as least significant bits. 389 uint32_t andMaskEncoded = (1 << 12) // = N 390 | ((64 - NrBitsToZero) << 6) // immr 391 | ((64 - NrBitsToZero - 1) << 0); // imms 392 393 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) 394 .addReg(scratchSPReg, RegState::Kill) 395 .addImm(andMaskEncoded); 396 AFI->setStackRealigned(true); 397 } 398 } 399 400 // If we need a base pointer, set it up here. It's whatever the value of the 401 // stack pointer is at this point. Any variable size objects will be allocated 402 // after this, so we can still use the base pointer to reference locals. 403 // 404 // FIXME: Clarify FrameSetup flags here. 405 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is 406 // needed. 407 if (RegInfo->hasBasePointer(MF)) { 408 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, 409 false); 410 } 411 412 if (needsFrameMoves) { 413 const DataLayout &TD = MF.getDataLayout(); 414 const int StackGrowth = -TD.getPointerSize(0); 415 unsigned FramePtr = RegInfo->getFrameRegister(MF); 416 // An example of the prologue: 417 // 418 // .globl __foo 419 // .align 2 420 // __foo: 421 // Ltmp0: 422 // .cfi_startproc 423 // .cfi_personality 155, ___gxx_personality_v0 424 // Leh_func_begin: 425 // .cfi_lsda 16, Lexception33 426 // 427 // stp xa,bx, [sp, -#offset]! 428 // ... 429 // stp x28, x27, [sp, #offset-32] 430 // stp fp, lr, [sp, #offset-16] 431 // add fp, sp, #offset - 16 432 // sub sp, sp, #1360 433 // 434 // The Stack: 435 // +-------------------------------------------+ 436 // 10000 | ........ | ........ | ........ | ........ | 437 // 10004 | ........ | ........ | ........ | ........ | 438 // +-------------------------------------------+ 439 // 10008 | ........ | ........ | ........ | ........ | 440 // 1000c | ........ | ........ | ........ | ........ | 441 // +===========================================+ 442 // 10010 | X28 Register | 443 // 10014 | X28 Register | 444 // +-------------------------------------------+ 445 // 10018 | X27 Register | 446 // 1001c | X27 Register | 447 // +===========================================+ 448 // 10020 | Frame Pointer | 449 // 10024 | Frame Pointer | 450 // +-------------------------------------------+ 451 // 10028 | Link Register | 452 // 1002c | Link Register | 453 // +===========================================+ 454 // 10030 | ........ | ........ | ........ | ........ | 455 // 10034 | ........ | ........ | ........ | ........ | 456 // +-------------------------------------------+ 457 // 10038 | ........ | ........ | ........ | ........ | 458 // 1003c | ........ | ........ | ........ | ........ | 459 // +-------------------------------------------+ 460 // 461 // [sp] = 10030 :: >>initial value<< 462 // sp = 10020 :: stp fp, lr, [sp, #-16]! 463 // fp = sp == 10020 :: mov fp, sp 464 // [sp] == 10020 :: stp x28, x27, [sp, #-16]! 465 // sp == 10010 :: >>final value<< 466 // 467 // The frame pointer (w29) points to address 10020. If we use an offset of 468 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 469 // for w27, and -32 for w28: 470 // 471 // Ltmp1: 472 // .cfi_def_cfa w29, 16 473 // Ltmp2: 474 // .cfi_offset w30, -8 475 // Ltmp3: 476 // .cfi_offset w29, -16 477 // Ltmp4: 478 // .cfi_offset w27, -24 479 // Ltmp5: 480 // .cfi_offset w28, -32 481 482 if (HasFP) { 483 // Define the current CFA rule to use the provided FP. 484 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); 485 unsigned CFIIndex = MMI.addFrameInst( 486 MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); 487 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 488 .addCFIIndex(CFIIndex) 489 .setMIFlags(MachineInstr::FrameSetup); 490 } else { 491 // Encode the stack size of the leaf function. 492 unsigned CFIIndex = MMI.addFrameInst( 493 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); 494 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 495 .addCFIIndex(CFIIndex) 496 .setMIFlags(MachineInstr::FrameSetup); 497 } 498 499 // Now emit the moves for whatever callee saved regs we have (including FP, 500 // LR if those are saved). 501 emitCalleeSavedFrameMoves(MBB, MBBI); 502 } 503 } 504 505 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, 506 MachineBasicBlock &MBB) const { 507 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 508 MachineFrameInfo *MFI = MF.getFrameInfo(); 509 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 510 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 511 DebugLoc DL; 512 bool IsTailCallReturn = false; 513 if (MBB.end() != MBBI) { 514 DL = MBBI->getDebugLoc(); 515 unsigned RetOpcode = MBBI->getOpcode(); 516 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || 517 RetOpcode == AArch64::TCRETURNri; 518 } 519 int NumBytes = MFI->getStackSize(); 520 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 521 522 // All calls are tail calls in GHC calling conv, and functions have no 523 // prologue/epilogue. 524 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 525 return; 526 527 // Initial and residual are named for consistency with the prologue. Note that 528 // in the epilogue, the residual adjustment is executed first. 529 uint64_t ArgumentPopSize = 0; 530 if (IsTailCallReturn) { 531 MachineOperand &StackAdjust = MBBI->getOperand(1); 532 533 // For a tail-call in a callee-pops-arguments environment, some or all of 534 // the stack may actually be in use for the call's arguments, this is 535 // calculated during LowerCall and consumed here... 536 ArgumentPopSize = StackAdjust.getImm(); 537 } else { 538 // ... otherwise the amount to pop is *all* of the argument space, 539 // conveniently stored in the MachineFunctionInfo by 540 // LowerFormalArguments. This will, of course, be zero for the C calling 541 // convention. 542 ArgumentPopSize = AFI->getArgumentStackToRestore(); 543 } 544 545 // The stack frame should be like below, 546 // 547 // ---------------------- --- 548 // | | | 549 // | BytesInStackArgArea| CalleeArgStackSize 550 // | (NumReusableBytes) | (of tail call) 551 // | | --- 552 // | | | 553 // ---------------------| --- | 554 // | | | | 555 // | CalleeSavedReg | | | 556 // | (CalleeSavedStackSize)| | | 557 // | | | | 558 // ---------------------| | NumBytes 559 // | | StackSize (StackAdjustUp) 560 // | LocalStackSize | | | 561 // | (covering callee | | | 562 // | args) | | | 563 // | | | | 564 // ---------------------- --- --- 565 // 566 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize 567 // = StackSize + ArgumentPopSize 568 // 569 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps 570 // it as the 2nd argument of AArch64ISD::TC_RETURN. 571 572 // Move past the restores of the callee-saved registers. 573 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); 574 MachineBasicBlock::iterator Begin = MBB.begin(); 575 while (LastPopI != Begin) { 576 --LastPopI; 577 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) { 578 ++LastPopI; 579 break; 580 } 581 } 582 NumBytes -= AFI->getCalleeSavedStackSize(); 583 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 584 585 if (!hasFP(MF)) { 586 bool RedZone = canUseRedZone(MF); 587 // If this was a redzone leaf function, we don't need to restore the 588 // stack pointer (but we may need to pop stack args for fastcc). 589 if (RedZone && ArgumentPopSize == 0) 590 return; 591 592 bool NoCalleeSaveRestore = AFI->getCalleeSavedStackSize() == 0; 593 int StackRestoreBytes = RedZone ? 0 : NumBytes; 594 if (NoCalleeSaveRestore) 595 StackRestoreBytes += ArgumentPopSize; 596 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, 597 StackRestoreBytes, TII, MachineInstr::FrameDestroy); 598 // If we were able to combine the local stack pop with the argument pop, 599 // then we're done. 600 if (NoCalleeSaveRestore || ArgumentPopSize == 0) 601 return; 602 NumBytes = 0; 603 } 604 605 // Restore the original stack pointer. 606 // FIXME: Rather than doing the math here, we should instead just use 607 // non-post-indexed loads for the restores if we aren't actually going to 608 // be able to save any instructions. 609 if (MFI->hasVarSizedObjects() || AFI->isStackRealigned()) 610 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, 611 -AFI->getCalleeSavedStackSize() + 16, TII, 612 MachineInstr::FrameDestroy); 613 else if (NumBytes) 614 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, 615 MachineInstr::FrameDestroy); 616 617 // This must be placed after the callee-save restore code because that code 618 // assumes the SP is at the same location as it was after the callee-save save 619 // code in the prologue. 620 if (ArgumentPopSize) 621 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 622 ArgumentPopSize, TII, MachineInstr::FrameDestroy); 623 } 624 625 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for 626 /// debug info. It's the same as what we use for resolving the code-gen 627 /// references for now. FIXME: This can go wrong when references are 628 /// SP-relative and simple call frames aren't used. 629 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, 630 int FI, 631 unsigned &FrameReg) const { 632 return resolveFrameIndexReference(MF, FI, FrameReg); 633 } 634 635 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, 636 int FI, unsigned &FrameReg, 637 bool PreferFP) const { 638 const MachineFrameInfo *MFI = MF.getFrameInfo(); 639 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 640 MF.getSubtarget().getRegisterInfo()); 641 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 642 int FPOffset = MFI->getObjectOffset(FI) + 16; 643 int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); 644 bool isFixed = MFI->isFixedObjectIndex(FI); 645 646 // Use frame pointer to reference fixed objects. Use it for locals if 647 // there are VLAs or a dynamically realigned SP (and thus the SP isn't 648 // reliable as a base). Make sure useFPForScavengingIndex() does the 649 // right thing for the emergency spill slot. 650 bool UseFP = false; 651 if (AFI->hasStackFrame()) { 652 // Note: Keeping the following as multiple 'if' statements rather than 653 // merging to a single expression for readability. 654 // 655 // Argument access should always use the FP. 656 if (isFixed) { 657 UseFP = hasFP(MF); 658 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && 659 !RegInfo->needsStackRealignment(MF)) { 660 // Use SP or FP, whichever gives us the best chance of the offset 661 // being in range for direct access. If the FPOffset is positive, 662 // that'll always be best, as the SP will be even further away. 663 // If the FPOffset is negative, we have to keep in mind that the 664 // available offset range for negative offsets is smaller than for 665 // positive ones. If we have variable sized objects, we're stuck with 666 // using the FP regardless, though, as the SP offset is unknown 667 // and we don't have a base pointer available. If an offset is 668 // available via the FP and the SP, use whichever is closest. 669 if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 || 670 (FPOffset >= -256 && Offset > -FPOffset)) 671 UseFP = true; 672 } 673 } 674 675 assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && 676 "In the presence of dynamic stack pointer realignment, " 677 "non-argument objects cannot be accessed through the frame pointer"); 678 679 if (UseFP) { 680 FrameReg = RegInfo->getFrameRegister(MF); 681 return FPOffset; 682 } 683 684 // Use the base pointer if we have one. 685 if (RegInfo->hasBasePointer(MF)) 686 FrameReg = RegInfo->getBaseRegister(); 687 else { 688 FrameReg = AArch64::SP; 689 // If we're using the red zone for this function, the SP won't actually 690 // be adjusted, so the offsets will be negative. They're also all 691 // within range of the signed 9-bit immediate instructions. 692 if (canUseRedZone(MF)) 693 Offset -= AFI->getLocalStackSize(); 694 } 695 696 return Offset; 697 } 698 699 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { 700 // Do not set a kill flag on values that are also marked as live-in. This 701 // happens with the @llvm-returnaddress intrinsic and with arguments passed in 702 // callee saved registers. 703 // Omitting the kill flags is conservatively correct even if the live-in 704 // is not used after all. 705 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg); 706 return getKillRegState(!IsLiveIn); 707 } 708 709 static bool produceCompactUnwindFrame(MachineFunction &MF) { 710 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 711 AttributeSet Attrs = MF.getFunction()->getAttributes(); 712 return Subtarget.isTargetMachO() && 713 !(Subtarget.getTargetLowering()->supportSwiftError() && 714 Attrs.hasAttrSomewhere(Attribute::SwiftError)); 715 } 716 717 718 struct RegPairInfo { 719 RegPairInfo() : Reg1(AArch64::NoRegister), Reg2(AArch64::NoRegister) {} 720 unsigned Reg1; 721 unsigned Reg2; 722 int FrameIdx; 723 int Offset; 724 bool IsGPR; 725 bool isPaired() const { return Reg2 != AArch64::NoRegister; } 726 }; 727 728 static void computeCalleeSaveRegisterPairs( 729 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, 730 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) { 731 732 if (CSI.empty()) 733 return; 734 735 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 736 MachineFrameInfo *MFI = MF.getFrameInfo(); 737 CallingConv::ID CC = MF.getFunction()->getCallingConv(); 738 unsigned Count = CSI.size(); 739 (void)CC; 740 // MachO's compact unwind format relies on all registers being stored in 741 // pairs. 742 assert((!produceCompactUnwindFrame(MF) || 743 CC == CallingConv::PreserveMost || 744 (Count & 1) == 0) && 745 "Odd number of callee-saved regs to spill!"); 746 unsigned Offset = AFI->getCalleeSavedStackSize(); 747 748 for (unsigned i = 0; i < Count; ++i) { 749 RegPairInfo RPI; 750 RPI.Reg1 = CSI[i].getReg(); 751 752 assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || 753 AArch64::FPR64RegClass.contains(RPI.Reg1)); 754 RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); 755 756 // Add the next reg to the pair if it is in the same register class. 757 if (i + 1 < Count) { 758 unsigned NextReg = CSI[i + 1].getReg(); 759 if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || 760 (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) 761 RPI.Reg2 = NextReg; 762 } 763 764 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI 765 // list to come in sorted by frame index so that we can issue the store 766 // pair instructions directly. Assert if we see anything otherwise. 767 // 768 // The order of the registers in the list is controlled by 769 // getCalleeSavedRegs(), so they will always be in-order, as well. 770 assert((!RPI.isPaired() || 771 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && 772 "Out of order callee saved regs!"); 773 774 // MachO's compact unwind format relies on all registers being stored in 775 // adjacent register pairs. 776 assert((!produceCompactUnwindFrame(MF) || 777 CC == CallingConv::PreserveMost || 778 (RPI.isPaired() && 779 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || 780 RPI.Reg1 + 1 == RPI.Reg2))) && 781 "Callee-save registers not saved as adjacent register pair!"); 782 783 RPI.FrameIdx = CSI[i].getFrameIdx(); 784 785 if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { 786 // Round up size of non-pair to pair size if we need to pad the 787 // callee-save area to ensure 16-byte alignment. 788 Offset -= 16; 789 assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16); 790 MFI->setObjectSize(RPI.FrameIdx, 16); 791 } else 792 Offset -= RPI.isPaired() ? 16 : 8; 793 assert(Offset % 8 == 0); 794 RPI.Offset = Offset / 8; 795 assert((RPI.Offset >= -64 && RPI.Offset <= 63) && 796 "Offset out of bounds for LDP/STP immediate"); 797 798 RegPairs.push_back(RPI); 799 if (RPI.isPaired()) 800 ++i; 801 } 802 803 // Align first offset to even 16-byte boundary to avoid additional SP 804 // adjustment instructions. 805 // Last pair offset is size of whole callee-save region for SP 806 // pre-dec/post-inc. 807 RegPairInfo &LastPair = RegPairs.back(); 808 assert(AFI->getCalleeSavedStackSize() % 8 == 0); 809 LastPair.Offset = AFI->getCalleeSavedStackSize() / 8; 810 } 811 812 bool AArch64FrameLowering::spillCalleeSavedRegisters( 813 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 814 const std::vector<CalleeSavedInfo> &CSI, 815 const TargetRegisterInfo *TRI) const { 816 MachineFunction &MF = *MBB.getParent(); 817 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 818 DebugLoc DL; 819 SmallVector<RegPairInfo, 8> RegPairs; 820 821 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); 822 823 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; 824 ++RPII) { 825 RegPairInfo RPI = *RPII; 826 unsigned Reg1 = RPI.Reg1; 827 unsigned Reg2 = RPI.Reg2; 828 unsigned StrOpc; 829 830 // Issue sequence of non-sp increment and pi sp spills for cs regs. The 831 // first spill is a pre-increment that allocates the stack. 832 // For example: 833 // stp x22, x21, [sp, #-48]! // addImm(-6) 834 // stp x20, x19, [sp, #16] // addImm(+2) 835 // stp fp, lr, [sp, #32] // addImm(+4) 836 // Rationale: This sequence saves uop updates compared to a sequence of 837 // pre-increment spills like stp xi,xj,[sp,#-16]! 838 // Note: Similar rationale and sequence for restores in epilog. 839 bool BumpSP = RPII == RegPairs.rbegin(); 840 if (RPI.IsGPR) { 841 // For first spill use pre-increment store. 842 if (BumpSP) 843 StrOpc = RPI.isPaired() ? AArch64::STPXpre : AArch64::STRXpre; 844 else 845 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; 846 } else { 847 // For first spill use pre-increment store. 848 if (BumpSP) 849 StrOpc = RPI.isPaired() ? AArch64::STPDpre : AArch64::STRDpre; 850 else 851 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; 852 } 853 DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1); 854 if (RPI.isPaired()) 855 dbgs() << ", " << TRI->getName(Reg2); 856 dbgs() << ") -> fi#(" << RPI.FrameIdx; 857 if (RPI.isPaired()) 858 dbgs() << ", " << RPI.FrameIdx+1; 859 dbgs() << ")\n"); 860 861 const int Offset = BumpSP ? -RPI.Offset : RPI.Offset; 862 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); 863 if (BumpSP) 864 MIB.addReg(AArch64::SP, RegState::Define); 865 866 if (RPI.isPaired()) { 867 MBB.addLiveIn(Reg1); 868 MBB.addLiveIn(Reg2); 869 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) 870 .addReg(Reg1, getPrologueDeath(MF, Reg1)) 871 .addReg(AArch64::SP) 872 .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit 873 .setMIFlag(MachineInstr::FrameSetup); 874 MIB.addMemOperand(MF.getMachineMemOperand( 875 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), 876 MachineMemOperand::MOStore, 8, 8)); 877 } else { 878 MBB.addLiveIn(Reg1); 879 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) 880 .addReg(AArch64::SP) 881 .addImm(BumpSP ? Offset * 8 : Offset) // pre-inc version is unscaled 882 .setMIFlag(MachineInstr::FrameSetup); 883 } 884 MIB.addMemOperand(MF.getMachineMemOperand( 885 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), 886 MachineMemOperand::MOStore, 8, 8)); 887 } 888 return true; 889 } 890 891 bool AArch64FrameLowering::restoreCalleeSavedRegisters( 892 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 893 const std::vector<CalleeSavedInfo> &CSI, 894 const TargetRegisterInfo *TRI) const { 895 MachineFunction &MF = *MBB.getParent(); 896 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 897 DebugLoc DL; 898 SmallVector<RegPairInfo, 8> RegPairs; 899 900 if (MI != MBB.end()) 901 DL = MI->getDebugLoc(); 902 903 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); 904 905 for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; 906 ++RPII) { 907 RegPairInfo RPI = *RPII; 908 unsigned Reg1 = RPI.Reg1; 909 unsigned Reg2 = RPI.Reg2; 910 911 // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only 912 // the last load is sp-pi post-increment and de-allocates the stack: 913 // For example: 914 // ldp fp, lr, [sp, #32] // addImm(+4) 915 // ldp x20, x19, [sp, #16] // addImm(+2) 916 // ldp x22, x21, [sp], #48 // addImm(+6) 917 // Note: see comment in spillCalleeSavedRegisters() 918 unsigned LdrOpc; 919 bool BumpSP = RPII == std::prev(RegPairs.end()); 920 if (RPI.IsGPR) { 921 if (BumpSP) 922 LdrOpc = RPI.isPaired() ? AArch64::LDPXpost : AArch64::LDRXpost; 923 else 924 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; 925 } else { 926 if (BumpSP) 927 LdrOpc = RPI.isPaired() ? AArch64::LDPDpost : AArch64::LDRDpost; 928 else 929 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; 930 } 931 DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1); 932 if (RPI.isPaired()) 933 dbgs() << ", " << TRI->getName(Reg2); 934 dbgs() << ") -> fi#(" << RPI.FrameIdx; 935 if (RPI.isPaired()) 936 dbgs() << ", " << RPI.FrameIdx+1; 937 dbgs() << ")\n"); 938 939 const int Offset = RPI.Offset; 940 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); 941 if (BumpSP) 942 MIB.addReg(AArch64::SP, RegState::Define); 943 944 if (RPI.isPaired()) { 945 MIB.addReg(Reg2, getDefRegState(true)) 946 .addReg(Reg1, getDefRegState(true)) 947 .addReg(AArch64::SP) 948 .addImm(Offset) // [sp], #offset * 8 or [sp, #offset * 8] 949 // where the factor * 8 is implicit 950 .setMIFlag(MachineInstr::FrameDestroy); 951 MIB.addMemOperand(MF.getMachineMemOperand( 952 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), 953 MachineMemOperand::MOLoad, 8, 8)); 954 } else { 955 MIB.addReg(Reg1, getDefRegState(true)) 956 .addReg(AArch64::SP) 957 .addImm(BumpSP ? Offset * 8 : Offset) // post-dec version is unscaled 958 .setMIFlag(MachineInstr::FrameDestroy); 959 } 960 MIB.addMemOperand(MF.getMachineMemOperand( 961 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), 962 MachineMemOperand::MOLoad, 8, 8)); 963 } 964 return true; 965 } 966 967 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, 968 BitVector &SavedRegs, 969 RegScavenger *RS) const { 970 // All calls are tail calls in GHC calling conv, and functions have no 971 // prologue/epilogue. 972 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 973 return; 974 975 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 976 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 977 MF.getSubtarget().getRegisterInfo()); 978 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 979 unsigned UnspilledCSGPR = AArch64::NoRegister; 980 unsigned UnspilledCSGPRPaired = AArch64::NoRegister; 981 982 // The frame record needs to be created by saving the appropriate registers 983 if (hasFP(MF)) { 984 SavedRegs.set(AArch64::FP); 985 SavedRegs.set(AArch64::LR); 986 } 987 988 unsigned BasePointerReg = AArch64::NoRegister; 989 if (RegInfo->hasBasePointer(MF)) 990 BasePointerReg = RegInfo->getBaseRegister(); 991 992 bool ExtraCSSpill = false; 993 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 994 // Figure out which callee-saved registers to save/restore. 995 for (unsigned i = 0; CSRegs[i]; ++i) { 996 const unsigned Reg = CSRegs[i]; 997 998 // Add the base pointer register to SavedRegs if it is callee-save. 999 if (Reg == BasePointerReg) 1000 SavedRegs.set(Reg); 1001 1002 bool RegUsed = SavedRegs.test(Reg); 1003 unsigned PairedReg = CSRegs[i ^ 1]; 1004 if (!RegUsed) { 1005 if (AArch64::GPR64RegClass.contains(Reg) && 1006 !RegInfo->isReservedReg(MF, Reg)) { 1007 UnspilledCSGPR = Reg; 1008 UnspilledCSGPRPaired = PairedReg; 1009 } 1010 continue; 1011 } 1012 1013 // MachO's compact unwind format relies on all registers being stored in 1014 // pairs. 1015 // FIXME: the usual format is actually better if unwinding isn't needed. 1016 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) { 1017 SavedRegs.set(PairedReg); 1018 ExtraCSSpill = true; 1019 } 1020 } 1021 1022 DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; 1023 for (int Reg = SavedRegs.find_first(); Reg != -1; 1024 Reg = SavedRegs.find_next(Reg)) 1025 dbgs() << ' ' << PrintReg(Reg, RegInfo); 1026 dbgs() << "\n";); 1027 1028 // If any callee-saved registers are used, the frame cannot be eliminated. 1029 unsigned NumRegsSpilled = SavedRegs.count(); 1030 bool CanEliminateFrame = NumRegsSpilled == 0; 1031 1032 // FIXME: Set BigStack if any stack slot references may be out of range. 1033 // For now, just conservatively guestimate based on unscaled indexing 1034 // range. We'll end up allocating an unnecessary spill slot a lot, but 1035 // realistically that's not a big deal at this stage of the game. 1036 // The CSR spill slots have not been allocated yet, so estimateStackSize 1037 // won't include them. 1038 MachineFrameInfo *MFI = MF.getFrameInfo(); 1039 unsigned CFSize = MFI->estimateStackSize(MF) + 8 * NumRegsSpilled; 1040 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); 1041 bool BigStack = (CFSize >= 256); 1042 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) 1043 AFI->setHasStackFrame(true); 1044 1045 // Estimate if we might need to scavenge a register at some point in order 1046 // to materialize a stack offset. If so, either spill one additional 1047 // callee-saved register or reserve a special spill slot to facilitate 1048 // register scavenging. If we already spilled an extra callee-saved register 1049 // above to keep the number of spills even, we don't need to do anything else 1050 // here. 1051 if (BigStack && !ExtraCSSpill) { 1052 if (UnspilledCSGPR != AArch64::NoRegister) { 1053 DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo) 1054 << " to get a scratch register.\n"); 1055 SavedRegs.set(UnspilledCSGPR); 1056 // MachO's compact unwind format relies on all registers being stored in 1057 // pairs, so if we need to spill one extra for BigStack, then we need to 1058 // store the pair. 1059 if (produceCompactUnwindFrame(MF)) 1060 SavedRegs.set(UnspilledCSGPRPaired); 1061 ExtraCSSpill = true; 1062 NumRegsSpilled = SavedRegs.count(); 1063 } 1064 1065 // If we didn't find an extra callee-saved register to spill, create 1066 // an emergency spill slot. 1067 if (!ExtraCSSpill) { 1068 const TargetRegisterClass *RC = &AArch64::GPR64RegClass; 1069 int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); 1070 RS->addScavengingFrameIndex(FI); 1071 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI 1072 << " as the emergency spill slot.\n"); 1073 } 1074 } 1075 1076 // Round up to register pair alignment to avoid additional SP adjustment 1077 // instructions. 1078 AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16)); 1079 } 1080