1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of TargetFrameLowering class.
11 //
12 // On AArch64, stack frames are structured as follows:
13 //
14 // The stack grows downward.
15 //
16 // All of the individual frame areas on the frame below are optional, i.e. it's
17 // possible to create a function so that the particular area isn't present
18 // in the frame.
19 //
20 // At function entry, the "frame" looks as follows:
21 //
22 // |                                   | Higher address
23 // |-----------------------------------|
24 // |                                   |
25 // | arguments passed on the stack     |
26 // |                                   |
27 // |-----------------------------------| <- sp
28 // |                                   | Lower address
29 //
30 //
31 // After the prologue has run, the frame has the following general structure.
32 // Note that this doesn't depict the case where a red-zone is used. Also,
33 // technically the last frame area (VLAs) doesn't get created until in the
34 // main function body, after the prologue is run. However, it's depicted here
35 // for completeness.
36 //
37 // |                                   | Higher address
38 // |-----------------------------------|
39 // |                                   |
40 // | arguments passed on the stack     |
41 // |                                   |
42 // |-----------------------------------|
43 // |                                   |
44 // | prev_fp, prev_lr                  |
45 // | (a.k.a. "frame record")           |
46 // |-----------------------------------| <- fp(=x29)
47 // |                                   |
48 // | other callee-saved registers      |
49 // |                                   |
50 // |-----------------------------------|
51 // |.empty.space.to.make.part.below....|
52 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
53 // |.the.standard.16-byte.alignment....|  compile time; if present)
54 // |-----------------------------------|
55 // |                                   |
56 // | local variables of fixed size     |
57 // | including spill slots             |
58 // |-----------------------------------| <- bp(not defined by ABI,
59 // |.variable-sized.local.variables....|       LLVM chooses X19)
60 // |.(VLAs)............................| (size of this area is unknown at
61 // |...................................|  compile time)
62 // |-----------------------------------| <- sp
63 // |                                   | Lower address
64 //
65 //
66 // To access the data in a frame, at-compile time, a constant offset must be
67 // computable from one of the pointers (fp, bp, sp) to access it. The size
68 // of the areas with a dotted background cannot be computed at compile-time
69 // if they are present, making it required to have all three of fp, bp and
70 // sp to be set up to be able to access all contents in the frame areas,
71 // assuming all of the frame areas are non-empty.
72 //
73 // For most functions, some of the frame areas are empty. For those functions,
74 // it may not be necessary to set up fp or bp:
75 // * A base pointer is definitely needed when there are both VLAs and local
76 //   variables with more-than-default alignment requirements.
77 // * A frame pointer is definitely needed when there are local variables with
78 //   more-than-default alignment requirements.
79 //
80 // In some cases when a base pointer is not strictly needed, it is generated
81 // anyway when offsets from the frame pointer to access local variables become
82 // so large that the offset can't be encoded in the immediate fields of loads
83 // or stores.
84 //
85 // FIXME: also explain the redzone concept.
86 // FIXME: also explain the concept of reserved call frames.
87 //
88 //===----------------------------------------------------------------------===//
89 
90 #include "AArch64FrameLowering.h"
91 #include "AArch64InstrInfo.h"
92 #include "AArch64MachineFunctionInfo.h"
93 #include "AArch64Subtarget.h"
94 #include "AArch64TargetMachine.h"
95 #include "llvm/ADT/Statistic.h"
96 #include "llvm/CodeGen/MachineFrameInfo.h"
97 #include "llvm/CodeGen/MachineFunction.h"
98 #include "llvm/CodeGen/MachineInstrBuilder.h"
99 #include "llvm/CodeGen/MachineModuleInfo.h"
100 #include "llvm/CodeGen/MachineRegisterInfo.h"
101 #include "llvm/CodeGen/RegisterScavenging.h"
102 #include "llvm/IR/DataLayout.h"
103 #include "llvm/IR/Function.h"
104 #include "llvm/Support/CommandLine.h"
105 #include "llvm/Support/Debug.h"
106 #include "llvm/Support/raw_ostream.h"
107 
108 using namespace llvm;
109 
110 #define DEBUG_TYPE "frame-info"
111 
112 static cl::opt<bool> EnableRedZone("aarch64-redzone",
113                                    cl::desc("enable use of redzone on AArch64"),
114                                    cl::init(false), cl::Hidden);
115 
116 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
117 
118 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
119   if (!EnableRedZone)
120     return false;
121   // Don't use the red zone if the function explicitly asks us not to.
122   // This is typically used for kernel code.
123   if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone))
124     return false;
125 
126   const MachineFrameInfo *MFI = MF.getFrameInfo();
127   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
128   unsigned NumBytes = AFI->getLocalStackSize();
129 
130   return !(MFI->hasCalls() || hasFP(MF) || NumBytes > 128);
131 }
132 
133 /// hasFP - Return true if the specified function should have a dedicated frame
134 /// pointer register.
135 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
136   const MachineFrameInfo *MFI = MF.getFrameInfo();
137   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
138   // Retain behavior of always omitting the FP for leaf functions when possible.
139   return (MFI->hasCalls() &&
140           MF.getTarget().Options.DisableFramePointerElim(MF)) ||
141          MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() ||
142          MFI->hasStackMap() || MFI->hasPatchPoint() ||
143          RegInfo->needsStackRealignment(MF);
144 }
145 
146 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
147 /// not required, we reserve argument space for call sites in the function
148 /// immediately on entry to the current function.  This eliminates the need for
149 /// add/sub sp brackets around call sites.  Returns true if the call frame is
150 /// included as part of the stack frame.
151 bool
152 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
153   return !MF.getFrameInfo()->hasVarSizedObjects();
154 }
155 
156 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
157     MachineFunction &MF, MachineBasicBlock &MBB,
158     MachineBasicBlock::iterator I) const {
159   const AArch64InstrInfo *TII =
160       static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
161   DebugLoc DL = I->getDebugLoc();
162   unsigned Opc = I->getOpcode();
163   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
164   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
165 
166   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
167   if (!TFI->hasReservedCallFrame(MF)) {
168     unsigned Align = getStackAlignment();
169 
170     int64_t Amount = I->getOperand(0).getImm();
171     Amount = alignTo(Amount, Align);
172     if (!IsDestroy)
173       Amount = -Amount;
174 
175     // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
176     // doesn't have to pop anything), then the first operand will be zero too so
177     // this adjustment is a no-op.
178     if (CalleePopAmount == 0) {
179       // FIXME: in-function stack adjustment for calls is limited to 24-bits
180       // because there's no guaranteed temporary register available.
181       //
182       // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
183       // 1) For offset <= 12-bit, we use LSL #0
184       // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
185       // LSL #0, and the other uses LSL #12.
186       //
187       // Most call frames will be allocated at the start of a function so
188       // this is OK, but it is a limitation that needs dealing with.
189       assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
190       emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
191     }
192   } else if (CalleePopAmount != 0) {
193     // If the calling convention demands that the callee pops arguments from the
194     // stack, we want to add it back if we have a reserved call frame.
195     assert(CalleePopAmount < 0xffffff && "call frame too large");
196     emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
197                     TII);
198   }
199   return MBB.erase(I);
200 }
201 
202 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
203     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
204   MachineFunction &MF = *MBB.getParent();
205   MachineFrameInfo *MFI = MF.getFrameInfo();
206   MachineModuleInfo &MMI = MF.getMMI();
207   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
208   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
209   DebugLoc DL = MBB.findDebugLoc(MBBI);
210 
211   // Add callee saved registers to move list.
212   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
213   if (CSI.empty())
214     return;
215 
216   for (const auto &Info : CSI) {
217     unsigned Reg = Info.getReg();
218     int64_t Offset =
219         MFI->getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
220     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
221     unsigned CFIIndex = MMI.addFrameInst(
222         MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
223     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
224         .addCFIIndex(CFIIndex)
225         .setMIFlags(MachineInstr::FrameSetup);
226   }
227 }
228 
229 // Find a scratch register that we can use at the start of the prologue to
230 // re-align the stack pointer.  We avoid using callee-save registers since they
231 // may appear to be free when this is called from canUseAsPrologue (during
232 // shrink wrapping), but then no longer be free when this is called from
233 // emitPrologue.
234 //
235 // FIXME: This is a bit conservative, since in the above case we could use one
236 // of the callee-save registers as a scratch temp to re-align the stack pointer,
237 // but we would then have to make sure that we were in fact saving at least one
238 // callee-save register in the prologue, which is additional complexity that
239 // doesn't seem worth the benefit.
240 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
241   MachineFunction *MF = MBB->getParent();
242 
243   // If MBB is an entry block, use X9 as the scratch register
244   if (&MF->front() == MBB)
245     return AArch64::X9;
246 
247   RegScavenger RS;
248   RS.enterBasicBlock(*MBB);
249 
250   // Prefer X9 since it was historically used for the prologue scratch reg.
251   if (!RS.isRegUsed(AArch64::X9))
252     return AArch64::X9;
253 
254   // Find a free non callee-save reg.
255   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
256   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
257   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF);
258   BitVector CalleeSaveRegs(RegInfo->getNumRegs());
259   for (unsigned i = 0; CSRegs[i]; ++i)
260     CalleeSaveRegs.set(CSRegs[i]);
261 
262   BitVector Available = RS.getRegsAvailable(&AArch64::GPR64RegClass);
263   for (int AvailReg = Available.find_first(); AvailReg != -1;
264        AvailReg = Available.find_next(AvailReg))
265     if (!CalleeSaveRegs.test(AvailReg))
266       return AvailReg;
267 
268   return AArch64::NoRegister;
269 }
270 
271 bool AArch64FrameLowering::canUseAsPrologue(
272     const MachineBasicBlock &MBB) const {
273   const MachineFunction *MF = MBB.getParent();
274   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
275   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
276   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
277 
278   // Don't need a scratch register if we're not going to re-align the stack.
279   if (!RegInfo->needsStackRealignment(*MF))
280     return true;
281   // Otherwise, we can use any block as long as it has a scratch register
282   // available.
283   return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
284 }
285 
286 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
287                                         MachineBasicBlock &MBB) const {
288   MachineBasicBlock::iterator MBBI = MBB.begin();
289   const MachineFrameInfo *MFI = MF.getFrameInfo();
290   const Function *Fn = MF.getFunction();
291   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
292   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
293   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
294   MachineModuleInfo &MMI = MF.getMMI();
295   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
296   bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
297   bool HasFP = hasFP(MF);
298 
299   // Debug location must be unknown since the first debug location is used
300   // to determine the end of the prologue.
301   DebugLoc DL;
302 
303   // All calls are tail calls in GHC calling conv, and functions have no
304   // prologue/epilogue.
305   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
306     return;
307 
308   int NumBytes = (int)MFI->getStackSize();
309   if (!AFI->hasStackFrame()) {
310     assert(!HasFP && "unexpected function without stack frame but with FP");
311 
312     // All of the stack allocation is for locals.
313     AFI->setLocalStackSize(NumBytes);
314 
315     if (!NumBytes)
316       return;
317     // REDZONE: If the stack size is less than 128 bytes, we don't need
318     // to actually allocate.
319     if (canUseRedZone(MF))
320       ++NumRedZoneFunctions;
321     else {
322       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
323                       MachineInstr::FrameSetup);
324 
325       // Label used to tie together the PROLOG_LABEL and the MachineMoves.
326       MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
327       // Encode the stack size of the leaf function.
328       unsigned CFIIndex = MMI.addFrameInst(
329           MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
330       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
331           .addCFIIndex(CFIIndex)
332           .setMIFlags(MachineInstr::FrameSetup);
333     }
334     return;
335   }
336 
337   NumBytes -= AFI->getCalleeSavedStackSize();
338   assert(NumBytes >= 0 && "Negative stack allocation size!?");
339   // All of the remaining stack allocations are for locals.
340   AFI->setLocalStackSize(NumBytes);
341 
342   // Move past the saves of the callee-saved registers.
343   MachineBasicBlock::iterator End = MBB.end();
344   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup))
345     ++MBBI;
346   if (HasFP) {
347     // Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
348     int FPOffset = AFI->getCalleeSavedStackSize() - 16;
349 
350     // Issue    sub fp, sp, FPOffset or
351     //          mov fp,sp          when FPOffset is zero.
352     // Note: All stores of callee-saved registers are marked as "FrameSetup".
353     // This code marks the instruction(s) that set the FP also.
354     emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
355                     MachineInstr::FrameSetup);
356   }
357 
358   // Allocate space for the rest of the frame.
359   if (NumBytes) {
360     const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
361     unsigned scratchSPReg = AArch64::SP;
362 
363     if (NeedsRealignment) {
364       scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
365       assert(scratchSPReg != AArch64::NoRegister);
366     }
367 
368     // If we're a leaf function, try using the red zone.
369     if (!canUseRedZone(MF))
370       // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
371       // the correct value here, as NumBytes also includes padding bytes,
372       // which shouldn't be counted here.
373       emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
374                       MachineInstr::FrameSetup);
375 
376     if (NeedsRealignment) {
377       const unsigned Alignment = MFI->getMaxAlignment();
378       const unsigned NrBitsToZero = countTrailingZeros(Alignment);
379       assert(NrBitsToZero > 1);
380       assert(scratchSPReg != AArch64::SP);
381 
382       // SUB X9, SP, NumBytes
383       //   -- X9 is temporary register, so shouldn't contain any live data here,
384       //   -- free to use. This is already produced by emitFrameOffset above.
385       // AND SP, X9, 0b11111...0000
386       // The logical immediates have a non-trivial encoding. The following
387       // formula computes the encoded immediate with all ones but
388       // NrBitsToZero zero bits as least significant bits.
389       uint32_t andMaskEncoded = (1 << 12)                         // = N
390                                 | ((64 - NrBitsToZero) << 6)      // immr
391                                 | ((64 - NrBitsToZero - 1) << 0); // imms
392 
393       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
394           .addReg(scratchSPReg, RegState::Kill)
395           .addImm(andMaskEncoded);
396       AFI->setStackRealigned(true);
397     }
398   }
399 
400   // If we need a base pointer, set it up here. It's whatever the value of the
401   // stack pointer is at this point. Any variable size objects will be allocated
402   // after this, so we can still use the base pointer to reference locals.
403   //
404   // FIXME: Clarify FrameSetup flags here.
405   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
406   // needed.
407   if (RegInfo->hasBasePointer(MF)) {
408     TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
409                      false);
410   }
411 
412   if (needsFrameMoves) {
413     const DataLayout &TD = MF.getDataLayout();
414     const int StackGrowth = -TD.getPointerSize(0);
415     unsigned FramePtr = RegInfo->getFrameRegister(MF);
416     // An example of the prologue:
417     //
418     //     .globl __foo
419     //     .align 2
420     //  __foo:
421     // Ltmp0:
422     //     .cfi_startproc
423     //     .cfi_personality 155, ___gxx_personality_v0
424     // Leh_func_begin:
425     //     .cfi_lsda 16, Lexception33
426     //
427     //     stp  xa,bx, [sp, -#offset]!
428     //     ...
429     //     stp  x28, x27, [sp, #offset-32]
430     //     stp  fp, lr, [sp, #offset-16]
431     //     add  fp, sp, #offset - 16
432     //     sub  sp, sp, #1360
433     //
434     // The Stack:
435     //       +-------------------------------------------+
436     // 10000 | ........ | ........ | ........ | ........ |
437     // 10004 | ........ | ........ | ........ | ........ |
438     //       +-------------------------------------------+
439     // 10008 | ........ | ........ | ........ | ........ |
440     // 1000c | ........ | ........ | ........ | ........ |
441     //       +===========================================+
442     // 10010 |                X28 Register               |
443     // 10014 |                X28 Register               |
444     //       +-------------------------------------------+
445     // 10018 |                X27 Register               |
446     // 1001c |                X27 Register               |
447     //       +===========================================+
448     // 10020 |                Frame Pointer              |
449     // 10024 |                Frame Pointer              |
450     //       +-------------------------------------------+
451     // 10028 |                Link Register              |
452     // 1002c |                Link Register              |
453     //       +===========================================+
454     // 10030 | ........ | ........ | ........ | ........ |
455     // 10034 | ........ | ........ | ........ | ........ |
456     //       +-------------------------------------------+
457     // 10038 | ........ | ........ | ........ | ........ |
458     // 1003c | ........ | ........ | ........ | ........ |
459     //       +-------------------------------------------+
460     //
461     //     [sp] = 10030        ::    >>initial value<<
462     //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
463     //     fp = sp == 10020    ::  mov fp, sp
464     //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
465     //     sp == 10010         ::    >>final value<<
466     //
467     // The frame pointer (w29) points to address 10020. If we use an offset of
468     // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
469     // for w27, and -32 for w28:
470     //
471     //  Ltmp1:
472     //     .cfi_def_cfa w29, 16
473     //  Ltmp2:
474     //     .cfi_offset w30, -8
475     //  Ltmp3:
476     //     .cfi_offset w29, -16
477     //  Ltmp4:
478     //     .cfi_offset w27, -24
479     //  Ltmp5:
480     //     .cfi_offset w28, -32
481 
482     if (HasFP) {
483       // Define the current CFA rule to use the provided FP.
484       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
485       unsigned CFIIndex = MMI.addFrameInst(
486           MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
487       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
488           .addCFIIndex(CFIIndex)
489           .setMIFlags(MachineInstr::FrameSetup);
490     } else {
491       // Encode the stack size of the leaf function.
492       unsigned CFIIndex = MMI.addFrameInst(
493           MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
494       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
495           .addCFIIndex(CFIIndex)
496           .setMIFlags(MachineInstr::FrameSetup);
497     }
498 
499     // Now emit the moves for whatever callee saved regs we have (including FP,
500     // LR if those are saved).
501     emitCalleeSavedFrameMoves(MBB, MBBI);
502   }
503 }
504 
505 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
506                                         MachineBasicBlock &MBB) const {
507   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
508   MachineFrameInfo *MFI = MF.getFrameInfo();
509   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
510   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
511   DebugLoc DL;
512   bool IsTailCallReturn = false;
513   if (MBB.end() != MBBI) {
514     DL = MBBI->getDebugLoc();
515     unsigned RetOpcode = MBBI->getOpcode();
516     IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
517       RetOpcode == AArch64::TCRETURNri;
518   }
519   int NumBytes = MFI->getStackSize();
520   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
521 
522   // All calls are tail calls in GHC calling conv, and functions have no
523   // prologue/epilogue.
524   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
525     return;
526 
527   // Initial and residual are named for consistency with the prologue. Note that
528   // in the epilogue, the residual adjustment is executed first.
529   uint64_t ArgumentPopSize = 0;
530   if (IsTailCallReturn) {
531     MachineOperand &StackAdjust = MBBI->getOperand(1);
532 
533     // For a tail-call in a callee-pops-arguments environment, some or all of
534     // the stack may actually be in use for the call's arguments, this is
535     // calculated during LowerCall and consumed here...
536     ArgumentPopSize = StackAdjust.getImm();
537   } else {
538     // ... otherwise the amount to pop is *all* of the argument space,
539     // conveniently stored in the MachineFunctionInfo by
540     // LowerFormalArguments. This will, of course, be zero for the C calling
541     // convention.
542     ArgumentPopSize = AFI->getArgumentStackToRestore();
543   }
544 
545   // The stack frame should be like below,
546   //
547   //      ----------------------                     ---
548   //      |                    |                      |
549   //      | BytesInStackArgArea|              CalleeArgStackSize
550   //      | (NumReusableBytes) |                (of tail call)
551   //      |                    |                     ---
552   //      |                    |                      |
553   //      ---------------------|        ---           |
554   //      |                    |         |            |
555   //      |   CalleeSavedReg   |         |            |
556   //      | (CalleeSavedStackSize)|      |            |
557   //      |                    |         |            |
558   //      ---------------------|         |         NumBytes
559   //      |                    |     StackSize  (StackAdjustUp)
560   //      |   LocalStackSize   |         |            |
561   //      | (covering callee   |         |            |
562   //      |       args)        |         |            |
563   //      |                    |         |            |
564   //      ----------------------        ---          ---
565   //
566   // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
567   //             = StackSize + ArgumentPopSize
568   //
569   // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
570   // it as the 2nd argument of AArch64ISD::TC_RETURN.
571 
572   // Move past the restores of the callee-saved registers.
573   MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
574   MachineBasicBlock::iterator Begin = MBB.begin();
575   while (LastPopI != Begin) {
576     --LastPopI;
577     if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
578       ++LastPopI;
579       break;
580     }
581   }
582   NumBytes -= AFI->getCalleeSavedStackSize();
583   assert(NumBytes >= 0 && "Negative stack allocation size!?");
584 
585   if (!hasFP(MF)) {
586     bool RedZone = canUseRedZone(MF);
587     // If this was a redzone leaf function, we don't need to restore the
588     // stack pointer (but we may need to pop stack args for fastcc).
589     if (RedZone && ArgumentPopSize == 0)
590       return;
591 
592     bool NoCalleeSaveRestore = AFI->getCalleeSavedStackSize() == 0;
593     int StackRestoreBytes = RedZone ? 0 : NumBytes;
594     if (NoCalleeSaveRestore)
595       StackRestoreBytes += ArgumentPopSize;
596     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
597                     StackRestoreBytes, TII, MachineInstr::FrameDestroy);
598     // If we were able to combine the local stack pop with the argument pop,
599     // then we're done.
600     if (NoCalleeSaveRestore || ArgumentPopSize == 0)
601       return;
602     NumBytes = 0;
603   }
604 
605   // Restore the original stack pointer.
606   // FIXME: Rather than doing the math here, we should instead just use
607   // non-post-indexed loads for the restores if we aren't actually going to
608   // be able to save any instructions.
609   if (MFI->hasVarSizedObjects() || AFI->isStackRealigned())
610     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
611                     -AFI->getCalleeSavedStackSize() + 16, TII,
612                     MachineInstr::FrameDestroy);
613   else if (NumBytes)
614     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
615                     MachineInstr::FrameDestroy);
616 
617   // This must be placed after the callee-save restore code because that code
618   // assumes the SP is at the same location as it was after the callee-save save
619   // code in the prologue.
620   if (ArgumentPopSize)
621     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
622                     ArgumentPopSize, TII, MachineInstr::FrameDestroy);
623 }
624 
625 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
626 /// debug info.  It's the same as what we use for resolving the code-gen
627 /// references for now.  FIXME: This can go wrong when references are
628 /// SP-relative and simple call frames aren't used.
629 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
630                                                  int FI,
631                                                  unsigned &FrameReg) const {
632   return resolveFrameIndexReference(MF, FI, FrameReg);
633 }
634 
635 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
636                                                      int FI, unsigned &FrameReg,
637                                                      bool PreferFP) const {
638   const MachineFrameInfo *MFI = MF.getFrameInfo();
639   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
640       MF.getSubtarget().getRegisterInfo());
641   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
642   int FPOffset = MFI->getObjectOffset(FI) + 16;
643   int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
644   bool isFixed = MFI->isFixedObjectIndex(FI);
645 
646   // Use frame pointer to reference fixed objects. Use it for locals if
647   // there are VLAs or a dynamically realigned SP (and thus the SP isn't
648   // reliable as a base). Make sure useFPForScavengingIndex() does the
649   // right thing for the emergency spill slot.
650   bool UseFP = false;
651   if (AFI->hasStackFrame()) {
652     // Note: Keeping the following as multiple 'if' statements rather than
653     // merging to a single expression for readability.
654     //
655     // Argument access should always use the FP.
656     if (isFixed) {
657       UseFP = hasFP(MF);
658     } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
659                !RegInfo->needsStackRealignment(MF)) {
660       // Use SP or FP, whichever gives us the best chance of the offset
661       // being in range for direct access. If the FPOffset is positive,
662       // that'll always be best, as the SP will be even further away.
663       // If the FPOffset is negative, we have to keep in mind that the
664       // available offset range for negative offsets is smaller than for
665       // positive ones. If we have variable sized objects, we're stuck with
666       // using the FP regardless, though, as the SP offset is unknown
667       // and we don't have a base pointer available. If an offset is
668       // available via the FP and the SP, use whichever is closest.
669       if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
670           (FPOffset >= -256 && Offset > -FPOffset))
671         UseFP = true;
672     }
673   }
674 
675   assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
676          "In the presence of dynamic stack pointer realignment, "
677          "non-argument objects cannot be accessed through the frame pointer");
678 
679   if (UseFP) {
680     FrameReg = RegInfo->getFrameRegister(MF);
681     return FPOffset;
682   }
683 
684   // Use the base pointer if we have one.
685   if (RegInfo->hasBasePointer(MF))
686     FrameReg = RegInfo->getBaseRegister();
687   else {
688     FrameReg = AArch64::SP;
689     // If we're using the red zone for this function, the SP won't actually
690     // be adjusted, so the offsets will be negative. They're also all
691     // within range of the signed 9-bit immediate instructions.
692     if (canUseRedZone(MF))
693       Offset -= AFI->getLocalStackSize();
694   }
695 
696   return Offset;
697 }
698 
699 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
700   // Do not set a kill flag on values that are also marked as live-in. This
701   // happens with the @llvm-returnaddress intrinsic and with arguments passed in
702   // callee saved registers.
703   // Omitting the kill flags is conservatively correct even if the live-in
704   // is not used after all.
705   bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
706   return getKillRegState(!IsLiveIn);
707 }
708 
709 static bool produceCompactUnwindFrame(MachineFunction &MF) {
710   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
711   AttributeSet Attrs = MF.getFunction()->getAttributes();
712   return Subtarget.isTargetMachO() &&
713          !(Subtarget.getTargetLowering()->supportSwiftError() &&
714            Attrs.hasAttrSomewhere(Attribute::SwiftError));
715 }
716 
717 
718 struct RegPairInfo {
719   RegPairInfo() : Reg1(AArch64::NoRegister), Reg2(AArch64::NoRegister) {}
720   unsigned Reg1;
721   unsigned Reg2;
722   int FrameIdx;
723   int Offset;
724   bool IsGPR;
725   bool isPaired() const { return Reg2 != AArch64::NoRegister; }
726 };
727 
728 static void computeCalleeSaveRegisterPairs(
729     MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
730     const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
731 
732   if (CSI.empty())
733     return;
734 
735   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
736   MachineFrameInfo *MFI = MF.getFrameInfo();
737   CallingConv::ID CC = MF.getFunction()->getCallingConv();
738   unsigned Count = CSI.size();
739   (void)CC;
740   // MachO's compact unwind format relies on all registers being stored in
741   // pairs.
742   assert((!produceCompactUnwindFrame(MF) ||
743           CC == CallingConv::PreserveMost ||
744           (Count & 1) == 0) &&
745          "Odd number of callee-saved regs to spill!");
746   unsigned Offset = AFI->getCalleeSavedStackSize();
747 
748   for (unsigned i = 0; i < Count; ++i) {
749     RegPairInfo RPI;
750     RPI.Reg1 = CSI[i].getReg();
751 
752     assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
753            AArch64::FPR64RegClass.contains(RPI.Reg1));
754     RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
755 
756     // Add the next reg to the pair if it is in the same register class.
757     if (i + 1 < Count) {
758       unsigned NextReg = CSI[i + 1].getReg();
759       if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
760           (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
761         RPI.Reg2 = NextReg;
762     }
763 
764     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
765     // list to come in sorted by frame index so that we can issue the store
766     // pair instructions directly. Assert if we see anything otherwise.
767     //
768     // The order of the registers in the list is controlled by
769     // getCalleeSavedRegs(), so they will always be in-order, as well.
770     assert((!RPI.isPaired() ||
771             (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
772            "Out of order callee saved regs!");
773 
774     // MachO's compact unwind format relies on all registers being stored in
775     // adjacent register pairs.
776     assert((!produceCompactUnwindFrame(MF) ||
777             CC == CallingConv::PreserveMost ||
778             (RPI.isPaired() &&
779              ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
780               RPI.Reg1 + 1 == RPI.Reg2))) &&
781            "Callee-save registers not saved as adjacent register pair!");
782 
783     RPI.FrameIdx = CSI[i].getFrameIdx();
784 
785     if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
786       // Round up size of non-pair to pair size if we need to pad the
787       // callee-save area to ensure 16-byte alignment.
788       Offset -= 16;
789       assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16);
790       MFI->setObjectSize(RPI.FrameIdx, 16);
791     } else
792       Offset -= RPI.isPaired() ? 16 : 8;
793     assert(Offset % 8 == 0);
794     RPI.Offset = Offset / 8;
795     assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
796            "Offset out of bounds for LDP/STP immediate");
797 
798     RegPairs.push_back(RPI);
799     if (RPI.isPaired())
800       ++i;
801   }
802 
803   // Align first offset to even 16-byte boundary to avoid additional SP
804   // adjustment instructions.
805   // Last pair offset is size of whole callee-save region for SP
806   // pre-dec/post-inc.
807   RegPairInfo &LastPair = RegPairs.back();
808   assert(AFI->getCalleeSavedStackSize() % 8 == 0);
809   LastPair.Offset = AFI->getCalleeSavedStackSize() / 8;
810 }
811 
812 bool AArch64FrameLowering::spillCalleeSavedRegisters(
813     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
814     const std::vector<CalleeSavedInfo> &CSI,
815     const TargetRegisterInfo *TRI) const {
816   MachineFunction &MF = *MBB.getParent();
817   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
818   DebugLoc DL;
819   SmallVector<RegPairInfo, 8> RegPairs;
820 
821   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
822 
823   for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
824        ++RPII) {
825     RegPairInfo RPI = *RPII;
826     unsigned Reg1 = RPI.Reg1;
827     unsigned Reg2 = RPI.Reg2;
828     unsigned StrOpc;
829 
830     // Issue sequence of non-sp increment and pi sp spills for cs regs. The
831     // first spill is a pre-increment that allocates the stack.
832     // For example:
833     //    stp     x22, x21, [sp, #-48]!   // addImm(-6)
834     //    stp     x20, x19, [sp, #16]    // addImm(+2)
835     //    stp     fp, lr, [sp, #32]      // addImm(+4)
836     // Rationale: This sequence saves uop updates compared to a sequence of
837     // pre-increment spills like stp xi,xj,[sp,#-16]!
838     // Note: Similar rationale and sequence for restores in epilog.
839     bool BumpSP = RPII == RegPairs.rbegin();
840     if (RPI.IsGPR) {
841       // For first spill use pre-increment store.
842       if (BumpSP)
843         StrOpc = RPI.isPaired() ? AArch64::STPXpre : AArch64::STRXpre;
844       else
845         StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
846     } else {
847       // For first spill use pre-increment store.
848       if (BumpSP)
849         StrOpc = RPI.isPaired() ? AArch64::STPDpre : AArch64::STRDpre;
850       else
851         StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
852     }
853     DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1);
854           if (RPI.isPaired())
855             dbgs() << ", " << TRI->getName(Reg2);
856           dbgs() << ") -> fi#(" << RPI.FrameIdx;
857           if (RPI.isPaired())
858             dbgs() << ", " << RPI.FrameIdx+1;
859           dbgs() << ")\n");
860 
861     const int Offset = BumpSP ? -RPI.Offset : RPI.Offset;
862     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
863     if (BumpSP)
864       MIB.addReg(AArch64::SP, RegState::Define);
865 
866     if (RPI.isPaired()) {
867       MBB.addLiveIn(Reg1);
868       MBB.addLiveIn(Reg2);
869       MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
870         .addReg(Reg1, getPrologueDeath(MF, Reg1))
871         .addReg(AArch64::SP)
872         .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
873         .setMIFlag(MachineInstr::FrameSetup);
874       MIB.addMemOperand(MF.getMachineMemOperand(
875           MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
876           MachineMemOperand::MOStore, 8, 8));
877     } else {
878       MBB.addLiveIn(Reg1);
879       MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
880         .addReg(AArch64::SP)
881         .addImm(BumpSP ? Offset * 8 : Offset) // pre-inc version is unscaled
882         .setMIFlag(MachineInstr::FrameSetup);
883     }
884     MIB.addMemOperand(MF.getMachineMemOperand(
885         MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
886         MachineMemOperand::MOStore, 8, 8));
887   }
888   return true;
889 }
890 
891 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
892     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
893     const std::vector<CalleeSavedInfo> &CSI,
894     const TargetRegisterInfo *TRI) const {
895   MachineFunction &MF = *MBB.getParent();
896   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
897   DebugLoc DL;
898   SmallVector<RegPairInfo, 8> RegPairs;
899 
900   if (MI != MBB.end())
901     DL = MI->getDebugLoc();
902 
903   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
904 
905   for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
906        ++RPII) {
907     RegPairInfo RPI = *RPII;
908     unsigned Reg1 = RPI.Reg1;
909     unsigned Reg2 = RPI.Reg2;
910 
911     // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
912     // the last load is sp-pi post-increment and de-allocates the stack:
913     // For example:
914     //    ldp     fp, lr, [sp, #32]       // addImm(+4)
915     //    ldp     x20, x19, [sp, #16]     // addImm(+2)
916     //    ldp     x22, x21, [sp], #48     // addImm(+6)
917     // Note: see comment in spillCalleeSavedRegisters()
918     unsigned LdrOpc;
919     bool BumpSP = RPII == std::prev(RegPairs.end());
920     if (RPI.IsGPR) {
921       if (BumpSP)
922         LdrOpc = RPI.isPaired() ? AArch64::LDPXpost : AArch64::LDRXpost;
923       else
924         LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
925     } else {
926       if (BumpSP)
927         LdrOpc = RPI.isPaired() ? AArch64::LDPDpost : AArch64::LDRDpost;
928       else
929         LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
930     }
931     DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1);
932           if (RPI.isPaired())
933             dbgs() << ", " << TRI->getName(Reg2);
934           dbgs() << ") -> fi#(" << RPI.FrameIdx;
935           if (RPI.isPaired())
936             dbgs() << ", " << RPI.FrameIdx+1;
937           dbgs() << ")\n");
938 
939     const int Offset = RPI.Offset;
940     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
941     if (BumpSP)
942       MIB.addReg(AArch64::SP, RegState::Define);
943 
944     if (RPI.isPaired()) {
945       MIB.addReg(Reg2, getDefRegState(true))
946         .addReg(Reg1, getDefRegState(true))
947         .addReg(AArch64::SP)
948         .addImm(Offset) // [sp], #offset * 8  or [sp, #offset * 8]
949                         // where the factor * 8 is implicit
950         .setMIFlag(MachineInstr::FrameDestroy);
951       MIB.addMemOperand(MF.getMachineMemOperand(
952           MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
953           MachineMemOperand::MOLoad, 8, 8));
954     } else {
955       MIB.addReg(Reg1, getDefRegState(true))
956         .addReg(AArch64::SP)
957         .addImm(BumpSP ? Offset * 8 : Offset) // post-dec version is unscaled
958         .setMIFlag(MachineInstr::FrameDestroy);
959     }
960     MIB.addMemOperand(MF.getMachineMemOperand(
961         MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
962         MachineMemOperand::MOLoad, 8, 8));
963   }
964   return true;
965 }
966 
967 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
968                                                 BitVector &SavedRegs,
969                                                 RegScavenger *RS) const {
970   // All calls are tail calls in GHC calling conv, and functions have no
971   // prologue/epilogue.
972   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
973     return;
974 
975   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
976   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
977       MF.getSubtarget().getRegisterInfo());
978   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
979   unsigned UnspilledCSGPR = AArch64::NoRegister;
980   unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
981 
982   // The frame record needs to be created by saving the appropriate registers
983   if (hasFP(MF)) {
984     SavedRegs.set(AArch64::FP);
985     SavedRegs.set(AArch64::LR);
986   }
987 
988   unsigned BasePointerReg = AArch64::NoRegister;
989   if (RegInfo->hasBasePointer(MF))
990     BasePointerReg = RegInfo->getBaseRegister();
991 
992   bool ExtraCSSpill = false;
993   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
994   // Figure out which callee-saved registers to save/restore.
995   for (unsigned i = 0; CSRegs[i]; ++i) {
996     const unsigned Reg = CSRegs[i];
997 
998     // Add the base pointer register to SavedRegs if it is callee-save.
999     if (Reg == BasePointerReg)
1000       SavedRegs.set(Reg);
1001 
1002     bool RegUsed = SavedRegs.test(Reg);
1003     unsigned PairedReg = CSRegs[i ^ 1];
1004     if (!RegUsed) {
1005       if (AArch64::GPR64RegClass.contains(Reg) &&
1006           !RegInfo->isReservedReg(MF, Reg)) {
1007         UnspilledCSGPR = Reg;
1008         UnspilledCSGPRPaired = PairedReg;
1009       }
1010       continue;
1011     }
1012 
1013     // MachO's compact unwind format relies on all registers being stored in
1014     // pairs.
1015     // FIXME: the usual format is actually better if unwinding isn't needed.
1016     if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
1017       SavedRegs.set(PairedReg);
1018       ExtraCSSpill = true;
1019     }
1020   }
1021 
1022   DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
1023         for (int Reg = SavedRegs.find_first(); Reg != -1;
1024              Reg = SavedRegs.find_next(Reg))
1025           dbgs() << ' ' << PrintReg(Reg, RegInfo);
1026         dbgs() << "\n";);
1027 
1028   // If any callee-saved registers are used, the frame cannot be eliminated.
1029   unsigned NumRegsSpilled = SavedRegs.count();
1030   bool CanEliminateFrame = NumRegsSpilled == 0;
1031 
1032   // FIXME: Set BigStack if any stack slot references may be out of range.
1033   // For now, just conservatively guestimate based on unscaled indexing
1034   // range. We'll end up allocating an unnecessary spill slot a lot, but
1035   // realistically that's not a big deal at this stage of the game.
1036   // The CSR spill slots have not been allocated yet, so estimateStackSize
1037   // won't include them.
1038   MachineFrameInfo *MFI = MF.getFrameInfo();
1039   unsigned CFSize = MFI->estimateStackSize(MF) + 8 * NumRegsSpilled;
1040   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
1041   bool BigStack = (CFSize >= 256);
1042   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
1043     AFI->setHasStackFrame(true);
1044 
1045   // Estimate if we might need to scavenge a register at some point in order
1046   // to materialize a stack offset. If so, either spill one additional
1047   // callee-saved register or reserve a special spill slot to facilitate
1048   // register scavenging. If we already spilled an extra callee-saved register
1049   // above to keep the number of spills even, we don't need to do anything else
1050   // here.
1051   if (BigStack && !ExtraCSSpill) {
1052     if (UnspilledCSGPR != AArch64::NoRegister) {
1053       DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
1054             << " to get a scratch register.\n");
1055       SavedRegs.set(UnspilledCSGPR);
1056       // MachO's compact unwind format relies on all registers being stored in
1057       // pairs, so if we need to spill one extra for BigStack, then we need to
1058       // store the pair.
1059       if (produceCompactUnwindFrame(MF))
1060         SavedRegs.set(UnspilledCSGPRPaired);
1061       ExtraCSSpill = true;
1062       NumRegsSpilled = SavedRegs.count();
1063     }
1064 
1065     // If we didn't find an extra callee-saved register to spill, create
1066     // an emergency spill slot.
1067     if (!ExtraCSSpill) {
1068       const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
1069       int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
1070       RS->addScavengingFrameIndex(FI);
1071       DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
1072                    << " as the emergency spill slot.\n");
1073     }
1074   }
1075 
1076   // Round up to register pair alignment to avoid additional SP adjustment
1077   // instructions.
1078   AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
1079 }
1080