1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of TargetFrameLowering class.
10 //
11 // On AArch64, stack frames are structured as follows:
12 //
13 // The stack grows downward.
14 //
15 // All of the individual frame areas on the frame below are optional, i.e. it's
16 // possible to create a function so that the particular area isn't present
17 // in the frame.
18 //
19 // At function entry, the "frame" looks as follows:
20 //
21 // |                                   | Higher address
22 // |-----------------------------------|
23 // |                                   |
24 // | arguments passed on the stack     |
25 // |                                   |
26 // |-----------------------------------| <- sp
27 // |                                   | Lower address
28 //
29 //
30 // After the prologue has run, the frame has the following general structure.
31 // Note that this doesn't depict the case where a red-zone is used. Also,
32 // technically the last frame area (VLAs) doesn't get created until in the
33 // main function body, after the prologue is run. However, it's depicted here
34 // for completeness.
35 //
36 // |                                   | Higher address
37 // |-----------------------------------|
38 // |                                   |
39 // | arguments passed on the stack     |
40 // |                                   |
41 // |-----------------------------------|
42 // |                                   |
43 // | (Win64 only) varargs from reg     |
44 // |                                   |
45 // |-----------------------------------|
46 // |                                   |
47 // | prev_fp, prev_lr                  |
48 // | (a.k.a. "frame record")           |
49 // |-----------------------------------| <- fp(=x29)
50 // |                                   |
51 // | other callee-saved registers      |
52 // |                                   |
53 // |-----------------------------------|
54 // |.empty.space.to.make.part.below....|
55 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
56 // |.the.standard.16-byte.alignment....|  compile time; if present)
57 // |-----------------------------------|
58 // |                                   |
59 // | local variables of fixed size     |
60 // | including spill slots             |
61 // |-----------------------------------| <- bp(not defined by ABI,
62 // |.variable-sized.local.variables....|       LLVM chooses X19)
63 // |.(VLAs)............................| (size of this area is unknown at
64 // |...................................|  compile time)
65 // |-----------------------------------| <- sp
66 // |                                   | Lower address
67 //
68 //
69 // To access the data in a frame, at-compile time, a constant offset must be
70 // computable from one of the pointers (fp, bp, sp) to access it. The size
71 // of the areas with a dotted background cannot be computed at compile-time
72 // if they are present, making it required to have all three of fp, bp and
73 // sp to be set up to be able to access all contents in the frame areas,
74 // assuming all of the frame areas are non-empty.
75 //
76 // For most functions, some of the frame areas are empty. For those functions,
77 // it may not be necessary to set up fp or bp:
78 // * A base pointer is definitely needed when there are both VLAs and local
79 //   variables with more-than-default alignment requirements.
80 // * A frame pointer is definitely needed when there are local variables with
81 //   more-than-default alignment requirements.
82 //
83 // In some cases when a base pointer is not strictly needed, it is generated
84 // anyway when offsets from the frame pointer to access local variables become
85 // so large that the offset can't be encoded in the immediate fields of loads
86 // or stores.
87 //
88 // FIXME: also explain the redzone concept.
89 // FIXME: also explain the concept of reserved call frames.
90 //
91 //===----------------------------------------------------------------------===//
92 
93 #include "AArch64FrameLowering.h"
94 #include "AArch64InstrInfo.h"
95 #include "AArch64MachineFunctionInfo.h"
96 #include "AArch64RegisterInfo.h"
97 #include "AArch64Subtarget.h"
98 #include "AArch64TargetMachine.h"
99 #include "MCTargetDesc/AArch64AddressingModes.h"
100 #include "llvm/ADT/ScopeExit.h"
101 #include "llvm/ADT/SmallVector.h"
102 #include "llvm/ADT/Statistic.h"
103 #include "llvm/CodeGen/LivePhysRegs.h"
104 #include "llvm/CodeGen/MachineBasicBlock.h"
105 #include "llvm/CodeGen/MachineFrameInfo.h"
106 #include "llvm/CodeGen/MachineFunction.h"
107 #include "llvm/CodeGen/MachineInstr.h"
108 #include "llvm/CodeGen/MachineInstrBuilder.h"
109 #include "llvm/CodeGen/MachineMemOperand.h"
110 #include "llvm/CodeGen/MachineModuleInfo.h"
111 #include "llvm/CodeGen/MachineOperand.h"
112 #include "llvm/CodeGen/MachineRegisterInfo.h"
113 #include "llvm/CodeGen/RegisterScavenging.h"
114 #include "llvm/CodeGen/TargetInstrInfo.h"
115 #include "llvm/CodeGen/TargetRegisterInfo.h"
116 #include "llvm/CodeGen/TargetSubtargetInfo.h"
117 #include "llvm/CodeGen/WinEHFuncInfo.h"
118 #include "llvm/IR/Attributes.h"
119 #include "llvm/IR/CallingConv.h"
120 #include "llvm/IR/DataLayout.h"
121 #include "llvm/IR/DebugLoc.h"
122 #include "llvm/IR/Function.h"
123 #include "llvm/MC/MCAsmInfo.h"
124 #include "llvm/MC/MCDwarf.h"
125 #include "llvm/Support/CommandLine.h"
126 #include "llvm/Support/Debug.h"
127 #include "llvm/Support/ErrorHandling.h"
128 #include "llvm/Support/MathExtras.h"
129 #include "llvm/Support/raw_ostream.h"
130 #include "llvm/Target/TargetMachine.h"
131 #include "llvm/Target/TargetOptions.h"
132 #include <cassert>
133 #include <cstdint>
134 #include <iterator>
135 #include <vector>
136 
137 using namespace llvm;
138 
139 #define DEBUG_TYPE "frame-info"
140 
141 static cl::opt<bool> EnableRedZone("aarch64-redzone",
142                                    cl::desc("enable use of redzone on AArch64"),
143                                    cl::init(false), cl::Hidden);
144 
145 static cl::opt<bool>
146     ReverseCSRRestoreSeq("reverse-csr-restore-seq",
147                          cl::desc("reverse the CSR restore sequence"),
148                          cl::init(false), cl::Hidden);
149 
150 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
151 
152 /// This is the biggest offset to the stack pointer we can encode in aarch64
153 /// instructions (without using a separate calculation and a temp register).
154 /// Note that the exception here are vector stores/loads which cannot encode any
155 /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
156 static const unsigned DefaultSafeSPDisplacement = 255;
157 
158 /// Look at each instruction that references stack frames and return the stack
159 /// size limit beyond which some of these instructions will require a scratch
160 /// register during their expansion later.
161 static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
162   // FIXME: For now, just conservatively guestimate based on unscaled indexing
163   // range. We'll end up allocating an unnecessary spill slot a lot, but
164   // realistically that's not a big deal at this stage of the game.
165   for (MachineBasicBlock &MBB : MF) {
166     for (MachineInstr &MI : MBB) {
167       if (MI.isDebugInstr() || MI.isPseudo() ||
168           MI.getOpcode() == AArch64::ADDXri ||
169           MI.getOpcode() == AArch64::ADDSXri)
170         continue;
171 
172       for (const MachineOperand &MO : MI.operands()) {
173         if (!MO.isFI())
174           continue;
175 
176         int Offset = 0;
177         if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
178             AArch64FrameOffsetCannotUpdate)
179           return 0;
180       }
181     }
182   }
183   return DefaultSafeSPDisplacement;
184 }
185 
186 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
187   if (!EnableRedZone)
188     return false;
189   // Don't use the red zone if the function explicitly asks us not to.
190   // This is typically used for kernel code.
191   if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
192     return false;
193 
194   const MachineFrameInfo &MFI = MF.getFrameInfo();
195   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
196   unsigned NumBytes = AFI->getLocalStackSize();
197 
198   return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
199 }
200 
201 /// hasFP - Return true if the specified function should have a dedicated frame
202 /// pointer register.
203 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
204   const MachineFrameInfo &MFI = MF.getFrameInfo();
205   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
206   // Win64 EH requires a frame pointer if funclets are present, as the locals
207   // are accessed off the frame pointer in both the parent function and the
208   // funclets.
209   if (MF.hasEHFunclets())
210     return true;
211   // Retain behavior of always omitting the FP for leaf functions when possible.
212   if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
213     return true;
214   if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
215       MFI.hasStackMap() || MFI.hasPatchPoint() ||
216       RegInfo->needsStackRealignment(MF))
217     return true;
218   // With large callframes around we may need to use FP to access the scavenging
219   // emergency spillslot.
220   //
221   // Unfortunately some calls to hasFP() like machine verifier ->
222   // getReservedReg() -> hasFP in the middle of global isel are too early
223   // to know the max call frame size. Hopefully conservatively returning "true"
224   // in those cases is fine.
225   // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
226   if (!MFI.isMaxCallFrameSizeComputed() ||
227       MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
228     return true;
229 
230   return false;
231 }
232 
233 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
234 /// not required, we reserve argument space for call sites in the function
235 /// immediately on entry to the current function.  This eliminates the need for
236 /// add/sub sp brackets around call sites.  Returns true if the call frame is
237 /// included as part of the stack frame.
238 bool
239 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
240   return !MF.getFrameInfo().hasVarSizedObjects();
241 }
242 
243 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
244     MachineFunction &MF, MachineBasicBlock &MBB,
245     MachineBasicBlock::iterator I) const {
246   const AArch64InstrInfo *TII =
247       static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
248   DebugLoc DL = I->getDebugLoc();
249   unsigned Opc = I->getOpcode();
250   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
251   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
252 
253   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
254   if (!TFI->hasReservedCallFrame(MF)) {
255     unsigned Align = getStackAlignment();
256 
257     int64_t Amount = I->getOperand(0).getImm();
258     Amount = alignTo(Amount, Align);
259     if (!IsDestroy)
260       Amount = -Amount;
261 
262     // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
263     // doesn't have to pop anything), then the first operand will be zero too so
264     // this adjustment is a no-op.
265     if (CalleePopAmount == 0) {
266       // FIXME: in-function stack adjustment for calls is limited to 24-bits
267       // because there's no guaranteed temporary register available.
268       //
269       // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
270       // 1) For offset <= 12-bit, we use LSL #0
271       // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
272       // LSL #0, and the other uses LSL #12.
273       //
274       // Most call frames will be allocated at the start of a function so
275       // this is OK, but it is a limitation that needs dealing with.
276       assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
277       emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
278     }
279   } else if (CalleePopAmount != 0) {
280     // If the calling convention demands that the callee pops arguments from the
281     // stack, we want to add it back if we have a reserved call frame.
282     assert(CalleePopAmount < 0xffffff && "call frame too large");
283     emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
284                     TII);
285   }
286   return MBB.erase(I);
287 }
288 
289 static bool ShouldSignReturnAddress(MachineFunction &MF) {
290   // The function should be signed in the following situations:
291   // - sign-return-address=all
292   // - sign-return-address=non-leaf and the functions spills the LR
293 
294   const Function &F = MF.getFunction();
295   if (!F.hasFnAttribute("sign-return-address"))
296     return false;
297 
298   StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
299   if (Scope.equals("none"))
300     return false;
301 
302   if (Scope.equals("all"))
303     return true;
304 
305   assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf");
306 
307   for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo())
308     if (Info.getReg() == AArch64::LR)
309       return true;
310 
311   return false;
312 }
313 
314 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
315     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
316   MachineFunction &MF = *MBB.getParent();
317   MachineFrameInfo &MFI = MF.getFrameInfo();
318   const TargetSubtargetInfo &STI = MF.getSubtarget();
319   const MCRegisterInfo *MRI = STI.getRegisterInfo();
320   const TargetInstrInfo *TII = STI.getInstrInfo();
321   DebugLoc DL = MBB.findDebugLoc(MBBI);
322 
323   // Add callee saved registers to move list.
324   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
325   if (CSI.empty())
326     return;
327 
328   for (const auto &Info : CSI) {
329     unsigned Reg = Info.getReg();
330     int64_t Offset =
331         MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
332     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
333     unsigned CFIIndex = MF.addFrameInst(
334         MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
335     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
336         .addCFIIndex(CFIIndex)
337         .setMIFlags(MachineInstr::FrameSetup);
338   }
339 }
340 
341 // Find a scratch register that we can use at the start of the prologue to
342 // re-align the stack pointer.  We avoid using callee-save registers since they
343 // may appear to be free when this is called from canUseAsPrologue (during
344 // shrink wrapping), but then no longer be free when this is called from
345 // emitPrologue.
346 //
347 // FIXME: This is a bit conservative, since in the above case we could use one
348 // of the callee-save registers as a scratch temp to re-align the stack pointer,
349 // but we would then have to make sure that we were in fact saving at least one
350 // callee-save register in the prologue, which is additional complexity that
351 // doesn't seem worth the benefit.
352 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
353   MachineFunction *MF = MBB->getParent();
354 
355   // If MBB is an entry block, use X9 as the scratch register
356   if (&MF->front() == MBB)
357     return AArch64::X9;
358 
359   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
360   const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
361   LivePhysRegs LiveRegs(TRI);
362   LiveRegs.addLiveIns(*MBB);
363 
364   // Mark callee saved registers as used so we will not choose them.
365   const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
366   for (unsigned i = 0; CSRegs[i]; ++i)
367     LiveRegs.addReg(CSRegs[i]);
368 
369   // Prefer X9 since it was historically used for the prologue scratch reg.
370   const MachineRegisterInfo &MRI = MF->getRegInfo();
371   if (LiveRegs.available(MRI, AArch64::X9))
372     return AArch64::X9;
373 
374   for (unsigned Reg : AArch64::GPR64RegClass) {
375     if (LiveRegs.available(MRI, Reg))
376       return Reg;
377   }
378   return AArch64::NoRegister;
379 }
380 
381 bool AArch64FrameLowering::canUseAsPrologue(
382     const MachineBasicBlock &MBB) const {
383   const MachineFunction *MF = MBB.getParent();
384   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
385   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
386   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
387 
388   // Don't need a scratch register if we're not going to re-align the stack.
389   if (!RegInfo->needsStackRealignment(*MF))
390     return true;
391   // Otherwise, we can use any block as long as it has a scratch register
392   // available.
393   return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
394 }
395 
396 static bool windowsRequiresStackProbe(MachineFunction &MF,
397                                       unsigned StackSizeInBytes) {
398   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
399   if (!Subtarget.isTargetWindows())
400     return false;
401   const Function &F = MF.getFunction();
402   // TODO: When implementing stack protectors, take that into account
403   // for the probe threshold.
404   unsigned StackProbeSize = 4096;
405   if (F.hasFnAttribute("stack-probe-size"))
406     F.getFnAttribute("stack-probe-size")
407         .getValueAsString()
408         .getAsInteger(0, StackProbeSize);
409   return (StackSizeInBytes >= StackProbeSize) &&
410          !F.hasFnAttribute("no-stack-arg-probe");
411 }
412 
413 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
414     MachineFunction &MF, unsigned StackBumpBytes) const {
415   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
416   const MachineFrameInfo &MFI = MF.getFrameInfo();
417   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
418   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
419 
420   if (AFI->getLocalStackSize() == 0)
421     return false;
422 
423   // 512 is the maximum immediate for stp/ldp that will be used for
424   // callee-save save/restores
425   if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
426     return false;
427 
428   if (MFI.hasVarSizedObjects())
429     return false;
430 
431   if (RegInfo->needsStackRealignment(MF))
432     return false;
433 
434   // This isn't strictly necessary, but it simplifies things a bit since the
435   // current RedZone handling code assumes the SP is adjusted by the
436   // callee-save save/restore code.
437   if (canUseRedZone(MF))
438     return false;
439 
440   return true;
441 }
442 
443 // Given a load or a store instruction, generate an appropriate unwinding SEH
444 // code on Windows.
445 static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
446                                              const TargetInstrInfo &TII,
447                                              MachineInstr::MIFlag Flag) {
448   unsigned Opc = MBBI->getOpcode();
449   MachineBasicBlock *MBB = MBBI->getParent();
450   MachineFunction &MF = *MBB->getParent();
451   DebugLoc DL = MBBI->getDebugLoc();
452   unsigned ImmIdx = MBBI->getNumOperands() - 1;
453   int Imm = MBBI->getOperand(ImmIdx).getImm();
454   MachineInstrBuilder MIB;
455   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
456   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
457 
458   switch (Opc) {
459   default:
460     llvm_unreachable("No SEH Opcode for this instruction");
461   case AArch64::LDPDpost:
462     Imm = -Imm;
463     LLVM_FALLTHROUGH;
464   case AArch64::STPDpre: {
465     unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
466     unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
467     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
468               .addImm(Reg0)
469               .addImm(Reg1)
470               .addImm(Imm * 8)
471               .setMIFlag(Flag);
472     break;
473   }
474   case AArch64::LDPXpost:
475     Imm = -Imm;
476     LLVM_FALLTHROUGH;
477   case AArch64::STPXpre: {
478     unsigned Reg0 = MBBI->getOperand(1).getReg();
479     unsigned Reg1 = MBBI->getOperand(2).getReg();
480     if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
481       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
482                 .addImm(Imm * 8)
483                 .setMIFlag(Flag);
484     else
485       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
486                 .addImm(RegInfo->getSEHRegNum(Reg0))
487                 .addImm(RegInfo->getSEHRegNum(Reg1))
488                 .addImm(Imm * 8)
489                 .setMIFlag(Flag);
490     break;
491   }
492   case AArch64::LDRDpost:
493     Imm = -Imm;
494     LLVM_FALLTHROUGH;
495   case AArch64::STRDpre: {
496     unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
497     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
498               .addImm(Reg)
499               .addImm(Imm)
500               .setMIFlag(Flag);
501     break;
502   }
503   case AArch64::LDRXpost:
504     Imm = -Imm;
505     LLVM_FALLTHROUGH;
506   case AArch64::STRXpre: {
507     unsigned Reg =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
508     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
509               .addImm(Reg)
510               .addImm(Imm)
511               .setMIFlag(Flag);
512     break;
513   }
514   case AArch64::STPDi:
515   case AArch64::LDPDi: {
516     unsigned Reg0 =  RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
517     unsigned Reg1 =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
518     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
519               .addImm(Reg0)
520               .addImm(Reg1)
521               .addImm(Imm * 8)
522               .setMIFlag(Flag);
523     break;
524   }
525   case AArch64::STPXi:
526   case AArch64::LDPXi: {
527     unsigned Reg0 = MBBI->getOperand(0).getReg();
528     unsigned Reg1 = MBBI->getOperand(1).getReg();
529     if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
530       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
531                 .addImm(Imm * 8)
532                 .setMIFlag(Flag);
533     else
534       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
535                 .addImm(RegInfo->getSEHRegNum(Reg0))
536                 .addImm(RegInfo->getSEHRegNum(Reg1))
537                 .addImm(Imm * 8)
538                 .setMIFlag(Flag);
539     break;
540   }
541   case AArch64::STRXui:
542   case AArch64::LDRXui: {
543     int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
544     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
545               .addImm(Reg)
546               .addImm(Imm * 8)
547               .setMIFlag(Flag);
548     break;
549   }
550   case AArch64::STRDui:
551   case AArch64::LDRDui: {
552     unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
553     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
554               .addImm(Reg)
555               .addImm(Imm * 8)
556               .setMIFlag(Flag);
557     break;
558   }
559   }
560   auto I = MBB->insertAfter(MBBI, MIB);
561   return I;
562 }
563 
564 // Fix up the SEH opcode associated with the save/restore instruction.
565 static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
566                            unsigned LocalStackSize) {
567   MachineOperand *ImmOpnd = nullptr;
568   unsigned ImmIdx = MBBI->getNumOperands() - 1;
569   switch (MBBI->getOpcode()) {
570   default:
571     llvm_unreachable("Fix the offset in the SEH instruction");
572   case AArch64::SEH_SaveFPLR:
573   case AArch64::SEH_SaveRegP:
574   case AArch64::SEH_SaveReg:
575   case AArch64::SEH_SaveFRegP:
576   case AArch64::SEH_SaveFReg:
577     ImmOpnd = &MBBI->getOperand(ImmIdx);
578     break;
579   }
580   if (ImmOpnd)
581     ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
582 }
583 
584 // Convert callee-save register save/restore instruction to do stack pointer
585 // decrement/increment to allocate/deallocate the callee-save stack area by
586 // converting store/load to use pre/post increment version.
587 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
588     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
589     const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
590     bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) {
591   // Ignore instructions that do not operate on SP, i.e. shadow call stack
592   // instructions and associated CFI instruction.
593   while (MBBI->getOpcode() == AArch64::STRXpost ||
594          MBBI->getOpcode() == AArch64::LDRXpre ||
595          MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) {
596     if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
597       assert(MBBI->getOperand(0).getReg() != AArch64::SP);
598     ++MBBI;
599   }
600   unsigned NewOpc;
601   int Scale = 1;
602   switch (MBBI->getOpcode()) {
603   default:
604     llvm_unreachable("Unexpected callee-save save/restore opcode!");
605   case AArch64::STPXi:
606     NewOpc = AArch64::STPXpre;
607     Scale = 8;
608     break;
609   case AArch64::STPDi:
610     NewOpc = AArch64::STPDpre;
611     Scale = 8;
612     break;
613   case AArch64::STPQi:
614     NewOpc = AArch64::STPQpre;
615     Scale = 16;
616     break;
617   case AArch64::STRXui:
618     NewOpc = AArch64::STRXpre;
619     break;
620   case AArch64::STRDui:
621     NewOpc = AArch64::STRDpre;
622     break;
623   case AArch64::STRQui:
624     NewOpc = AArch64::STRQpre;
625     break;
626   case AArch64::LDPXi:
627     NewOpc = AArch64::LDPXpost;
628     Scale = 8;
629     break;
630   case AArch64::LDPDi:
631     NewOpc = AArch64::LDPDpost;
632     Scale = 8;
633     break;
634   case AArch64::LDPQi:
635     NewOpc = AArch64::LDPQpost;
636     Scale = 16;
637     break;
638   case AArch64::LDRXui:
639     NewOpc = AArch64::LDRXpost;
640     break;
641   case AArch64::LDRDui:
642     NewOpc = AArch64::LDRDpost;
643     break;
644   case AArch64::LDRQui:
645     NewOpc = AArch64::LDRQpost;
646     break;
647   }
648   // Get rid of the SEH code associated with the old instruction.
649   if (NeedsWinCFI) {
650     auto SEH = std::next(MBBI);
651     if (AArch64InstrInfo::isSEHInstruction(*SEH))
652       SEH->eraseFromParent();
653   }
654 
655   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
656   MIB.addReg(AArch64::SP, RegState::Define);
657 
658   // Copy all operands other than the immediate offset.
659   unsigned OpndIdx = 0;
660   for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
661        ++OpndIdx)
662     MIB.add(MBBI->getOperand(OpndIdx));
663 
664   assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
665          "Unexpected immediate offset in first/last callee-save save/restore "
666          "instruction!");
667   assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
668          "Unexpected base register in callee-save save/restore instruction!");
669   assert(CSStackSizeInc % Scale == 0);
670   MIB.addImm(CSStackSizeInc / Scale);
671 
672   MIB.setMIFlags(MBBI->getFlags());
673   MIB.setMemRefs(MBBI->memoperands());
674 
675   // Generate a new SEH code that corresponds to the new instruction.
676   if (NeedsWinCFI) {
677     *HasWinCFI = true;
678     InsertSEH(*MIB, *TII,
679               InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
680   }
681 
682   return std::prev(MBB.erase(MBBI));
683 }
684 
685 // Fixup callee-save register save/restore instructions to take into account
686 // combined SP bump by adding the local stack size to the stack offsets.
687 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
688                                               unsigned LocalStackSize,
689                                               bool NeedsWinCFI,
690                                               bool *HasWinCFI) {
691   if (AArch64InstrInfo::isSEHInstruction(MI))
692     return;
693 
694   unsigned Opc = MI.getOpcode();
695 
696   // Ignore instructions that do not operate on SP, i.e. shadow call stack
697   // instructions and associated CFI instruction.
698   if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
699       Opc == AArch64::CFI_INSTRUCTION) {
700     if (Opc != AArch64::CFI_INSTRUCTION)
701       assert(MI.getOperand(0).getReg() != AArch64::SP);
702     return;
703   }
704 
705   unsigned Scale;
706   switch (Opc) {
707   case AArch64::STPXi:
708   case AArch64::STRXui:
709   case AArch64::STPDi:
710   case AArch64::STRDui:
711   case AArch64::LDPXi:
712   case AArch64::LDRXui:
713   case AArch64::LDPDi:
714   case AArch64::LDRDui:
715     Scale = 8;
716     break;
717   case AArch64::STPQi:
718   case AArch64::STRQui:
719   case AArch64::LDPQi:
720   case AArch64::LDRQui:
721     Scale = 16;
722     break;
723   default:
724     llvm_unreachable("Unexpected callee-save save/restore opcode!");
725   }
726 
727   unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
728   assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
729          "Unexpected base register in callee-save save/restore instruction!");
730   // Last operand is immediate offset that needs fixing.
731   MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
732   // All generated opcodes have scaled offsets.
733   assert(LocalStackSize % Scale == 0);
734   OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
735 
736   if (NeedsWinCFI) {
737     *HasWinCFI = true;
738     auto MBBI = std::next(MachineBasicBlock::iterator(MI));
739     assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
740     assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
741            "Expecting a SEH instruction");
742     fixupSEHOpcode(MBBI, LocalStackSize);
743   }
744 }
745 
746 static void adaptForLdStOpt(MachineBasicBlock &MBB,
747                             MachineBasicBlock::iterator FirstSPPopI,
748                             MachineBasicBlock::iterator LastPopI) {
749   // Sometimes (when we restore in the same order as we save), we can end up
750   // with code like this:
751   //
752   // ldp      x26, x25, [sp]
753   // ldp      x24, x23, [sp, #16]
754   // ldp      x22, x21, [sp, #32]
755   // ldp      x20, x19, [sp, #48]
756   // add      sp, sp, #64
757   //
758   // In this case, it is always better to put the first ldp at the end, so
759   // that the load-store optimizer can run and merge the ldp and the add into
760   // a post-index ldp.
761   // If we managed to grab the first pop instruction, move it to the end.
762   if (ReverseCSRRestoreSeq)
763     MBB.splice(FirstSPPopI, &MBB, LastPopI);
764   // We should end up with something like this now:
765   //
766   // ldp      x24, x23, [sp, #16]
767   // ldp      x22, x21, [sp, #32]
768   // ldp      x20, x19, [sp, #48]
769   // ldp      x26, x25, [sp]
770   // add      sp, sp, #64
771   //
772   // and the load-store optimizer can merge the last two instructions into:
773   //
774   // ldp      x26, x25, [sp], #64
775   //
776 }
777 
778 static bool ShouldSignWithAKey(MachineFunction &MF) {
779   const Function &F = MF.getFunction();
780   if (!F.hasFnAttribute("sign-return-address-key"))
781     return true;
782 
783   const StringRef Key =
784       F.getFnAttribute("sign-return-address-key").getValueAsString();
785   assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
786   return Key.equals_lower("a_key");
787 }
788 
789 static bool needsWinCFI(const MachineFunction &MF) {
790   const Function &F = MF.getFunction();
791   return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
792          F.needsUnwindTableEntry();
793 }
794 
795 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
796                                         MachineBasicBlock &MBB) const {
797   MachineBasicBlock::iterator MBBI = MBB.begin();
798   const MachineFrameInfo &MFI = MF.getFrameInfo();
799   const Function &F = MF.getFunction();
800   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
801   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
802   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
803   MachineModuleInfo &MMI = MF.getMMI();
804   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
805   bool needsFrameMoves = (MMI.hasDebugInfo() || F.needsUnwindTableEntry()) &&
806                          !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
807   bool HasFP = hasFP(MF);
808   bool NeedsWinCFI = needsWinCFI(MF);
809   bool HasWinCFI = false;
810   auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
811 
812   bool IsFunclet = MBB.isEHFuncletEntry();
813 
814   // At this point, we're going to decide whether or not the function uses a
815   // redzone. In most cases, the function doesn't have a redzone so let's
816   // assume that's false and set it to true in the case that there's a redzone.
817   AFI->setHasRedZone(false);
818 
819   // Debug location must be unknown since the first debug location is used
820   // to determine the end of the prologue.
821   DebugLoc DL;
822 
823   if (ShouldSignReturnAddress(MF)) {
824     if (ShouldSignWithAKey(MF))
825       BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
826           .setMIFlag(MachineInstr::FrameSetup);
827     else {
828       BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
829           .setMIFlag(MachineInstr::FrameSetup);
830       BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
831           .setMIFlag(MachineInstr::FrameSetup);
832     }
833 
834     unsigned CFIIndex =
835         MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
836     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
837         .addCFIIndex(CFIIndex)
838         .setMIFlags(MachineInstr::FrameSetup);
839   }
840 
841   // All calls are tail calls in GHC calling conv, and functions have no
842   // prologue/epilogue.
843   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
844     return;
845 
846   // getStackSize() includes all the locals in its size calculation. We don't
847   // include these locals when computing the stack size of a funclet, as they
848   // are allocated in the parent's stack frame and accessed via the frame
849   // pointer from the funclet.  We only save the callee saved registers in the
850   // funclet, which are really the callee saved registers of the parent
851   // function, including the funclet.
852   int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
853                            : (int)MFI.getStackSize();
854   if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
855     assert(!HasFP && "unexpected function without stack frame but with FP");
856     // All of the stack allocation is for locals.
857     AFI->setLocalStackSize(NumBytes);
858     if (!NumBytes)
859       return;
860     // REDZONE: If the stack size is less than 128 bytes, we don't need
861     // to actually allocate.
862     if (canUseRedZone(MF)) {
863       AFI->setHasRedZone(true);
864       ++NumRedZoneFunctions;
865     } else {
866       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
867                       MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
868       if (!NeedsWinCFI) {
869         // Label used to tie together the PROLOG_LABEL and the MachineMoves.
870         MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
871         // Encode the stack size of the leaf function.
872         unsigned CFIIndex = MF.addFrameInst(
873             MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
874         BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
875             .addCFIIndex(CFIIndex)
876             .setMIFlags(MachineInstr::FrameSetup);
877       }
878     }
879 
880     if (NeedsWinCFI) {
881       HasWinCFI = true;
882       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
883           .setMIFlag(MachineInstr::FrameSetup);
884     }
885 
886     return;
887   }
888 
889   bool IsWin64 =
890       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
891   // Var args are accounted for in the containing function, so don't
892   // include them for funclets.
893   unsigned FixedObject = (IsWin64 && !IsFunclet) ?
894                          alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
895 
896   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
897   // All of the remaining stack allocations are for locals.
898   AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
899   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
900   if (CombineSPBump) {
901     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
902                     MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
903     NumBytes = 0;
904   } else if (PrologueSaveSize != 0) {
905     MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
906         MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
907     NumBytes -= PrologueSaveSize;
908   }
909   assert(NumBytes >= 0 && "Negative stack allocation size!?");
910 
911   // Move past the saves of the callee-saved registers, fixing up the offsets
912   // and pre-inc if we decided to combine the callee-save and local stack
913   // pointer bump above.
914   MachineBasicBlock::iterator End = MBB.end();
915   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
916     if (CombineSPBump)
917       fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
918                                         NeedsWinCFI, &HasWinCFI);
919     ++MBBI;
920   }
921 
922   // The code below is not applicable to funclets. We have emitted all the SEH
923   // opcodes that we needed to emit.  The FP and BP belong to the containing
924   // function.
925   if (IsFunclet) {
926     if (NeedsWinCFI) {
927       HasWinCFI = true;
928       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
929           .setMIFlag(MachineInstr::FrameSetup);
930     }
931 
932     // SEH funclets are passed the frame pointer in X1.  If the parent
933     // function uses the base register, then the base register is used
934     // directly, and is not retrieved from X1.
935     if (F.hasPersonalityFn()) {
936       EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
937       if (isAsynchronousEHPersonality(Per)) {
938         BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
939             .addReg(AArch64::X1).setMIFlag(MachineInstr::FrameSetup);
940         MBB.addLiveIn(AArch64::X1);
941       }
942     }
943 
944     return;
945   }
946 
947   if (HasFP) {
948     // Only set up FP if we actually need to. Frame pointer is fp =
949     // sp - fixedobject - 16.
950     int FPOffset = AFI->getCalleeSavedStackSize() - 16;
951     if (CombineSPBump)
952       FPOffset += AFI->getLocalStackSize();
953 
954     // Issue    sub fp, sp, FPOffset or
955     //          mov fp,sp          when FPOffset is zero.
956     // Note: All stores of callee-saved registers are marked as "FrameSetup".
957     // This code marks the instruction(s) that set the FP also.
958     emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
959                     MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
960   }
961 
962   if (windowsRequiresStackProbe(MF, NumBytes)) {
963     uint32_t NumWords = NumBytes >> 4;
964     if (NeedsWinCFI) {
965       HasWinCFI = true;
966       // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
967       // exceed this amount.  We need to move at most 2^24 - 1 into x15.
968       // This is at most two instructions, MOVZ follwed by MOVK.
969       // TODO: Fix to use multiple stack alloc unwind codes for stacks
970       // exceeding 256MB in size.
971       if (NumBytes >= (1 << 28))
972         report_fatal_error("Stack size cannot exceed 256MB for stack "
973                             "unwinding purposes");
974 
975       uint32_t LowNumWords = NumWords & 0xFFFF;
976       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
977             .addImm(LowNumWords)
978             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
979             .setMIFlag(MachineInstr::FrameSetup);
980       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
981             .setMIFlag(MachineInstr::FrameSetup);
982       if ((NumWords & 0xFFFF0000) != 0) {
983           BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
984               .addReg(AArch64::X15)
985               .addImm((NumWords & 0xFFFF0000) >> 16) // High half
986               .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
987               .setMIFlag(MachineInstr::FrameSetup);
988           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
989             .setMIFlag(MachineInstr::FrameSetup);
990       }
991     } else {
992       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
993           .addImm(NumWords)
994           .setMIFlags(MachineInstr::FrameSetup);
995     }
996 
997     switch (MF.getTarget().getCodeModel()) {
998     case CodeModel::Tiny:
999     case CodeModel::Small:
1000     case CodeModel::Medium:
1001     case CodeModel::Kernel:
1002       BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1003           .addExternalSymbol("__chkstk")
1004           .addReg(AArch64::X15, RegState::Implicit)
1005           .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
1006           .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
1007           .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
1008           .setMIFlags(MachineInstr::FrameSetup);
1009       if (NeedsWinCFI) {
1010         HasWinCFI = true;
1011         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1012             .setMIFlag(MachineInstr::FrameSetup);
1013       }
1014       break;
1015     case CodeModel::Large:
1016       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1017           .addReg(AArch64::X16, RegState::Define)
1018           .addExternalSymbol("__chkstk")
1019           .addExternalSymbol("__chkstk")
1020           .setMIFlags(MachineInstr::FrameSetup);
1021       if (NeedsWinCFI) {
1022         HasWinCFI = true;
1023         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1024             .setMIFlag(MachineInstr::FrameSetup);
1025       }
1026 
1027       BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
1028           .addReg(AArch64::X16, RegState::Kill)
1029           .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
1030           .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
1031           .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
1032           .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
1033           .setMIFlags(MachineInstr::FrameSetup);
1034       if (NeedsWinCFI) {
1035         HasWinCFI = true;
1036         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1037             .setMIFlag(MachineInstr::FrameSetup);
1038       }
1039       break;
1040     }
1041 
1042     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1043         .addReg(AArch64::SP, RegState::Kill)
1044         .addReg(AArch64::X15, RegState::Kill)
1045         .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
1046         .setMIFlags(MachineInstr::FrameSetup);
1047     if (NeedsWinCFI) {
1048       HasWinCFI = true;
1049       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1050           .addImm(NumBytes)
1051           .setMIFlag(MachineInstr::FrameSetup);
1052     }
1053     NumBytes = 0;
1054   }
1055 
1056   // Allocate space for the rest of the frame.
1057   if (NumBytes) {
1058     const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
1059     unsigned scratchSPReg = AArch64::SP;
1060 
1061     if (NeedsRealignment) {
1062       scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
1063       assert(scratchSPReg != AArch64::NoRegister);
1064     }
1065 
1066     // If we're a leaf function, try using the red zone.
1067     if (!canUseRedZone(MF))
1068       // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
1069       // the correct value here, as NumBytes also includes padding bytes,
1070       // which shouldn't be counted here.
1071       emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
1072                       MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
1073 
1074     if (NeedsRealignment) {
1075       const unsigned Alignment = MFI.getMaxAlignment();
1076       const unsigned NrBitsToZero = countTrailingZeros(Alignment);
1077       assert(NrBitsToZero > 1);
1078       assert(scratchSPReg != AArch64::SP);
1079 
1080       // SUB X9, SP, NumBytes
1081       //   -- X9 is temporary register, so shouldn't contain any live data here,
1082       //   -- free to use. This is already produced by emitFrameOffset above.
1083       // AND SP, X9, 0b11111...0000
1084       // The logical immediates have a non-trivial encoding. The following
1085       // formula computes the encoded immediate with all ones but
1086       // NrBitsToZero zero bits as least significant bits.
1087       uint32_t andMaskEncoded = (1 << 12)                         // = N
1088                                 | ((64 - NrBitsToZero) << 6)      // immr
1089                                 | ((64 - NrBitsToZero - 1) << 0); // imms
1090 
1091       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1092           .addReg(scratchSPReg, RegState::Kill)
1093           .addImm(andMaskEncoded);
1094       AFI->setStackRealigned(true);
1095       if (NeedsWinCFI) {
1096         HasWinCFI = true;
1097         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1098             .addImm(NumBytes & andMaskEncoded)
1099             .setMIFlag(MachineInstr::FrameSetup);
1100       }
1101     }
1102   }
1103 
1104   // If we need a base pointer, set it up here. It's whatever the value of the
1105   // stack pointer is at this point. Any variable size objects will be allocated
1106   // after this, so we can still use the base pointer to reference locals.
1107   //
1108   // FIXME: Clarify FrameSetup flags here.
1109   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
1110   // needed.
1111   if (RegInfo->hasBasePointer(MF)) {
1112     TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
1113                      false);
1114     if (NeedsWinCFI) {
1115       HasWinCFI = true;
1116       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1117           .setMIFlag(MachineInstr::FrameSetup);
1118     }
1119   }
1120 
1121   // The very last FrameSetup instruction indicates the end of prologue. Emit a
1122   // SEH opcode indicating the prologue end.
1123   if (NeedsWinCFI && HasWinCFI) {
1124     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1125         .setMIFlag(MachineInstr::FrameSetup);
1126   }
1127 
1128   if (needsFrameMoves) {
1129     const DataLayout &TD = MF.getDataLayout();
1130     const int StackGrowth = -TD.getPointerSize(0);
1131     unsigned FramePtr = RegInfo->getFrameRegister(MF);
1132     // An example of the prologue:
1133     //
1134     //     .globl __foo
1135     //     .align 2
1136     //  __foo:
1137     // Ltmp0:
1138     //     .cfi_startproc
1139     //     .cfi_personality 155, ___gxx_personality_v0
1140     // Leh_func_begin:
1141     //     .cfi_lsda 16, Lexception33
1142     //
1143     //     stp  xa,bx, [sp, -#offset]!
1144     //     ...
1145     //     stp  x28, x27, [sp, #offset-32]
1146     //     stp  fp, lr, [sp, #offset-16]
1147     //     add  fp, sp, #offset - 16
1148     //     sub  sp, sp, #1360
1149     //
1150     // The Stack:
1151     //       +-------------------------------------------+
1152     // 10000 | ........ | ........ | ........ | ........ |
1153     // 10004 | ........ | ........ | ........ | ........ |
1154     //       +-------------------------------------------+
1155     // 10008 | ........ | ........ | ........ | ........ |
1156     // 1000c | ........ | ........ | ........ | ........ |
1157     //       +===========================================+
1158     // 10010 |                X28 Register               |
1159     // 10014 |                X28 Register               |
1160     //       +-------------------------------------------+
1161     // 10018 |                X27 Register               |
1162     // 1001c |                X27 Register               |
1163     //       +===========================================+
1164     // 10020 |                Frame Pointer              |
1165     // 10024 |                Frame Pointer              |
1166     //       +-------------------------------------------+
1167     // 10028 |                Link Register              |
1168     // 1002c |                Link Register              |
1169     //       +===========================================+
1170     // 10030 | ........ | ........ | ........ | ........ |
1171     // 10034 | ........ | ........ | ........ | ........ |
1172     //       +-------------------------------------------+
1173     // 10038 | ........ | ........ | ........ | ........ |
1174     // 1003c | ........ | ........ | ........ | ........ |
1175     //       +-------------------------------------------+
1176     //
1177     //     [sp] = 10030        ::    >>initial value<<
1178     //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
1179     //     fp = sp == 10020    ::  mov fp, sp
1180     //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
1181     //     sp == 10010         ::    >>final value<<
1182     //
1183     // The frame pointer (w29) points to address 10020. If we use an offset of
1184     // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
1185     // for w27, and -32 for w28:
1186     //
1187     //  Ltmp1:
1188     //     .cfi_def_cfa w29, 16
1189     //  Ltmp2:
1190     //     .cfi_offset w30, -8
1191     //  Ltmp3:
1192     //     .cfi_offset w29, -16
1193     //  Ltmp4:
1194     //     .cfi_offset w27, -24
1195     //  Ltmp5:
1196     //     .cfi_offset w28, -32
1197 
1198     if (HasFP) {
1199       // Define the current CFA rule to use the provided FP.
1200       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
1201       unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
1202           nullptr, Reg, 2 * StackGrowth - FixedObject));
1203       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1204           .addCFIIndex(CFIIndex)
1205           .setMIFlags(MachineInstr::FrameSetup);
1206     } else {
1207       // Encode the stack size of the leaf function.
1208       unsigned CFIIndex = MF.addFrameInst(
1209           MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
1210       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1211           .addCFIIndex(CFIIndex)
1212           .setMIFlags(MachineInstr::FrameSetup);
1213     }
1214 
1215     // Now emit the moves for whatever callee saved regs we have (including FP,
1216     // LR if those are saved).
1217     emitCalleeSavedFrameMoves(MBB, MBBI);
1218   }
1219 }
1220 
1221 static void InsertReturnAddressAuth(MachineFunction &MF,
1222                                     MachineBasicBlock &MBB) {
1223   if (!ShouldSignReturnAddress(MF))
1224     return;
1225   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1226   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1227 
1228   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1229   DebugLoc DL;
1230   if (MBBI != MBB.end())
1231     DL = MBBI->getDebugLoc();
1232 
1233   // The AUTIASP instruction assembles to a hint instruction before v8.3a so
1234   // this instruction can safely used for any v8a architecture.
1235   // From v8.3a onwards there are optimised authenticate LR and return
1236   // instructions, namely RETA{A,B}, that can be used instead.
1237   if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() &&
1238       MBBI->getOpcode() == AArch64::RET_ReallyLR) {
1239     BuildMI(MBB, MBBI, DL,
1240             TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB))
1241         .copyImplicitOps(*MBBI);
1242     MBB.erase(MBBI);
1243   } else {
1244     BuildMI(
1245         MBB, MBBI, DL,
1246         TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP))
1247         .setMIFlag(MachineInstr::FrameDestroy);
1248   }
1249 }
1250 
1251 static bool isFuncletReturnInstr(const MachineInstr &MI) {
1252   switch (MI.getOpcode()) {
1253   default:
1254     return false;
1255   case AArch64::CATCHRET:
1256   case AArch64::CLEANUPRET:
1257     return true;
1258   }
1259 }
1260 
1261 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
1262                                         MachineBasicBlock &MBB) const {
1263   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1264   MachineFrameInfo &MFI = MF.getFrameInfo();
1265   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1266   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1267   DebugLoc DL;
1268   bool IsTailCallReturn = false;
1269   bool NeedsWinCFI = needsWinCFI(MF);
1270   bool HasWinCFI = false;
1271   bool IsFunclet = false;
1272   auto WinCFI = make_scope_exit([&]() {
1273     if (!MF.hasWinCFI())
1274       MF.setHasWinCFI(HasWinCFI);
1275   });
1276 
1277   if (MBB.end() != MBBI) {
1278     DL = MBBI->getDebugLoc();
1279     unsigned RetOpcode = MBBI->getOpcode();
1280     IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
1281                        RetOpcode == AArch64::TCRETURNri ||
1282                        RetOpcode == AArch64::TCRETURNriBTI;
1283     IsFunclet = isFuncletReturnInstr(*MBBI);
1284   }
1285 
1286   int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
1287                            : MFI.getStackSize();
1288   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1289 
1290   // All calls are tail calls in GHC calling conv, and functions have no
1291   // prologue/epilogue.
1292   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1293     return;
1294 
1295   // Initial and residual are named for consistency with the prologue. Note that
1296   // in the epilogue, the residual adjustment is executed first.
1297   uint64_t ArgumentPopSize = 0;
1298   if (IsTailCallReturn) {
1299     MachineOperand &StackAdjust = MBBI->getOperand(1);
1300 
1301     // For a tail-call in a callee-pops-arguments environment, some or all of
1302     // the stack may actually be in use for the call's arguments, this is
1303     // calculated during LowerCall and consumed here...
1304     ArgumentPopSize = StackAdjust.getImm();
1305   } else {
1306     // ... otherwise the amount to pop is *all* of the argument space,
1307     // conveniently stored in the MachineFunctionInfo by
1308     // LowerFormalArguments. This will, of course, be zero for the C calling
1309     // convention.
1310     ArgumentPopSize = AFI->getArgumentStackToRestore();
1311   }
1312 
1313   // The stack frame should be like below,
1314   //
1315   //      ----------------------                     ---
1316   //      |                    |                      |
1317   //      | BytesInStackArgArea|              CalleeArgStackSize
1318   //      | (NumReusableBytes) |                (of tail call)
1319   //      |                    |                     ---
1320   //      |                    |                      |
1321   //      ---------------------|        ---           |
1322   //      |                    |         |            |
1323   //      |   CalleeSavedReg   |         |            |
1324   //      | (CalleeSavedStackSize)|      |            |
1325   //      |                    |         |            |
1326   //      ---------------------|         |         NumBytes
1327   //      |                    |     StackSize  (StackAdjustUp)
1328   //      |   LocalStackSize   |         |            |
1329   //      | (covering callee   |         |            |
1330   //      |       args)        |         |            |
1331   //      |                    |         |            |
1332   //      ----------------------        ---          ---
1333   //
1334   // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
1335   //             = StackSize + ArgumentPopSize
1336   //
1337   // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
1338   // it as the 2nd argument of AArch64ISD::TC_RETURN.
1339 
1340   auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); });
1341 
1342   bool IsWin64 =
1343       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1344   // Var args are accounted for in the containing function, so don't
1345   // include them for funclets.
1346   unsigned FixedObject =
1347       (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
1348 
1349   uint64_t AfterCSRPopSize = ArgumentPopSize;
1350   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1351   // We cannot rely on the local stack size set in emitPrologue if the function
1352   // has funclets, as funclets have different local stack size requirements, and
1353   // the current value set in emitPrologue may be that of the containing
1354   // function.
1355   if (MF.hasEHFunclets())
1356     AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1357   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1358   // Assume we can't combine the last pop with the sp restore.
1359 
1360   if (!CombineSPBump && PrologueSaveSize != 0) {
1361     MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1362     while (AArch64InstrInfo::isSEHInstruction(*Pop))
1363       Pop = std::prev(Pop);
1364     // Converting the last ldp to a post-index ldp is valid only if the last
1365     // ldp's offset is 0.
1366     const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1367     // If the offset is 0, convert it to a post-index ldp.
1368     if (OffsetOp.getImm() == 0)
1369       convertCalleeSaveRestoreToSPPrePostIncDec(
1370           MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false);
1371     else {
1372       // If not, make sure to emit an add after the last ldp.
1373       // We're doing this by transfering the size to be restored from the
1374       // adjustment *before* the CSR pops to the adjustment *after* the CSR
1375       // pops.
1376       AfterCSRPopSize += PrologueSaveSize;
1377     }
1378   }
1379 
1380   // Move past the restores of the callee-saved registers.
1381   // If we plan on combining the sp bump of the local stack size and the callee
1382   // save stack size, we might need to adjust the CSR save and restore offsets.
1383   MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
1384   MachineBasicBlock::iterator Begin = MBB.begin();
1385   while (LastPopI != Begin) {
1386     --LastPopI;
1387     if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
1388       ++LastPopI;
1389       break;
1390     } else if (CombineSPBump)
1391       fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
1392                                         NeedsWinCFI, &HasWinCFI);
1393   }
1394 
1395   if (NeedsWinCFI) {
1396     HasWinCFI = true;
1397     BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
1398         .setMIFlag(MachineInstr::FrameDestroy);
1399   }
1400 
1401   // If there is a single SP update, insert it before the ret and we're done.
1402   if (CombineSPBump) {
1403     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1404                     NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy,
1405                     false, NeedsWinCFI, &HasWinCFI);
1406     if (NeedsWinCFI && HasWinCFI)
1407       BuildMI(MBB, MBB.getFirstTerminator(), DL,
1408               TII->get(AArch64::SEH_EpilogEnd))
1409           .setMIFlag(MachineInstr::FrameDestroy);
1410     return;
1411   }
1412 
1413   NumBytes -= PrologueSaveSize;
1414   assert(NumBytes >= 0 && "Negative stack allocation size!?");
1415 
1416   if (!hasFP(MF)) {
1417     bool RedZone = canUseRedZone(MF);
1418     // If this was a redzone leaf function, we don't need to restore the
1419     // stack pointer (but we may need to pop stack args for fastcc).
1420     if (RedZone && AfterCSRPopSize == 0)
1421       return;
1422 
1423     bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1424     int StackRestoreBytes = RedZone ? 0 : NumBytes;
1425     if (NoCalleeSaveRestore)
1426       StackRestoreBytes += AfterCSRPopSize;
1427 
1428     // If we were able to combine the local stack pop with the argument pop,
1429     // then we're done.
1430     bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0;
1431 
1432     // If we're done after this, make sure to help the load store optimizer.
1433     if (Done)
1434       adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
1435 
1436     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1437                     StackRestoreBytes, TII, MachineInstr::FrameDestroy, false,
1438                     NeedsWinCFI, &HasWinCFI);
1439     if (Done) {
1440       if (NeedsWinCFI) {
1441         HasWinCFI = true;
1442         BuildMI(MBB, MBB.getFirstTerminator(), DL,
1443                 TII->get(AArch64::SEH_EpilogEnd))
1444             .setMIFlag(MachineInstr::FrameDestroy);
1445       }
1446       return;
1447     }
1448 
1449     NumBytes = 0;
1450   }
1451 
1452   // Restore the original stack pointer.
1453   // FIXME: Rather than doing the math here, we should instead just use
1454   // non-post-indexed loads for the restores if we aren't actually going to
1455   // be able to save any instructions.
1456   if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned()))
1457     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
1458                     -AFI->getCalleeSavedStackSize() + 16, TII,
1459                     MachineInstr::FrameDestroy, false, NeedsWinCFI);
1460   else if (NumBytes)
1461     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
1462                     MachineInstr::FrameDestroy, false, NeedsWinCFI);
1463 
1464   // This must be placed after the callee-save restore code because that code
1465   // assumes the SP is at the same location as it was after the callee-save save
1466   // code in the prologue.
1467   if (AfterCSRPopSize) {
1468     // Find an insertion point for the first ldp so that it goes before the
1469     // shadow call stack epilog instruction. This ensures that the restore of
1470     // lr from x18 is placed after the restore from sp.
1471     auto FirstSPPopI = MBB.getFirstTerminator();
1472     while (FirstSPPopI != Begin) {
1473       auto Prev = std::prev(FirstSPPopI);
1474       if (Prev->getOpcode() != AArch64::LDRXpre ||
1475           Prev->getOperand(0).getReg() == AArch64::SP)
1476         break;
1477       FirstSPPopI = Prev;
1478     }
1479 
1480     adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
1481 
1482     emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
1483                     AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false,
1484                     NeedsWinCFI, &HasWinCFI);
1485   }
1486   if (NeedsWinCFI && HasWinCFI)
1487     BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1488         .setMIFlag(MachineInstr::FrameDestroy);
1489 
1490   MF.setHasWinCFI(HasWinCFI);
1491 }
1492 
1493 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1494 /// debug info.  It's the same as what we use for resolving the code-gen
1495 /// references for now.  FIXME: This can go wrong when references are
1496 /// SP-relative and simple call frames aren't used.
1497 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
1498                                                  int FI,
1499                                                  unsigned &FrameReg) const {
1500   return resolveFrameIndexReference(MF, FI, FrameReg);
1501 }
1502 
1503 int AArch64FrameLowering::getNonLocalFrameIndexReference(
1504   const MachineFunction &MF, int FI) const {
1505   return getSEHFrameIndexOffset(MF, FI);
1506 }
1507 
1508 static int getFPOffset(const MachineFunction &MF, int FI) {
1509   const auto &MFI = MF.getFrameInfo();
1510   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1511   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1512   bool IsWin64 =
1513       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1514   unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
1515   return MFI.getObjectOffset(FI) + FixedObject + 16;
1516 }
1517 
1518 static int getStackOffset(const MachineFunction &MF, int FI) {
1519   const auto &MFI = MF.getFrameInfo();
1520   return MFI.getObjectOffset(FI) + MFI.getStackSize();
1521 }
1522 
1523 int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
1524                                                  int FI) const {
1525   const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1526       MF.getSubtarget().getRegisterInfo());
1527   return RegInfo->getLocalAddressRegister(MF) == AArch64::FP ?
1528          getFPOffset(MF, FI) : getStackOffset(MF, FI);
1529 }
1530 
1531 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
1532                                                      int FI, unsigned &FrameReg,
1533                                                      bool PreferFP) const {
1534   const auto &MFI = MF.getFrameInfo();
1535   const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1536       MF.getSubtarget().getRegisterInfo());
1537   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1538   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1539   int FPOffset = getFPOffset(MF, FI);
1540   int Offset = getStackOffset(MF, FI);
1541   bool isFixed = MFI.isFixedObjectIndex(FI);
1542   bool isCSR = !isFixed && MFI.getObjectOffset(FI) >=
1543                                -((int)AFI->getCalleeSavedStackSize());
1544 
1545   // Use frame pointer to reference fixed objects. Use it for locals if
1546   // there are VLAs or a dynamically realigned SP (and thus the SP isn't
1547   // reliable as a base). Make sure useFPForScavengingIndex() does the
1548   // right thing for the emergency spill slot.
1549   bool UseFP = false;
1550   if (AFI->hasStackFrame()) {
1551     // Note: Keeping the following as multiple 'if' statements rather than
1552     // merging to a single expression for readability.
1553     //
1554     // Argument access should always use the FP.
1555     if (isFixed) {
1556       UseFP = hasFP(MF);
1557     } else if (isCSR && RegInfo->needsStackRealignment(MF)) {
1558       // References to the CSR area must use FP if we're re-aligning the stack
1559       // since the dynamically-sized alignment padding is between the SP/BP and
1560       // the CSR area.
1561       assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
1562       UseFP = true;
1563     } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) {
1564       // If the FPOffset is negative, we have to keep in mind that the
1565       // available offset range for negative offsets is smaller than for
1566       // positive ones. If an offset is
1567       // available via the FP and the SP, use whichever is closest.
1568       bool FPOffsetFits = FPOffset >= -256;
1569       PreferFP |= Offset > -FPOffset;
1570 
1571       if (MFI.hasVarSizedObjects()) {
1572         // If we have variable sized objects, we can use either FP or BP, as the
1573         // SP offset is unknown. We can use the base pointer if we have one and
1574         // FP is not preferred. If not, we're stuck with using FP.
1575         bool CanUseBP = RegInfo->hasBasePointer(MF);
1576         if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
1577           UseFP = PreferFP;
1578         else if (!CanUseBP) // Can't use BP. Forced to use FP.
1579           UseFP = true;
1580         // else we can use BP and FP, but the offset from FP won't fit.
1581         // That will make us scavenge registers which we can probably avoid by
1582         // using BP. If it won't fit for BP either, we'll scavenge anyway.
1583       } else if (FPOffset >= 0) {
1584         // Use SP or FP, whichever gives us the best chance of the offset
1585         // being in range for direct access. If the FPOffset is positive,
1586         // that'll always be best, as the SP will be even further away.
1587         UseFP = true;
1588       } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
1589         // Funclets access the locals contained in the parent's stack frame
1590         // via the frame pointer, so we have to use the FP in the parent
1591         // function.
1592         (void) Subtarget;
1593         assert(
1594             Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
1595             "Funclets should only be present on Win64");
1596         UseFP = true;
1597       } else {
1598         // We have the choice between FP and (SP or BP).
1599         if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
1600           UseFP = true;
1601       }
1602     }
1603   }
1604 
1605   assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
1606          "In the presence of dynamic stack pointer realignment, "
1607          "non-argument/CSR objects cannot be accessed through the frame pointer");
1608 
1609   if (UseFP) {
1610     FrameReg = RegInfo->getFrameRegister(MF);
1611     return FPOffset;
1612   }
1613 
1614   // Use the base pointer if we have one.
1615   if (RegInfo->hasBasePointer(MF))
1616     FrameReg = RegInfo->getBaseRegister();
1617   else {
1618     assert(!MFI.hasVarSizedObjects() &&
1619            "Can't use SP when we have var sized objects.");
1620     FrameReg = AArch64::SP;
1621     // If we're using the red zone for this function, the SP won't actually
1622     // be adjusted, so the offsets will be negative. They're also all
1623     // within range of the signed 9-bit immediate instructions.
1624     if (canUseRedZone(MF))
1625       Offset -= AFI->getLocalStackSize();
1626   }
1627 
1628   return Offset;
1629 }
1630 
1631 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
1632   // Do not set a kill flag on values that are also marked as live-in. This
1633   // happens with the @llvm-returnaddress intrinsic and with arguments passed in
1634   // callee saved registers.
1635   // Omitting the kill flags is conservatively correct even if the live-in
1636   // is not used after all.
1637   bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
1638   return getKillRegState(!IsLiveIn);
1639 }
1640 
1641 static bool produceCompactUnwindFrame(MachineFunction &MF) {
1642   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1643   AttributeList Attrs = MF.getFunction().getAttributes();
1644   return Subtarget.isTargetMachO() &&
1645          !(Subtarget.getTargetLowering()->supportSwiftError() &&
1646            Attrs.hasAttrSomewhere(Attribute::SwiftError));
1647 }
1648 
1649 static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
1650                                              bool NeedsWinCFI) {
1651   // If we are generating register pairs for a Windows function that requires
1652   // EH support, then pair consecutive registers only.  There are no unwind
1653   // opcodes for saves/restores of non-consectuve register pairs.
1654   // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
1655   // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
1656 
1657   // TODO: LR can be paired with any register.  We don't support this yet in
1658   // the MCLayer.  We need to add support for the save_lrpair unwind code.
1659   if (!NeedsWinCFI)
1660     return false;
1661   if (Reg2 == Reg1 + 1)
1662     return false;
1663   return true;
1664 }
1665 
1666 namespace {
1667 
1668 struct RegPairInfo {
1669   unsigned Reg1 = AArch64::NoRegister;
1670   unsigned Reg2 = AArch64::NoRegister;
1671   int FrameIdx;
1672   int Offset;
1673   enum RegType { GPR, FPR64, FPR128 } Type;
1674 
1675   RegPairInfo() = default;
1676 
1677   bool isPaired() const { return Reg2 != AArch64::NoRegister; }
1678 };
1679 
1680 } // end anonymous namespace
1681 
1682 static void computeCalleeSaveRegisterPairs(
1683     MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
1684     const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
1685     bool &NeedShadowCallStackProlog) {
1686 
1687   if (CSI.empty())
1688     return;
1689 
1690   bool NeedsWinCFI = needsWinCFI(MF);
1691   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1692   MachineFrameInfo &MFI = MF.getFrameInfo();
1693   CallingConv::ID CC = MF.getFunction().getCallingConv();
1694   unsigned Count = CSI.size();
1695   (void)CC;
1696   // MachO's compact unwind format relies on all registers being stored in
1697   // pairs.
1698   assert((!produceCompactUnwindFrame(MF) ||
1699           CC == CallingConv::PreserveMost ||
1700           (Count & 1) == 0) &&
1701          "Odd number of callee-saved regs to spill!");
1702   int Offset = AFI->getCalleeSavedStackSize();
1703   // On Linux, we will have either one or zero non-paired register.  On Windows
1704   // with CFI, we can have multiple unpaired registers in order to utilize the
1705   // available unwind codes.  This flag assures that the alignment fixup is done
1706   // only once, as intened.
1707   bool FixupDone = false;
1708   for (unsigned i = 0; i < Count; ++i) {
1709     RegPairInfo RPI;
1710     RPI.Reg1 = CSI[i].getReg();
1711 
1712     if (AArch64::GPR64RegClass.contains(RPI.Reg1))
1713       RPI.Type = RegPairInfo::GPR;
1714     else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
1715       RPI.Type = RegPairInfo::FPR64;
1716     else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
1717       RPI.Type = RegPairInfo::FPR128;
1718     else
1719       llvm_unreachable("Unsupported register class.");
1720 
1721     // Add the next reg to the pair if it is in the same register class.
1722     if (i + 1 < Count) {
1723       unsigned NextReg = CSI[i + 1].getReg();
1724       switch (RPI.Type) {
1725       case RegPairInfo::GPR:
1726         if (AArch64::GPR64RegClass.contains(NextReg) &&
1727             !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
1728           RPI.Reg2 = NextReg;
1729         break;
1730       case RegPairInfo::FPR64:
1731         if (AArch64::FPR64RegClass.contains(NextReg) &&
1732             !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
1733           RPI.Reg2 = NextReg;
1734         break;
1735       case RegPairInfo::FPR128:
1736         if (AArch64::FPR128RegClass.contains(NextReg))
1737           RPI.Reg2 = NextReg;
1738         break;
1739       }
1740     }
1741 
1742     // If either of the registers to be saved is the lr register, it means that
1743     // we also need to save lr in the shadow call stack.
1744     if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&
1745         MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
1746       if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
1747         report_fatal_error("Must reserve x18 to use shadow call stack");
1748       NeedShadowCallStackProlog = true;
1749     }
1750 
1751     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
1752     // list to come in sorted by frame index so that we can issue the store
1753     // pair instructions directly. Assert if we see anything otherwise.
1754     //
1755     // The order of the registers in the list is controlled by
1756     // getCalleeSavedRegs(), so they will always be in-order, as well.
1757     assert((!RPI.isPaired() ||
1758             (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
1759            "Out of order callee saved regs!");
1760 
1761     // MachO's compact unwind format relies on all registers being stored in
1762     // adjacent register pairs.
1763     assert((!produceCompactUnwindFrame(MF) ||
1764             CC == CallingConv::PreserveMost ||
1765             (RPI.isPaired() &&
1766              ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
1767               RPI.Reg1 + 1 == RPI.Reg2))) &&
1768            "Callee-save registers not saved as adjacent register pair!");
1769 
1770     RPI.FrameIdx = CSI[i].getFrameIdx();
1771 
1772     int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
1773     Offset -= RPI.isPaired() ? 2 * Scale : Scale;
1774 
1775     // Round up size of non-pair to pair size if we need to pad the
1776     // callee-save area to ensure 16-byte alignment.
1777     if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
1778         RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
1779       FixupDone = true;
1780       Offset -= 8;
1781       assert(Offset % 16 == 0);
1782       assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
1783       MFI.setObjectAlignment(RPI.FrameIdx, 16);
1784     }
1785 
1786     assert(Offset % Scale == 0);
1787     RPI.Offset = Offset / Scale;
1788     assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
1789            "Offset out of bounds for LDP/STP immediate");
1790 
1791     RegPairs.push_back(RPI);
1792     if (RPI.isPaired())
1793       ++i;
1794   }
1795 }
1796 
1797 bool AArch64FrameLowering::spillCalleeSavedRegisters(
1798     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1799     const std::vector<CalleeSavedInfo> &CSI,
1800     const TargetRegisterInfo *TRI) const {
1801   MachineFunction &MF = *MBB.getParent();
1802   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1803   bool NeedsWinCFI = needsWinCFI(MF);
1804   DebugLoc DL;
1805   SmallVector<RegPairInfo, 8> RegPairs;
1806 
1807   bool NeedShadowCallStackProlog = false;
1808   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
1809                                  NeedShadowCallStackProlog);
1810   const MachineRegisterInfo &MRI = MF.getRegInfo();
1811 
1812   if (NeedShadowCallStackProlog) {
1813     // Shadow call stack prolog: str x30, [x18], #8
1814     BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost))
1815         .addReg(AArch64::X18, RegState::Define)
1816         .addReg(AArch64::LR)
1817         .addReg(AArch64::X18)
1818         .addImm(8)
1819         .setMIFlag(MachineInstr::FrameSetup);
1820 
1821     if (NeedsWinCFI)
1822       BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
1823           .setMIFlag(MachineInstr::FrameSetup);
1824 
1825     if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
1826       // Emit a CFI instruction that causes 8 to be subtracted from the value of
1827       // x18 when unwinding past this frame.
1828       static const char CFIInst[] = {
1829           dwarf::DW_CFA_val_expression,
1830           18, // register
1831           2,  // length
1832           static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
1833           static_cast<char>(-8) & 0x7f, // addend (sleb128)
1834       };
1835       unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
1836           nullptr, StringRef(CFIInst, sizeof(CFIInst))));
1837       BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
1838           .addCFIIndex(CFIIndex)
1839           .setMIFlag(MachineInstr::FrameSetup);
1840     }
1841 
1842     // This instruction also makes x18 live-in to the entry block.
1843     MBB.addLiveIn(AArch64::X18);
1844   }
1845 
1846   for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
1847        ++RPII) {
1848     RegPairInfo RPI = *RPII;
1849     unsigned Reg1 = RPI.Reg1;
1850     unsigned Reg2 = RPI.Reg2;
1851     unsigned StrOpc;
1852 
1853     // Issue sequence of spills for cs regs.  The first spill may be converted
1854     // to a pre-decrement store later by emitPrologue if the callee-save stack
1855     // area allocation can't be combined with the local stack area allocation.
1856     // For example:
1857     //    stp     x22, x21, [sp, #0]     // addImm(+0)
1858     //    stp     x20, x19, [sp, #16]    // addImm(+2)
1859     //    stp     fp, lr, [sp, #32]      // addImm(+4)
1860     // Rationale: This sequence saves uop updates compared to a sequence of
1861     // pre-increment spills like stp xi,xj,[sp,#-16]!
1862     // Note: Similar rationale and sequence for restores in epilog.
1863     unsigned Size, Align;
1864     switch (RPI.Type) {
1865     case RegPairInfo::GPR:
1866        StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
1867        Size = 8;
1868        Align = 8;
1869        break;
1870     case RegPairInfo::FPR64:
1871        StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
1872        Size = 8;
1873        Align = 8;
1874        break;
1875     case RegPairInfo::FPR128:
1876        StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
1877        Size = 16;
1878        Align = 16;
1879        break;
1880     }
1881     LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
1882                if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
1883                dbgs() << ") -> fi#(" << RPI.FrameIdx;
1884                if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
1885                dbgs() << ")\n");
1886 
1887     assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
1888            "Windows unwdinding requires a consecutive (FP,LR) pair");
1889     // Windows unwind codes require consecutive registers if registers are
1890     // paired.  Make the switch here, so that the code below will save (x,x+1)
1891     // and not (x+1,x).
1892     unsigned FrameIdxReg1 = RPI.FrameIdx;
1893     unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
1894     if (NeedsWinCFI && RPI.isPaired()) {
1895       std::swap(Reg1, Reg2);
1896       std::swap(FrameIdxReg1, FrameIdxReg2);
1897     }
1898     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
1899     if (!MRI.isReserved(Reg1))
1900       MBB.addLiveIn(Reg1);
1901     if (RPI.isPaired()) {
1902       if (!MRI.isReserved(Reg2))
1903         MBB.addLiveIn(Reg2);
1904       MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
1905       MIB.addMemOperand(MF.getMachineMemOperand(
1906           MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
1907           MachineMemOperand::MOStore, Size, Align));
1908     }
1909     MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
1910         .addReg(AArch64::SP)
1911         .addImm(RPI.Offset) // [sp, #offset*scale],
1912                             // where factor*scale is implicit
1913         .setMIFlag(MachineInstr::FrameSetup);
1914     MIB.addMemOperand(MF.getMachineMemOperand(
1915         MachinePointerInfo::getFixedStack(MF,FrameIdxReg1),
1916         MachineMemOperand::MOStore, Size, Align));
1917     if (NeedsWinCFI)
1918       InsertSEH(MIB, TII, MachineInstr::FrameSetup);
1919 
1920   }
1921   return true;
1922 }
1923 
1924 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
1925     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1926     std::vector<CalleeSavedInfo> &CSI,
1927     const TargetRegisterInfo *TRI) const {
1928   MachineFunction &MF = *MBB.getParent();
1929   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1930   DebugLoc DL;
1931   SmallVector<RegPairInfo, 8> RegPairs;
1932   bool NeedsWinCFI = needsWinCFI(MF);
1933 
1934   if (MI != MBB.end())
1935     DL = MI->getDebugLoc();
1936 
1937   bool NeedShadowCallStackProlog = false;
1938   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
1939                                  NeedShadowCallStackProlog);
1940 
1941   auto EmitMI = [&](const RegPairInfo &RPI) {
1942     unsigned Reg1 = RPI.Reg1;
1943     unsigned Reg2 = RPI.Reg2;
1944 
1945     // Issue sequence of restores for cs regs. The last restore may be converted
1946     // to a post-increment load later by emitEpilogue if the callee-save stack
1947     // area allocation can't be combined with the local stack area allocation.
1948     // For example:
1949     //    ldp     fp, lr, [sp, #32]       // addImm(+4)
1950     //    ldp     x20, x19, [sp, #16]     // addImm(+2)
1951     //    ldp     x22, x21, [sp, #0]      // addImm(+0)
1952     // Note: see comment in spillCalleeSavedRegisters()
1953     unsigned LdrOpc;
1954     unsigned Size, Align;
1955     switch (RPI.Type) {
1956     case RegPairInfo::GPR:
1957        LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
1958        Size = 8;
1959        Align = 8;
1960        break;
1961     case RegPairInfo::FPR64:
1962        LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
1963        Size = 8;
1964        Align = 8;
1965        break;
1966     case RegPairInfo::FPR128:
1967        LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
1968        Size = 16;
1969        Align = 16;
1970        break;
1971     }
1972     LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
1973                if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
1974                dbgs() << ") -> fi#(" << RPI.FrameIdx;
1975                if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
1976                dbgs() << ")\n");
1977 
1978     // Windows unwind codes require consecutive registers if registers are
1979     // paired.  Make the switch here, so that the code below will save (x,x+1)
1980     // and not (x+1,x).
1981     unsigned FrameIdxReg1 = RPI.FrameIdx;
1982     unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
1983     if (NeedsWinCFI && RPI.isPaired()) {
1984       std::swap(Reg1, Reg2);
1985       std::swap(FrameIdxReg1, FrameIdxReg2);
1986     }
1987     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
1988     if (RPI.isPaired()) {
1989       MIB.addReg(Reg2, getDefRegState(true));
1990       MIB.addMemOperand(MF.getMachineMemOperand(
1991           MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
1992           MachineMemOperand::MOLoad, Size, Align));
1993     }
1994     MIB.addReg(Reg1, getDefRegState(true))
1995         .addReg(AArch64::SP)
1996         .addImm(RPI.Offset) // [sp, #offset*scale]
1997                             // where factor*scale is implicit
1998         .setMIFlag(MachineInstr::FrameDestroy);
1999     MIB.addMemOperand(MF.getMachineMemOperand(
2000         MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
2001         MachineMemOperand::MOLoad, Size, Align));
2002     if (NeedsWinCFI)
2003       InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
2004   };
2005   if (ReverseCSRRestoreSeq)
2006     for (const RegPairInfo &RPI : reverse(RegPairs))
2007       EmitMI(RPI);
2008   else
2009     for (const RegPairInfo &RPI : RegPairs)
2010       EmitMI(RPI);
2011 
2012   if (NeedShadowCallStackProlog) {
2013     // Shadow call stack epilog: ldr x30, [x18, #-8]!
2014     BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre))
2015         .addReg(AArch64::X18, RegState::Define)
2016         .addReg(AArch64::LR, RegState::Define)
2017         .addReg(AArch64::X18)
2018         .addImm(-8)
2019         .setMIFlag(MachineInstr::FrameDestroy);
2020   }
2021 
2022   return true;
2023 }
2024 
2025 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
2026                                                 BitVector &SavedRegs,
2027                                                 RegScavenger *RS) const {
2028   // All calls are tail calls in GHC calling conv, and functions have no
2029   // prologue/epilogue.
2030   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
2031     return;
2032 
2033   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2034   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
2035       MF.getSubtarget().getRegisterInfo());
2036   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2037   unsigned UnspilledCSGPR = AArch64::NoRegister;
2038   unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
2039 
2040   MachineFrameInfo &MFI = MF.getFrameInfo();
2041   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
2042 
2043   unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
2044                                 ? RegInfo->getBaseRegister()
2045                                 : (unsigned)AArch64::NoRegister;
2046 
2047   unsigned ExtraCSSpill = 0;
2048   // Figure out which callee-saved registers to save/restore.
2049   for (unsigned i = 0; CSRegs[i]; ++i) {
2050     const unsigned Reg = CSRegs[i];
2051 
2052     // Add the base pointer register to SavedRegs if it is callee-save.
2053     if (Reg == BasePointerReg)
2054       SavedRegs.set(Reg);
2055 
2056     bool RegUsed = SavedRegs.test(Reg);
2057     unsigned PairedReg = CSRegs[i ^ 1];
2058     if (!RegUsed) {
2059       if (AArch64::GPR64RegClass.contains(Reg) &&
2060           !RegInfo->isReservedReg(MF, Reg)) {
2061         UnspilledCSGPR = Reg;
2062         UnspilledCSGPRPaired = PairedReg;
2063       }
2064       continue;
2065     }
2066 
2067     // MachO's compact unwind format relies on all registers being stored in
2068     // pairs.
2069     // FIXME: the usual format is actually better if unwinding isn't needed.
2070     if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
2071         !SavedRegs.test(PairedReg)) {
2072       SavedRegs.set(PairedReg);
2073       if (AArch64::GPR64RegClass.contains(PairedReg) &&
2074           !RegInfo->isReservedReg(MF, PairedReg))
2075         ExtraCSSpill = PairedReg;
2076     }
2077   }
2078 
2079   // Calculates the callee saved stack size.
2080   unsigned CSStackSize = 0;
2081   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2082   const MachineRegisterInfo &MRI = MF.getRegInfo();
2083   for (unsigned Reg : SavedRegs.set_bits())
2084     CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
2085 
2086   // Save number of saved regs, so we can easily update CSStackSize later.
2087   unsigned NumSavedRegs = SavedRegs.count();
2088 
2089   // The frame record needs to be created by saving the appropriate registers
2090   unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
2091   if (hasFP(MF) ||
2092       windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
2093     SavedRegs.set(AArch64::FP);
2094     SavedRegs.set(AArch64::LR);
2095   }
2096 
2097   LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
2098              for (unsigned Reg
2099                   : SavedRegs.set_bits()) dbgs()
2100              << ' ' << printReg(Reg, RegInfo);
2101              dbgs() << "\n";);
2102 
2103   // If any callee-saved registers are used, the frame cannot be eliminated.
2104   bool CanEliminateFrame = SavedRegs.count() == 0;
2105 
2106   // The CSR spill slots have not been allocated yet, so estimateStackSize
2107   // won't include them.
2108   unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
2109   bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
2110   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
2111     AFI->setHasStackFrame(true);
2112 
2113   // Estimate if we might need to scavenge a register at some point in order
2114   // to materialize a stack offset. If so, either spill one additional
2115   // callee-saved register or reserve a special spill slot to facilitate
2116   // register scavenging. If we already spilled an extra callee-saved register
2117   // above to keep the number of spills even, we don't need to do anything else
2118   // here.
2119   if (BigStack) {
2120     if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
2121       LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
2122                         << " to get a scratch register.\n");
2123       SavedRegs.set(UnspilledCSGPR);
2124       // MachO's compact unwind format relies on all registers being stored in
2125       // pairs, so if we need to spill one extra for BigStack, then we need to
2126       // store the pair.
2127       if (produceCompactUnwindFrame(MF))
2128         SavedRegs.set(UnspilledCSGPRPaired);
2129       ExtraCSSpill = UnspilledCSGPRPaired;
2130     }
2131 
2132     // If we didn't find an extra callee-saved register to spill, create
2133     // an emergency spill slot.
2134     if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
2135       const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2136       const TargetRegisterClass &RC = AArch64::GPR64RegClass;
2137       unsigned Size = TRI->getSpillSize(RC);
2138       unsigned Align = TRI->getSpillAlignment(RC);
2139       int FI = MFI.CreateStackObject(Size, Align, false);
2140       RS->addScavengingFrameIndex(FI);
2141       LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
2142                         << " as the emergency spill slot.\n");
2143     }
2144   }
2145 
2146   // Adding the size of additional 64bit GPR saves.
2147   CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
2148   unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
2149   LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
2150                << EstimatedStackSize + AlignedCSStackSize
2151                << " bytes.\n");
2152 
2153   // Round up to register pair alignment to avoid additional SP adjustment
2154   // instructions.
2155   AFI->setCalleeSavedStackSize(AlignedCSStackSize);
2156   AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
2157 }
2158 
2159 bool AArch64FrameLowering::enableStackSlotScavenging(
2160     const MachineFunction &MF) const {
2161   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2162   return AFI->hasCalleeSaveStackFreeSpace();
2163 }
2164 
2165 void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
2166     MachineFunction &MF, RegScavenger *RS) const {
2167   // If this function isn't doing Win64-style C++ EH, we don't need to do
2168   // anything.
2169   if (!MF.hasEHFunclets())
2170     return;
2171   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2172   MachineFrameInfo &MFI = MF.getFrameInfo();
2173   WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
2174 
2175   MachineBasicBlock &MBB = MF.front();
2176   auto MBBI = MBB.begin();
2177   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
2178     ++MBBI;
2179 
2180   // Create an UnwindHelp object.
2181   int UnwindHelpFI =
2182       MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
2183   EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
2184   // We need to store -2 into the UnwindHelp object at the start of the
2185   // function.
2186   DebugLoc DL;
2187   RS->enterBasicBlockEnd(MBB);
2188   RS->backward(std::prev(MBBI));
2189   unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
2190   assert(DstReg && "There must be a free register after frame setup");
2191   BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
2192   BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
2193       .addReg(DstReg, getKillRegState(true))
2194       .addFrameIndex(UnwindHelpFI)
2195       .addImm(0);
2196 }
2197 
2198 /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
2199 /// the update.  This is easily retrieved as it is exactly the offset that is set
2200 /// in processFunctionBeforeFrameFinalized.
2201 int AArch64FrameLowering::getFrameIndexReferencePreferSP(
2202     const MachineFunction &MF, int FI, unsigned &FrameReg,
2203     bool IgnoreSPUpdates) const {
2204   const MachineFrameInfo &MFI = MF.getFrameInfo();
2205   LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
2206                     << MFI.getObjectOffset(FI) << "\n");
2207   FrameReg = AArch64::SP;
2208   return MFI.getObjectOffset(FI);
2209 }
2210 
2211 /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
2212 /// the parent's frame pointer
2213 unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
2214     const MachineFunction &MF) const {
2215   return 0;
2216 }
2217 
2218 /// Funclets only need to account for space for the callee saved registers,
2219 /// as the locals are accounted for in the parent's stack frame.
2220 unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
2221     const MachineFunction &MF) const {
2222   // This is the size of the pushed CSRs.
2223   unsigned CSSize =
2224       MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
2225   // This is the amount of stack a funclet needs to allocate.
2226   return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
2227                  getStackAlignment());
2228 }
2229