1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of TargetFrameLowering class.
10 //
11 // On AArch64, stack frames are structured as follows:
12 //
13 // The stack grows downward.
14 //
15 // All of the individual frame areas on the frame below are optional, i.e. it's
16 // possible to create a function so that the particular area isn't present
17 // in the frame.
18 //
19 // At function entry, the "frame" looks as follows:
20 //
21 // |                                   | Higher address
22 // |-----------------------------------|
23 // |                                   |
24 // | arguments passed on the stack     |
25 // |                                   |
26 // |-----------------------------------| <- sp
27 // |                                   | Lower address
28 //
29 //
30 // After the prologue has run, the frame has the following general structure.
31 // Note that this doesn't depict the case where a red-zone is used. Also,
32 // technically the last frame area (VLAs) doesn't get created until in the
33 // main function body, after the prologue is run. However, it's depicted here
34 // for completeness.
35 //
36 // |                                   | Higher address
37 // |-----------------------------------|
38 // |                                   |
39 // | arguments passed on the stack     |
40 // |                                   |
41 // |-----------------------------------|
42 // |                                   |
43 // | (Win64 only) varargs from reg     |
44 // |                                   |
45 // |-----------------------------------|
46 // |                                   |
47 // | callee-saved gpr registers        | <--.
48 // |                                   |    | On Darwin platforms these
49 // |- - - - - - - - - - - - - - - - - -|    | callee saves are swapped,
50 // |                                   |    | (frame record first)
51 // | prev_fp, prev_lr                  | <--'
52 // | (a.k.a. "frame record")           |
53 // |-----------------------------------| <- fp(=x29)
54 // |                                   |
55 // | callee-saved fp/simd/SVE regs     |
56 // |                                   |
57 // |-----------------------------------|
58 // |.empty.space.to.make.part.below....|
59 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
60 // |.the.standard.16-byte.alignment....|  compile time; if present)
61 // |-----------------------------------|
62 // |                                   |
63 // | local variables of fixed size     |
64 // | including spill slots             |
65 // |-----------------------------------| <- bp(not defined by ABI,
66 // |.variable-sized.local.variables....|       LLVM chooses X19)
67 // |.(VLAs)............................| (size of this area is unknown at
68 // |...................................|  compile time)
69 // |-----------------------------------| <- sp
70 // |                                   | Lower address
71 //
72 //
73 // To access the data in a frame, at-compile time, a constant offset must be
74 // computable from one of the pointers (fp, bp, sp) to access it. The size
75 // of the areas with a dotted background cannot be computed at compile-time
76 // if they are present, making it required to have all three of fp, bp and
77 // sp to be set up to be able to access all contents in the frame areas,
78 // assuming all of the frame areas are non-empty.
79 //
80 // For most functions, some of the frame areas are empty. For those functions,
81 // it may not be necessary to set up fp or bp:
82 // * A base pointer is definitely needed when there are both VLAs and local
83 //   variables with more-than-default alignment requirements.
84 // * A frame pointer is definitely needed when there are local variables with
85 //   more-than-default alignment requirements.
86 //
87 // For Darwin platforms the frame-record (fp, lr) is stored at the top of the
88 // callee-saved area, since the unwind encoding does not allow for encoding
89 // this dynamically and existing tools depend on this layout. For other
90 // platforms, the frame-record is stored at the bottom of the (gpr) callee-saved
91 // area to allow SVE stack objects (allocated directly below the callee-saves,
92 // if available) to be accessed directly from the framepointer.
93 // The SVE spill/fill instructions have VL-scaled addressing modes such
94 // as:
95 //    ldr z8, [fp, #-7 mul vl]
96 // For SVE the size of the vector length (VL) is not known at compile-time, so
97 // '#-7 mul vl' is an offset that can only be evaluated at runtime. With this
98 // layout, we don't need to add an unscaled offset to the framepointer before
99 // accessing the SVE object in the frame.
100 //
101 // In some cases when a base pointer is not strictly needed, it is generated
102 // anyway when offsets from the frame pointer to access local variables become
103 // so large that the offset can't be encoded in the immediate fields of loads
104 // or stores.
105 //
106 // FIXME: also explain the redzone concept.
107 // FIXME: also explain the concept of reserved call frames.
108 //
109 //===----------------------------------------------------------------------===//
110 
111 #include "AArch64FrameLowering.h"
112 #include "AArch64InstrInfo.h"
113 #include "AArch64MachineFunctionInfo.h"
114 #include "AArch64RegisterInfo.h"
115 #include "AArch64StackOffset.h"
116 #include "AArch64Subtarget.h"
117 #include "AArch64TargetMachine.h"
118 #include "MCTargetDesc/AArch64AddressingModes.h"
119 #include "llvm/ADT/ScopeExit.h"
120 #include "llvm/ADT/SmallVector.h"
121 #include "llvm/ADT/Statistic.h"
122 #include "llvm/CodeGen/LivePhysRegs.h"
123 #include "llvm/CodeGen/MachineBasicBlock.h"
124 #include "llvm/CodeGen/MachineFrameInfo.h"
125 #include "llvm/CodeGen/MachineFunction.h"
126 #include "llvm/CodeGen/MachineInstr.h"
127 #include "llvm/CodeGen/MachineInstrBuilder.h"
128 #include "llvm/CodeGen/MachineMemOperand.h"
129 #include "llvm/CodeGen/MachineModuleInfo.h"
130 #include "llvm/CodeGen/MachineOperand.h"
131 #include "llvm/CodeGen/MachineRegisterInfo.h"
132 #include "llvm/CodeGen/RegisterScavenging.h"
133 #include "llvm/CodeGen/TargetInstrInfo.h"
134 #include "llvm/CodeGen/TargetRegisterInfo.h"
135 #include "llvm/CodeGen/TargetSubtargetInfo.h"
136 #include "llvm/CodeGen/WinEHFuncInfo.h"
137 #include "llvm/IR/Attributes.h"
138 #include "llvm/IR/CallingConv.h"
139 #include "llvm/IR/DataLayout.h"
140 #include "llvm/IR/DebugLoc.h"
141 #include "llvm/IR/Function.h"
142 #include "llvm/MC/MCAsmInfo.h"
143 #include "llvm/MC/MCDwarf.h"
144 #include "llvm/Support/CommandLine.h"
145 #include "llvm/Support/Debug.h"
146 #include "llvm/Support/ErrorHandling.h"
147 #include "llvm/Support/MathExtras.h"
148 #include "llvm/Support/raw_ostream.h"
149 #include "llvm/Target/TargetMachine.h"
150 #include "llvm/Target/TargetOptions.h"
151 #include <cassert>
152 #include <cstdint>
153 #include <iterator>
154 #include <vector>
155 
156 using namespace llvm;
157 
158 #define DEBUG_TYPE "frame-info"
159 
160 static cl::opt<bool> EnableRedZone("aarch64-redzone",
161                                    cl::desc("enable use of redzone on AArch64"),
162                                    cl::init(false), cl::Hidden);
163 
164 static cl::opt<bool>
165     ReverseCSRRestoreSeq("reverse-csr-restore-seq",
166                          cl::desc("reverse the CSR restore sequence"),
167                          cl::init(false), cl::Hidden);
168 
169 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
170 
171 /// This is the biggest offset to the stack pointer we can encode in aarch64
172 /// instructions (without using a separate calculation and a temp register).
173 /// Note that the exception here are vector stores/loads which cannot encode any
174 /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
175 static const unsigned DefaultSafeSPDisplacement = 255;
176 
177 /// Look at each instruction that references stack frames and return the stack
178 /// size limit beyond which some of these instructions will require a scratch
179 /// register during their expansion later.
180 static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
181   // FIXME: For now, just conservatively guestimate based on unscaled indexing
182   // range. We'll end up allocating an unnecessary spill slot a lot, but
183   // realistically that's not a big deal at this stage of the game.
184   for (MachineBasicBlock &MBB : MF) {
185     for (MachineInstr &MI : MBB) {
186       if (MI.isDebugInstr() || MI.isPseudo() ||
187           MI.getOpcode() == AArch64::ADDXri ||
188           MI.getOpcode() == AArch64::ADDSXri)
189         continue;
190 
191       for (const MachineOperand &MO : MI.operands()) {
192         if (!MO.isFI())
193           continue;
194 
195         StackOffset Offset;
196         if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
197             AArch64FrameOffsetCannotUpdate)
198           return 0;
199       }
200     }
201   }
202   return DefaultSafeSPDisplacement;
203 }
204 
205 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
206   if (!EnableRedZone)
207     return false;
208   // Don't use the red zone if the function explicitly asks us not to.
209   // This is typically used for kernel code.
210   if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
211     return false;
212 
213   const MachineFrameInfo &MFI = MF.getFrameInfo();
214   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
215   unsigned NumBytes = AFI->getLocalStackSize();
216 
217   return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
218 }
219 
220 /// hasFP - Return true if the specified function should have a dedicated frame
221 /// pointer register.
222 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
223   const MachineFrameInfo &MFI = MF.getFrameInfo();
224   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
225   // Win64 EH requires a frame pointer if funclets are present, as the locals
226   // are accessed off the frame pointer in both the parent function and the
227   // funclets.
228   if (MF.hasEHFunclets())
229     return true;
230   // Retain behavior of always omitting the FP for leaf functions when possible.
231   if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
232     return true;
233   if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
234       MFI.hasStackMap() || MFI.hasPatchPoint() ||
235       RegInfo->needsStackRealignment(MF))
236     return true;
237   // With large callframes around we may need to use FP to access the scavenging
238   // emergency spillslot.
239   //
240   // Unfortunately some calls to hasFP() like machine verifier ->
241   // getReservedReg() -> hasFP in the middle of global isel are too early
242   // to know the max call frame size. Hopefully conservatively returning "true"
243   // in those cases is fine.
244   // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
245   if (!MFI.isMaxCallFrameSizeComputed() ||
246       MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
247     return true;
248 
249   return false;
250 }
251 
252 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
253 /// not required, we reserve argument space for call sites in the function
254 /// immediately on entry to the current function.  This eliminates the need for
255 /// add/sub sp brackets around call sites.  Returns true if the call frame is
256 /// included as part of the stack frame.
257 bool
258 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
259   return !MF.getFrameInfo().hasVarSizedObjects();
260 }
261 
262 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
263     MachineFunction &MF, MachineBasicBlock &MBB,
264     MachineBasicBlock::iterator I) const {
265   const AArch64InstrInfo *TII =
266       static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
267   DebugLoc DL = I->getDebugLoc();
268   unsigned Opc = I->getOpcode();
269   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
270   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
271 
272   if (!hasReservedCallFrame(MF)) {
273     unsigned Align = getStackAlignment();
274 
275     int64_t Amount = I->getOperand(0).getImm();
276     Amount = alignTo(Amount, Align);
277     if (!IsDestroy)
278       Amount = -Amount;
279 
280     // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
281     // doesn't have to pop anything), then the first operand will be zero too so
282     // this adjustment is a no-op.
283     if (CalleePopAmount == 0) {
284       // FIXME: in-function stack adjustment for calls is limited to 24-bits
285       // because there's no guaranteed temporary register available.
286       //
287       // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
288       // 1) For offset <= 12-bit, we use LSL #0
289       // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
290       // LSL #0, and the other uses LSL #12.
291       //
292       // Most call frames will be allocated at the start of a function so
293       // this is OK, but it is a limitation that needs dealing with.
294       assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
295       emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, {Amount, MVT::i8},
296                       TII);
297     }
298   } else if (CalleePopAmount != 0) {
299     // If the calling convention demands that the callee pops arguments from the
300     // stack, we want to add it back if we have a reserved call frame.
301     assert(CalleePopAmount < 0xffffff && "call frame too large");
302     emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
303                     {-(int64_t)CalleePopAmount, MVT::i8}, TII);
304   }
305   return MBB.erase(I);
306 }
307 
308 static bool ShouldSignReturnAddress(MachineFunction &MF) {
309   // The function should be signed in the following situations:
310   // - sign-return-address=all
311   // - sign-return-address=non-leaf and the functions spills the LR
312 
313   const Function &F = MF.getFunction();
314   if (!F.hasFnAttribute("sign-return-address"))
315     return false;
316 
317   StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
318   if (Scope.equals("none"))
319     return false;
320 
321   if (Scope.equals("all"))
322     return true;
323 
324   assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf");
325 
326   for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo())
327     if (Info.getReg() == AArch64::LR)
328       return true;
329 
330   return false;
331 }
332 
333 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
334     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
335   MachineFunction &MF = *MBB.getParent();
336   MachineFrameInfo &MFI = MF.getFrameInfo();
337   const TargetSubtargetInfo &STI = MF.getSubtarget();
338   const MCRegisterInfo *MRI = STI.getRegisterInfo();
339   const TargetInstrInfo *TII = STI.getInstrInfo();
340   DebugLoc DL = MBB.findDebugLoc(MBBI);
341 
342   // Add callee saved registers to move list.
343   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
344   if (CSI.empty())
345     return;
346 
347   for (const auto &Info : CSI) {
348     unsigned Reg = Info.getReg();
349     int64_t Offset =
350         MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
351     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
352     unsigned CFIIndex = MF.addFrameInst(
353         MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
354     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
355         .addCFIIndex(CFIIndex)
356         .setMIFlags(MachineInstr::FrameSetup);
357   }
358 }
359 
360 // Find a scratch register that we can use at the start of the prologue to
361 // re-align the stack pointer.  We avoid using callee-save registers since they
362 // may appear to be free when this is called from canUseAsPrologue (during
363 // shrink wrapping), but then no longer be free when this is called from
364 // emitPrologue.
365 //
366 // FIXME: This is a bit conservative, since in the above case we could use one
367 // of the callee-save registers as a scratch temp to re-align the stack pointer,
368 // but we would then have to make sure that we were in fact saving at least one
369 // callee-save register in the prologue, which is additional complexity that
370 // doesn't seem worth the benefit.
371 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
372   MachineFunction *MF = MBB->getParent();
373 
374   // If MBB is an entry block, use X9 as the scratch register
375   if (&MF->front() == MBB)
376     return AArch64::X9;
377 
378   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
379   const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
380   LivePhysRegs LiveRegs(TRI);
381   LiveRegs.addLiveIns(*MBB);
382 
383   // Mark callee saved registers as used so we will not choose them.
384   const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
385   for (unsigned i = 0; CSRegs[i]; ++i)
386     LiveRegs.addReg(CSRegs[i]);
387 
388   // Prefer X9 since it was historically used for the prologue scratch reg.
389   const MachineRegisterInfo &MRI = MF->getRegInfo();
390   if (LiveRegs.available(MRI, AArch64::X9))
391     return AArch64::X9;
392 
393   for (unsigned Reg : AArch64::GPR64RegClass) {
394     if (LiveRegs.available(MRI, Reg))
395       return Reg;
396   }
397   return AArch64::NoRegister;
398 }
399 
400 bool AArch64FrameLowering::canUseAsPrologue(
401     const MachineBasicBlock &MBB) const {
402   const MachineFunction *MF = MBB.getParent();
403   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
404   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
405   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
406 
407   // Don't need a scratch register if we're not going to re-align the stack.
408   if (!RegInfo->needsStackRealignment(*MF))
409     return true;
410   // Otherwise, we can use any block as long as it has a scratch register
411   // available.
412   return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
413 }
414 
415 static bool windowsRequiresStackProbe(MachineFunction &MF,
416                                       unsigned StackSizeInBytes) {
417   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
418   if (!Subtarget.isTargetWindows())
419     return false;
420   const Function &F = MF.getFunction();
421   // TODO: When implementing stack protectors, take that into account
422   // for the probe threshold.
423   unsigned StackProbeSize = 4096;
424   if (F.hasFnAttribute("stack-probe-size"))
425     F.getFnAttribute("stack-probe-size")
426         .getValueAsString()
427         .getAsInteger(0, StackProbeSize);
428   return (StackSizeInBytes >= StackProbeSize) &&
429          !F.hasFnAttribute("no-stack-arg-probe");
430 }
431 
432 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
433     MachineFunction &MF, unsigned StackBumpBytes) const {
434   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
435   const MachineFrameInfo &MFI = MF.getFrameInfo();
436   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
437   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
438 
439   if (AFI->getLocalStackSize() == 0)
440     return false;
441 
442   // 512 is the maximum immediate for stp/ldp that will be used for
443   // callee-save save/restores
444   if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
445     return false;
446 
447   if (MFI.hasVarSizedObjects())
448     return false;
449 
450   if (RegInfo->needsStackRealignment(MF))
451     return false;
452 
453   // This isn't strictly necessary, but it simplifies things a bit since the
454   // current RedZone handling code assumes the SP is adjusted by the
455   // callee-save save/restore code.
456   if (canUseRedZone(MF))
457     return false;
458 
459   return true;
460 }
461 
462 // Given a load or a store instruction, generate an appropriate unwinding SEH
463 // code on Windows.
464 static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
465                                              const TargetInstrInfo &TII,
466                                              MachineInstr::MIFlag Flag) {
467   unsigned Opc = MBBI->getOpcode();
468   MachineBasicBlock *MBB = MBBI->getParent();
469   MachineFunction &MF = *MBB->getParent();
470   DebugLoc DL = MBBI->getDebugLoc();
471   unsigned ImmIdx = MBBI->getNumOperands() - 1;
472   int Imm = MBBI->getOperand(ImmIdx).getImm();
473   MachineInstrBuilder MIB;
474   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
475   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
476 
477   switch (Opc) {
478   default:
479     llvm_unreachable("No SEH Opcode for this instruction");
480   case AArch64::LDPDpost:
481     Imm = -Imm;
482     LLVM_FALLTHROUGH;
483   case AArch64::STPDpre: {
484     unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
485     unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
486     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
487               .addImm(Reg0)
488               .addImm(Reg1)
489               .addImm(Imm * 8)
490               .setMIFlag(Flag);
491     break;
492   }
493   case AArch64::LDPXpost:
494     Imm = -Imm;
495     LLVM_FALLTHROUGH;
496   case AArch64::STPXpre: {
497     Register Reg0 = MBBI->getOperand(1).getReg();
498     Register Reg1 = MBBI->getOperand(2).getReg();
499     if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
500       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
501                 .addImm(Imm * 8)
502                 .setMIFlag(Flag);
503     else
504       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
505                 .addImm(RegInfo->getSEHRegNum(Reg0))
506                 .addImm(RegInfo->getSEHRegNum(Reg1))
507                 .addImm(Imm * 8)
508                 .setMIFlag(Flag);
509     break;
510   }
511   case AArch64::LDRDpost:
512     Imm = -Imm;
513     LLVM_FALLTHROUGH;
514   case AArch64::STRDpre: {
515     unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
516     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
517               .addImm(Reg)
518               .addImm(Imm)
519               .setMIFlag(Flag);
520     break;
521   }
522   case AArch64::LDRXpost:
523     Imm = -Imm;
524     LLVM_FALLTHROUGH;
525   case AArch64::STRXpre: {
526     unsigned Reg =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
527     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
528               .addImm(Reg)
529               .addImm(Imm)
530               .setMIFlag(Flag);
531     break;
532   }
533   case AArch64::STPDi:
534   case AArch64::LDPDi: {
535     unsigned Reg0 =  RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
536     unsigned Reg1 =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
537     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
538               .addImm(Reg0)
539               .addImm(Reg1)
540               .addImm(Imm * 8)
541               .setMIFlag(Flag);
542     break;
543   }
544   case AArch64::STPXi:
545   case AArch64::LDPXi: {
546     Register Reg0 = MBBI->getOperand(0).getReg();
547     Register Reg1 = MBBI->getOperand(1).getReg();
548     if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
549       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
550                 .addImm(Imm * 8)
551                 .setMIFlag(Flag);
552     else
553       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
554                 .addImm(RegInfo->getSEHRegNum(Reg0))
555                 .addImm(RegInfo->getSEHRegNum(Reg1))
556                 .addImm(Imm * 8)
557                 .setMIFlag(Flag);
558     break;
559   }
560   case AArch64::STRXui:
561   case AArch64::LDRXui: {
562     int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
563     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
564               .addImm(Reg)
565               .addImm(Imm * 8)
566               .setMIFlag(Flag);
567     break;
568   }
569   case AArch64::STRDui:
570   case AArch64::LDRDui: {
571     unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
572     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
573               .addImm(Reg)
574               .addImm(Imm * 8)
575               .setMIFlag(Flag);
576     break;
577   }
578   }
579   auto I = MBB->insertAfter(MBBI, MIB);
580   return I;
581 }
582 
583 // Fix up the SEH opcode associated with the save/restore instruction.
584 static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
585                            unsigned LocalStackSize) {
586   MachineOperand *ImmOpnd = nullptr;
587   unsigned ImmIdx = MBBI->getNumOperands() - 1;
588   switch (MBBI->getOpcode()) {
589   default:
590     llvm_unreachable("Fix the offset in the SEH instruction");
591   case AArch64::SEH_SaveFPLR:
592   case AArch64::SEH_SaveRegP:
593   case AArch64::SEH_SaveReg:
594   case AArch64::SEH_SaveFRegP:
595   case AArch64::SEH_SaveFReg:
596     ImmOpnd = &MBBI->getOperand(ImmIdx);
597     break;
598   }
599   if (ImmOpnd)
600     ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
601 }
602 
603 // Convert callee-save register save/restore instruction to do stack pointer
604 // decrement/increment to allocate/deallocate the callee-save stack area by
605 // converting store/load to use pre/post increment version.
606 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
607     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
608     const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
609     bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) {
610   // Ignore instructions that do not operate on SP, i.e. shadow call stack
611   // instructions and associated CFI instruction.
612   while (MBBI->getOpcode() == AArch64::STRXpost ||
613          MBBI->getOpcode() == AArch64::LDRXpre ||
614          MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) {
615     if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
616       assert(MBBI->getOperand(0).getReg() != AArch64::SP);
617     ++MBBI;
618   }
619   unsigned NewOpc;
620   int Scale = 1;
621   switch (MBBI->getOpcode()) {
622   default:
623     llvm_unreachable("Unexpected callee-save save/restore opcode!");
624   case AArch64::STPXi:
625     NewOpc = AArch64::STPXpre;
626     Scale = 8;
627     break;
628   case AArch64::STPDi:
629     NewOpc = AArch64::STPDpre;
630     Scale = 8;
631     break;
632   case AArch64::STPQi:
633     NewOpc = AArch64::STPQpre;
634     Scale = 16;
635     break;
636   case AArch64::STRXui:
637     NewOpc = AArch64::STRXpre;
638     break;
639   case AArch64::STRDui:
640     NewOpc = AArch64::STRDpre;
641     break;
642   case AArch64::STRQui:
643     NewOpc = AArch64::STRQpre;
644     break;
645   case AArch64::LDPXi:
646     NewOpc = AArch64::LDPXpost;
647     Scale = 8;
648     break;
649   case AArch64::LDPDi:
650     NewOpc = AArch64::LDPDpost;
651     Scale = 8;
652     break;
653   case AArch64::LDPQi:
654     NewOpc = AArch64::LDPQpost;
655     Scale = 16;
656     break;
657   case AArch64::LDRXui:
658     NewOpc = AArch64::LDRXpost;
659     break;
660   case AArch64::LDRDui:
661     NewOpc = AArch64::LDRDpost;
662     break;
663   case AArch64::LDRQui:
664     NewOpc = AArch64::LDRQpost;
665     break;
666   }
667   // Get rid of the SEH code associated with the old instruction.
668   if (NeedsWinCFI) {
669     auto SEH = std::next(MBBI);
670     if (AArch64InstrInfo::isSEHInstruction(*SEH))
671       SEH->eraseFromParent();
672   }
673 
674   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
675   MIB.addReg(AArch64::SP, RegState::Define);
676 
677   // Copy all operands other than the immediate offset.
678   unsigned OpndIdx = 0;
679   for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
680        ++OpndIdx)
681     MIB.add(MBBI->getOperand(OpndIdx));
682 
683   assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
684          "Unexpected immediate offset in first/last callee-save save/restore "
685          "instruction!");
686   assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
687          "Unexpected base register in callee-save save/restore instruction!");
688   assert(CSStackSizeInc % Scale == 0);
689   MIB.addImm(CSStackSizeInc / Scale);
690 
691   MIB.setMIFlags(MBBI->getFlags());
692   MIB.setMemRefs(MBBI->memoperands());
693 
694   // Generate a new SEH code that corresponds to the new instruction.
695   if (NeedsWinCFI) {
696     *HasWinCFI = true;
697     InsertSEH(*MIB, *TII,
698               InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
699   }
700 
701   return std::prev(MBB.erase(MBBI));
702 }
703 
704 // Fixup callee-save register save/restore instructions to take into account
705 // combined SP bump by adding the local stack size to the stack offsets.
706 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
707                                               unsigned LocalStackSize,
708                                               bool NeedsWinCFI,
709                                               bool *HasWinCFI) {
710   if (AArch64InstrInfo::isSEHInstruction(MI))
711     return;
712 
713   unsigned Opc = MI.getOpcode();
714 
715   // Ignore instructions that do not operate on SP, i.e. shadow call stack
716   // instructions and associated CFI instruction.
717   if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
718       Opc == AArch64::CFI_INSTRUCTION) {
719     if (Opc != AArch64::CFI_INSTRUCTION)
720       assert(MI.getOperand(0).getReg() != AArch64::SP);
721     return;
722   }
723 
724   unsigned Scale;
725   switch (Opc) {
726   case AArch64::STPXi:
727   case AArch64::STRXui:
728   case AArch64::STPDi:
729   case AArch64::STRDui:
730   case AArch64::LDPXi:
731   case AArch64::LDRXui:
732   case AArch64::LDPDi:
733   case AArch64::LDRDui:
734     Scale = 8;
735     break;
736   case AArch64::STPQi:
737   case AArch64::STRQui:
738   case AArch64::LDPQi:
739   case AArch64::LDRQui:
740     Scale = 16;
741     break;
742   default:
743     llvm_unreachable("Unexpected callee-save save/restore opcode!");
744   }
745 
746   unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
747   assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
748          "Unexpected base register in callee-save save/restore instruction!");
749   // Last operand is immediate offset that needs fixing.
750   MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
751   // All generated opcodes have scaled offsets.
752   assert(LocalStackSize % Scale == 0);
753   OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
754 
755   if (NeedsWinCFI) {
756     *HasWinCFI = true;
757     auto MBBI = std::next(MachineBasicBlock::iterator(MI));
758     assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
759     assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
760            "Expecting a SEH instruction");
761     fixupSEHOpcode(MBBI, LocalStackSize);
762   }
763 }
764 
765 static void adaptForLdStOpt(MachineBasicBlock &MBB,
766                             MachineBasicBlock::iterator FirstSPPopI,
767                             MachineBasicBlock::iterator LastPopI) {
768   // Sometimes (when we restore in the same order as we save), we can end up
769   // with code like this:
770   //
771   // ldp      x26, x25, [sp]
772   // ldp      x24, x23, [sp, #16]
773   // ldp      x22, x21, [sp, #32]
774   // ldp      x20, x19, [sp, #48]
775   // add      sp, sp, #64
776   //
777   // In this case, it is always better to put the first ldp at the end, so
778   // that the load-store optimizer can run and merge the ldp and the add into
779   // a post-index ldp.
780   // If we managed to grab the first pop instruction, move it to the end.
781   if (ReverseCSRRestoreSeq)
782     MBB.splice(FirstSPPopI, &MBB, LastPopI);
783   // We should end up with something like this now:
784   //
785   // ldp      x24, x23, [sp, #16]
786   // ldp      x22, x21, [sp, #32]
787   // ldp      x20, x19, [sp, #48]
788   // ldp      x26, x25, [sp]
789   // add      sp, sp, #64
790   //
791   // and the load-store optimizer can merge the last two instructions into:
792   //
793   // ldp      x26, x25, [sp], #64
794   //
795 }
796 
797 static bool ShouldSignWithAKey(MachineFunction &MF) {
798   const Function &F = MF.getFunction();
799   if (!F.hasFnAttribute("sign-return-address-key"))
800     return true;
801 
802   const StringRef Key =
803       F.getFnAttribute("sign-return-address-key").getValueAsString();
804   assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
805   return Key.equals_lower("a_key");
806 }
807 
808 static bool needsWinCFI(const MachineFunction &MF) {
809   const Function &F = MF.getFunction();
810   return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
811          F.needsUnwindTableEntry();
812 }
813 
814 static bool isTargetDarwin(const MachineFunction &MF) {
815   return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
816 }
817 
818 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
819                                         MachineBasicBlock &MBB) const {
820   MachineBasicBlock::iterator MBBI = MBB.begin();
821   const MachineFrameInfo &MFI = MF.getFrameInfo();
822   const Function &F = MF.getFunction();
823   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
824   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
825   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
826   MachineModuleInfo &MMI = MF.getMMI();
827   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
828   bool needsFrameMoves = (MMI.hasDebugInfo() || F.needsUnwindTableEntry()) &&
829                          !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
830   bool HasFP = hasFP(MF);
831   bool NeedsWinCFI = needsWinCFI(MF);
832   bool HasWinCFI = false;
833   auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
834 
835   bool IsFunclet = MBB.isEHFuncletEntry();
836 
837   // At this point, we're going to decide whether or not the function uses a
838   // redzone. In most cases, the function doesn't have a redzone so let's
839   // assume that's false and set it to true in the case that there's a redzone.
840   AFI->setHasRedZone(false);
841 
842   // Debug location must be unknown since the first debug location is used
843   // to determine the end of the prologue.
844   DebugLoc DL;
845 
846   if (ShouldSignReturnAddress(MF)) {
847     if (ShouldSignWithAKey(MF))
848       BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
849           .setMIFlag(MachineInstr::FrameSetup);
850     else {
851       BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
852           .setMIFlag(MachineInstr::FrameSetup);
853       BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
854           .setMIFlag(MachineInstr::FrameSetup);
855     }
856 
857     unsigned CFIIndex =
858         MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
859     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
860         .addCFIIndex(CFIIndex)
861         .setMIFlags(MachineInstr::FrameSetup);
862   }
863 
864   // All calls are tail calls in GHC calling conv, and functions have no
865   // prologue/epilogue.
866   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
867     return;
868 
869   // Set tagged base pointer to the bottom of the stack frame.
870   // Ideally it should match SP value after prologue.
871   AFI->setTaggedBasePointerOffset(MFI.getStackSize());
872 
873   // getStackSize() includes all the locals in its size calculation. We don't
874   // include these locals when computing the stack size of a funclet, as they
875   // are allocated in the parent's stack frame and accessed via the frame
876   // pointer from the funclet.  We only save the callee saved registers in the
877   // funclet, which are really the callee saved registers of the parent
878   // function, including the funclet.
879   int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
880                            : (int)MFI.getStackSize();
881   if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
882     assert(!HasFP && "unexpected function without stack frame but with FP");
883     // All of the stack allocation is for locals.
884     AFI->setLocalStackSize(NumBytes);
885     if (!NumBytes)
886       return;
887     // REDZONE: If the stack size is less than 128 bytes, we don't need
888     // to actually allocate.
889     if (canUseRedZone(MF)) {
890       AFI->setHasRedZone(true);
891       ++NumRedZoneFunctions;
892     } else {
893       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
894                       {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
895                       false, NeedsWinCFI, &HasWinCFI);
896       if (!NeedsWinCFI) {
897         // Label used to tie together the PROLOG_LABEL and the MachineMoves.
898         MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
899         // Encode the stack size of the leaf function.
900         unsigned CFIIndex = MF.addFrameInst(
901             MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
902         BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
903             .addCFIIndex(CFIIndex)
904             .setMIFlags(MachineInstr::FrameSetup);
905       }
906     }
907 
908     if (NeedsWinCFI) {
909       HasWinCFI = true;
910       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
911           .setMIFlag(MachineInstr::FrameSetup);
912     }
913 
914     return;
915   }
916 
917   bool IsWin64 =
918       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
919   // Var args are accounted for in the containing function, so don't
920   // include them for funclets.
921   unsigned FixedObject = (IsWin64 && !IsFunclet) ?
922                          alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
923 
924   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
925   // All of the remaining stack allocations are for locals.
926   AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
927   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
928   if (CombineSPBump) {
929     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
930                     {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false,
931                     NeedsWinCFI, &HasWinCFI);
932     NumBytes = 0;
933   } else if (PrologueSaveSize != 0) {
934     MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
935         MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
936     NumBytes -= PrologueSaveSize;
937   }
938   assert(NumBytes >= 0 && "Negative stack allocation size!?");
939 
940   // Move past the saves of the callee-saved registers, fixing up the offsets
941   // and pre-inc if we decided to combine the callee-save and local stack
942   // pointer bump above.
943   MachineBasicBlock::iterator End = MBB.end();
944   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
945     if (CombineSPBump)
946       fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
947                                         NeedsWinCFI, &HasWinCFI);
948     ++MBBI;
949   }
950 
951   // The code below is not applicable to funclets. We have emitted all the SEH
952   // opcodes that we needed to emit.  The FP and BP belong to the containing
953   // function.
954   if (IsFunclet) {
955     if (NeedsWinCFI) {
956       HasWinCFI = true;
957       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
958           .setMIFlag(MachineInstr::FrameSetup);
959     }
960 
961     // SEH funclets are passed the frame pointer in X1.  If the parent
962     // function uses the base register, then the base register is used
963     // directly, and is not retrieved from X1.
964     if (F.hasPersonalityFn()) {
965       EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
966       if (isAsynchronousEHPersonality(Per)) {
967         BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
968             .addReg(AArch64::X1).setMIFlag(MachineInstr::FrameSetup);
969         MBB.addLiveIn(AArch64::X1);
970       }
971     }
972 
973     return;
974   }
975 
976   if (HasFP) {
977     // Only set up FP if we actually need to.
978     int FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
979 
980     if (CombineSPBump)
981       FPOffset += AFI->getLocalStackSize();
982 
983     // Issue    sub fp, sp, FPOffset or
984     //          mov fp,sp          when FPOffset is zero.
985     // Note: All stores of callee-saved registers are marked as "FrameSetup".
986     // This code marks the instruction(s) that set the FP also.
987     emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
988                     {FPOffset, MVT::i8}, TII, MachineInstr::FrameSetup, false,
989                     NeedsWinCFI, &HasWinCFI);
990   }
991 
992   if (windowsRequiresStackProbe(MF, NumBytes)) {
993     uint32_t NumWords = NumBytes >> 4;
994     if (NeedsWinCFI) {
995       HasWinCFI = true;
996       // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
997       // exceed this amount.  We need to move at most 2^24 - 1 into x15.
998       // This is at most two instructions, MOVZ follwed by MOVK.
999       // TODO: Fix to use multiple stack alloc unwind codes for stacks
1000       // exceeding 256MB in size.
1001       if (NumBytes >= (1 << 28))
1002         report_fatal_error("Stack size cannot exceed 256MB for stack "
1003                             "unwinding purposes");
1004 
1005       uint32_t LowNumWords = NumWords & 0xFFFF;
1006       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1007             .addImm(LowNumWords)
1008             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1009             .setMIFlag(MachineInstr::FrameSetup);
1010       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1011             .setMIFlag(MachineInstr::FrameSetup);
1012       if ((NumWords & 0xFFFF0000) != 0) {
1013           BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1014               .addReg(AArch64::X15)
1015               .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1016               .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
1017               .setMIFlag(MachineInstr::FrameSetup);
1018           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1019             .setMIFlag(MachineInstr::FrameSetup);
1020       }
1021     } else {
1022       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1023           .addImm(NumWords)
1024           .setMIFlags(MachineInstr::FrameSetup);
1025     }
1026 
1027     switch (MF.getTarget().getCodeModel()) {
1028     case CodeModel::Tiny:
1029     case CodeModel::Small:
1030     case CodeModel::Medium:
1031     case CodeModel::Kernel:
1032       BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1033           .addExternalSymbol("__chkstk")
1034           .addReg(AArch64::X15, RegState::Implicit)
1035           .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
1036           .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
1037           .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
1038           .setMIFlags(MachineInstr::FrameSetup);
1039       if (NeedsWinCFI) {
1040         HasWinCFI = true;
1041         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1042             .setMIFlag(MachineInstr::FrameSetup);
1043       }
1044       break;
1045     case CodeModel::Large:
1046       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1047           .addReg(AArch64::X16, RegState::Define)
1048           .addExternalSymbol("__chkstk")
1049           .addExternalSymbol("__chkstk")
1050           .setMIFlags(MachineInstr::FrameSetup);
1051       if (NeedsWinCFI) {
1052         HasWinCFI = true;
1053         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1054             .setMIFlag(MachineInstr::FrameSetup);
1055       }
1056 
1057       BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
1058           .addReg(AArch64::X16, RegState::Kill)
1059           .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
1060           .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
1061           .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
1062           .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
1063           .setMIFlags(MachineInstr::FrameSetup);
1064       if (NeedsWinCFI) {
1065         HasWinCFI = true;
1066         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1067             .setMIFlag(MachineInstr::FrameSetup);
1068       }
1069       break;
1070     }
1071 
1072     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1073         .addReg(AArch64::SP, RegState::Kill)
1074         .addReg(AArch64::X15, RegState::Kill)
1075         .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
1076         .setMIFlags(MachineInstr::FrameSetup);
1077     if (NeedsWinCFI) {
1078       HasWinCFI = true;
1079       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1080           .addImm(NumBytes)
1081           .setMIFlag(MachineInstr::FrameSetup);
1082     }
1083     NumBytes = 0;
1084   }
1085 
1086   // Allocate space for the rest of the frame.
1087   if (NumBytes) {
1088     const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
1089     unsigned scratchSPReg = AArch64::SP;
1090 
1091     if (NeedsRealignment) {
1092       scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
1093       assert(scratchSPReg != AArch64::NoRegister);
1094     }
1095 
1096     // If we're a leaf function, try using the red zone.
1097     if (!canUseRedZone(MF))
1098       // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
1099       // the correct value here, as NumBytes also includes padding bytes,
1100       // which shouldn't be counted here.
1101       emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP,
1102                       {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
1103                       false, NeedsWinCFI, &HasWinCFI);
1104 
1105     if (NeedsRealignment) {
1106       const unsigned Alignment = MFI.getMaxAlignment();
1107       const unsigned NrBitsToZero = countTrailingZeros(Alignment);
1108       assert(NrBitsToZero > 1);
1109       assert(scratchSPReg != AArch64::SP);
1110 
1111       // SUB X9, SP, NumBytes
1112       //   -- X9 is temporary register, so shouldn't contain any live data here,
1113       //   -- free to use. This is already produced by emitFrameOffset above.
1114       // AND SP, X9, 0b11111...0000
1115       // The logical immediates have a non-trivial encoding. The following
1116       // formula computes the encoded immediate with all ones but
1117       // NrBitsToZero zero bits as least significant bits.
1118       uint32_t andMaskEncoded = (1 << 12)                         // = N
1119                                 | ((64 - NrBitsToZero) << 6)      // immr
1120                                 | ((64 - NrBitsToZero - 1) << 0); // imms
1121 
1122       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1123           .addReg(scratchSPReg, RegState::Kill)
1124           .addImm(andMaskEncoded);
1125       AFI->setStackRealigned(true);
1126       if (NeedsWinCFI) {
1127         HasWinCFI = true;
1128         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1129             .addImm(NumBytes & andMaskEncoded)
1130             .setMIFlag(MachineInstr::FrameSetup);
1131       }
1132     }
1133   }
1134 
1135   // If we need a base pointer, set it up here. It's whatever the value of the
1136   // stack pointer is at this point. Any variable size objects will be allocated
1137   // after this, so we can still use the base pointer to reference locals.
1138   //
1139   // FIXME: Clarify FrameSetup flags here.
1140   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
1141   // needed.
1142   if (RegInfo->hasBasePointer(MF)) {
1143     TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
1144                      false);
1145     if (NeedsWinCFI) {
1146       HasWinCFI = true;
1147       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1148           .setMIFlag(MachineInstr::FrameSetup);
1149     }
1150   }
1151 
1152   // The very last FrameSetup instruction indicates the end of prologue. Emit a
1153   // SEH opcode indicating the prologue end.
1154   if (NeedsWinCFI && HasWinCFI) {
1155     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1156         .setMIFlag(MachineInstr::FrameSetup);
1157   }
1158 
1159   if (needsFrameMoves) {
1160     const DataLayout &TD = MF.getDataLayout();
1161     const int StackGrowth = isTargetDarwin(MF)
1162                                 ? (2 * -TD.getPointerSize(0))
1163                                 : -AFI->getCalleeSavedStackSize();
1164     Register FramePtr = RegInfo->getFrameRegister(MF);
1165     // An example of the prologue:
1166     //
1167     //     .globl __foo
1168     //     .align 2
1169     //  __foo:
1170     // Ltmp0:
1171     //     .cfi_startproc
1172     //     .cfi_personality 155, ___gxx_personality_v0
1173     // Leh_func_begin:
1174     //     .cfi_lsda 16, Lexception33
1175     //
1176     //     stp  xa,bx, [sp, -#offset]!
1177     //     ...
1178     //     stp  x28, x27, [sp, #offset-32]
1179     //     stp  fp, lr, [sp, #offset-16]
1180     //     add  fp, sp, #offset - 16
1181     //     sub  sp, sp, #1360
1182     //
1183     // The Stack:
1184     //       +-------------------------------------------+
1185     // 10000 | ........ | ........ | ........ | ........ |
1186     // 10004 | ........ | ........ | ........ | ........ |
1187     //       +-------------------------------------------+
1188     // 10008 | ........ | ........ | ........ | ........ |
1189     // 1000c | ........ | ........ | ........ | ........ |
1190     //       +===========================================+
1191     // 10010 |                X28 Register               |
1192     // 10014 |                X28 Register               |
1193     //       +-------------------------------------------+
1194     // 10018 |                X27 Register               |
1195     // 1001c |                X27 Register               |
1196     //       +===========================================+
1197     // 10020 |                Frame Pointer              |
1198     // 10024 |                Frame Pointer              |
1199     //       +-------------------------------------------+
1200     // 10028 |                Link Register              |
1201     // 1002c |                Link Register              |
1202     //       +===========================================+
1203     // 10030 | ........ | ........ | ........ | ........ |
1204     // 10034 | ........ | ........ | ........ | ........ |
1205     //       +-------------------------------------------+
1206     // 10038 | ........ | ........ | ........ | ........ |
1207     // 1003c | ........ | ........ | ........ | ........ |
1208     //       +-------------------------------------------+
1209     //
1210     //     [sp] = 10030        ::    >>initial value<<
1211     //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
1212     //     fp = sp == 10020    ::  mov fp, sp
1213     //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
1214     //     sp == 10010         ::    >>final value<<
1215     //
1216     // The frame pointer (w29) points to address 10020. If we use an offset of
1217     // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
1218     // for w27, and -32 for w28:
1219     //
1220     //  Ltmp1:
1221     //     .cfi_def_cfa w29, 16
1222     //  Ltmp2:
1223     //     .cfi_offset w30, -8
1224     //  Ltmp3:
1225     //     .cfi_offset w29, -16
1226     //  Ltmp4:
1227     //     .cfi_offset w27, -24
1228     //  Ltmp5:
1229     //     .cfi_offset w28, -32
1230 
1231     if (HasFP) {
1232       // Define the current CFA rule to use the provided FP.
1233       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
1234       unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
1235           nullptr, Reg, StackGrowth - FixedObject));
1236       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1237           .addCFIIndex(CFIIndex)
1238           .setMIFlags(MachineInstr::FrameSetup);
1239     } else {
1240       // Encode the stack size of the leaf function.
1241       unsigned CFIIndex = MF.addFrameInst(
1242           MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
1243       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1244           .addCFIIndex(CFIIndex)
1245           .setMIFlags(MachineInstr::FrameSetup);
1246     }
1247 
1248     // Now emit the moves for whatever callee saved regs we have (including FP,
1249     // LR if those are saved).
1250     emitCalleeSavedFrameMoves(MBB, MBBI);
1251   }
1252 }
1253 
1254 static void InsertReturnAddressAuth(MachineFunction &MF,
1255                                     MachineBasicBlock &MBB) {
1256   if (!ShouldSignReturnAddress(MF))
1257     return;
1258   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1259   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1260 
1261   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1262   DebugLoc DL;
1263   if (MBBI != MBB.end())
1264     DL = MBBI->getDebugLoc();
1265 
1266   // The AUTIASP instruction assembles to a hint instruction before v8.3a so
1267   // this instruction can safely used for any v8a architecture.
1268   // From v8.3a onwards there are optimised authenticate LR and return
1269   // instructions, namely RETA{A,B}, that can be used instead.
1270   if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() &&
1271       MBBI->getOpcode() == AArch64::RET_ReallyLR) {
1272     BuildMI(MBB, MBBI, DL,
1273             TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB))
1274         .copyImplicitOps(*MBBI);
1275     MBB.erase(MBBI);
1276   } else {
1277     BuildMI(
1278         MBB, MBBI, DL,
1279         TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP))
1280         .setMIFlag(MachineInstr::FrameDestroy);
1281   }
1282 }
1283 
1284 static bool isFuncletReturnInstr(const MachineInstr &MI) {
1285   switch (MI.getOpcode()) {
1286   default:
1287     return false;
1288   case AArch64::CATCHRET:
1289   case AArch64::CLEANUPRET:
1290     return true;
1291   }
1292 }
1293 
1294 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
1295                                         MachineBasicBlock &MBB) const {
1296   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1297   MachineFrameInfo &MFI = MF.getFrameInfo();
1298   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1299   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1300   DebugLoc DL;
1301   bool IsTailCallReturn = false;
1302   bool NeedsWinCFI = needsWinCFI(MF);
1303   bool HasWinCFI = false;
1304   bool IsFunclet = false;
1305   auto WinCFI = make_scope_exit([&]() {
1306     if (!MF.hasWinCFI())
1307       MF.setHasWinCFI(HasWinCFI);
1308   });
1309 
1310   if (MBB.end() != MBBI) {
1311     DL = MBBI->getDebugLoc();
1312     unsigned RetOpcode = MBBI->getOpcode();
1313     IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
1314                        RetOpcode == AArch64::TCRETURNri ||
1315                        RetOpcode == AArch64::TCRETURNriBTI;
1316     IsFunclet = isFuncletReturnInstr(*MBBI);
1317   }
1318 
1319   int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
1320                            : MFI.getStackSize();
1321   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1322 
1323   // All calls are tail calls in GHC calling conv, and functions have no
1324   // prologue/epilogue.
1325   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1326     return;
1327 
1328   // Initial and residual are named for consistency with the prologue. Note that
1329   // in the epilogue, the residual adjustment is executed first.
1330   uint64_t ArgumentPopSize = 0;
1331   if (IsTailCallReturn) {
1332     MachineOperand &StackAdjust = MBBI->getOperand(1);
1333 
1334     // For a tail-call in a callee-pops-arguments environment, some or all of
1335     // the stack may actually be in use for the call's arguments, this is
1336     // calculated during LowerCall and consumed here...
1337     ArgumentPopSize = StackAdjust.getImm();
1338   } else {
1339     // ... otherwise the amount to pop is *all* of the argument space,
1340     // conveniently stored in the MachineFunctionInfo by
1341     // LowerFormalArguments. This will, of course, be zero for the C calling
1342     // convention.
1343     ArgumentPopSize = AFI->getArgumentStackToRestore();
1344   }
1345 
1346   // The stack frame should be like below,
1347   //
1348   //      ----------------------                     ---
1349   //      |                    |                      |
1350   //      | BytesInStackArgArea|              CalleeArgStackSize
1351   //      | (NumReusableBytes) |                (of tail call)
1352   //      |                    |                     ---
1353   //      |                    |                      |
1354   //      ---------------------|        ---           |
1355   //      |                    |         |            |
1356   //      |   CalleeSavedReg   |         |            |
1357   //      | (CalleeSavedStackSize)|      |            |
1358   //      |                    |         |            |
1359   //      ---------------------|         |         NumBytes
1360   //      |                    |     StackSize  (StackAdjustUp)
1361   //      |   LocalStackSize   |         |            |
1362   //      | (covering callee   |         |            |
1363   //      |       args)        |         |            |
1364   //      |                    |         |            |
1365   //      ----------------------        ---          ---
1366   //
1367   // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
1368   //             = StackSize + ArgumentPopSize
1369   //
1370   // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
1371   // it as the 2nd argument of AArch64ISD::TC_RETURN.
1372 
1373   auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); });
1374 
1375   bool IsWin64 =
1376       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1377   // Var args are accounted for in the containing function, so don't
1378   // include them for funclets.
1379   unsigned FixedObject =
1380       (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
1381 
1382   uint64_t AfterCSRPopSize = ArgumentPopSize;
1383   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1384   // We cannot rely on the local stack size set in emitPrologue if the function
1385   // has funclets, as funclets have different local stack size requirements, and
1386   // the current value set in emitPrologue may be that of the containing
1387   // function.
1388   if (MF.hasEHFunclets())
1389     AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1390   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1391   // Assume we can't combine the last pop with the sp restore.
1392 
1393   if (!CombineSPBump && PrologueSaveSize != 0) {
1394     MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1395     while (AArch64InstrInfo::isSEHInstruction(*Pop))
1396       Pop = std::prev(Pop);
1397     // Converting the last ldp to a post-index ldp is valid only if the last
1398     // ldp's offset is 0.
1399     const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1400     // If the offset is 0, convert it to a post-index ldp.
1401     if (OffsetOp.getImm() == 0)
1402       convertCalleeSaveRestoreToSPPrePostIncDec(
1403           MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false);
1404     else {
1405       // If not, make sure to emit an add after the last ldp.
1406       // We're doing this by transfering the size to be restored from the
1407       // adjustment *before* the CSR pops to the adjustment *after* the CSR
1408       // pops.
1409       AfterCSRPopSize += PrologueSaveSize;
1410     }
1411   }
1412 
1413   // Move past the restores of the callee-saved registers.
1414   // If we plan on combining the sp bump of the local stack size and the callee
1415   // save stack size, we might need to adjust the CSR save and restore offsets.
1416   MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
1417   MachineBasicBlock::iterator Begin = MBB.begin();
1418   while (LastPopI != Begin) {
1419     --LastPopI;
1420     if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
1421       ++LastPopI;
1422       break;
1423     } else if (CombineSPBump)
1424       fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
1425                                         NeedsWinCFI, &HasWinCFI);
1426   }
1427 
1428   if (NeedsWinCFI) {
1429     HasWinCFI = true;
1430     BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
1431         .setMIFlag(MachineInstr::FrameDestroy);
1432   }
1433 
1434   // If there is a single SP update, insert it before the ret and we're done.
1435   if (CombineSPBump) {
1436     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1437                     {NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII,
1438                     MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1439     if (NeedsWinCFI && HasWinCFI)
1440       BuildMI(MBB, MBB.getFirstTerminator(), DL,
1441               TII->get(AArch64::SEH_EpilogEnd))
1442           .setMIFlag(MachineInstr::FrameDestroy);
1443     return;
1444   }
1445 
1446   NumBytes -= PrologueSaveSize;
1447   assert(NumBytes >= 0 && "Negative stack allocation size!?");
1448 
1449   if (!hasFP(MF)) {
1450     bool RedZone = canUseRedZone(MF);
1451     // If this was a redzone leaf function, we don't need to restore the
1452     // stack pointer (but we may need to pop stack args for fastcc).
1453     if (RedZone && AfterCSRPopSize == 0)
1454       return;
1455 
1456     bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1457     int StackRestoreBytes = RedZone ? 0 : NumBytes;
1458     if (NoCalleeSaveRestore)
1459       StackRestoreBytes += AfterCSRPopSize;
1460 
1461     // If we were able to combine the local stack pop with the argument pop,
1462     // then we're done.
1463     bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0;
1464 
1465     // If we're done after this, make sure to help the load store optimizer.
1466     if (Done)
1467       adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
1468 
1469     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1470                     {StackRestoreBytes, MVT::i8}, TII,
1471                     MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1472     if (Done) {
1473       if (NeedsWinCFI) {
1474         HasWinCFI = true;
1475         BuildMI(MBB, MBB.getFirstTerminator(), DL,
1476                 TII->get(AArch64::SEH_EpilogEnd))
1477             .setMIFlag(MachineInstr::FrameDestroy);
1478       }
1479       return;
1480     }
1481 
1482     NumBytes = 0;
1483   }
1484 
1485   // Restore the original stack pointer.
1486   // FIXME: Rather than doing the math here, we should instead just use
1487   // non-post-indexed loads for the restores if we aren't actually going to
1488   // be able to save any instructions.
1489   if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1490     int64_t OffsetToFrameRecord =
1491         isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0;
1492     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
1493                     {OffsetToFrameRecord, MVT::i8},
1494                     TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
1495   } else if (NumBytes)
1496     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1497                     {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false,
1498                     NeedsWinCFI);
1499 
1500   // This must be placed after the callee-save restore code because that code
1501   // assumes the SP is at the same location as it was after the callee-save save
1502   // code in the prologue.
1503   if (AfterCSRPopSize) {
1504     // Find an insertion point for the first ldp so that it goes before the
1505     // shadow call stack epilog instruction. This ensures that the restore of
1506     // lr from x18 is placed after the restore from sp.
1507     auto FirstSPPopI = MBB.getFirstTerminator();
1508     while (FirstSPPopI != Begin) {
1509       auto Prev = std::prev(FirstSPPopI);
1510       if (Prev->getOpcode() != AArch64::LDRXpre ||
1511           Prev->getOperand(0).getReg() == AArch64::SP)
1512         break;
1513       FirstSPPopI = Prev;
1514     }
1515 
1516     adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
1517 
1518     emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
1519                     {(int64_t)AfterCSRPopSize, MVT::i8}, TII,
1520                     MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1521   }
1522   if (NeedsWinCFI && HasWinCFI)
1523     BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1524         .setMIFlag(MachineInstr::FrameDestroy);
1525 
1526   MF.setHasWinCFI(HasWinCFI);
1527 }
1528 
1529 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1530 /// debug info.  It's the same as what we use for resolving the code-gen
1531 /// references for now.  FIXME: This can go wrong when references are
1532 /// SP-relative and simple call frames aren't used.
1533 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
1534                                                  int FI,
1535                                                  unsigned &FrameReg) const {
1536   return resolveFrameIndexReference(
1537              MF, FI, FrameReg,
1538              /*PreferFP=*/
1539              MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
1540              /*ForSimm=*/false)
1541       .getBytes();
1542 }
1543 
1544 int AArch64FrameLowering::getNonLocalFrameIndexReference(
1545   const MachineFunction &MF, int FI) const {
1546   return getSEHFrameIndexOffset(MF, FI);
1547 }
1548 
1549 static StackOffset getFPOffset(const MachineFunction &MF, int ObjectOffset) {
1550   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1551   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1552   bool IsWin64 =
1553       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1554   unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
1555   unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize();
1556   return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
1557 }
1558 
1559 static StackOffset getStackOffset(const MachineFunction &MF, int ObjectOffset) {
1560   const auto &MFI = MF.getFrameInfo();
1561   return {ObjectOffset + (int)MFI.getStackSize(), MVT::i8};
1562 }
1563 
1564 int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
1565                                                  int FI) const {
1566   const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1567       MF.getSubtarget().getRegisterInfo());
1568   int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
1569   return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
1570              ? getFPOffset(MF, ObjectOffset).getBytes()
1571              : getStackOffset(MF, ObjectOffset).getBytes();
1572 }
1573 
1574 StackOffset AArch64FrameLowering::resolveFrameIndexReference(
1575     const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP,
1576     bool ForSimm) const {
1577   const auto &MFI = MF.getFrameInfo();
1578   int ObjectOffset = MFI.getObjectOffset(FI);
1579   bool isFixed = MFI.isFixedObjectIndex(FI);
1580   return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg,
1581                                      PreferFP, ForSimm);
1582 }
1583 
1584 StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
1585     const MachineFunction &MF, int ObjectOffset, bool isFixed,
1586     unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
1587   const auto &MFI = MF.getFrameInfo();
1588   const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1589       MF.getSubtarget().getRegisterInfo());
1590   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1591   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1592 
1593   int FPOffset = getFPOffset(MF, ObjectOffset).getBytes();
1594   int Offset = getStackOffset(MF, ObjectOffset).getBytes();
1595   bool isCSR =
1596       !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
1597 
1598   // Use frame pointer to reference fixed objects. Use it for locals if
1599   // there are VLAs or a dynamically realigned SP (and thus the SP isn't
1600   // reliable as a base). Make sure useFPForScavengingIndex() does the
1601   // right thing for the emergency spill slot.
1602   bool UseFP = false;
1603   if (AFI->hasStackFrame()) {
1604     // Note: Keeping the following as multiple 'if' statements rather than
1605     // merging to a single expression for readability.
1606     //
1607     // Argument access should always use the FP.
1608     if (isFixed) {
1609       UseFP = hasFP(MF);
1610     } else if (isCSR && RegInfo->needsStackRealignment(MF)) {
1611       // References to the CSR area must use FP if we're re-aligning the stack
1612       // since the dynamically-sized alignment padding is between the SP/BP and
1613       // the CSR area.
1614       assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
1615       UseFP = true;
1616     } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) {
1617       // If the FPOffset is negative and we're producing a signed immediate, we
1618       // have to keep in mind that the available offset range for negative
1619       // offsets is smaller than for positive ones. If an offset is available
1620       // via the FP and the SP, use whichever is closest.
1621       bool FPOffsetFits = !ForSimm || FPOffset >= -256;
1622       PreferFP |= Offset > -FPOffset;
1623 
1624       if (MFI.hasVarSizedObjects()) {
1625         // If we have variable sized objects, we can use either FP or BP, as the
1626         // SP offset is unknown. We can use the base pointer if we have one and
1627         // FP is not preferred. If not, we're stuck with using FP.
1628         bool CanUseBP = RegInfo->hasBasePointer(MF);
1629         if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
1630           UseFP = PreferFP;
1631         else if (!CanUseBP) // Can't use BP. Forced to use FP.
1632           UseFP = true;
1633         // else we can use BP and FP, but the offset from FP won't fit.
1634         // That will make us scavenge registers which we can probably avoid by
1635         // using BP. If it won't fit for BP either, we'll scavenge anyway.
1636       } else if (FPOffset >= 0) {
1637         // Use SP or FP, whichever gives us the best chance of the offset
1638         // being in range for direct access. If the FPOffset is positive,
1639         // that'll always be best, as the SP will be even further away.
1640         UseFP = true;
1641       } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
1642         // Funclets access the locals contained in the parent's stack frame
1643         // via the frame pointer, so we have to use the FP in the parent
1644         // function.
1645         (void) Subtarget;
1646         assert(
1647             Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
1648             "Funclets should only be present on Win64");
1649         UseFP = true;
1650       } else {
1651         // We have the choice between FP and (SP or BP).
1652         if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
1653           UseFP = true;
1654       }
1655     }
1656   }
1657 
1658   assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
1659          "In the presence of dynamic stack pointer realignment, "
1660          "non-argument/CSR objects cannot be accessed through the frame pointer");
1661 
1662   if (UseFP) {
1663     FrameReg = RegInfo->getFrameRegister(MF);
1664     return StackOffset(FPOffset, MVT::i8);
1665   }
1666 
1667   // Use the base pointer if we have one.
1668   if (RegInfo->hasBasePointer(MF))
1669     FrameReg = RegInfo->getBaseRegister();
1670   else {
1671     assert(!MFI.hasVarSizedObjects() &&
1672            "Can't use SP when we have var sized objects.");
1673     FrameReg = AArch64::SP;
1674     // If we're using the red zone for this function, the SP won't actually
1675     // be adjusted, so the offsets will be negative. They're also all
1676     // within range of the signed 9-bit immediate instructions.
1677     if (canUseRedZone(MF))
1678       Offset -= AFI->getLocalStackSize();
1679   }
1680 
1681   return StackOffset(Offset, MVT::i8);
1682 }
1683 
1684 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
1685   // Do not set a kill flag on values that are also marked as live-in. This
1686   // happens with the @llvm-returnaddress intrinsic and with arguments passed in
1687   // callee saved registers.
1688   // Omitting the kill flags is conservatively correct even if the live-in
1689   // is not used after all.
1690   bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
1691   return getKillRegState(!IsLiveIn);
1692 }
1693 
1694 static bool produceCompactUnwindFrame(MachineFunction &MF) {
1695   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1696   AttributeList Attrs = MF.getFunction().getAttributes();
1697   return Subtarget.isTargetMachO() &&
1698          !(Subtarget.getTargetLowering()->supportSwiftError() &&
1699            Attrs.hasAttrSomewhere(Attribute::SwiftError));
1700 }
1701 
1702 static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
1703                                              bool NeedsWinCFI) {
1704   // If we are generating register pairs for a Windows function that requires
1705   // EH support, then pair consecutive registers only.  There are no unwind
1706   // opcodes for saves/restores of non-consectuve register pairs.
1707   // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
1708   // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
1709 
1710   // TODO: LR can be paired with any register.  We don't support this yet in
1711   // the MCLayer.  We need to add support for the save_lrpair unwind code.
1712   if (!NeedsWinCFI)
1713     return false;
1714   if (Reg2 == Reg1 + 1)
1715     return false;
1716   return true;
1717 }
1718 
1719 /// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
1720 /// WindowsCFI requires that only consecutive registers can be paired.
1721 /// LR and FP need to be allocated together when the frame needs to save
1722 /// the frame-record. This means any other register pairing with LR is invalid.
1723 static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
1724                                       bool NeedsWinCFI, bool NeedsFrameRecord) {
1725   if (NeedsWinCFI)
1726     return invalidateWindowsRegisterPairing(Reg1, Reg2, true);
1727 
1728   // If we need to store the frame record, don't pair any register
1729   // with LR other than FP.
1730   if (NeedsFrameRecord)
1731     return Reg2 == AArch64::LR;
1732 
1733   return false;
1734 }
1735 
1736 namespace {
1737 
1738 struct RegPairInfo {
1739   unsigned Reg1 = AArch64::NoRegister;
1740   unsigned Reg2 = AArch64::NoRegister;
1741   int FrameIdx;
1742   int Offset;
1743   enum RegType { GPR, FPR64, FPR128 } Type;
1744 
1745   RegPairInfo() = default;
1746 
1747   bool isPaired() const { return Reg2 != AArch64::NoRegister; }
1748 };
1749 
1750 } // end anonymous namespace
1751 
1752 static void computeCalleeSaveRegisterPairs(
1753     MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
1754     const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
1755     bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) {
1756 
1757   if (CSI.empty())
1758     return;
1759 
1760   bool NeedsWinCFI = needsWinCFI(MF);
1761   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1762   MachineFrameInfo &MFI = MF.getFrameInfo();
1763   CallingConv::ID CC = MF.getFunction().getCallingConv();
1764   unsigned Count = CSI.size();
1765   (void)CC;
1766   // MachO's compact unwind format relies on all registers being stored in
1767   // pairs.
1768   assert((!produceCompactUnwindFrame(MF) ||
1769           CC == CallingConv::PreserveMost ||
1770           (Count & 1) == 0) &&
1771          "Odd number of callee-saved regs to spill!");
1772   int Offset = AFI->getCalleeSavedStackSize();
1773   // On Linux, we will have either one or zero non-paired register.  On Windows
1774   // with CFI, we can have multiple unpaired registers in order to utilize the
1775   // available unwind codes.  This flag assures that the alignment fixup is done
1776   // only once, as intened.
1777   bool FixupDone = false;
1778   for (unsigned i = 0; i < Count; ++i) {
1779     RegPairInfo RPI;
1780     RPI.Reg1 = CSI[i].getReg();
1781 
1782     if (AArch64::GPR64RegClass.contains(RPI.Reg1))
1783       RPI.Type = RegPairInfo::GPR;
1784     else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
1785       RPI.Type = RegPairInfo::FPR64;
1786     else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
1787       RPI.Type = RegPairInfo::FPR128;
1788     else
1789       llvm_unreachable("Unsupported register class.");
1790 
1791     // Add the next reg to the pair if it is in the same register class.
1792     if (i + 1 < Count) {
1793       unsigned NextReg = CSI[i + 1].getReg();
1794       switch (RPI.Type) {
1795       case RegPairInfo::GPR:
1796         if (AArch64::GPR64RegClass.contains(NextReg) &&
1797             !invalidateRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
1798                                        NeedsFrameRecord))
1799           RPI.Reg2 = NextReg;
1800         break;
1801       case RegPairInfo::FPR64:
1802         if (AArch64::FPR64RegClass.contains(NextReg) &&
1803             !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
1804           RPI.Reg2 = NextReg;
1805         break;
1806       case RegPairInfo::FPR128:
1807         if (AArch64::FPR128RegClass.contains(NextReg))
1808           RPI.Reg2 = NextReg;
1809         break;
1810       }
1811     }
1812 
1813     // If either of the registers to be saved is the lr register, it means that
1814     // we also need to save lr in the shadow call stack.
1815     if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&
1816         MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
1817       if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
1818         report_fatal_error("Must reserve x18 to use shadow call stack");
1819       NeedShadowCallStackProlog = true;
1820     }
1821 
1822     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
1823     // list to come in sorted by frame index so that we can issue the store
1824     // pair instructions directly. Assert if we see anything otherwise.
1825     //
1826     // The order of the registers in the list is controlled by
1827     // getCalleeSavedRegs(), so they will always be in-order, as well.
1828     assert((!RPI.isPaired() ||
1829             (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
1830            "Out of order callee saved regs!");
1831 
1832     assert((!RPI.isPaired() || RPI.Reg2 != AArch64::FP ||
1833             RPI.Reg1 == AArch64::LR) &&
1834            "FrameRecord must be allocated together with LR");
1835 
1836     // MachO's compact unwind format relies on all registers being stored in
1837     // adjacent register pairs.
1838     assert((!produceCompactUnwindFrame(MF) ||
1839             CC == CallingConv::PreserveMost ||
1840             (RPI.isPaired() &&
1841              ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
1842               RPI.Reg1 + 1 == RPI.Reg2))) &&
1843            "Callee-save registers not saved as adjacent register pair!");
1844 
1845     RPI.FrameIdx = CSI[i].getFrameIdx();
1846 
1847     int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
1848     Offset -= RPI.isPaired() ? 2 * Scale : Scale;
1849 
1850     // Round up size of non-pair to pair size if we need to pad the
1851     // callee-save area to ensure 16-byte alignment.
1852     if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
1853         RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
1854       FixupDone = true;
1855       Offset -= 8;
1856       assert(Offset % 16 == 0);
1857       assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
1858       MFI.setObjectAlignment(RPI.FrameIdx, 16);
1859     }
1860 
1861     assert(Offset % Scale == 0);
1862     RPI.Offset = Offset / Scale;
1863     assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
1864            "Offset out of bounds for LDP/STP immediate");
1865 
1866     RegPairs.push_back(RPI);
1867     if (RPI.isPaired())
1868       ++i;
1869   }
1870 }
1871 
1872 bool AArch64FrameLowering::spillCalleeSavedRegisters(
1873     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1874     const std::vector<CalleeSavedInfo> &CSI,
1875     const TargetRegisterInfo *TRI) const {
1876   MachineFunction &MF = *MBB.getParent();
1877   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1878   bool NeedsWinCFI = needsWinCFI(MF);
1879   DebugLoc DL;
1880   SmallVector<RegPairInfo, 8> RegPairs;
1881 
1882   bool NeedShadowCallStackProlog = false;
1883   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
1884                                  NeedShadowCallStackProlog, hasFP(MF));
1885   const MachineRegisterInfo &MRI = MF.getRegInfo();
1886 
1887   if (NeedShadowCallStackProlog) {
1888     // Shadow call stack prolog: str x30, [x18], #8
1889     BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost))
1890         .addReg(AArch64::X18, RegState::Define)
1891         .addReg(AArch64::LR)
1892         .addReg(AArch64::X18)
1893         .addImm(8)
1894         .setMIFlag(MachineInstr::FrameSetup);
1895 
1896     if (NeedsWinCFI)
1897       BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
1898           .setMIFlag(MachineInstr::FrameSetup);
1899 
1900     if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
1901       // Emit a CFI instruction that causes 8 to be subtracted from the value of
1902       // x18 when unwinding past this frame.
1903       static const char CFIInst[] = {
1904           dwarf::DW_CFA_val_expression,
1905           18, // register
1906           2,  // length
1907           static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
1908           static_cast<char>(-8) & 0x7f, // addend (sleb128)
1909       };
1910       unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
1911           nullptr, StringRef(CFIInst, sizeof(CFIInst))));
1912       BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
1913           .addCFIIndex(CFIIndex)
1914           .setMIFlag(MachineInstr::FrameSetup);
1915     }
1916 
1917     // This instruction also makes x18 live-in to the entry block.
1918     MBB.addLiveIn(AArch64::X18);
1919   }
1920 
1921   for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
1922        ++RPII) {
1923     RegPairInfo RPI = *RPII;
1924     unsigned Reg1 = RPI.Reg1;
1925     unsigned Reg2 = RPI.Reg2;
1926     unsigned StrOpc;
1927 
1928     // Issue sequence of spills for cs regs.  The first spill may be converted
1929     // to a pre-decrement store later by emitPrologue if the callee-save stack
1930     // area allocation can't be combined with the local stack area allocation.
1931     // For example:
1932     //    stp     x22, x21, [sp, #0]     // addImm(+0)
1933     //    stp     x20, x19, [sp, #16]    // addImm(+2)
1934     //    stp     fp, lr, [sp, #32]      // addImm(+4)
1935     // Rationale: This sequence saves uop updates compared to a sequence of
1936     // pre-increment spills like stp xi,xj,[sp,#-16]!
1937     // Note: Similar rationale and sequence for restores in epilog.
1938     unsigned Size, Align;
1939     switch (RPI.Type) {
1940     case RegPairInfo::GPR:
1941        StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
1942        Size = 8;
1943        Align = 8;
1944        break;
1945     case RegPairInfo::FPR64:
1946        StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
1947        Size = 8;
1948        Align = 8;
1949        break;
1950     case RegPairInfo::FPR128:
1951        StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
1952        Size = 16;
1953        Align = 16;
1954        break;
1955     }
1956     LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
1957                if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
1958                dbgs() << ") -> fi#(" << RPI.FrameIdx;
1959                if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
1960                dbgs() << ")\n");
1961 
1962     assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
1963            "Windows unwdinding requires a consecutive (FP,LR) pair");
1964     // Windows unwind codes require consecutive registers if registers are
1965     // paired.  Make the switch here, so that the code below will save (x,x+1)
1966     // and not (x+1,x).
1967     unsigned FrameIdxReg1 = RPI.FrameIdx;
1968     unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
1969     if (NeedsWinCFI && RPI.isPaired()) {
1970       std::swap(Reg1, Reg2);
1971       std::swap(FrameIdxReg1, FrameIdxReg2);
1972     }
1973     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
1974     if (!MRI.isReserved(Reg1))
1975       MBB.addLiveIn(Reg1);
1976     if (RPI.isPaired()) {
1977       if (!MRI.isReserved(Reg2))
1978         MBB.addLiveIn(Reg2);
1979       MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
1980       MIB.addMemOperand(MF.getMachineMemOperand(
1981           MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
1982           MachineMemOperand::MOStore, Size, Align));
1983     }
1984     MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
1985         .addReg(AArch64::SP)
1986         .addImm(RPI.Offset) // [sp, #offset*scale],
1987                             // where factor*scale is implicit
1988         .setMIFlag(MachineInstr::FrameSetup);
1989     MIB.addMemOperand(MF.getMachineMemOperand(
1990         MachinePointerInfo::getFixedStack(MF,FrameIdxReg1),
1991         MachineMemOperand::MOStore, Size, Align));
1992     if (NeedsWinCFI)
1993       InsertSEH(MIB, TII, MachineInstr::FrameSetup);
1994 
1995   }
1996   return true;
1997 }
1998 
1999 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
2000     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2001     std::vector<CalleeSavedInfo> &CSI,
2002     const TargetRegisterInfo *TRI) const {
2003   MachineFunction &MF = *MBB.getParent();
2004   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2005   DebugLoc DL;
2006   SmallVector<RegPairInfo, 8> RegPairs;
2007   bool NeedsWinCFI = needsWinCFI(MF);
2008 
2009   if (MI != MBB.end())
2010     DL = MI->getDebugLoc();
2011 
2012   bool NeedShadowCallStackProlog = false;
2013   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
2014                                  NeedShadowCallStackProlog, hasFP(MF));
2015 
2016   auto EmitMI = [&](const RegPairInfo &RPI) {
2017     unsigned Reg1 = RPI.Reg1;
2018     unsigned Reg2 = RPI.Reg2;
2019 
2020     // Issue sequence of restores for cs regs. The last restore may be converted
2021     // to a post-increment load later by emitEpilogue if the callee-save stack
2022     // area allocation can't be combined with the local stack area allocation.
2023     // For example:
2024     //    ldp     fp, lr, [sp, #32]       // addImm(+4)
2025     //    ldp     x20, x19, [sp, #16]     // addImm(+2)
2026     //    ldp     x22, x21, [sp, #0]      // addImm(+0)
2027     // Note: see comment in spillCalleeSavedRegisters()
2028     unsigned LdrOpc;
2029     unsigned Size, Align;
2030     switch (RPI.Type) {
2031     case RegPairInfo::GPR:
2032        LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
2033        Size = 8;
2034        Align = 8;
2035        break;
2036     case RegPairInfo::FPR64:
2037        LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
2038        Size = 8;
2039        Align = 8;
2040        break;
2041     case RegPairInfo::FPR128:
2042        LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
2043        Size = 16;
2044        Align = 16;
2045        break;
2046     }
2047     LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
2048                if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
2049                dbgs() << ") -> fi#(" << RPI.FrameIdx;
2050                if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
2051                dbgs() << ")\n");
2052 
2053     // Windows unwind codes require consecutive registers if registers are
2054     // paired.  Make the switch here, so that the code below will save (x,x+1)
2055     // and not (x+1,x).
2056     unsigned FrameIdxReg1 = RPI.FrameIdx;
2057     unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2058     if (NeedsWinCFI && RPI.isPaired()) {
2059       std::swap(Reg1, Reg2);
2060       std::swap(FrameIdxReg1, FrameIdxReg2);
2061     }
2062     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
2063     if (RPI.isPaired()) {
2064       MIB.addReg(Reg2, getDefRegState(true));
2065       MIB.addMemOperand(MF.getMachineMemOperand(
2066           MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
2067           MachineMemOperand::MOLoad, Size, Align));
2068     }
2069     MIB.addReg(Reg1, getDefRegState(true))
2070         .addReg(AArch64::SP)
2071         .addImm(RPI.Offset) // [sp, #offset*scale]
2072                             // where factor*scale is implicit
2073         .setMIFlag(MachineInstr::FrameDestroy);
2074     MIB.addMemOperand(MF.getMachineMemOperand(
2075         MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
2076         MachineMemOperand::MOLoad, Size, Align));
2077     if (NeedsWinCFI)
2078       InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
2079   };
2080   if (ReverseCSRRestoreSeq)
2081     for (const RegPairInfo &RPI : reverse(RegPairs))
2082       EmitMI(RPI);
2083   else
2084     for (const RegPairInfo &RPI : RegPairs)
2085       EmitMI(RPI);
2086 
2087   if (NeedShadowCallStackProlog) {
2088     // Shadow call stack epilog: ldr x30, [x18, #-8]!
2089     BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre))
2090         .addReg(AArch64::X18, RegState::Define)
2091         .addReg(AArch64::LR, RegState::Define)
2092         .addReg(AArch64::X18)
2093         .addImm(-8)
2094         .setMIFlag(MachineInstr::FrameDestroy);
2095   }
2096 
2097   return true;
2098 }
2099 
2100 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
2101                                                 BitVector &SavedRegs,
2102                                                 RegScavenger *RS) const {
2103   // All calls are tail calls in GHC calling conv, and functions have no
2104   // prologue/epilogue.
2105   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
2106     return;
2107 
2108   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2109   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
2110       MF.getSubtarget().getRegisterInfo());
2111   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2112   unsigned UnspilledCSGPR = AArch64::NoRegister;
2113   unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
2114 
2115   MachineFrameInfo &MFI = MF.getFrameInfo();
2116   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
2117 
2118   unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
2119                                 ? RegInfo->getBaseRegister()
2120                                 : (unsigned)AArch64::NoRegister;
2121 
2122   unsigned ExtraCSSpill = 0;
2123   // Figure out which callee-saved registers to save/restore.
2124   for (unsigned i = 0; CSRegs[i]; ++i) {
2125     const unsigned Reg = CSRegs[i];
2126 
2127     // Add the base pointer register to SavedRegs if it is callee-save.
2128     if (Reg == BasePointerReg)
2129       SavedRegs.set(Reg);
2130 
2131     bool RegUsed = SavedRegs.test(Reg);
2132     unsigned PairedReg = CSRegs[i ^ 1];
2133     if (!RegUsed) {
2134       if (AArch64::GPR64RegClass.contains(Reg) &&
2135           !RegInfo->isReservedReg(MF, Reg)) {
2136         UnspilledCSGPR = Reg;
2137         UnspilledCSGPRPaired = PairedReg;
2138       }
2139       continue;
2140     }
2141 
2142     // MachO's compact unwind format relies on all registers being stored in
2143     // pairs.
2144     // FIXME: the usual format is actually better if unwinding isn't needed.
2145     if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
2146         !SavedRegs.test(PairedReg)) {
2147       SavedRegs.set(PairedReg);
2148       if (AArch64::GPR64RegClass.contains(PairedReg) &&
2149           !RegInfo->isReservedReg(MF, PairedReg))
2150         ExtraCSSpill = PairedReg;
2151     }
2152   }
2153 
2154   // Calculates the callee saved stack size.
2155   unsigned CSStackSize = 0;
2156   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2157   const MachineRegisterInfo &MRI = MF.getRegInfo();
2158   for (unsigned Reg : SavedRegs.set_bits())
2159     CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
2160 
2161   // Save number of saved regs, so we can easily update CSStackSize later.
2162   unsigned NumSavedRegs = SavedRegs.count();
2163 
2164   // The frame record needs to be created by saving the appropriate registers
2165   unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
2166   if (hasFP(MF) ||
2167       windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
2168     SavedRegs.set(AArch64::FP);
2169     SavedRegs.set(AArch64::LR);
2170   }
2171 
2172   LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
2173              for (unsigned Reg
2174                   : SavedRegs.set_bits()) dbgs()
2175              << ' ' << printReg(Reg, RegInfo);
2176              dbgs() << "\n";);
2177 
2178   // If any callee-saved registers are used, the frame cannot be eliminated.
2179   bool CanEliminateFrame = SavedRegs.count() == 0;
2180 
2181   // The CSR spill slots have not been allocated yet, so estimateStackSize
2182   // won't include them.
2183   unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
2184   bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
2185   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
2186     AFI->setHasStackFrame(true);
2187 
2188   // Estimate if we might need to scavenge a register at some point in order
2189   // to materialize a stack offset. If so, either spill one additional
2190   // callee-saved register or reserve a special spill slot to facilitate
2191   // register scavenging. If we already spilled an extra callee-saved register
2192   // above to keep the number of spills even, we don't need to do anything else
2193   // here.
2194   if (BigStack) {
2195     if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
2196       LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
2197                         << " to get a scratch register.\n");
2198       SavedRegs.set(UnspilledCSGPR);
2199       // MachO's compact unwind format relies on all registers being stored in
2200       // pairs, so if we need to spill one extra for BigStack, then we need to
2201       // store the pair.
2202       if (produceCompactUnwindFrame(MF))
2203         SavedRegs.set(UnspilledCSGPRPaired);
2204       ExtraCSSpill = UnspilledCSGPR;
2205     }
2206 
2207     // If we didn't find an extra callee-saved register to spill, create
2208     // an emergency spill slot.
2209     if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
2210       const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2211       const TargetRegisterClass &RC = AArch64::GPR64RegClass;
2212       unsigned Size = TRI->getSpillSize(RC);
2213       unsigned Align = TRI->getSpillAlignment(RC);
2214       int FI = MFI.CreateStackObject(Size, Align, false);
2215       RS->addScavengingFrameIndex(FI);
2216       LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
2217                         << " as the emergency spill slot.\n");
2218     }
2219   }
2220 
2221   // Adding the size of additional 64bit GPR saves.
2222   CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
2223   unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
2224   LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
2225                << EstimatedStackSize + AlignedCSStackSize
2226                << " bytes.\n");
2227 
2228   // Round up to register pair alignment to avoid additional SP adjustment
2229   // instructions.
2230   AFI->setCalleeSavedStackSize(AlignedCSStackSize);
2231   AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
2232 }
2233 
2234 bool AArch64FrameLowering::enableStackSlotScavenging(
2235     const MachineFunction &MF) const {
2236   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2237   return AFI->hasCalleeSaveStackFreeSpace();
2238 }
2239 
2240 void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
2241     MachineFunction &MF, RegScavenger *RS) const {
2242   // If this function isn't doing Win64-style C++ EH, we don't need to do
2243   // anything.
2244   if (!MF.hasEHFunclets())
2245     return;
2246   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2247   MachineFrameInfo &MFI = MF.getFrameInfo();
2248   WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
2249 
2250   MachineBasicBlock &MBB = MF.front();
2251   auto MBBI = MBB.begin();
2252   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
2253     ++MBBI;
2254 
2255   // Create an UnwindHelp object.
2256   int UnwindHelpFI =
2257       MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
2258   EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
2259   // We need to store -2 into the UnwindHelp object at the start of the
2260   // function.
2261   DebugLoc DL;
2262   RS->enterBasicBlockEnd(MBB);
2263   RS->backward(std::prev(MBBI));
2264   unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
2265   assert(DstReg && "There must be a free register after frame setup");
2266   BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
2267   BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
2268       .addReg(DstReg, getKillRegState(true))
2269       .addFrameIndex(UnwindHelpFI)
2270       .addImm(0);
2271 }
2272 
2273 /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
2274 /// the update.  This is easily retrieved as it is exactly the offset that is set
2275 /// in processFunctionBeforeFrameFinalized.
2276 int AArch64FrameLowering::getFrameIndexReferencePreferSP(
2277     const MachineFunction &MF, int FI, unsigned &FrameReg,
2278     bool IgnoreSPUpdates) const {
2279   const MachineFrameInfo &MFI = MF.getFrameInfo();
2280   LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
2281                     << MFI.getObjectOffset(FI) << "\n");
2282   FrameReg = AArch64::SP;
2283   return MFI.getObjectOffset(FI);
2284 }
2285 
2286 /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
2287 /// the parent's frame pointer
2288 unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
2289     const MachineFunction &MF) const {
2290   return 0;
2291 }
2292 
2293 /// Funclets only need to account for space for the callee saved registers,
2294 /// as the locals are accounted for in the parent's stack frame.
2295 unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
2296     const MachineFunction &MF) const {
2297   // This is the size of the pushed CSRs.
2298   unsigned CSSize =
2299       MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
2300   // This is the amount of stack a funclet needs to allocate.
2301   return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
2302                  getStackAlignment());
2303 }
2304