1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the ARM implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 //
13 // This file contains the ARM implementation of TargetFrameLowering class.
14 //
15 // On ARM, stack frames are structured as follows:
16 //
17 // The stack grows downward.
18 //
19 // All of the individual frame areas on the frame below are optional, i.e. it's
20 // possible to create a function so that the particular area isn't present
21 // in the frame.
22 //
23 // At function entry, the "frame" looks as follows:
24 //
25 // |                                   | Higher address
26 // |-----------------------------------|
27 // |                                   |
28 // | arguments passed on the stack     |
29 // |                                   |
30 // |-----------------------------------| <- sp
31 // |                                   | Lower address
32 //
33 //
34 // After the prologue has run, the frame has the following general structure.
35 // Technically the last frame area (VLAs) doesn't get created until in the
36 // main function body, after the prologue is run. However, it's depicted here
37 // for completeness.
38 //
39 // |                                   | Higher address
40 // |-----------------------------------|
41 // |                                   |
42 // | arguments passed on the stack     |
43 // |                                   |
44 // |-----------------------------------| <- (sp at function entry)
45 // |                                   |
46 // | varargs from registers            |
47 // |                                   |
48 // |-----------------------------------|
49 // |                                   |
50 // | prev_fp, prev_lr                  |
51 // | (a.k.a. "frame record")           |
52 // |                                   |
53 // |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
54 // |                                   |
55 // | callee-saved gpr registers        |
56 // |                                   |
57 // |-----------------------------------|
58 // |                                   |
59 // | callee-saved fp/simd regs         |
60 // |                                   |
61 // |-----------------------------------|
62 // |.empty.space.to.make.part.below....|
63 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
64 // |.the.standard.8-byte.alignment.....|  compile time; if present)
65 // |-----------------------------------|
66 // |                                   |
67 // | local variables of fixed size     |
68 // | including spill slots             |
69 // |-----------------------------------| <- base pointer (not defined by ABI,
70 // |.variable-sized.local.variables....|       LLVM chooses r6)
71 // |.(VLAs)............................| (size of this area is unknown at
72 // |...................................|  compile time)
73 // |-----------------------------------| <- sp
74 // |                                   | Lower address
75 //
76 //
77 // To access the data in a frame, at-compile time, a constant offset must be
78 // computable from one of the pointers (fp, bp, sp) to access it. The size
79 // of the areas with a dotted background cannot be computed at compile-time
80 // if they are present, making it required to have all three of fp, bp and
81 // sp to be set up to be able to access all contents in the frame areas,
82 // assuming all of the frame areas are non-empty.
83 //
84 // For most functions, some of the frame areas are empty. For those functions,
85 // it may not be necessary to set up fp or bp:
86 // * A base pointer is definitely needed when there are both VLAs and local
87 //   variables with more-than-default alignment requirements.
88 // * A frame pointer is definitely needed when there are local variables with
89 //   more-than-default alignment requirements.
90 //
91 // In some cases when a base pointer is not strictly needed, it is generated
92 // anyway when offsets from the frame pointer to access local variables become
93 // so large that the offset can't be encoded in the immediate fields of loads
94 // or stores.
95 //
96 // The frame pointer might be chosen to be r7 or r11, depending on the target
97 // architecture and operating system. See ARMSubtarget::getFramePointerReg for
98 // details.
99 //
100 // Outgoing function arguments must be at the bottom of the stack frame when
101 // calling another function. If we do not have variable-sized stack objects, we
102 // can allocate a "reserved call frame" area at the bottom of the local
103 // variable area, large enough for all outgoing calls. If we do have VLAs, then
104 // the stack pointer must be decremented and incremented around each call to
105 // make space for the arguments below the VLAs.
106 //
107 //===----------------------------------------------------------------------===//
108 
109 #include "ARMFrameLowering.h"
110 #include "ARMBaseInstrInfo.h"
111 #include "ARMBaseRegisterInfo.h"
112 #include "ARMConstantPoolValue.h"
113 #include "ARMMachineFunctionInfo.h"
114 #include "ARMSubtarget.h"
115 #include "MCTargetDesc/ARMAddressingModes.h"
116 #include "MCTargetDesc/ARMBaseInfo.h"
117 #include "Utils/ARMBaseInfo.h"
118 #include "llvm/ADT/BitVector.h"
119 #include "llvm/ADT/STLExtras.h"
120 #include "llvm/ADT/SmallPtrSet.h"
121 #include "llvm/ADT/SmallVector.h"
122 #include "llvm/CodeGen/MachineBasicBlock.h"
123 #include "llvm/CodeGen/MachineConstantPool.h"
124 #include "llvm/CodeGen/MachineFrameInfo.h"
125 #include "llvm/CodeGen/MachineFunction.h"
126 #include "llvm/CodeGen/MachineInstr.h"
127 #include "llvm/CodeGen/MachineInstrBuilder.h"
128 #include "llvm/CodeGen/MachineJumpTableInfo.h"
129 #include "llvm/CodeGen/MachineModuleInfo.h"
130 #include "llvm/CodeGen/MachineOperand.h"
131 #include "llvm/CodeGen/MachineRegisterInfo.h"
132 #include "llvm/CodeGen/RegisterScavenging.h"
133 #include "llvm/CodeGen/TargetInstrInfo.h"
134 #include "llvm/CodeGen/TargetOpcodes.h"
135 #include "llvm/CodeGen/TargetRegisterInfo.h"
136 #include "llvm/CodeGen/TargetSubtargetInfo.h"
137 #include "llvm/IR/Attributes.h"
138 #include "llvm/IR/CallingConv.h"
139 #include "llvm/IR/DebugLoc.h"
140 #include "llvm/IR/Function.h"
141 #include "llvm/MC/MCAsmInfo.h"
142 #include "llvm/MC/MCContext.h"
143 #include "llvm/MC/MCDwarf.h"
144 #include "llvm/MC/MCInstrDesc.h"
145 #include "llvm/MC/MCRegisterInfo.h"
146 #include "llvm/Support/CodeGen.h"
147 #include "llvm/Support/CommandLine.h"
148 #include "llvm/Support/Compiler.h"
149 #include "llvm/Support/Debug.h"
150 #include "llvm/Support/ErrorHandling.h"
151 #include "llvm/Support/MathExtras.h"
152 #include "llvm/Support/raw_ostream.h"
153 #include "llvm/Target/TargetMachine.h"
154 #include "llvm/Target/TargetOptions.h"
155 #include <algorithm>
156 #include <cassert>
157 #include <cstddef>
158 #include <cstdint>
159 #include <iterator>
160 #include <utility>
161 #include <vector>
162 
163 #define DEBUG_TYPE "arm-frame-lowering"
164 
165 using namespace llvm;
166 
167 static cl::opt<bool>
168 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
169                      cl::desc("Align ARM NEON spills in prolog and epilog"));
170 
171 static MachineBasicBlock::iterator
172 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
173                         unsigned NumAlignedDPRCS2Regs);
174 
175 ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
176     : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
177       STI(sti) {}
178 
179 bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
180   // iOS always has a FP for backtracking, force other targets to keep their FP
181   // when doing FastISel. The emitted code is currently superior, and in cases
182   // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
183   return MF.getSubtarget<ARMSubtarget>().useFastISel();
184 }
185 
186 /// Returns true if the target can safely skip saving callee-saved registers
187 /// for noreturn nounwind functions.
188 bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
189   assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
190          MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
191          !MF.getFunction().hasFnAttribute(Attribute::UWTable));
192 
193   // Frame pointer and link register are not treated as normal CSR, thus we
194   // can always skip CSR saves for nonreturning functions.
195   return true;
196 }
197 
198 /// hasFP - Return true if the specified function should have a dedicated frame
199 /// pointer register.  This is true if the function has variable sized allocas
200 /// or if frame pointer elimination is disabled.
201 bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
202   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
203   const MachineFrameInfo &MFI = MF.getFrameInfo();
204 
205   // ABI-required frame pointer.
206   if (MF.getTarget().Options.DisableFramePointerElim(MF))
207     return true;
208 
209   // Frame pointer required for use within this function.
210   return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
211           MFI.isFrameAddressTaken());
212 }
213 
214 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
215 /// not required, we reserve argument space for call sites in the function
216 /// immediately on entry to the current function.  This eliminates the need for
217 /// add/sub sp brackets around call sites.  Returns true if the call frame is
218 /// included as part of the stack frame.
219 bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
220   const MachineFrameInfo &MFI = MF.getFrameInfo();
221   unsigned CFSize = MFI.getMaxCallFrameSize();
222   // It's not always a good idea to include the call frame as part of the
223   // stack frame. ARM (especially Thumb) has small immediate offset to
224   // address the stack frame. So a large call frame can cause poor codegen
225   // and may even makes it impossible to scavenge a register.
226   if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
227     return false;
228 
229   return !MFI.hasVarSizedObjects();
230 }
231 
232 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
233 /// call frame pseudos can be simplified.  Unlike most targets, having a FP
234 /// is not sufficient here since we still may reference some objects via SP
235 /// even when FP is available in Thumb2 mode.
236 bool
237 ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
238   return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
239 }
240 
241 // Returns how much of the incoming argument stack area we should clean up in an
242 // epilogue. For the C calling convention this will be 0, for guaranteed tail
243 // call conventions it can be positive (a normal return or a tail call to a
244 // function that uses less stack space for arguments) or negative (for a tail
245 // call to a function that needs more stack space than us for arguments).
246 static int getArgumentStackToRestore(MachineFunction &MF,
247                                      MachineBasicBlock &MBB) {
248   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
249   bool IsTailCallReturn = false;
250   if (MBB.end() != MBBI) {
251     unsigned RetOpcode = MBBI->getOpcode();
252     IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
253                        RetOpcode == ARM::TCRETURNri;
254   }
255   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
256 
257   int ArgumentPopSize = 0;
258   if (IsTailCallReturn) {
259     MachineOperand &StackAdjust = MBBI->getOperand(1);
260 
261     // For a tail-call in a callee-pops-arguments environment, some or all of
262     // the stack may actually be in use for the call's arguments, this is
263     // calculated during LowerCall and consumed here...
264     ArgumentPopSize = StackAdjust.getImm();
265   } else {
266     // ... otherwise the amount to pop is *all* of the argument space,
267     // conveniently stored in the MachineFunctionInfo by
268     // LowerFormalArguments. This will, of course, be zero for the C calling
269     // convention.
270     ArgumentPopSize = AFI->getArgumentStackToRestore();
271   }
272 
273   return ArgumentPopSize;
274 }
275 
276 static bool needsWinCFI(const MachineFunction &MF) {
277   const Function &F = MF.getFunction();
278   return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
279          F.needsUnwindTableEntry();
280 }
281 
282 // Given a load or a store instruction, generate an appropriate unwinding SEH
283 // code on Windows.
284 static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
285                                              const TargetInstrInfo &TII,
286                                              unsigned Flags) {
287   unsigned Opc = MBBI->getOpcode();
288   MachineBasicBlock *MBB = MBBI->getParent();
289   MachineFunction &MF = *MBB->getParent();
290   DebugLoc DL = MBBI->getDebugLoc();
291   MachineInstrBuilder MIB;
292   const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
293   const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
294 
295   Flags |= MachineInstr::NoMerge;
296 
297   switch (Opc) {
298   default:
299     report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
300     break;
301   case ARM::t2ADDri:   // add.w r11, sp, #xx
302   case ARM::t2ADDri12: // add.w r11, sp, #xx
303   case ARM::t2SUBri:   // sub.w r4, r11, #xx
304   case ARM::t2MOVTi16: // movt  r4, #xx
305   case ARM::t2MOVi16:  // movw  r4, #xx
306   case ARM::tBL:       // bl __chkstk
307     // These are harmless if used for just setting up a frame pointer,
308     // but that frame pointer can't be relied upon for unwinding, unless
309     // set up with SEH_SaveSP.
310     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
311               .addImm(/*Wide=*/1)
312               .setMIFlags(Flags);
313     break;
314 
315   case ARM::tBLXr: // blx r12 (__chkstk)
316     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
317               .addImm(/*Wide=*/0)
318               .setMIFlags(Flags);
319     break;
320 
321   case ARM::t2MOVi32imm: // movw+movt
322     // This pseudo instruction expands into two mov instructions. If the
323     // second operand is a symbol reference, this will stay as two wide
324     // instructions, movw+movt. If they're immediates, the first one can
325     // end up as a narrow mov though.
326     // As two SEH instructions are appended here, they won't get interleaved
327     // between the two final movw/movt instructions, but it doesn't make any
328     // practical difference.
329     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
330               .addImm(/*Wide=*/1)
331               .setMIFlags(Flags);
332     MBB->insertAfter(MBBI, MIB);
333     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
334               .addImm(/*Wide=*/1)
335               .setMIFlags(Flags);
336     break;
337 
338   case ARM::t2LDMIA_RET:
339   case ARM::t2LDMIA_UPD:
340   case ARM::t2STMDB_UPD: {
341     unsigned Mask = 0;
342     for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
343       const MachineOperand &MO = MBBI->getOperand(i);
344       if (!MO.isReg() || MO.isImplicit())
345         continue;
346       unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
347       if (Reg == 15)
348         Reg = 14;
349       Mask |= 1 << Reg;
350     }
351     unsigned SEHOpc =
352         (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
353     MIB = BuildMI(MF, DL, TII.get(SEHOpc))
354               .addImm(Mask)
355               .addImm(/*Wide=*/1)
356               .setMIFlags(Flags);
357     break;
358   }
359   case ARM::VSTMDDB_UPD:
360   case ARM::VLDMDIA_UPD: {
361     int First = -1, Last = 0;
362     for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
363       const MachineOperand &MO = MBBI->getOperand(i);
364       unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
365       if (First == -1)
366         First = Reg;
367       Last = Reg;
368     }
369     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
370               .addImm(First)
371               .addImm(Last)
372               .setMIFlags(Flags);
373     break;
374   }
375   case ARM::tSUBspi:
376   case ARM::tADDspi:
377     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
378               .addImm(MBBI->getOperand(2).getImm() * 4)
379               .addImm(/*Wide=*/0)
380               .setMIFlags(Flags);
381     break;
382   case ARM::t2SUBspImm:
383   case ARM::t2SUBspImm12:
384   case ARM::t2ADDspImm:
385   case ARM::t2ADDspImm12:
386     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
387               .addImm(MBBI->getOperand(2).getImm())
388               .addImm(/*Wide=*/1)
389               .setMIFlags(Flags);
390     break;
391 
392   case ARM::tMOVr:
393     if (MBBI->getOperand(1).getReg() == ARM::SP &&
394         (Flags & MachineInstr::FrameSetup)) {
395       unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
396       MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
397                 .addImm(Reg)
398                 .setMIFlags(Flags);
399     } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
400                (Flags & MachineInstr::FrameDestroy)) {
401       unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
402       MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
403                 .addImm(Reg)
404                 .setMIFlags(Flags);
405     } else {
406       report_fatal_error("No SEH Opcode for MOV");
407     }
408     break;
409 
410   case ARM::tBX_RET:
411   case ARM::TCRETURNri:
412     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
413               .addImm(/*Wide=*/0)
414               .setMIFlags(Flags);
415     break;
416 
417   case ARM::TCRETURNdi:
418     MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
419               .addImm(/*Wide=*/1)
420               .setMIFlags(Flags);
421     break;
422   }
423   return MBB->insertAfter(MBBI, MIB);
424 }
425 
426 static MachineBasicBlock::iterator
427 initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
428   if (MBBI == MBB.begin())
429     return MachineBasicBlock::iterator();
430   return std::prev(MBBI);
431 }
432 
433 static void insertSEHRange(MachineBasicBlock &MBB,
434                            MachineBasicBlock::iterator Start,
435                            const MachineBasicBlock::iterator &End,
436                            const ARMBaseInstrInfo &TII, unsigned MIFlags) {
437   if (Start.isValid())
438     Start = std::next(Start);
439   else
440     Start = MBB.begin();
441 
442   for (auto MI = Start; MI != End;) {
443     auto Next = std::next(MI);
444     // Check if this instruction already has got a SEH opcode added. In that
445     // case, don't do this generic mapping.
446     if (Next != End && isSEHInstruction(*Next)) {
447       MI = std::next(Next);
448       while (MI != End && isSEHInstruction(*MI))
449         ++MI;
450       continue;
451     }
452     insertSEH(MI, TII, MIFlags);
453     MI = Next;
454   }
455 }
456 
457 static void emitRegPlusImmediate(
458     bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
459     const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
460     unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
461     ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
462   if (isARM)
463     emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
464                             Pred, PredReg, TII, MIFlags);
465   else
466     emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
467                            Pred, PredReg, TII, MIFlags);
468 }
469 
470 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
471                          MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
472                          const ARMBaseInstrInfo &TII, int NumBytes,
473                          unsigned MIFlags = MachineInstr::NoFlags,
474                          ARMCC::CondCodes Pred = ARMCC::AL,
475                          unsigned PredReg = 0) {
476   emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
477                        MIFlags, Pred, PredReg);
478 }
479 
480 static int sizeOfSPAdjustment(const MachineInstr &MI) {
481   int RegSize;
482   switch (MI.getOpcode()) {
483   case ARM::VSTMDDB_UPD:
484     RegSize = 8;
485     break;
486   case ARM::STMDB_UPD:
487   case ARM::t2STMDB_UPD:
488     RegSize = 4;
489     break;
490   case ARM::t2STR_PRE:
491   case ARM::STR_PRE_IMM:
492     return 4;
493   default:
494     llvm_unreachable("Unknown push or pop like instruction");
495   }
496 
497   int count = 0;
498   // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
499   // pred) so the list starts at 4.
500   for (int i = MI.getNumOperands() - 1; i >= 4; --i)
501     count += RegSize;
502   return count;
503 }
504 
505 static bool WindowsRequiresStackProbe(const MachineFunction &MF,
506                                       size_t StackSizeInBytes) {
507   const MachineFrameInfo &MFI = MF.getFrameInfo();
508   const Function &F = MF.getFunction();
509   unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
510   if (F.hasFnAttribute("stack-probe-size"))
511     F.getFnAttribute("stack-probe-size")
512         .getValueAsString()
513         .getAsInteger(0, StackProbeSize);
514   return (StackSizeInBytes >= StackProbeSize) &&
515          !F.hasFnAttribute("no-stack-arg-probe");
516 }
517 
518 namespace {
519 
520 struct StackAdjustingInsts {
521   struct InstInfo {
522     MachineBasicBlock::iterator I;
523     unsigned SPAdjust;
524     bool BeforeFPSet;
525   };
526 
527   SmallVector<InstInfo, 4> Insts;
528 
529   void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
530                bool BeforeFPSet = false) {
531     InstInfo Info = {I, SPAdjust, BeforeFPSet};
532     Insts.push_back(Info);
533   }
534 
535   void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
536     auto Info =
537         llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
538     assert(Info != Insts.end() && "invalid sp adjusting instruction");
539     Info->SPAdjust += ExtraBytes;
540   }
541 
542   void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
543                          const ARMBaseInstrInfo &TII, bool HasFP) {
544     MachineFunction &MF = *MBB.getParent();
545     unsigned CFAOffset = 0;
546     for (auto &Info : Insts) {
547       if (HasFP && !Info.BeforeFPSet)
548         return;
549 
550       CFAOffset += Info.SPAdjust;
551       unsigned CFIIndex = MF.addFrameInst(
552           MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
553       BuildMI(MBB, std::next(Info.I), dl,
554               TII.get(TargetOpcode::CFI_INSTRUCTION))
555               .addCFIIndex(CFIIndex)
556               .setMIFlags(MachineInstr::FrameSetup);
557     }
558   }
559 };
560 
561 } // end anonymous namespace
562 
563 /// Emit an instruction sequence that will align the address in
564 /// register Reg by zero-ing out the lower bits.  For versions of the
565 /// architecture that support Neon, this must be done in a single
566 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
567 /// single instruction. That function only gets called when optimizing
568 /// spilling of D registers on a core with the Neon instruction set
569 /// present.
570 static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
571                                      const TargetInstrInfo &TII,
572                                      MachineBasicBlock &MBB,
573                                      MachineBasicBlock::iterator MBBI,
574                                      const DebugLoc &DL, const unsigned Reg,
575                                      const Align Alignment,
576                                      const bool MustBeSingleInstruction) {
577   const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
578   const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
579   const unsigned AlignMask = Alignment.value() - 1U;
580   const unsigned NrBitsToZero = Log2(Alignment);
581   assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
582   if (!AFI->isThumbFunction()) {
583     // if the BFC instruction is available, use that to zero the lower
584     // bits:
585     //   bfc Reg, #0, log2(Alignment)
586     // otherwise use BIC, if the mask to zero the required number of bits
587     // can be encoded in the bic immediate field
588     //   bic Reg, Reg, Alignment-1
589     // otherwise, emit
590     //   lsr Reg, Reg, log2(Alignment)
591     //   lsl Reg, Reg, log2(Alignment)
592     if (CanUseBFC) {
593       BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
594           .addReg(Reg, RegState::Kill)
595           .addImm(~AlignMask)
596           .add(predOps(ARMCC::AL));
597     } else if (AlignMask <= 255) {
598       BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
599           .addReg(Reg, RegState::Kill)
600           .addImm(AlignMask)
601           .add(predOps(ARMCC::AL))
602           .add(condCodeOp());
603     } else {
604       assert(!MustBeSingleInstruction &&
605              "Shouldn't call emitAligningInstructions demanding a single "
606              "instruction to be emitted for large stack alignment for a target "
607              "without BFC.");
608       BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
609           .addReg(Reg, RegState::Kill)
610           .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
611           .add(predOps(ARMCC::AL))
612           .add(condCodeOp());
613       BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
614           .addReg(Reg, RegState::Kill)
615           .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
616           .add(predOps(ARMCC::AL))
617           .add(condCodeOp());
618     }
619   } else {
620     // Since this is only reached for Thumb-2 targets, the BFC instruction
621     // should always be available.
622     assert(CanUseBFC);
623     BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
624         .addReg(Reg, RegState::Kill)
625         .addImm(~AlignMask)
626         .add(predOps(ARMCC::AL));
627   }
628 }
629 
630 /// We need the offset of the frame pointer relative to other MachineFrameInfo
631 /// offsets which are encoded relative to SP at function begin.
632 /// See also emitPrologue() for how the FP is set up.
633 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
634 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
635 /// this to produce a conservative estimate that we check in an assert() later.
636 static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) {
637   // For Thumb1, push.w isn't available, so the first push will always push
638   // r7 and lr onto the stack first.
639   if (AFI.isThumb1OnlyFunction())
640     return -AFI.getArgRegsSaveSize() - (2 * 4);
641   // This is a conservative estimation: Assume the frame pointer being r7 and
642   // pc("r15") up to r8 getting spilled before (= 8 registers).
643   int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
644   return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4);
645 }
646 
647 void ARMFrameLowering::emitPrologue(MachineFunction &MF,
648                                     MachineBasicBlock &MBB) const {
649   MachineBasicBlock::iterator MBBI = MBB.begin();
650   MachineFrameInfo  &MFI = MF.getFrameInfo();
651   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
652   MachineModuleInfo &MMI = MF.getMMI();
653   MCContext &Context = MMI.getContext();
654   const TargetMachine &TM = MF.getTarget();
655   const MCRegisterInfo *MRI = Context.getRegisterInfo();
656   const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
657   const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
658   assert(!AFI->isThumb1OnlyFunction() &&
659          "This emitPrologue does not support Thumb1!");
660   bool isARM = !AFI->isThumbFunction();
661   Align Alignment = STI.getFrameLowering()->getStackAlign();
662   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
663   unsigned NumBytes = MFI.getStackSize();
664   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
665   int FPCXTSaveSize = 0;
666   bool NeedsWinCFI = needsWinCFI(MF);
667 
668   // Debug location must be unknown since the first debug location is used
669   // to determine the end of the prologue.
670   DebugLoc dl;
671 
672   Register FramePtr = RegInfo->getFrameRegister(MF);
673 
674   // Determine the sizes of each callee-save spill areas and record which frame
675   // belongs to which callee-save spill areas.
676   unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
677   int FramePtrSpillFI = 0;
678   int D8SpillFI = 0;
679 
680   // All calls are tail calls in GHC calling conv, and functions have no
681   // prologue/epilogue.
682   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
683     return;
684 
685   StackAdjustingInsts DefCFAOffsetCandidates;
686   bool HasFP = hasFP(MF);
687 
688   if (!AFI->hasStackFrame() &&
689       (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
690     if (NumBytes != 0) {
691       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
692                    MachineInstr::FrameSetup);
693       DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
694     }
695     if (!NeedsWinCFI)
696       DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
697     if (NeedsWinCFI && MBBI != MBB.begin()) {
698       insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup);
699       BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
700           .setMIFlag(MachineInstr::FrameSetup);
701       MF.setHasWinCFI(true);
702     }
703     return;
704   }
705 
706   // Determine spill area sizes.
707   for (const CalleeSavedInfo &I : CSI) {
708     Register Reg = I.getReg();
709     int FI = I.getFrameIdx();
710     switch (Reg) {
711     case ARM::R8:
712     case ARM::R9:
713     case ARM::R10:
714     case ARM::R11:
715     case ARM::R12:
716       if (STI.splitFramePushPop(MF)) {
717         GPRCS2Size += 4;
718         break;
719       }
720       LLVM_FALLTHROUGH;
721     case ARM::R0:
722     case ARM::R1:
723     case ARM::R2:
724     case ARM::R3:
725     case ARM::R4:
726     case ARM::R5:
727     case ARM::R6:
728     case ARM::R7:
729     case ARM::LR:
730       if (Reg == FramePtr)
731         FramePtrSpillFI = FI;
732       GPRCS1Size += 4;
733       break;
734     case ARM::FPCXTNS:
735       FPCXTSaveSize = 4;
736       break;
737     default:
738       // This is a DPR. Exclude the aligned DPRCS2 spills.
739       if (Reg == ARM::D8)
740         D8SpillFI = FI;
741       if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
742         DPRCSSize += 8;
743     }
744   }
745 
746   MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
747 
748   // Move past the PAC computation.
749   if (AFI->shouldSignReturnAddress())
750     LastPush = MBBI++;
751 
752   // Move past FPCXT area.
753   if (FPCXTSaveSize > 0) {
754     LastPush = MBBI++;
755     DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
756   }
757 
758   // Allocate the vararg register save area.
759   if (ArgRegsSaveSize) {
760     emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
761                  MachineInstr::FrameSetup);
762     LastPush = std::prev(MBBI);
763     DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
764   }
765 
766   // Move past area 1.
767   if (GPRCS1Size > 0) {
768     GPRCS1Push = LastPush = MBBI++;
769     DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
770   }
771 
772   // Determine starting offsets of spill areas.
773   unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
774   unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
775   unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
776   Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
777   unsigned DPRGapSize =
778       (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) %
779       DPRAlign.value();
780 
781   unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
782   int FramePtrOffsetInPush = 0;
783   if (HasFP) {
784     int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
785     assert(getMaxFPOffset(STI, *AFI) <= FPOffset &&
786            "Max FP estimation is wrong");
787     FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
788     AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
789                                 NumBytes);
790   }
791   AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
792   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
793   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
794 
795   // Move past area 2.
796   if (GPRCS2Size > 0) {
797     GPRCS2Push = LastPush = MBBI++;
798     DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
799   }
800 
801   // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
802   // .cfi_offset operations will reflect that.
803   if (DPRGapSize) {
804     assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
805     if (LastPush != MBB.end() &&
806         tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
807       DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
808     else {
809       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
810                    MachineInstr::FrameSetup);
811       DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
812     }
813   }
814 
815   // Move past area 3.
816   if (DPRCSSize > 0) {
817     // Since vpush register list cannot have gaps, there may be multiple vpush
818     // instructions in the prologue.
819     while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
820       DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
821       LastPush = MBBI++;
822     }
823   }
824 
825   // Move past the aligned DPRCS2 area.
826   if (AFI->getNumAlignedDPRCS2Regs() > 0) {
827     MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
828     // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
829     // leaves the stack pointer pointing to the DPRCS2 area.
830     //
831     // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
832     NumBytes += MFI.getObjectOffset(D8SpillFI);
833   } else
834     NumBytes = DPRCSOffset;
835 
836   if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
837     uint32_t NumWords = NumBytes >> 2;
838 
839     if (NumWords < 65536) {
840       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
841           .addImm(NumWords)
842           .setMIFlags(MachineInstr::FrameSetup)
843           .add(predOps(ARMCC::AL));
844     } else {
845       // Split into two instructions here, instead of using t2MOVi32imm,
846       // to allow inserting accurate SEH instructions (including accurate
847       // instruction size for each of them).
848       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
849           .addImm(NumWords & 0xffff)
850           .setMIFlags(MachineInstr::FrameSetup)
851           .add(predOps(ARMCC::AL));
852       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
853           .addReg(ARM::R4)
854           .addImm(NumWords >> 16)
855           .setMIFlags(MachineInstr::FrameSetup)
856           .add(predOps(ARMCC::AL));
857     }
858 
859     switch (TM.getCodeModel()) {
860     case CodeModel::Tiny:
861       llvm_unreachable("Tiny code model not available on ARM.");
862     case CodeModel::Small:
863     case CodeModel::Medium:
864     case CodeModel::Kernel:
865       BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
866           .add(predOps(ARMCC::AL))
867           .addExternalSymbol("__chkstk")
868           .addReg(ARM::R4, RegState::Implicit)
869           .setMIFlags(MachineInstr::FrameSetup);
870       break;
871     case CodeModel::Large:
872       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
873         .addExternalSymbol("__chkstk")
874         .setMIFlags(MachineInstr::FrameSetup);
875 
876       BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
877           .add(predOps(ARMCC::AL))
878           .addReg(ARM::R12, RegState::Kill)
879           .addReg(ARM::R4, RegState::Implicit)
880           .setMIFlags(MachineInstr::FrameSetup);
881       break;
882     }
883 
884     MachineInstrBuilder Instr, SEH;
885     Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
886                 .addReg(ARM::SP, RegState::Kill)
887                 .addReg(ARM::R4, RegState::Kill)
888                 .setMIFlags(MachineInstr::FrameSetup)
889                 .add(predOps(ARMCC::AL))
890                 .add(condCodeOp());
891     if (NeedsWinCFI) {
892       SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
893                 .addImm(NumBytes)
894                 .addImm(/*Wide=*/1)
895                 .setMIFlags(MachineInstr::FrameSetup);
896       MBB.insertAfter(Instr, SEH);
897     }
898     NumBytes = 0;
899   }
900 
901   if (NumBytes) {
902     // Adjust SP after all the callee-save spills.
903     if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
904         tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
905       DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
906     else {
907       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
908                    MachineInstr::FrameSetup);
909       DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
910     }
911 
912     if (HasFP && isARM)
913       // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
914       // Note it's not safe to do this in Thumb2 mode because it would have
915       // taken two instructions:
916       // mov sp, r7
917       // sub sp, #24
918       // If an interrupt is taken between the two instructions, then sp is in
919       // an inconsistent state (pointing to the middle of callee-saved area).
920       // The interrupt handler can end up clobbering the registers.
921       AFI->setShouldRestoreSPFromFP(true);
922   }
923 
924   // Set FP to point to the stack slot that contains the previous FP.
925   // For iOS, FP is R7, which has now been stored in spill area 1.
926   // Otherwise, if this is not iOS, all the callee-saved registers go
927   // into spill area 1, including the FP in R11.  In either case, it
928   // is in area one and the adjustment needs to take place just after
929   // that push.
930   if (HasFP) {
931     MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
932     unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
933     emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
934                          dl, TII, FramePtr, ARM::SP,
935                          PushSize + FramePtrOffsetInPush,
936                          MachineInstr::FrameSetup);
937     if (!NeedsWinCFI) {
938       if (FramePtrOffsetInPush + PushSize != 0) {
939         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
940             nullptr, MRI->getDwarfRegNum(FramePtr, true),
941             FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
942         BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
943             .addCFIIndex(CFIIndex)
944             .setMIFlags(MachineInstr::FrameSetup);
945       } else {
946         unsigned CFIIndex =
947             MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
948                 nullptr, MRI->getDwarfRegNum(FramePtr, true)));
949         BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
950             .addCFIIndex(CFIIndex)
951             .setMIFlags(MachineInstr::FrameSetup);
952       }
953     }
954   }
955 
956   // Emit a SEH opcode indicating the prologue end. The rest of the prologue
957   // instructions below don't need to be replayed to unwind the stack.
958   if (NeedsWinCFI && MBBI != MBB.begin()) {
959     insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup);
960     BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
961         .setMIFlag(MachineInstr::FrameSetup);
962     MF.setHasWinCFI(true);
963   }
964 
965   // Now that the prologue's actual instructions are finalised, we can insert
966   // the necessary DWARF cf instructions to describe the situation. Start by
967   // recording where each register ended up:
968   if (GPRCS1Size > 0 && !NeedsWinCFI) {
969     MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
970     int CFIIndex;
971     for (const auto &Entry : CSI) {
972       Register Reg = Entry.getReg();
973       int FI = Entry.getFrameIdx();
974       switch (Reg) {
975       case ARM::R8:
976       case ARM::R9:
977       case ARM::R10:
978       case ARM::R11:
979       case ARM::R12:
980         if (STI.splitFramePushPop(MF))
981           break;
982         LLVM_FALLTHROUGH;
983       case ARM::R0:
984       case ARM::R1:
985       case ARM::R2:
986       case ARM::R3:
987       case ARM::R4:
988       case ARM::R5:
989       case ARM::R6:
990       case ARM::R7:
991       case ARM::LR:
992         CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
993             nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
994         BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
995             .addCFIIndex(CFIIndex)
996             .setMIFlags(MachineInstr::FrameSetup);
997         break;
998       }
999     }
1000   }
1001 
1002   if (GPRCS2Size > 0 && !NeedsWinCFI) {
1003     MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
1004     for (const auto &Entry : CSI) {
1005       Register Reg = Entry.getReg();
1006       int FI = Entry.getFrameIdx();
1007       switch (Reg) {
1008       case ARM::R8:
1009       case ARM::R9:
1010       case ARM::R10:
1011       case ARM::R11:
1012       case ARM::R12:
1013         if (STI.splitFramePushPop(MF)) {
1014           unsigned DwarfReg = MRI->getDwarfRegNum(
1015               Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
1016           unsigned Offset = MFI.getObjectOffset(FI);
1017           unsigned CFIIndex = MF.addFrameInst(
1018               MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1019           BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1020               .addCFIIndex(CFIIndex)
1021               .setMIFlags(MachineInstr::FrameSetup);
1022         }
1023         break;
1024       }
1025     }
1026   }
1027 
1028   if (DPRCSSize > 0 && !NeedsWinCFI) {
1029     // Since vpush register list cannot have gaps, there may be multiple vpush
1030     // instructions in the prologue.
1031     MachineBasicBlock::iterator Pos = std::next(LastPush);
1032     for (const auto &Entry : CSI) {
1033       Register Reg = Entry.getReg();
1034       int FI = Entry.getFrameIdx();
1035       if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
1036           (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
1037         unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
1038         unsigned Offset = MFI.getObjectOffset(FI);
1039         unsigned CFIIndex = MF.addFrameInst(
1040             MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1041         BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1042             .addCFIIndex(CFIIndex)
1043             .setMIFlags(MachineInstr::FrameSetup);
1044       }
1045     }
1046   }
1047 
1048   // Now we can emit descriptions of where the canonical frame address was
1049   // throughout the process. If we have a frame pointer, it takes over the job
1050   // half-way through, so only the first few .cfi_def_cfa_offset instructions
1051   // actually get emitted.
1052   if (!NeedsWinCFI)
1053     DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1054 
1055   if (STI.isTargetELF() && hasFP(MF))
1056     MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
1057                             AFI->getFramePtrSpillOffset());
1058 
1059   AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1060   AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1061   AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1062   AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1063   AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
1064 
1065   // If we need dynamic stack realignment, do it here. Be paranoid and make
1066   // sure if we also have VLAs, we have a base pointer for frame access.
1067   // If aligned NEON registers were spilled, the stack has already been
1068   // realigned.
1069   if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1070     Align MaxAlign = MFI.getMaxAlign();
1071     assert(!AFI->isThumb1OnlyFunction());
1072     if (!AFI->isThumbFunction()) {
1073       emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1074                                false);
1075     } else {
1076       // We cannot use sp as source/dest register here, thus we're using r4 to
1077       // perform the calculations. We're emitting the following sequence:
1078       // mov r4, sp
1079       // -- use emitAligningInstructions to produce best sequence to zero
1080       // -- out lower bits in r4
1081       // mov sp, r4
1082       // FIXME: It will be better just to find spare register here.
1083       BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1084           .addReg(ARM::SP, RegState::Kill)
1085           .add(predOps(ARMCC::AL));
1086       emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1087                                false);
1088       BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1089           .addReg(ARM::R4, RegState::Kill)
1090           .add(predOps(ARMCC::AL));
1091     }
1092 
1093     AFI->setShouldRestoreSPFromFP(true);
1094   }
1095 
1096   // If we need a base pointer, set it up here. It's whatever the value
1097   // of the stack pointer is at this point. Any variable size objects
1098   // will be allocated after this, so we can still use the base pointer
1099   // to reference locals.
1100   // FIXME: Clarify FrameSetup flags here.
1101   if (RegInfo->hasBasePointer(MF)) {
1102     if (isARM)
1103       BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1104           .addReg(ARM::SP)
1105           .add(predOps(ARMCC::AL))
1106           .add(condCodeOp());
1107     else
1108       BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1109           .addReg(ARM::SP)
1110           .add(predOps(ARMCC::AL));
1111   }
1112 
1113   // If the frame has variable sized objects then the epilogue must restore
1114   // the sp from fp. We can assume there's an FP here since hasFP already
1115   // checks for hasVarSizedObjects.
1116   if (MFI.hasVarSizedObjects())
1117     AFI->setShouldRestoreSPFromFP(true);
1118 }
1119 
1120 void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
1121                                     MachineBasicBlock &MBB) const {
1122   MachineFrameInfo &MFI = MF.getFrameInfo();
1123   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1124   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1125   const ARMBaseInstrInfo &TII =
1126       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1127   assert(!AFI->isThumb1OnlyFunction() &&
1128          "This emitEpilogue does not support Thumb1!");
1129   bool isARM = !AFI->isThumbFunction();
1130 
1131   // Amount of stack space we reserved next to incoming args for either
1132   // varargs registers or stack arguments in tail calls made by this function.
1133   unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1134 
1135   // How much of the stack used by incoming arguments this function is expected
1136   // to restore in this particular epilogue.
1137   int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1138   int NumBytes = (int)MFI.getStackSize();
1139   Register FramePtr = RegInfo->getFrameRegister(MF);
1140 
1141   // All calls are tail calls in GHC calling conv, and functions have no
1142   // prologue/epilogue.
1143   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1144     return;
1145 
1146   // First put ourselves on the first (from top) terminator instructions.
1147   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1148   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1149 
1150   MachineBasicBlock::iterator RangeStart;
1151   if (!AFI->hasStackFrame()) {
1152     if (MF.hasWinCFI()) {
1153       BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1154           .setMIFlag(MachineInstr::FrameDestroy);
1155       RangeStart = initMBBRange(MBB, MBBI);
1156     }
1157 
1158     if (NumBytes + IncomingArgStackToRestore != 0)
1159       emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1160                    NumBytes + IncomingArgStackToRestore,
1161                    MachineInstr::FrameDestroy);
1162   } else {
1163     // Unwind MBBI to point to first LDR / VLDRD.
1164     if (MBBI != MBB.begin()) {
1165       do {
1166         --MBBI;
1167       } while (MBBI != MBB.begin() &&
1168                MBBI->getFlag(MachineInstr::FrameDestroy));
1169       if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1170         ++MBBI;
1171     }
1172 
1173     if (MF.hasWinCFI()) {
1174       BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1175           .setMIFlag(MachineInstr::FrameDestroy);
1176       RangeStart = initMBBRange(MBB, MBBI);
1177     }
1178 
1179     // Move SP to start of FP callee save spill area.
1180     NumBytes -= (ReservedArgStack +
1181                  AFI->getFPCXTSaveAreaSize() +
1182                  AFI->getGPRCalleeSavedArea1Size() +
1183                  AFI->getGPRCalleeSavedArea2Size() +
1184                  AFI->getDPRCalleeSavedGapSize() +
1185                  AFI->getDPRCalleeSavedAreaSize());
1186 
1187     // Reset SP based on frame pointer only if the stack frame extends beyond
1188     // frame pointer stack slot or target is ELF and the function has FP.
1189     if (AFI->shouldRestoreSPFromFP()) {
1190       NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1191       if (NumBytes) {
1192         if (isARM)
1193           emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1194                                   ARMCC::AL, 0, TII,
1195                                   MachineInstr::FrameDestroy);
1196         else {
1197           // It's not possible to restore SP from FP in a single instruction.
1198           // For iOS, this looks like:
1199           // mov sp, r7
1200           // sub sp, #24
1201           // This is bad, if an interrupt is taken after the mov, sp is in an
1202           // inconsistent state.
1203           // Use the first callee-saved register as a scratch register.
1204           assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1205                  "No scratch register to restore SP from FP!");
1206           emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1207                                  ARMCC::AL, 0, TII, MachineInstr::FrameDestroy);
1208           BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1209               .addReg(ARM::R4)
1210               .add(predOps(ARMCC::AL))
1211               .setMIFlag(MachineInstr::FrameDestroy);
1212         }
1213       } else {
1214         // Thumb2 or ARM.
1215         if (isARM)
1216           BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1217               .addReg(FramePtr)
1218               .add(predOps(ARMCC::AL))
1219               .add(condCodeOp())
1220               .setMIFlag(MachineInstr::FrameDestroy);
1221         else
1222           BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1223               .addReg(FramePtr)
1224               .add(predOps(ARMCC::AL))
1225               .setMIFlag(MachineInstr::FrameDestroy);
1226       }
1227     } else if (NumBytes &&
1228                !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1229       emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1230                    MachineInstr::FrameDestroy);
1231 
1232     // Increment past our save areas.
1233     if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
1234       MBBI++;
1235       // Since vpop register list cannot have gaps, there may be multiple vpop
1236       // instructions in the epilogue.
1237       while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1238         MBBI++;
1239     }
1240     if (AFI->getDPRCalleeSavedGapSize()) {
1241       assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1242              "unexpected DPR alignment gap");
1243       emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1244                    MachineInstr::FrameDestroy);
1245     }
1246 
1247     if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
1248     if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1249 
1250     if (ReservedArgStack || IncomingArgStackToRestore) {
1251       assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1252              "attempting to restore negative stack amount");
1253       emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1254                    ReservedArgStack + IncomingArgStackToRestore,
1255                    MachineInstr::FrameDestroy);
1256     }
1257 
1258     // Validate PAC, It should have been already popped into R12. For CMSE entry
1259     // function, the validation instruction is emitted during expansion of the
1260     // tBXNS_RET, since the validation must use the value of SP at function
1261     // entry, before saving, resp. after restoring, FPCXTNS.
1262     if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1263       BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1264   }
1265 
1266   if (MF.hasWinCFI()) {
1267     insertSEHRange(MBB, RangeStart, MBB.end(), TII, MachineInstr::FrameDestroy);
1268     BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1269         .setMIFlag(MachineInstr::FrameDestroy);
1270   }
1271 }
1272 
1273 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1274 /// debug info.  It's the same as what we use for resolving the code-gen
1275 /// references for now.  FIXME: This can go wrong when references are
1276 /// SP-relative and simple call frames aren't used.
1277 StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1278                                                      int FI,
1279                                                      Register &FrameReg) const {
1280   return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1281 }
1282 
1283 int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
1284                                                  int FI, Register &FrameReg,
1285                                                  int SPAdj) const {
1286   const MachineFrameInfo &MFI = MF.getFrameInfo();
1287   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1288       MF.getSubtarget().getRegisterInfo());
1289   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1290   int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1291   int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1292   bool isFixed = MFI.isFixedObjectIndex(FI);
1293 
1294   FrameReg = ARM::SP;
1295   Offset += SPAdj;
1296 
1297   // SP can move around if there are allocas.  We may also lose track of SP
1298   // when emergency spilling inside a non-reserved call frame setup.
1299   bool hasMovingSP = !hasReservedCallFrame(MF);
1300 
1301   // When dynamically realigning the stack, use the frame pointer for
1302   // parameters, and the stack/base pointer for locals.
1303   if (RegInfo->hasStackRealignment(MF)) {
1304     assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1305     if (isFixed) {
1306       FrameReg = RegInfo->getFrameRegister(MF);
1307       Offset = FPOffset;
1308     } else if (hasMovingSP) {
1309       assert(RegInfo->hasBasePointer(MF) &&
1310              "VLAs and dynamic stack alignment, but missing base pointer!");
1311       FrameReg = RegInfo->getBaseRegister();
1312       Offset -= SPAdj;
1313     }
1314     return Offset;
1315   }
1316 
1317   // If there is a frame pointer, use it when we can.
1318   if (hasFP(MF) && AFI->hasStackFrame()) {
1319     // Use frame pointer to reference fixed objects. Use it for locals if
1320     // there are VLAs (and thus the SP isn't reliable as a base).
1321     if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1322       FrameReg = RegInfo->getFrameRegister(MF);
1323       return FPOffset;
1324     } else if (hasMovingSP) {
1325       assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1326       if (AFI->isThumb2Function()) {
1327         // Try to use the frame pointer if we can, else use the base pointer
1328         // since it's available. This is handy for the emergency spill slot, in
1329         // particular.
1330         if (FPOffset >= -255 && FPOffset < 0) {
1331           FrameReg = RegInfo->getFrameRegister(MF);
1332           return FPOffset;
1333         }
1334       }
1335     } else if (AFI->isThumbFunction()) {
1336       // Prefer SP to base pointer, if the offset is suitably aligned and in
1337       // range as the effective range of the immediate offset is bigger when
1338       // basing off SP.
1339       // Use  add <rd>, sp, #<imm8>
1340       //      ldr <rd>, [sp, #<imm8>]
1341       if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1342         return Offset;
1343       // In Thumb2 mode, the negative offset is very limited. Try to avoid
1344       // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1345       if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1346         FrameReg = RegInfo->getFrameRegister(MF);
1347         return FPOffset;
1348       }
1349     } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1350       // Otherwise, use SP or FP, whichever is closer to the stack slot.
1351       FrameReg = RegInfo->getFrameRegister(MF);
1352       return FPOffset;
1353     }
1354   }
1355   // Use the base pointer if we have one.
1356   // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1357   // That can happen if we forced a base pointer for a large call frame.
1358   if (RegInfo->hasBasePointer(MF)) {
1359     FrameReg = RegInfo->getBaseRegister();
1360     Offset -= SPAdj;
1361   }
1362   return Offset;
1363 }
1364 
1365 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1366                                     MachineBasicBlock::iterator MI,
1367                                     ArrayRef<CalleeSavedInfo> CSI,
1368                                     unsigned StmOpc, unsigned StrOpc,
1369                                     bool NoGap, bool (*Func)(unsigned, bool),
1370                                     unsigned NumAlignedDPRCS2Regs,
1371                                     unsigned MIFlags) const {
1372   MachineFunction &MF = *MBB.getParent();
1373   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1374   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1375 
1376   DebugLoc DL;
1377 
1378   using RegAndKill = std::pair<unsigned, bool>;
1379 
1380   SmallVector<RegAndKill, 4> Regs;
1381   unsigned i = CSI.size();
1382   while (i != 0) {
1383     unsigned LastReg = 0;
1384     for (; i != 0; --i) {
1385       Register Reg = CSI[i-1].getReg();
1386       if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1387 
1388       // D-registers in the aligned area DPRCS2 are NOT spilled here.
1389       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1390         continue;
1391 
1392       const MachineRegisterInfo &MRI = MF.getRegInfo();
1393       bool isLiveIn = MRI.isLiveIn(Reg);
1394       if (!isLiveIn && !MRI.isReserved(Reg))
1395         MBB.addLiveIn(Reg);
1396       // If NoGap is true, push consecutive registers and then leave the rest
1397       // for other instructions. e.g.
1398       // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1399       if (NoGap && LastReg && LastReg != Reg-1)
1400         break;
1401       LastReg = Reg;
1402       // Do not set a kill flag on values that are also marked as live-in. This
1403       // happens with the @llvm-returnaddress intrinsic and with arguments
1404       // passed in callee saved registers.
1405       // Omitting the kill flags is conservatively correct even if the live-in
1406       // is not used after all.
1407       Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1408     }
1409 
1410     if (Regs.empty())
1411       continue;
1412 
1413     llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1414       return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1415     });
1416 
1417     if (Regs.size() > 1 || StrOpc== 0) {
1418       MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1419                                     .addReg(ARM::SP)
1420                                     .setMIFlags(MIFlags)
1421                                     .add(predOps(ARMCC::AL));
1422       for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1423         MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1424     } else if (Regs.size() == 1) {
1425       BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1426           .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1427           .addReg(ARM::SP)
1428           .setMIFlags(MIFlags)
1429           .addImm(-4)
1430           .add(predOps(ARMCC::AL));
1431     }
1432     Regs.clear();
1433 
1434     // Put any subsequent vpush instructions before this one: they will refer to
1435     // higher register numbers so need to be pushed first in order to preserve
1436     // monotonicity.
1437     if (MI != MBB.begin())
1438       --MI;
1439   }
1440 }
1441 
1442 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1443                                    MachineBasicBlock::iterator MI,
1444                                    MutableArrayRef<CalleeSavedInfo> CSI,
1445                                    unsigned LdmOpc, unsigned LdrOpc,
1446                                    bool isVarArg, bool NoGap,
1447                                    bool (*Func)(unsigned, bool),
1448                                    unsigned NumAlignedDPRCS2Regs) const {
1449   MachineFunction &MF = *MBB.getParent();
1450   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1451   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1452   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1453   bool hasPAC = AFI->shouldSignReturnAddress();
1454   DebugLoc DL;
1455   bool isTailCall = false;
1456   bool isInterrupt = false;
1457   bool isTrap = false;
1458   bool isCmseEntry = false;
1459   if (MBB.end() != MI) {
1460     DL = MI->getDebugLoc();
1461     unsigned RetOpcode = MI->getOpcode();
1462     isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1463     isInterrupt =
1464         RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1465     isTrap =
1466         RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1467         RetOpcode == ARM::tTRAP;
1468     isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1469   }
1470 
1471   SmallVector<unsigned, 4> Regs;
1472   unsigned i = CSI.size();
1473   while (i != 0) {
1474     unsigned LastReg = 0;
1475     bool DeleteRet = false;
1476     for (; i != 0; --i) {
1477       CalleeSavedInfo &Info = CSI[i-1];
1478       Register Reg = Info.getReg();
1479       if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1480 
1481       // The aligned reloads from area DPRCS2 are not inserted here.
1482       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1483         continue;
1484       if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1485           !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1486           STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) {
1487         Reg = ARM::PC;
1488         // Fold the return instruction into the LDM.
1489         DeleteRet = true;
1490         LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1491         // We 'restore' LR into PC so it is not live out of the return block:
1492         // Clear Restored bit.
1493         Info.setRestored(false);
1494       }
1495 
1496       // If NoGap is true, pop consecutive registers and then leave the rest
1497       // for other instructions. e.g.
1498       // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1499       if (NoGap && LastReg && LastReg != Reg-1)
1500         break;
1501 
1502       LastReg = Reg;
1503       Regs.push_back(Reg);
1504     }
1505 
1506     if (Regs.empty())
1507       continue;
1508 
1509     llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1510       return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1511     });
1512 
1513     if (Regs.size() > 1 || LdrOpc == 0) {
1514       MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1515                                     .addReg(ARM::SP)
1516                                     .add(predOps(ARMCC::AL))
1517                                     .setMIFlags(MachineInstr::FrameDestroy);
1518       for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1519         MIB.addReg(Regs[i], getDefRegState(true));
1520       if (DeleteRet) {
1521         if (MI != MBB.end()) {
1522           MIB.copyImplicitOps(*MI);
1523           MI->eraseFromParent();
1524         }
1525       }
1526       MI = MIB;
1527     } else if (Regs.size() == 1) {
1528       // If we adjusted the reg to PC from LR above, switch it back here. We
1529       // only do that for LDM.
1530       if (Regs[0] == ARM::PC)
1531         Regs[0] = ARM::LR;
1532       MachineInstrBuilder MIB =
1533         BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1534           .addReg(ARM::SP, RegState::Define)
1535           .addReg(ARM::SP)
1536           .setMIFlags(MachineInstr::FrameDestroy);
1537       // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1538       // that refactoring is complete (eventually).
1539       if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1540         MIB.addReg(0);
1541         MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
1542       } else
1543         MIB.addImm(4);
1544       MIB.add(predOps(ARMCC::AL));
1545     }
1546     Regs.clear();
1547 
1548     // Put any subsequent vpop instructions after this one: they will refer to
1549     // higher register numbers so need to be popped afterwards.
1550     if (MI != MBB.end())
1551       ++MI;
1552   }
1553 }
1554 
1555 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1556 /// starting from d8.  Also insert stack realignment code and leave the stack
1557 /// pointer pointing to the d8 spill slot.
1558 static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
1559                                     MachineBasicBlock::iterator MI,
1560                                     unsigned NumAlignedDPRCS2Regs,
1561                                     ArrayRef<CalleeSavedInfo> CSI,
1562                                     const TargetRegisterInfo *TRI) {
1563   MachineFunction &MF = *MBB.getParent();
1564   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1565   DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1566   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1567   MachineFrameInfo &MFI = MF.getFrameInfo();
1568 
1569   // Mark the D-register spill slots as properly aligned.  Since MFI computes
1570   // stack slot layout backwards, this can actually mean that the d-reg stack
1571   // slot offsets can be wrong. The offset for d8 will always be correct.
1572   for (const CalleeSavedInfo &I : CSI) {
1573     unsigned DNum = I.getReg() - ARM::D8;
1574     if (DNum > NumAlignedDPRCS2Regs - 1)
1575       continue;
1576     int FI = I.getFrameIdx();
1577     // The even-numbered registers will be 16-byte aligned, the odd-numbered
1578     // registers will be 8-byte aligned.
1579     MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1580 
1581     // The stack slot for D8 needs to be maximally aligned because this is
1582     // actually the point where we align the stack pointer.  MachineFrameInfo
1583     // computes all offsets relative to the incoming stack pointer which is a
1584     // bit weird when realigning the stack.  Any extra padding for this
1585     // over-alignment is not realized because the code inserted below adjusts
1586     // the stack pointer by numregs * 8 before aligning the stack pointer.
1587     if (DNum == 0)
1588       MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1589   }
1590 
1591   // Move the stack pointer to the d8 spill slot, and align it at the same
1592   // time. Leave the stack slot address in the scratch register r4.
1593   //
1594   //   sub r4, sp, #numregs * 8
1595   //   bic r4, r4, #align - 1
1596   //   mov sp, r4
1597   //
1598   bool isThumb = AFI->isThumbFunction();
1599   assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1600   AFI->setShouldRestoreSPFromFP(true);
1601 
1602   // sub r4, sp, #numregs * 8
1603   // The immediate is <= 64, so it doesn't need any special encoding.
1604   unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1605   BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1606       .addReg(ARM::SP)
1607       .addImm(8 * NumAlignedDPRCS2Regs)
1608       .add(predOps(ARMCC::AL))
1609       .add(condCodeOp());
1610 
1611   Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1612   // We must set parameter MustBeSingleInstruction to true, since
1613   // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1614   // stack alignment.  Luckily, this can always be done since all ARM
1615   // architecture versions that support Neon also support the BFC
1616   // instruction.
1617   emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1618 
1619   // mov sp, r4
1620   // The stack pointer must be adjusted before spilling anything, otherwise
1621   // the stack slots could be clobbered by an interrupt handler.
1622   // Leave r4 live, it is used below.
1623   Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1624   MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1625                                 .addReg(ARM::R4)
1626                                 .add(predOps(ARMCC::AL));
1627   if (!isThumb)
1628     MIB.add(condCodeOp());
1629 
1630   // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1631   // r4 holds the stack slot address.
1632   unsigned NextReg = ARM::D8;
1633 
1634   // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1635   // The writeback is only needed when emitting two vst1.64 instructions.
1636   if (NumAlignedDPRCS2Regs >= 6) {
1637     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1638                                                &ARM::QQPRRegClass);
1639     MBB.addLiveIn(SupReg);
1640     BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1641         .addReg(ARM::R4, RegState::Kill)
1642         .addImm(16)
1643         .addReg(NextReg)
1644         .addReg(SupReg, RegState::ImplicitKill)
1645         .add(predOps(ARMCC::AL));
1646     NextReg += 4;
1647     NumAlignedDPRCS2Regs -= 4;
1648   }
1649 
1650   // We won't modify r4 beyond this point.  It currently points to the next
1651   // register to be spilled.
1652   unsigned R4BaseReg = NextReg;
1653 
1654   // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1655   if (NumAlignedDPRCS2Regs >= 4) {
1656     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1657                                                &ARM::QQPRRegClass);
1658     MBB.addLiveIn(SupReg);
1659     BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1660         .addReg(ARM::R4)
1661         .addImm(16)
1662         .addReg(NextReg)
1663         .addReg(SupReg, RegState::ImplicitKill)
1664         .add(predOps(ARMCC::AL));
1665     NextReg += 4;
1666     NumAlignedDPRCS2Regs -= 4;
1667   }
1668 
1669   // 16-byte aligned vst1.64 with 2 d-regs.
1670   if (NumAlignedDPRCS2Regs >= 2) {
1671     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1672                                                &ARM::QPRRegClass);
1673     MBB.addLiveIn(SupReg);
1674     BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1675         .addReg(ARM::R4)
1676         .addImm(16)
1677         .addReg(SupReg)
1678         .add(predOps(ARMCC::AL));
1679     NextReg += 2;
1680     NumAlignedDPRCS2Regs -= 2;
1681   }
1682 
1683   // Finally, use a vanilla vstr.64 for the odd last register.
1684   if (NumAlignedDPRCS2Regs) {
1685     MBB.addLiveIn(NextReg);
1686     // vstr.64 uses addrmode5 which has an offset scale of 4.
1687     BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1688         .addReg(NextReg)
1689         .addReg(ARM::R4)
1690         .addImm((NextReg - R4BaseReg) * 2)
1691         .add(predOps(ARMCC::AL));
1692   }
1693 
1694   // The last spill instruction inserted should kill the scratch register r4.
1695   std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1696 }
1697 
1698 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1699 /// iterator to the following instruction.
1700 static MachineBasicBlock::iterator
1701 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
1702                         unsigned NumAlignedDPRCS2Regs) {
1703   //   sub r4, sp, #numregs * 8
1704   //   bic r4, r4, #align - 1
1705   //   mov sp, r4
1706   ++MI; ++MI; ++MI;
1707   assert(MI->mayStore() && "Expecting spill instruction");
1708 
1709   // These switches all fall through.
1710   switch(NumAlignedDPRCS2Regs) {
1711   case 7:
1712     ++MI;
1713     assert(MI->mayStore() && "Expecting spill instruction");
1714     LLVM_FALLTHROUGH;
1715   default:
1716     ++MI;
1717     assert(MI->mayStore() && "Expecting spill instruction");
1718     LLVM_FALLTHROUGH;
1719   case 1:
1720   case 2:
1721   case 4:
1722     assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1723     ++MI;
1724   }
1725   return MI;
1726 }
1727 
1728 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1729 /// starting from d8.  These instructions are assumed to execute while the
1730 /// stack is still aligned, unlike the code inserted by emitPopInst.
1731 static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
1732                                       MachineBasicBlock::iterator MI,
1733                                       unsigned NumAlignedDPRCS2Regs,
1734                                       ArrayRef<CalleeSavedInfo> CSI,
1735                                       const TargetRegisterInfo *TRI) {
1736   MachineFunction &MF = *MBB.getParent();
1737   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1738   DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1739   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1740 
1741   // Find the frame index assigned to d8.
1742   int D8SpillFI = 0;
1743   for (const CalleeSavedInfo &I : CSI)
1744     if (I.getReg() == ARM::D8) {
1745       D8SpillFI = I.getFrameIdx();
1746       break;
1747     }
1748 
1749   // Materialize the address of the d8 spill slot into the scratch register r4.
1750   // This can be fairly complicated if the stack frame is large, so just use
1751   // the normal frame index elimination mechanism to do it.  This code runs as
1752   // the initial part of the epilog where the stack and base pointers haven't
1753   // been changed yet.
1754   bool isThumb = AFI->isThumbFunction();
1755   assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1756 
1757   unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1758   BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1759       .addFrameIndex(D8SpillFI)
1760       .addImm(0)
1761       .add(predOps(ARMCC::AL))
1762       .add(condCodeOp());
1763 
1764   // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1765   unsigned NextReg = ARM::D8;
1766 
1767   // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1768   if (NumAlignedDPRCS2Regs >= 6) {
1769     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1770                                                &ARM::QQPRRegClass);
1771     BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1772         .addReg(ARM::R4, RegState::Define)
1773         .addReg(ARM::R4, RegState::Kill)
1774         .addImm(16)
1775         .addReg(SupReg, RegState::ImplicitDefine)
1776         .add(predOps(ARMCC::AL));
1777     NextReg += 4;
1778     NumAlignedDPRCS2Regs -= 4;
1779   }
1780 
1781   // We won't modify r4 beyond this point.  It currently points to the next
1782   // register to be spilled.
1783   unsigned R4BaseReg = NextReg;
1784 
1785   // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1786   if (NumAlignedDPRCS2Regs >= 4) {
1787     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1788                                                &ARM::QQPRRegClass);
1789     BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1790         .addReg(ARM::R4)
1791         .addImm(16)
1792         .addReg(SupReg, RegState::ImplicitDefine)
1793         .add(predOps(ARMCC::AL));
1794     NextReg += 4;
1795     NumAlignedDPRCS2Regs -= 4;
1796   }
1797 
1798   // 16-byte aligned vld1.64 with 2 d-regs.
1799   if (NumAlignedDPRCS2Regs >= 2) {
1800     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1801                                                &ARM::QPRRegClass);
1802     BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1803         .addReg(ARM::R4)
1804         .addImm(16)
1805         .add(predOps(ARMCC::AL));
1806     NextReg += 2;
1807     NumAlignedDPRCS2Regs -= 2;
1808   }
1809 
1810   // Finally, use a vanilla vldr.64 for the remaining odd register.
1811   if (NumAlignedDPRCS2Regs)
1812     BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1813         .addReg(ARM::R4)
1814         .addImm(2 * (NextReg - R4BaseReg))
1815         .add(predOps(ARMCC::AL));
1816 
1817   // Last store kills r4.
1818   std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1819 }
1820 
1821 bool ARMFrameLowering::spillCalleeSavedRegisters(
1822     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1823     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
1824   if (CSI.empty())
1825     return false;
1826 
1827   MachineFunction &MF = *MBB.getParent();
1828   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1829 
1830   unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1831   unsigned PushOneOpc = AFI->isThumbFunction() ?
1832     ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1833   unsigned FltOpc = ARM::VSTMDDB_UPD;
1834   unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1835   // Compute PAC in R12.
1836   if (AFI->shouldSignReturnAddress()) {
1837     BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
1838         .setMIFlags(MachineInstr::FrameSetup);
1839   }
1840   // Save the non-secure floating point context.
1841   if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
1842         return C.getReg() == ARM::FPCXTNS;
1843       })) {
1844     BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
1845             ARM::SP)
1846         .addReg(ARM::SP)
1847         .addImm(-4)
1848         .add(predOps(ARMCC::AL));
1849   }
1850   emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1851                MachineInstr::FrameSetup);
1852   emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1853                MachineInstr::FrameSetup);
1854   emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1855                NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1856 
1857   // The code above does not insert spill code for the aligned DPRCS2 registers.
1858   // The stack realignment code will be inserted between the push instructions
1859   // and these spills.
1860   if (NumAlignedDPRCS2Regs)
1861     emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1862 
1863   return true;
1864 }
1865 
1866 bool ARMFrameLowering::restoreCalleeSavedRegisters(
1867     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1868     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
1869   if (CSI.empty())
1870     return false;
1871 
1872   MachineFunction &MF = *MBB.getParent();
1873   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1874   bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1875   unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1876 
1877   // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1878   // registers. Do that here instead.
1879   if (NumAlignedDPRCS2Regs)
1880     emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1881 
1882   unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1883   unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1884   unsigned FltOpc = ARM::VLDMDIA_UPD;
1885   emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1886               NumAlignedDPRCS2Regs);
1887   emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1888               &isARMArea2Register, 0);
1889   emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1890               &isARMArea1Register, 0);
1891 
1892   return true;
1893 }
1894 
1895 // FIXME: Make generic?
1896 static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
1897                                             const ARMBaseInstrInfo &TII) {
1898   unsigned FnSize = 0;
1899   for (auto &MBB : MF) {
1900     for (auto &MI : MBB)
1901       FnSize += TII.getInstSizeInBytes(MI);
1902   }
1903   if (MF.getJumpTableInfo())
1904     for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
1905       FnSize += Table.MBBs.size() * 4;
1906   FnSize += MF.getConstantPool()->getConstants().size() * 4;
1907   return FnSize;
1908 }
1909 
1910 /// estimateRSStackSizeLimit - Look at each instruction that references stack
1911 /// frames and return the stack size limit beyond which some of these
1912 /// instructions will require a scratch register during their expansion later.
1913 // FIXME: Move to TII?
1914 static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
1915                                          const TargetFrameLowering *TFI,
1916                                          bool &HasNonSPFrameIndex) {
1917   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1918   const ARMBaseInstrInfo &TII =
1919       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1920   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1921   unsigned Limit = (1 << 12) - 1;
1922   for (auto &MBB : MF) {
1923     for (auto &MI : MBB) {
1924       if (MI.isDebugInstr())
1925         continue;
1926       for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1927         if (!MI.getOperand(i).isFI())
1928           continue;
1929 
1930         // When using ADDri to get the address of a stack object, 255 is the
1931         // largest offset guaranteed to fit in the immediate offset.
1932         if (MI.getOpcode() == ARM::ADDri) {
1933           Limit = std::min(Limit, (1U << 8) - 1);
1934           break;
1935         }
1936         // t2ADDri will not require an extra register, it can reuse the
1937         // destination.
1938         if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
1939           break;
1940 
1941         const MCInstrDesc &MCID = MI.getDesc();
1942         const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
1943         if (RegClass && !RegClass->contains(ARM::SP))
1944           HasNonSPFrameIndex = true;
1945 
1946         // Otherwise check the addressing mode.
1947         switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
1948         case ARMII::AddrMode_i12:
1949         case ARMII::AddrMode2:
1950           // Default 12 bit limit.
1951           break;
1952         case ARMII::AddrMode3:
1953         case ARMII::AddrModeT2_i8neg:
1954           Limit = std::min(Limit, (1U << 8) - 1);
1955           break;
1956         case ARMII::AddrMode5FP16:
1957           Limit = std::min(Limit, ((1U << 8) - 1) * 2);
1958           break;
1959         case ARMII::AddrMode5:
1960         case ARMII::AddrModeT2_i8s4:
1961         case ARMII::AddrModeT2_ldrex:
1962           Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1963           break;
1964         case ARMII::AddrModeT2_i12:
1965           // i12 supports only positive offset so these will be converted to
1966           // i8 opcodes. See llvm::rewriteT2FrameIndex.
1967           if (TFI->hasFP(MF) && AFI->hasStackFrame())
1968             Limit = std::min(Limit, (1U << 8) - 1);
1969           break;
1970         case ARMII::AddrMode4:
1971         case ARMII::AddrMode6:
1972           // Addressing modes 4 & 6 (load/store) instructions can't encode an
1973           // immediate offset for stack references.
1974           return 0;
1975         case ARMII::AddrModeT2_i7:
1976           Limit = std::min(Limit, ((1U << 7) - 1) * 1);
1977           break;
1978         case ARMII::AddrModeT2_i7s2:
1979           Limit = std::min(Limit, ((1U << 7) - 1) * 2);
1980           break;
1981         case ARMII::AddrModeT2_i7s4:
1982           Limit = std::min(Limit, ((1U << 7) - 1) * 4);
1983           break;
1984         default:
1985           llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
1986         }
1987         break; // At most one FI per instruction
1988       }
1989     }
1990   }
1991 
1992   return Limit;
1993 }
1994 
1995 // In functions that realign the stack, it can be an advantage to spill the
1996 // callee-saved vector registers after realigning the stack. The vst1 and vld1
1997 // instructions take alignment hints that can improve performance.
1998 static void
1999 checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
2000   MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2001   if (!SpillAlignedNEONRegs)
2002     return;
2003 
2004   // Naked functions don't spill callee-saved registers.
2005   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2006     return;
2007 
2008   // We are planning to use NEON instructions vst1 / vld1.
2009   if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2010     return;
2011 
2012   // Don't bother if the default stack alignment is sufficiently high.
2013   if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align(8))
2014     return;
2015 
2016   // Aligned spills require stack realignment.
2017   if (!static_cast<const ARMBaseRegisterInfo *>(
2018            MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
2019     return;
2020 
2021   // We always spill contiguous d-registers starting from d8. Count how many
2022   // needs spilling.  The register allocator will almost always use the
2023   // callee-saved registers in order, but it can happen that there are holes in
2024   // the range.  Registers above the hole will be spilled to the standard DPRCS
2025   // area.
2026   unsigned NumSpills = 0;
2027   for (; NumSpills < 8; ++NumSpills)
2028     if (!SavedRegs.test(ARM::D8 + NumSpills))
2029       break;
2030 
2031   // Don't do this for just one d-register. It's not worth it.
2032   if (NumSpills < 2)
2033     return;
2034 
2035   // Spill the first NumSpills D-registers after realigning the stack.
2036   MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2037 
2038   // A scratch register is required for the vst1 / vld1 instructions.
2039   SavedRegs.set(ARM::R4);
2040 }
2041 
2042 bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2043   // For CMSE entry functions, we want to save the FPCXT_NS immediately
2044   // upon function entry (resp. restore it immmediately before return)
2045   if (STI.hasV8_1MMainlineOps() &&
2046       MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
2047     return false;
2048 
2049   // We are disabling shrinkwrapping for now when PAC is enabled, as
2050   // shrinkwrapping can cause clobbering of r12 when the PAC code is
2051   // generated. A follow-up patch will fix this in a more performant manner.
2052   if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
2053           true /* SpillsLR */))
2054     return false;
2055 
2056   return true;
2057 }
2058 
2059 void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
2060                                             BitVector &SavedRegs,
2061                                             RegScavenger *RS) const {
2062   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2063   // This tells PEI to spill the FP as if it is any other callee-save register
2064   // to take advantage the eliminateFrameIndex machinery. This also ensures it
2065   // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2066   // to combine multiple loads / stores.
2067   bool CanEliminateFrame = true;
2068   bool CS1Spilled = false;
2069   bool LRSpilled = false;
2070   unsigned NumGPRSpills = 0;
2071   unsigned NumFPRSpills = 0;
2072   SmallVector<unsigned, 4> UnspilledCS1GPRs;
2073   SmallVector<unsigned, 4> UnspilledCS2GPRs;
2074   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2075       MF.getSubtarget().getRegisterInfo());
2076   const ARMBaseInstrInfo &TII =
2077       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2078   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2079   MachineFrameInfo &MFI = MF.getFrameInfo();
2080   MachineRegisterInfo &MRI = MF.getRegInfo();
2081   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2082   (void)TRI;  // Silence unused warning in non-assert builds.
2083   Register FramePtr = RegInfo->getFrameRegister(MF);
2084 
2085   // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2086   // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2087   // since it's not always possible to restore sp from fp in a single
2088   // instruction.
2089   // FIXME: It will be better just to find spare register here.
2090   if (AFI->isThumb2Function() &&
2091       (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2092     SavedRegs.set(ARM::R4);
2093 
2094   // If a stack probe will be emitted, spill R4 and LR, since they are
2095   // clobbered by the stack probe call.
2096   // This estimate should be a safe, conservative estimate. The actual
2097   // stack probe is enabled based on the size of the local objects;
2098   // this estimate also includes the varargs store size.
2099   if (STI.isTargetWindows() &&
2100       WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2101     SavedRegs.set(ARM::R4);
2102     SavedRegs.set(ARM::LR);
2103   }
2104 
2105   if (AFI->isThumb1OnlyFunction()) {
2106     // Spill LR if Thumb1 function uses variable length argument lists.
2107     if (AFI->getArgRegsSaveSize() > 0)
2108       SavedRegs.set(ARM::LR);
2109 
2110     // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2111     // requires stack alignment.  We don't know for sure what the stack size
2112     // will be, but for this, an estimate is good enough. If there anything
2113     // changes it, it'll be a spill, which implies we've used all the registers
2114     // and so R4 is already used, so not marking it here will be OK.
2115     // FIXME: It will be better just to find spare register here.
2116     if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2117         MFI.estimateStackSize(MF) > 508)
2118       SavedRegs.set(ARM::R4);
2119   }
2120 
2121   // See if we can spill vector registers to aligned stack.
2122   checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2123 
2124   // Spill the BasePtr if it's used.
2125   if (RegInfo->hasBasePointer(MF))
2126     SavedRegs.set(RegInfo->getBaseRegister());
2127 
2128   // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2129   if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2130     CanEliminateFrame = false;
2131 
2132   // Don't spill FP if the frame can be eliminated. This is determined
2133   // by scanning the callee-save registers to see if any is modified.
2134   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2135   for (unsigned i = 0; CSRegs[i]; ++i) {
2136     unsigned Reg = CSRegs[i];
2137     bool Spilled = false;
2138     if (SavedRegs.test(Reg)) {
2139       Spilled = true;
2140       CanEliminateFrame = false;
2141     }
2142 
2143     if (!ARM::GPRRegClass.contains(Reg)) {
2144       if (Spilled) {
2145         if (ARM::SPRRegClass.contains(Reg))
2146           NumFPRSpills++;
2147         else if (ARM::DPRRegClass.contains(Reg))
2148           NumFPRSpills += 2;
2149         else if (ARM::QPRRegClass.contains(Reg))
2150           NumFPRSpills += 4;
2151       }
2152       continue;
2153     }
2154 
2155     if (Spilled) {
2156       NumGPRSpills++;
2157 
2158       if (!STI.splitFramePushPop(MF)) {
2159         if (Reg == ARM::LR)
2160           LRSpilled = true;
2161         CS1Spilled = true;
2162         continue;
2163       }
2164 
2165       // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2166       switch (Reg) {
2167       case ARM::LR:
2168         LRSpilled = true;
2169         LLVM_FALLTHROUGH;
2170       case ARM::R0: case ARM::R1:
2171       case ARM::R2: case ARM::R3:
2172       case ARM::R4: case ARM::R5:
2173       case ARM::R6: case ARM::R7:
2174         CS1Spilled = true;
2175         break;
2176       default:
2177         break;
2178       }
2179     } else {
2180       if (!STI.splitFramePushPop(MF)) {
2181         UnspilledCS1GPRs.push_back(Reg);
2182         continue;
2183       }
2184 
2185       switch (Reg) {
2186       case ARM::R0: case ARM::R1:
2187       case ARM::R2: case ARM::R3:
2188       case ARM::R4: case ARM::R5:
2189       case ARM::R6: case ARM::R7:
2190       case ARM::LR:
2191         UnspilledCS1GPRs.push_back(Reg);
2192         break;
2193       default:
2194         UnspilledCS2GPRs.push_back(Reg);
2195         break;
2196       }
2197     }
2198   }
2199 
2200   bool ForceLRSpill = false;
2201   if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2202     unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2203     // Force LR to be spilled if the Thumb function size is > 2048. This enables
2204     // use of BL to implement far jump.
2205     if (FnSize >= (1 << 11)) {
2206       CanEliminateFrame = false;
2207       ForceLRSpill = true;
2208     }
2209   }
2210 
2211   // If any of the stack slot references may be out of range of an immediate
2212   // offset, make sure a register (or a spill slot) is available for the
2213   // register scavenger. Note that if we're indexing off the frame pointer, the
2214   // effective stack size is 4 bytes larger since the FP points to the stack
2215   // slot of the previous FP. Also, if we have variable sized objects in the
2216   // function, stack slot references will often be negative, and some of
2217   // our instructions are positive-offset only, so conservatively consider
2218   // that case to want a spill slot (or register) as well. Similarly, if
2219   // the function adjusts the stack pointer during execution and the
2220   // adjustments aren't already part of our stack size estimate, our offset
2221   // calculations may be off, so be conservative.
2222   // FIXME: We could add logic to be more precise about negative offsets
2223   //        and which instructions will need a scratch register for them. Is it
2224   //        worth the effort and added fragility?
2225   unsigned EstimatedStackSize =
2226       MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2227 
2228   // Determine biggest (positive) SP offset in MachineFrameInfo.
2229   int MaxFixedOffset = 0;
2230   for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2231     int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2232     MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2233   }
2234 
2235   bool HasFP = hasFP(MF);
2236   if (HasFP) {
2237     if (AFI->hasStackFrame())
2238       EstimatedStackSize += 4;
2239   } else {
2240     // If FP is not used, SP will be used to access arguments, so count the
2241     // size of arguments into the estimation.
2242     EstimatedStackSize += MaxFixedOffset;
2243   }
2244   EstimatedStackSize += 16; // For possible paddings.
2245 
2246   unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2247   bool HasNonSPFrameIndex = false;
2248   if (AFI->isThumb1OnlyFunction()) {
2249     // For Thumb1, don't bother to iterate over the function. The only
2250     // instruction that requires an emergency spill slot is a store to a
2251     // frame index.
2252     //
2253     // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2254     // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2255     // a 5-bit unsigned immediate.
2256     //
2257     // We could try to check if the function actually contains a tSTRspi
2258     // that might need the spill slot, but it's not really important.
2259     // Functions with VLAs or extremely large call frames are rare, and
2260     // if a function is allocating more than 1KB of stack, an extra 4-byte
2261     // slot probably isn't relevant.
2262     if (RegInfo->hasBasePointer(MF))
2263       EstimatedRSStackSizeLimit = (1U << 5) * 4;
2264     else
2265       EstimatedRSStackSizeLimit = (1U << 8) * 4;
2266     EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2267   } else {
2268     EstimatedRSStackSizeLimit =
2269         estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2270     EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2271   }
2272   // Final estimate of whether sp or bp-relative accesses might require
2273   // scavenging.
2274   bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2275 
2276   // If the stack pointer moves and we don't have a base pointer, the
2277   // estimate logic doesn't work. The actual offsets might be larger when
2278   // we're constructing a call frame, or we might need to use negative
2279   // offsets from fp.
2280   bool HasMovingSP = MFI.hasVarSizedObjects() ||
2281     (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2282   bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2283 
2284   // If we have a frame pointer, we assume arguments will be accessed
2285   // relative to the frame pointer. Check whether fp-relative accesses to
2286   // arguments require scavenging.
2287   //
2288   // We could do slightly better on Thumb1; in some cases, an sp-relative
2289   // offset would be legal even though an fp-relative offset is not.
2290   int MaxFPOffset = getMaxFPOffset(STI, *AFI);
2291   bool HasLargeArgumentList =
2292       HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2293 
2294   bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2295                          HasLargeArgumentList || HasNonSPFrameIndex;
2296   LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2297                     << "; EstimatedStack: " << EstimatedStackSize
2298                     << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2299                     << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2300   if (BigFrameOffsets ||
2301       !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2302     AFI->setHasStackFrame(true);
2303 
2304     if (HasFP) {
2305       SavedRegs.set(FramePtr);
2306       // If the frame pointer is required by the ABI, also spill LR so that we
2307       // emit a complete frame record.
2308       if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
2309         SavedRegs.set(ARM::LR);
2310         LRSpilled = true;
2311         NumGPRSpills++;
2312         auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2313         if (LRPos != UnspilledCS1GPRs.end())
2314           UnspilledCS1GPRs.erase(LRPos);
2315       }
2316       auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2317       if (FPPos != UnspilledCS1GPRs.end())
2318         UnspilledCS1GPRs.erase(FPPos);
2319       NumGPRSpills++;
2320       if (FramePtr == ARM::R7)
2321         CS1Spilled = true;
2322     }
2323 
2324     // This is true when we inserted a spill for a callee-save GPR which is
2325     // not otherwise used by the function. This guaranteees it is possible
2326     // to scavenge a register to hold the address of a stack slot. On Thumb1,
2327     // the register must be a valid operand to tSTRi, i.e. r4-r7. For other
2328     // subtargets, this is any GPR, i.e. r4-r11 or lr.
2329     //
2330     // If we don't insert a spill, we instead allocate an emergency spill
2331     // slot, which can be used by scavenging to spill an arbitrary register.
2332     //
2333     // We currently don't try to figure out whether any specific instruction
2334     // requires scavening an additional register.
2335     bool ExtraCSSpill = false;
2336 
2337     if (AFI->isThumb1OnlyFunction()) {
2338       // For Thumb1-only targets, we need some low registers when we save and
2339       // restore the high registers (which aren't allocatable, but could be
2340       // used by inline assembly) because the push/pop instructions can not
2341       // access high registers. If necessary, we might need to push more low
2342       // registers to ensure that there is at least one free that can be used
2343       // for the saving & restoring, and preferably we should ensure that as
2344       // many as are needed are available so that fewer push/pop instructions
2345       // are required.
2346 
2347       // Low registers which are not currently pushed, but could be (r4-r7).
2348       SmallVector<unsigned, 4> AvailableRegs;
2349 
2350       // Unused argument registers (r0-r3) can be clobbered in the prologue for
2351       // free.
2352       int EntryRegDeficit = 0;
2353       for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2354         if (!MF.getRegInfo().isLiveIn(Reg)) {
2355           --EntryRegDeficit;
2356           LLVM_DEBUG(dbgs()
2357                      << printReg(Reg, TRI)
2358                      << " is unused argument register, EntryRegDeficit = "
2359                      << EntryRegDeficit << "\n");
2360         }
2361       }
2362 
2363       // Unused return registers can be clobbered in the epilogue for free.
2364       int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2365       LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
2366                         << " return regs used, ExitRegDeficit = "
2367                         << ExitRegDeficit << "\n");
2368 
2369       int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2370       LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2371 
2372       // r4-r6 can be used in the prologue if they are pushed by the first push
2373       // instruction.
2374       for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2375         if (SavedRegs.test(Reg)) {
2376           --RegDeficit;
2377           LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2378                             << " is saved low register, RegDeficit = "
2379                             << RegDeficit << "\n");
2380         } else {
2381           AvailableRegs.push_back(Reg);
2382           LLVM_DEBUG(
2383               dbgs()
2384               << printReg(Reg, TRI)
2385               << " is non-saved low register, adding to AvailableRegs\n");
2386         }
2387       }
2388 
2389       // r7 can be used if it is not being used as the frame pointer.
2390       if (!HasFP) {
2391         if (SavedRegs.test(ARM::R7)) {
2392           --RegDeficit;
2393           LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2394                             << RegDeficit << "\n");
2395         } else {
2396           AvailableRegs.push_back(ARM::R7);
2397           LLVM_DEBUG(
2398               dbgs()
2399               << "%r7 is non-saved low register, adding to AvailableRegs\n");
2400         }
2401       }
2402 
2403       // Each of r8-r11 needs to be copied to a low register, then pushed.
2404       for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2405         if (SavedRegs.test(Reg)) {
2406           ++RegDeficit;
2407           LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2408                             << " is saved high register, RegDeficit = "
2409                             << RegDeficit << "\n");
2410         }
2411       }
2412 
2413       // LR can only be used by PUSH, not POP, and can't be used at all if the
2414       // llvm.returnaddress intrinsic is used. This is only worth doing if we
2415       // are more limited at function entry than exit.
2416       if ((EntryRegDeficit > ExitRegDeficit) &&
2417           !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2418             MF.getFrameInfo().isReturnAddressTaken())) {
2419         if (SavedRegs.test(ARM::LR)) {
2420           --RegDeficit;
2421           LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2422                             << RegDeficit << "\n");
2423         } else {
2424           AvailableRegs.push_back(ARM::LR);
2425           LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2426         }
2427       }
2428 
2429       // If there are more high registers that need pushing than low registers
2430       // available, push some more low registers so that we can use fewer push
2431       // instructions. This might not reduce RegDeficit all the way to zero,
2432       // because we can only guarantee that r4-r6 are available, but r8-r11 may
2433       // need saving.
2434       LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2435       for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2436         unsigned Reg = AvailableRegs.pop_back_val();
2437         LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2438                           << " to make up reg deficit\n");
2439         SavedRegs.set(Reg);
2440         NumGPRSpills++;
2441         CS1Spilled = true;
2442         assert(!MRI.isReserved(Reg) && "Should not be reserved");
2443         if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2444           ExtraCSSpill = true;
2445         UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2446         if (Reg == ARM::LR)
2447           LRSpilled = true;
2448       }
2449       LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2450                         << "\n");
2451     }
2452 
2453     // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2454     // restore LR in that case.
2455     bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2456 
2457     // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2458     // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2459     if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2460       SavedRegs.set(ARM::LR);
2461       NumGPRSpills++;
2462       SmallVectorImpl<unsigned>::iterator LRPos;
2463       LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2464       if (LRPos != UnspilledCS1GPRs.end())
2465         UnspilledCS1GPRs.erase(LRPos);
2466 
2467       ForceLRSpill = false;
2468       if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2469           !AFI->isThumb1OnlyFunction())
2470         ExtraCSSpill = true;
2471     }
2472 
2473     // If stack and double are 8-byte aligned and we are spilling an odd number
2474     // of GPRs, spill one extra callee save GPR so we won't have to pad between
2475     // the integer and double callee save areas.
2476     LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2477     const Align TargetAlign = getStackAlign();
2478     if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2479       if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2480         for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
2481           unsigned Reg = UnspilledCS1GPRs[i];
2482           // Don't spill high register if the function is thumb.  In the case of
2483           // Windows on ARM, accept R11 (frame pointer)
2484           if (!AFI->isThumbFunction() ||
2485               (STI.isTargetWindows() && Reg == ARM::R11) ||
2486               isARMLowRegister(Reg) ||
2487               (Reg == ARM::LR && !ExpensiveLRRestore)) {
2488             SavedRegs.set(Reg);
2489             LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2490                               << " to make up alignment\n");
2491             if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2492                 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2493               ExtraCSSpill = true;
2494             break;
2495           }
2496         }
2497       } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2498         unsigned Reg = UnspilledCS2GPRs.front();
2499         SavedRegs.set(Reg);
2500         LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2501                           << " to make up alignment\n");
2502         if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2503           ExtraCSSpill = true;
2504       }
2505     }
2506 
2507     // Estimate if we might need to scavenge a register at some point in order
2508     // to materialize a stack offset. If so, either spill one additional
2509     // callee-saved register or reserve a special spill slot to facilitate
2510     // register scavenging. Thumb1 needs a spill slot for stack pointer
2511     // adjustments also, even when the frame itself is small.
2512     if (BigFrameOffsets && !ExtraCSSpill) {
2513       // If any non-reserved CS register isn't spilled, just spill one or two
2514       // extra. That should take care of it!
2515       unsigned NumExtras = TargetAlign.value() / 4;
2516       SmallVector<unsigned, 2> Extras;
2517       while (NumExtras && !UnspilledCS1GPRs.empty()) {
2518         unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2519         if (!MRI.isReserved(Reg) &&
2520             (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2521           Extras.push_back(Reg);
2522           NumExtras--;
2523         }
2524       }
2525       // For non-Thumb1 functions, also check for hi-reg CS registers
2526       if (!AFI->isThumb1OnlyFunction()) {
2527         while (NumExtras && !UnspilledCS2GPRs.empty()) {
2528           unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2529           if (!MRI.isReserved(Reg)) {
2530             Extras.push_back(Reg);
2531             NumExtras--;
2532           }
2533         }
2534       }
2535       if (NumExtras == 0) {
2536         for (unsigned Reg : Extras) {
2537           SavedRegs.set(Reg);
2538           if (!MRI.isPhysRegUsed(Reg))
2539             ExtraCSSpill = true;
2540         }
2541       }
2542       if (!ExtraCSSpill && RS) {
2543         // Reserve a slot closest to SP or frame pointer.
2544         LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2545         const TargetRegisterClass &RC = ARM::GPRRegClass;
2546         unsigned Size = TRI->getSpillSize(RC);
2547         Align Alignment = TRI->getSpillAlign(RC);
2548         RS->addScavengingFrameIndex(
2549             MFI.CreateStackObject(Size, Alignment, false));
2550       }
2551     }
2552   }
2553 
2554   if (ForceLRSpill)
2555     SavedRegs.set(ARM::LR);
2556   AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2557 }
2558 
2559 void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
2560                                       BitVector &SavedRegs) const {
2561   TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
2562 
2563   // If we have the "returned" parameter attribute which guarantees that we
2564   // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2565   // record that fact for IPRA.
2566   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2567   if (AFI->getPreservesR0())
2568     SavedRegs.set(ARM::R0);
2569 }
2570 
2571 bool ARMFrameLowering::assignCalleeSavedSpillSlots(
2572     MachineFunction &MF, const TargetRegisterInfo *TRI,
2573     std::vector<CalleeSavedInfo> &CSI) const {
2574   // For CMSE entry functions, handle floating-point context as if it was a
2575   // callee-saved register.
2576   if (STI.hasV8_1MMainlineOps() &&
2577       MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
2578     CSI.emplace_back(ARM::FPCXTNS);
2579     CSI.back().setRestored(false);
2580   }
2581 
2582   // For functions, which sign their return address, upon function entry, the
2583   // return address PAC is computed in R12. Treat R12 as a callee-saved register
2584   // in this case.
2585   const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2586   if (AFI.shouldSignReturnAddress()) {
2587     // The order of register must match the order we push them, because the
2588     // PEI assigns frame indices in that order. When compiling for return
2589     // address sign and authenication, we use split push, therefore the orders
2590     // we want are:
2591     // LR, R7, R6, R5, R4, <R12>, R11, R10,  R9,  R8, D15-D8
2592     CSI.insert(find_if(CSI,
2593                        [=](const auto &CS) {
2594                          Register Reg = CS.getReg();
2595                          return Reg == ARM::R10 || Reg == ARM::R11 ||
2596                                 Reg == ARM::R8 || Reg == ARM::R9 ||
2597                                 ARM::DPRRegClass.contains(Reg);
2598                        }),
2599                CalleeSavedInfo(ARM::R12));
2600   }
2601 
2602   return false;
2603 }
2604 
2605 const TargetFrameLowering::SpillSlot *
2606 ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
2607   static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
2608   NumEntries = array_lengthof(FixedSpillOffsets);
2609   return FixedSpillOffsets;
2610 }
2611 
2612 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2613     MachineFunction &MF, MachineBasicBlock &MBB,
2614     MachineBasicBlock::iterator I) const {
2615   const ARMBaseInstrInfo &TII =
2616       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2617   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2618   bool isARM = !AFI->isThumbFunction();
2619   DebugLoc dl = I->getDebugLoc();
2620   unsigned Opc = I->getOpcode();
2621   bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2622   unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2623 
2624   assert(!AFI->isThumb1OnlyFunction() &&
2625          "This eliminateCallFramePseudoInstr does not support Thumb1!");
2626 
2627   int PIdx = I->findFirstPredOperandIdx();
2628   ARMCC::CondCodes Pred = (PIdx == -1)
2629                               ? ARMCC::AL
2630                               : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
2631   unsigned PredReg = TII.getFramePred(*I);
2632 
2633   if (!hasReservedCallFrame(MF)) {
2634     // Bail early if the callee is expected to do the adjustment.
2635     if (IsDestroy && CalleePopAmount != -1U)
2636       return MBB.erase(I);
2637 
2638     // If we have alloca, convert as follows:
2639     // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2640     // ADJCALLSTACKUP   -> add, sp, sp, amount
2641     unsigned Amount = TII.getFrameSize(*I);
2642     if (Amount != 0) {
2643       // We need to keep the stack aligned properly.  To do this, we round the
2644       // amount of space needed for the outgoing arguments up to the next
2645       // alignment boundary.
2646       Amount = alignSPAdjust(Amount);
2647 
2648       if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2649         emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2650                      Pred, PredReg);
2651       } else {
2652         assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2653         emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2654                      Pred, PredReg);
2655       }
2656     }
2657   } else if (CalleePopAmount != -1U) {
2658     // If the calling convention demands that the callee pops arguments from the
2659     // stack, we want to add it back if we have a reserved call frame.
2660     emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
2661                  MachineInstr::NoFlags, Pred, PredReg);
2662   }
2663   return MBB.erase(I);
2664 }
2665 
2666 /// Get the minimum constant for ARM that is greater than or equal to the
2667 /// argument. In ARM, constants can have any value that can be produced by
2668 /// rotating an 8-bit value to the right by an even number of bits within a
2669 /// 32-bit word.
2670 static uint32_t alignToARMConstant(uint32_t Value) {
2671   unsigned Shifted = 0;
2672 
2673   if (Value == 0)
2674       return 0;
2675 
2676   while (!(Value & 0xC0000000)) {
2677       Value = Value << 2;
2678       Shifted += 2;
2679   }
2680 
2681   bool Carry = (Value & 0x00FFFFFF);
2682   Value = ((Value & 0xFF000000) >> 24) + Carry;
2683 
2684   if (Value & 0x0000100)
2685       Value = Value & 0x000001FC;
2686 
2687   if (Shifted > 24)
2688       Value = Value >> (Shifted - 24);
2689   else
2690       Value = Value << (24 - Shifted);
2691 
2692   return Value;
2693 }
2694 
2695 // The stack limit in the TCB is set to this many bytes above the actual
2696 // stack limit.
2697 static const uint64_t kSplitStackAvailable = 256;
2698 
2699 // Adjust the function prologue to enable split stacks. This currently only
2700 // supports android and linux.
2701 //
2702 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2703 // must be well defined in order to allow for consistent implementations of the
2704 // __morestack helper function. The ABI is also not a normal ABI in that it
2705 // doesn't follow the normal calling conventions because this allows the
2706 // prologue of each function to be optimized further.
2707 //
2708 // Currently, the ABI looks like (when calling __morestack)
2709 //
2710 //  * r4 holds the minimum stack size requested for this function call
2711 //  * r5 holds the stack size of the arguments to the function
2712 //  * the beginning of the function is 3 instructions after the call to
2713 //    __morestack
2714 //
2715 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2716 // place the arguments on to the new stack, and the 3-instruction knowledge to
2717 // jump directly to the body of the function when working on the new stack.
2718 //
2719 // An old (and possibly no longer compatible) implementation of __morestack for
2720 // ARM can be found at [1].
2721 //
2722 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2723 void ARMFrameLowering::adjustForSegmentedStacks(
2724     MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2725   unsigned Opcode;
2726   unsigned CFIIndex;
2727   const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2728   bool Thumb = ST->isThumb();
2729   bool Thumb2 = ST->isThumb2();
2730 
2731   // Sadly, this currently doesn't support varargs, platforms other than
2732   // android/linux. Note that thumb1/thumb2 are support for android/linux.
2733   if (MF.getFunction().isVarArg())
2734     report_fatal_error("Segmented stacks do not support vararg functions.");
2735   if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2736     report_fatal_error("Segmented stacks not supported on this platform.");
2737 
2738   MachineFrameInfo &MFI = MF.getFrameInfo();
2739   MachineModuleInfo &MMI = MF.getMMI();
2740   MCContext &Context = MMI.getContext();
2741   const MCRegisterInfo *MRI = Context.getRegisterInfo();
2742   const ARMBaseInstrInfo &TII =
2743       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2744   ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
2745   DebugLoc DL;
2746 
2747   if (!MFI.needsSplitStackProlog())
2748     return;
2749 
2750   uint64_t StackSize = MFI.getStackSize();
2751 
2752   // Use R4 and R5 as scratch registers.
2753   // We save R4 and R5 before use and restore them before leaving the function.
2754   unsigned ScratchReg0 = ARM::R4;
2755   unsigned ScratchReg1 = ARM::R5;
2756   uint64_t AlignedStackSize;
2757 
2758   MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2759   MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2760   MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
2761   MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
2762   MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
2763 
2764   // Grab everything that reaches PrologueMBB to update there liveness as well.
2765   SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2766   SmallVector<MachineBasicBlock *, 2> WalkList;
2767   WalkList.push_back(&PrologueMBB);
2768 
2769   do {
2770     MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2771     for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2772       if (BeforePrologueRegion.insert(PredBB).second)
2773         WalkList.push_back(PredBB);
2774     }
2775   } while (!WalkList.empty());
2776 
2777   // The order in that list is important.
2778   // The blocks will all be inserted before PrologueMBB using that order.
2779   // Therefore the block that should appear first in the CFG should appear
2780   // first in the list.
2781   MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2782                                       PostStackMBB};
2783 
2784   for (MachineBasicBlock *B : AddedBlocks)
2785     BeforePrologueRegion.insert(B);
2786 
2787   for (const auto &LI : PrologueMBB.liveins()) {
2788     for (MachineBasicBlock *PredBB : BeforePrologueRegion)
2789       PredBB->addLiveIn(LI);
2790   }
2791 
2792   // Remove the newly added blocks from the list, since we know
2793   // we do not have to do the following updates for them.
2794   for (MachineBasicBlock *B : AddedBlocks) {
2795     BeforePrologueRegion.erase(B);
2796     MF.insert(PrologueMBB.getIterator(), B);
2797   }
2798 
2799   for (MachineBasicBlock *MBB : BeforePrologueRegion) {
2800     // Make sure the LiveIns are still sorted and unique.
2801     MBB->sortUniqueLiveIns();
2802     // Replace the edges to PrologueMBB by edges to the sequences
2803     // we are about to add, but only update for immediate predecessors.
2804     if (MBB->isSuccessor(&PrologueMBB))
2805       MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
2806   }
2807 
2808   // The required stack size that is aligned to ARM constant criterion.
2809   AlignedStackSize = alignToARMConstant(StackSize);
2810 
2811   // When the frame size is less than 256 we just compare the stack
2812   // boundary directly to the value of the stack pointer, per gcc.
2813   bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
2814 
2815   // We will use two of the callee save registers as scratch registers so we
2816   // need to save those registers onto the stack.
2817   // We will use SR0 to hold stack limit and SR1 to hold the stack size
2818   // requested and arguments for __morestack().
2819   // SR0: Scratch Register #0
2820   // SR1: Scratch Register #1
2821   // push {SR0, SR1}
2822   if (Thumb) {
2823     BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
2824         .add(predOps(ARMCC::AL))
2825         .addReg(ScratchReg0)
2826         .addReg(ScratchReg1);
2827   } else {
2828     BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
2829         .addReg(ARM::SP, RegState::Define)
2830         .addReg(ARM::SP)
2831         .add(predOps(ARMCC::AL))
2832         .addReg(ScratchReg0)
2833         .addReg(ScratchReg1);
2834   }
2835 
2836   // Emit the relevant DWARF information about the change in stack pointer as
2837   // well as where to find both r4 and r5 (the callee-save registers)
2838   if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
2839     CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
2840     BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2841         .addCFIIndex(CFIIndex);
2842     CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
2843         nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
2844     BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2845         .addCFIIndex(CFIIndex);
2846     CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
2847         nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
2848     BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2849         .addCFIIndex(CFIIndex);
2850   }
2851 
2852   // mov SR1, sp
2853   if (Thumb) {
2854     BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
2855         .addReg(ARM::SP)
2856         .add(predOps(ARMCC::AL));
2857   } else if (CompareStackPointer) {
2858     BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
2859         .addReg(ARM::SP)
2860         .add(predOps(ARMCC::AL))
2861         .add(condCodeOp());
2862   }
2863 
2864   // sub SR1, sp, #StackSize
2865   if (!CompareStackPointer && Thumb) {
2866     if (AlignedStackSize < 256) {
2867       BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
2868           .add(condCodeOp())
2869           .addReg(ScratchReg1)
2870           .addImm(AlignedStackSize)
2871           .add(predOps(ARMCC::AL));
2872     } else {
2873       if (Thumb2) {
2874         BuildMI(McrMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
2875             .addImm(AlignedStackSize);
2876       } else {
2877         auto MBBI = McrMBB->end();
2878         auto RegInfo = STI.getRegisterInfo();
2879         RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
2880                                    AlignedStackSize);
2881       }
2882       BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
2883           .add(condCodeOp())
2884           .addReg(ScratchReg1)
2885           .addReg(ScratchReg0)
2886           .add(predOps(ARMCC::AL));
2887     }
2888   } else if (!CompareStackPointer) {
2889     if (AlignedStackSize < 256) {
2890       BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
2891           .addReg(ARM::SP)
2892           .addImm(AlignedStackSize)
2893           .add(predOps(ARMCC::AL))
2894           .add(condCodeOp());
2895     } else {
2896       auto MBBI = McrMBB->end();
2897       auto RegInfo = STI.getRegisterInfo();
2898       RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
2899                                  AlignedStackSize);
2900       BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
2901           .addReg(ARM::SP)
2902           .addReg(ScratchReg0)
2903           .add(predOps(ARMCC::AL))
2904           .add(condCodeOp());
2905     }
2906   }
2907 
2908   if (Thumb && ST->isThumb1Only()) {
2909     unsigned PCLabelId = ARMFI->createPICLabelUId();
2910     ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
2911         MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
2912     MachineConstantPool *MCP = MF.getConstantPool();
2913     unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
2914 
2915     // ldr SR0, [pc, offset(STACK_LIMIT)]
2916     BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
2917         .addConstantPoolIndex(CPI)
2918         .add(predOps(ARMCC::AL));
2919 
2920     // ldr SR0, [SR0]
2921     BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
2922         .addReg(ScratchReg0)
2923         .addImm(0)
2924         .add(predOps(ARMCC::AL));
2925   } else {
2926     // Get TLS base address from the coprocessor
2927     // mrc p15, #0, SR0, c13, c0, #3
2928     BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
2929             ScratchReg0)
2930         .addImm(15)
2931         .addImm(0)
2932         .addImm(13)
2933         .addImm(0)
2934         .addImm(3)
2935         .add(predOps(ARMCC::AL));
2936 
2937     // Use the last tls slot on android and a private field of the TCP on linux.
2938     assert(ST->isTargetAndroid() || ST->isTargetLinux());
2939     unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
2940 
2941     // Get the stack limit from the right offset
2942     // ldr SR0, [sr0, #4 * TlsOffset]
2943     BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
2944             ScratchReg0)
2945         .addReg(ScratchReg0)
2946         .addImm(4 * TlsOffset)
2947         .add(predOps(ARMCC::AL));
2948   }
2949 
2950   // Compare stack limit with stack size requested.
2951   // cmp SR0, SR1
2952   Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
2953   BuildMI(GetMBB, DL, TII.get(Opcode))
2954       .addReg(ScratchReg0)
2955       .addReg(ScratchReg1)
2956       .add(predOps(ARMCC::AL));
2957 
2958   // This jump is taken if StackLimit < SP - stack required.
2959   Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
2960   BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
2961        .addImm(ARMCC::LO)
2962        .addReg(ARM::CPSR);
2963 
2964 
2965   // Calling __morestack(StackSize, Size of stack arguments).
2966   // __morestack knows that the stack size requested is in SR0(r4)
2967   // and amount size of stack arguments is in SR1(r5).
2968 
2969   // Pass first argument for the __morestack by Scratch Register #0.
2970   //   The amount size of stack required
2971   if (Thumb) {
2972     if (AlignedStackSize < 256) {
2973       BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
2974           .add(condCodeOp())
2975           .addImm(AlignedStackSize)
2976           .add(predOps(ARMCC::AL));
2977     } else {
2978       if (Thumb2) {
2979         BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
2980             .addImm(AlignedStackSize);
2981       } else {
2982         auto MBBI = AllocMBB->end();
2983         auto RegInfo = STI.getRegisterInfo();
2984         RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
2985                                    AlignedStackSize);
2986       }
2987     }
2988   } else {
2989     if (AlignedStackSize < 256) {
2990       BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
2991           .addImm(AlignedStackSize)
2992           .add(predOps(ARMCC::AL))
2993           .add(condCodeOp());
2994     } else {
2995       auto MBBI = AllocMBB->end();
2996       auto RegInfo = STI.getRegisterInfo();
2997       RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
2998                                  AlignedStackSize);
2999     }
3000   }
3001 
3002   // Pass second argument for the __morestack by Scratch Register #1.
3003   //   The amount size of stack consumed to save function arguments.
3004   if (Thumb) {
3005     if (ARMFI->getArgumentStackSize() < 256) {
3006       BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3007           .add(condCodeOp())
3008           .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
3009           .add(predOps(ARMCC::AL));
3010     } else {
3011       if (Thumb2) {
3012         BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg1)
3013             .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()));
3014       } else {
3015         auto MBBI = AllocMBB->end();
3016         auto RegInfo = STI.getRegisterInfo();
3017         RegInfo->emitLoadConstPool(
3018             *AllocMBB, MBBI, DL, ScratchReg1, 0,
3019             alignToARMConstant(ARMFI->getArgumentStackSize()));
3020       }
3021     }
3022   } else {
3023     if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3024       BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3025           .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
3026           .add(predOps(ARMCC::AL))
3027           .add(condCodeOp());
3028     } else {
3029       auto MBBI = AllocMBB->end();
3030       auto RegInfo = STI.getRegisterInfo();
3031       RegInfo->emitLoadConstPool(
3032           *AllocMBB, MBBI, DL, ScratchReg1, 0,
3033           alignToARMConstant(ARMFI->getArgumentStackSize()));
3034     }
3035   }
3036 
3037   // push {lr} - Save return address of this function.
3038   if (Thumb) {
3039     BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3040         .add(predOps(ARMCC::AL))
3041         .addReg(ARM::LR);
3042   } else {
3043     BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3044         .addReg(ARM::SP, RegState::Define)
3045         .addReg(ARM::SP)
3046         .add(predOps(ARMCC::AL))
3047         .addReg(ARM::LR);
3048   }
3049 
3050   // Emit the DWARF info about the change in stack as well as where to find the
3051   // previous link register
3052   if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3053     CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
3054     BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3055         .addCFIIndex(CFIIndex);
3056     CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
3057         nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
3058     BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3059         .addCFIIndex(CFIIndex);
3060   }
3061 
3062   // Call __morestack().
3063   if (Thumb) {
3064     BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3065         .add(predOps(ARMCC::AL))
3066         .addExternalSymbol("__morestack");
3067   } else {
3068     BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3069         .addExternalSymbol("__morestack");
3070   }
3071 
3072   // pop {lr} - Restore return address of this original function.
3073   if (Thumb) {
3074     if (ST->isThumb1Only()) {
3075       BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3076           .add(predOps(ARMCC::AL))
3077           .addReg(ScratchReg0);
3078       BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3079           .addReg(ScratchReg0)
3080           .add(predOps(ARMCC::AL));
3081     } else {
3082       BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3083           .addReg(ARM::LR, RegState::Define)
3084           .addReg(ARM::SP, RegState::Define)
3085           .addReg(ARM::SP)
3086           .addImm(4)
3087           .add(predOps(ARMCC::AL));
3088     }
3089   } else {
3090     BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3091         .addReg(ARM::SP, RegState::Define)
3092         .addReg(ARM::SP)
3093         .add(predOps(ARMCC::AL))
3094         .addReg(ARM::LR);
3095   }
3096 
3097   // Restore SR0 and SR1 in case of __morestack() was called.
3098   // __morestack() will skip PostStackMBB block so we need to restore
3099   // scratch registers from here.
3100   // pop {SR0, SR1}
3101   if (Thumb) {
3102     BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3103         .add(predOps(ARMCC::AL))
3104         .addReg(ScratchReg0)
3105         .addReg(ScratchReg1);
3106   } else {
3107     BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3108         .addReg(ARM::SP, RegState::Define)
3109         .addReg(ARM::SP)
3110         .add(predOps(ARMCC::AL))
3111         .addReg(ScratchReg0)
3112         .addReg(ScratchReg1);
3113   }
3114 
3115   // Update the CFA offset now that we've popped
3116   if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3117     CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3118     BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3119         .addCFIIndex(CFIIndex);
3120   }
3121 
3122   // Return from this function.
3123   BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3124 
3125   // Restore SR0 and SR1 in case of __morestack() was not called.
3126   // pop {SR0, SR1}
3127   if (Thumb) {
3128     BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3129         .add(predOps(ARMCC::AL))
3130         .addReg(ScratchReg0)
3131         .addReg(ScratchReg1);
3132   } else {
3133     BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3134         .addReg(ARM::SP, RegState::Define)
3135         .addReg(ARM::SP)
3136         .add(predOps(ARMCC::AL))
3137         .addReg(ScratchReg0)
3138         .addReg(ScratchReg1);
3139   }
3140 
3141   // Update the CFA offset now that we've popped
3142   if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3143     CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3144     BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3145         .addCFIIndex(CFIIndex);
3146 
3147     // Tell debuggers that r4 and r5 are now the same as they were in the
3148     // previous function, that they're the "Same Value".
3149     CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
3150         nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
3151     BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3152         .addCFIIndex(CFIIndex);
3153     CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
3154         nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
3155     BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3156         .addCFIIndex(CFIIndex);
3157   }
3158 
3159   // Organizing MBB lists
3160   PostStackMBB->addSuccessor(&PrologueMBB);
3161 
3162   AllocMBB->addSuccessor(PostStackMBB);
3163 
3164   GetMBB->addSuccessor(PostStackMBB);
3165   GetMBB->addSuccessor(AllocMBB);
3166 
3167   McrMBB->addSuccessor(GetMBB);
3168 
3169   PrevStackMBB->addSuccessor(McrMBB);
3170 
3171 #ifdef EXPENSIVE_CHECKS
3172   MF.verify();
3173 #endif
3174 }
3175