1 //===-- Thumb1FrameLowering.cpp - Thumb1 Frame Information ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the Thumb1 implementation of TargetFrameLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMMachineFunctionInfo.h"
17 #include "ARMSubtarget.h"
18 #include "MCTargetDesc/ARMBaseInfo.h"
19 #include "Thumb1FrameLowering.h"
20 #include "Thumb1InstrInfo.h"
21 #include "ThumbRegisterInfo.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/CodeGen/LivePhysRegs.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/CodeGen/MachineModuleInfo.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/IR/DebugLoc.h"
36 #include "llvm/MC/MCDwarf.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Target/TargetInstrInfo.h"
40 #include "llvm/Target/TargetSubtargetInfo.h"
41 #include <cassert>
42 #include <iterator>
43 #include <vector>
44 
45 using namespace llvm;
46 
47 Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti)
48     : ARMFrameLowering(sti) {}
49 
50 bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
51   const MachineFrameInfo &MFI = MF.getFrameInfo();
52   unsigned CFSize = MFI.getMaxCallFrameSize();
53   // It's not always a good idea to include the call frame as part of the
54   // stack frame. ARM (especially Thumb) has small immediate offset to
55   // address the stack frame. So a large call frame can cause poor codegen
56   // and may even makes it impossible to scavenge a register.
57   if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
58     return false;
59 
60   return !MFI.hasVarSizedObjects();
61 }
62 
63 static void emitSPUpdate(MachineBasicBlock &MBB,
64                          MachineBasicBlock::iterator &MBBI,
65                          const TargetInstrInfo &TII, const DebugLoc &dl,
66                          const ThumbRegisterInfo &MRI, int NumBytes,
67                          unsigned MIFlags = MachineInstr::NoFlags) {
68   emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
69                             MRI, MIFlags);
70 }
71 
72 
73 MachineBasicBlock::iterator Thumb1FrameLowering::
74 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
75                               MachineBasicBlock::iterator I) const {
76   const Thumb1InstrInfo &TII =
77       *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
78   const ThumbRegisterInfo *RegInfo =
79       static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
80   if (!hasReservedCallFrame(MF)) {
81     // If we have alloca, convert as follows:
82     // ADJCALLSTACKDOWN -> sub, sp, sp, amount
83     // ADJCALLSTACKUP   -> add, sp, sp, amount
84     MachineInstr &Old = *I;
85     DebugLoc dl = Old.getDebugLoc();
86     unsigned Amount = Old.getOperand(0).getImm();
87     if (Amount != 0) {
88       // We need to keep the stack aligned properly.  To do this, we round the
89       // amount of space needed for the outgoing arguments up to the next
90       // alignment boundary.
91       unsigned Align = getStackAlignment();
92       Amount = (Amount+Align-1)/Align*Align;
93 
94       // Replace the pseudo instruction with a new instruction...
95       unsigned Opc = Old.getOpcode();
96       if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
97         emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
98       } else {
99         assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
100         emitSPUpdate(MBB, I, TII, dl, *RegInfo, Amount);
101       }
102     }
103   }
104   return MBB.erase(I);
105 }
106 
107 void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
108                                        MachineBasicBlock &MBB) const {
109   MachineBasicBlock::iterator MBBI = MBB.begin();
110   MachineFrameInfo &MFI = MF.getFrameInfo();
111   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
112   MachineModuleInfo &MMI = MF.getMMI();
113   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
114   const ThumbRegisterInfo *RegInfo =
115       static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
116   const Thumb1InstrInfo &TII =
117       *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
118 
119   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
120   unsigned NumBytes = MFI.getStackSize();
121   assert(NumBytes >= ArgRegsSaveSize &&
122          "ArgRegsSaveSize is included in NumBytes");
123   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
124 
125   // Debug location must be unknown since the first debug location is used
126   // to determine the end of the prologue.
127   DebugLoc dl;
128 
129   unsigned FramePtr = RegInfo->getFrameRegister(MF);
130   unsigned BasePtr = RegInfo->getBaseRegister();
131   int CFAOffset = 0;
132 
133   // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
134   NumBytes = (NumBytes + 3) & ~3;
135   MFI.setStackSize(NumBytes);
136 
137   // Determine the sizes of each callee-save spill areas and record which frame
138   // belongs to which callee-save spill areas.
139   unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
140   int FramePtrSpillFI = 0;
141 
142   if (ArgRegsSaveSize) {
143     emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
144                  MachineInstr::FrameSetup);
145     CFAOffset -= ArgRegsSaveSize;
146     unsigned CFIIndex = MF.addFrameInst(
147         MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
148     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
149         .addCFIIndex(CFIIndex)
150         .setMIFlags(MachineInstr::FrameSetup);
151   }
152 
153   if (!AFI->hasStackFrame()) {
154     if (NumBytes - ArgRegsSaveSize != 0) {
155       emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize),
156                    MachineInstr::FrameSetup);
157       CFAOffset -= NumBytes - ArgRegsSaveSize;
158       unsigned CFIIndex = MF.addFrameInst(
159           MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
160       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
161           .addCFIIndex(CFIIndex)
162           .setMIFlags(MachineInstr::FrameSetup);
163     }
164     return;
165   }
166 
167   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
168     unsigned Reg = CSI[i].getReg();
169     int FI = CSI[i].getFrameIdx();
170     switch (Reg) {
171     case ARM::R8:
172     case ARM::R9:
173     case ARM::R10:
174     case ARM::R11:
175       if (STI.splitFramePushPop(MF)) {
176         GPRCS2Size += 4;
177         break;
178       }
179       LLVM_FALLTHROUGH;
180     case ARM::R4:
181     case ARM::R5:
182     case ARM::R6:
183     case ARM::R7:
184     case ARM::LR:
185       if (Reg == FramePtr)
186         FramePtrSpillFI = FI;
187       GPRCS1Size += 4;
188       break;
189     default:
190       DPRCSSize += 8;
191     }
192   }
193 
194   if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
195     ++MBBI;
196   }
197 
198   // Determine starting offsets of spill areas.
199   unsigned DPRCSOffset  = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize);
200   unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
201   unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
202   bool HasFP = hasFP(MF);
203   if (HasFP)
204     AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
205                                 NumBytes);
206   AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
207   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
208   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
209   NumBytes = DPRCSOffset;
210 
211   int FramePtrOffsetInBlock = 0;
212   unsigned adjustedGPRCS1Size = GPRCS1Size;
213   if (GPRCS1Size > 0 && GPRCS2Size == 0 &&
214       tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
215     FramePtrOffsetInBlock = NumBytes;
216     adjustedGPRCS1Size += NumBytes;
217     NumBytes = 0;
218   }
219 
220   if (adjustedGPRCS1Size) {
221     CFAOffset -= adjustedGPRCS1Size;
222     unsigned CFIIndex = MF.addFrameInst(
223         MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
224     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
225         .addCFIIndex(CFIIndex)
226         .setMIFlags(MachineInstr::FrameSetup);
227   }
228   for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
229          E = CSI.end(); I != E; ++I) {
230     unsigned Reg = I->getReg();
231     int FI = I->getFrameIdx();
232     switch (Reg) {
233     case ARM::R8:
234     case ARM::R9:
235     case ARM::R10:
236     case ARM::R11:
237     case ARM::R12:
238       if (STI.splitFramePushPop(MF))
239         break;
240       // fallthough
241     case ARM::R0:
242     case ARM::R1:
243     case ARM::R2:
244     case ARM::R3:
245     case ARM::R4:
246     case ARM::R5:
247     case ARM::R6:
248     case ARM::R7:
249     case ARM::LR:
250       unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
251           nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
252       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
253           .addCFIIndex(CFIIndex)
254           .setMIFlags(MachineInstr::FrameSetup);
255       break;
256     }
257   }
258 
259   // Adjust FP so it point to the stack slot that contains the previous FP.
260   if (HasFP) {
261     FramePtrOffsetInBlock +=
262         MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
263     BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
264         .addReg(ARM::SP)
265         .addImm(FramePtrOffsetInBlock / 4)
266         .setMIFlags(MachineInstr::FrameSetup)
267         .add(predOps(ARMCC::AL));
268     if(FramePtrOffsetInBlock) {
269       CFAOffset += FramePtrOffsetInBlock;
270       unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
271           nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
272       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
273           .addCFIIndex(CFIIndex)
274           .setMIFlags(MachineInstr::FrameSetup);
275     } else {
276       unsigned CFIIndex =
277           MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
278               nullptr, MRI->getDwarfRegNum(FramePtr, true)));
279       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
280           .addCFIIndex(CFIIndex)
281           .setMIFlags(MachineInstr::FrameSetup);
282     }
283     if (NumBytes > 508)
284       // If offset is > 508 then sp cannot be adjusted in a single instruction,
285       // try restoring from fp instead.
286       AFI->setShouldRestoreSPFromFP(true);
287   }
288 
289   // Skip past the spilling of r8-r11, which could consist of multiple tPUSH
290   // and tMOVr instructions. We don't need to add any call frame information
291   // in-between these instructions, because they do not modify the high
292   // registers.
293   while (true) {
294     MachineBasicBlock::iterator OldMBBI = MBBI;
295     // Skip a run of tMOVr instructions
296     while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr)
297       MBBI++;
298     if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
299       MBBI++;
300     } else {
301       // We have reached an instruction which is not a push, so the previous
302       // run of tMOVr instructions (which may have been empty) was not part of
303       // the prologue. Reset MBBI back to the last PUSH of the prologue.
304       MBBI = OldMBBI;
305       break;
306     }
307   }
308 
309   // Emit call frame information for the callee-saved high registers.
310   for (auto &I : CSI) {
311     unsigned Reg = I.getReg();
312     int FI = I.getFrameIdx();
313     switch (Reg) {
314     case ARM::R8:
315     case ARM::R9:
316     case ARM::R10:
317     case ARM::R11:
318     case ARM::R12: {
319       unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
320           nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
321       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
322           .addCFIIndex(CFIIndex)
323           .setMIFlags(MachineInstr::FrameSetup);
324       break;
325     }
326     default:
327       break;
328     }
329   }
330 
331   if (NumBytes) {
332     // Insert it after all the callee-save spills.
333     emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
334                  MachineInstr::FrameSetup);
335     if (!HasFP) {
336       CFAOffset -= NumBytes;
337       unsigned CFIIndex = MF.addFrameInst(
338           MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
339       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
340           .addCFIIndex(CFIIndex)
341           .setMIFlags(MachineInstr::FrameSetup);
342     }
343   }
344 
345   if (STI.isTargetELF() && HasFP)
346     MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
347                             AFI->getFramePtrSpillOffset());
348 
349   AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
350   AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
351   AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
352 
353   // Thumb1 does not currently support dynamic stack realignment.  Report a
354   // fatal error rather then silently generate bad code.
355   if (RegInfo->needsStackRealignment(MF))
356       report_fatal_error("Dynamic stack realignment not supported for thumb1.");
357 
358   // If we need a base pointer, set it up here. It's whatever the value
359   // of the stack pointer is at this point. Any variable size objects
360   // will be allocated after this, so we can still use the base pointer
361   // to reference locals.
362   if (RegInfo->hasBasePointer(MF))
363     BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
364         .addReg(ARM::SP)
365         .add(predOps(ARMCC::AL));
366 
367   // If the frame has variable sized objects then the epilogue must restore
368   // the sp from fp. We can assume there's an FP here since hasFP already
369   // checks for hasVarSizedObjects.
370   if (MFI.hasVarSizedObjects())
371     AFI->setShouldRestoreSPFromFP(true);
372 
373   // In some cases, virtual registers have been introduced, e.g. by uses of
374   // emitThumbRegPlusImmInReg.
375   MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs);
376 }
377 
378 static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) {
379   if (MI.getOpcode() == ARM::tLDRspi && MI.getOperand(1).isFI() &&
380       isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs))
381     return true;
382   else if (MI.getOpcode() == ARM::tPOP) {
383     return true;
384   } else if (MI.getOpcode() == ARM::tMOVr) {
385     unsigned Dst = MI.getOperand(0).getReg();
386     unsigned Src = MI.getOperand(1).getReg();
387     return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) &&
388             ARM::hGPRRegClass.contains(Dst));
389   }
390   return false;
391 }
392 
393 void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
394                                    MachineBasicBlock &MBB) const {
395   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
396   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
397   MachineFrameInfo &MFI = MF.getFrameInfo();
398   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
399   const ThumbRegisterInfo *RegInfo =
400       static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
401   const Thumb1InstrInfo &TII =
402       *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
403 
404   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
405   int NumBytes = (int)MFI.getStackSize();
406   assert((unsigned)NumBytes >= ArgRegsSaveSize &&
407          "ArgRegsSaveSize is included in NumBytes");
408   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
409   unsigned FramePtr = RegInfo->getFrameRegister(MF);
410 
411   if (!AFI->hasStackFrame()) {
412     if (NumBytes - ArgRegsSaveSize != 0)
413       emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize);
414   } else {
415     // Unwind MBBI to point to first LDR / VLDRD.
416     if (MBBI != MBB.begin()) {
417       do
418         --MBBI;
419       while (MBBI != MBB.begin() && isCSRestore(*MBBI, CSRegs));
420       if (!isCSRestore(*MBBI, CSRegs))
421         ++MBBI;
422     }
423 
424     // Move SP to start of FP callee save spill area.
425     NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
426                  AFI->getGPRCalleeSavedArea2Size() +
427                  AFI->getDPRCalleeSavedAreaSize() +
428                  ArgRegsSaveSize);
429 
430     if (AFI->shouldRestoreSPFromFP()) {
431       NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
432       // Reset SP based on frame pointer only if the stack frame extends beyond
433       // frame pointer stack slot, the target is ELF and the function has FP, or
434       // the target uses var sized objects.
435       if (NumBytes) {
436         assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
437                "No scratch register to restore SP from FP!");
438         emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
439                                   TII, *RegInfo);
440         BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
441             .addReg(ARM::R4)
442             .add(predOps(ARMCC::AL));
443       } else
444         BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
445             .addReg(FramePtr)
446             .add(predOps(ARMCC::AL));
447     } else {
448       if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
449           &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
450         MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
451         if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes))
452           emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
453       } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
454         emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
455     }
456   }
457 
458   if (needPopSpecialFixUp(MF)) {
459     bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true);
460     (void)Done;
461     assert(Done && "Emission of the special fixup failed!?");
462   }
463 }
464 
465 bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
466   if (!needPopSpecialFixUp(*MBB.getParent()))
467     return true;
468 
469   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
470   return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false);
471 }
472 
473 bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
474   ARMFunctionInfo *AFI =
475       const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>();
476   if (AFI->getArgRegsSaveSize())
477     return true;
478 
479   // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
480   for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo())
481     if (CSI.getReg() == ARM::LR)
482       return true;
483 
484   return false;
485 }
486 
487 bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
488                                               bool DoIt) const {
489   MachineFunction &MF = *MBB.getParent();
490   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
491   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
492   const TargetInstrInfo &TII = *STI.getInstrInfo();
493   const ThumbRegisterInfo *RegInfo =
494       static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
495 
496   // If MBBI is a return instruction, or is a tPOP followed by a return
497   // instruction in the successor BB, we may be able to directly restore
498   // LR in the PC.
499   // This is only possible with v5T ops (v4T can't change the Thumb bit via
500   // a POP PC instruction), and only if we do not need to emit any SP update.
501   // Otherwise, we need a temporary register to pop the value
502   // and copy that value into LR.
503   auto MBBI = MBB.getFirstTerminator();
504   bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize;
505   if (CanRestoreDirectly) {
506     if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB)
507       CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET ||
508                             MBBI->getOpcode() == ARM::tPOP_RET);
509     else {
510       auto MBBI_prev = MBBI;
511       MBBI_prev--;
512       assert(MBBI_prev->getOpcode() == ARM::tPOP);
513       assert(MBB.succ_size() == 1);
514       if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET)
515         MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET.
516       else
517         CanRestoreDirectly = false;
518     }
519   }
520 
521   if (CanRestoreDirectly) {
522     if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET)
523       return true;
524     MachineInstrBuilder MIB =
525         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))
526             .add(predOps(ARMCC::AL));
527     // Copy implicit ops and popped registers, if any.
528     for (auto MO: MBBI->operands())
529       if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
530         MIB.add(MO);
531     MIB.addReg(ARM::PC, RegState::Define);
532     // Erase the old instruction (tBX_RET or tPOP).
533     MBB.erase(MBBI);
534     return true;
535   }
536 
537   // Look for a temporary register to use.
538   // First, compute the liveness information.
539   LivePhysRegs UsedRegs(STI.getRegisterInfo());
540   UsedRegs.addLiveOuts(MBB);
541   // The semantic of pristines changed recently and now,
542   // the callee-saved registers that are touched in the function
543   // are not part of the pristines set anymore.
544   // Add those callee-saved now.
545   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
546   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
547   for (unsigned i = 0; CSRegs[i]; ++i)
548     UsedRegs.addReg(CSRegs[i]);
549 
550   DebugLoc dl = DebugLoc();
551   if (MBBI != MBB.end()) {
552     dl = MBBI->getDebugLoc();
553     auto InstUpToMBBI = MBB.end();
554     while (InstUpToMBBI != MBBI)
555       // The pre-decrement is on purpose here.
556       // We want to have the liveness right before MBBI.
557       UsedRegs.stepBackward(*--InstUpToMBBI);
558   }
559 
560   // Look for a register that can be directly use in the POP.
561   unsigned PopReg = 0;
562   // And some temporary register, just in case.
563   unsigned TemporaryReg = 0;
564   BitVector PopFriendly =
565       TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID));
566   assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
567   // Rebuild the GPRs from the high registers because they are removed
568   // form the GPR reg class for thumb1.
569   BitVector GPRsNoLRSP =
570       TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID));
571   GPRsNoLRSP |= PopFriendly;
572   GPRsNoLRSP.reset(ARM::LR);
573   GPRsNoLRSP.reset(ARM::SP);
574   GPRsNoLRSP.reset(ARM::PC);
575   for (int Register = GPRsNoLRSP.find_first(); Register != -1;
576        Register = GPRsNoLRSP.find_next(Register)) {
577     if (!UsedRegs.contains(Register)) {
578       // Remember the first pop-friendly register and exit.
579       if (PopFriendly.test(Register)) {
580         PopReg = Register;
581         TemporaryReg = 0;
582         break;
583       }
584       // Otherwise, remember that the register will be available to
585       // save a pop-friendly register.
586       TemporaryReg = Register;
587     }
588   }
589 
590   if (!DoIt && !PopReg && !TemporaryReg)
591     return false;
592 
593   assert((PopReg || TemporaryReg) && "Cannot get LR");
594 
595   if (TemporaryReg) {
596     assert(!PopReg && "Unnecessary MOV is about to be inserted");
597     PopReg = PopFriendly.find_first();
598     BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
599         .addReg(TemporaryReg, RegState::Define)
600         .addReg(PopReg, RegState::Kill)
601         .add(predOps(ARMCC::AL));
602   }
603 
604   if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
605     // We couldn't use the direct restoration above, so
606     // perform the opposite conversion: tPOP_RET to tPOP.
607     MachineInstrBuilder MIB =
608         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))
609             .add(predOps(ARMCC::AL));
610     bool Popped = false;
611     for (auto MO: MBBI->operands())
612       if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
613           MO.getReg() != ARM::PC) {
614         MIB.add(MO);
615         if (!MO.isImplicit())
616           Popped = true;
617       }
618     // Is there anything left to pop?
619     if (!Popped)
620       MBB.erase(MIB.getInstr());
621     // Erase the old instruction.
622     MBB.erase(MBBI);
623     MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))
624                .add(predOps(ARMCC::AL));
625   }
626 
627   assert(PopReg && "Do not know how to get LR");
628   BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))
629       .add(predOps(ARMCC::AL))
630       .addReg(PopReg, RegState::Define);
631 
632   emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
633 
634   BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
635       .addReg(ARM::LR, RegState::Define)
636       .addReg(PopReg, RegState::Kill)
637       .add(predOps(ARMCC::AL));
638 
639   if (TemporaryReg)
640     BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
641         .addReg(PopReg, RegState::Define)
642         .addReg(TemporaryReg, RegState::Kill)
643         .add(predOps(ARMCC::AL));
644 
645   return true;
646 }
647 
648 // Return the first iteraror after CurrentReg which is present in EnabledRegs,
649 // or OrderEnd if no further registers are in that set. This does not advance
650 // the iterator fiorst, so returns CurrentReg if it is in EnabledRegs.
651 template <unsigned SetSize>
652 static const unsigned *
653 findNextOrderedReg(const unsigned *CurrentReg,
654                    SmallSet<unsigned, SetSize> &EnabledRegs,
655                    const unsigned *OrderEnd) {
656   while (CurrentReg != OrderEnd && !EnabledRegs.count(*CurrentReg))
657     ++CurrentReg;
658   return CurrentReg;
659 }
660 
661 bool Thumb1FrameLowering::
662 spillCalleeSavedRegisters(MachineBasicBlock &MBB,
663                           MachineBasicBlock::iterator MI,
664                           const std::vector<CalleeSavedInfo> &CSI,
665                           const TargetRegisterInfo *TRI) const {
666   if (CSI.empty())
667     return false;
668 
669   DebugLoc DL;
670   const TargetInstrInfo &TII = *STI.getInstrInfo();
671   MachineFunction &MF = *MBB.getParent();
672   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
673       MF.getSubtarget().getRegisterInfo());
674 
675   SmallSet<unsigned, 9> LoRegsToSave; // r0-r7, lr
676   SmallSet<unsigned, 4> HiRegsToSave; // r8-r11
677   SmallSet<unsigned, 9> CopyRegs; // Registers which can be used after pushing
678                            // LoRegs for saving HiRegs.
679 
680   for (unsigned i = CSI.size(); i != 0; --i) {
681     unsigned Reg = CSI[i-1].getReg();
682 
683     if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
684       LoRegsToSave.insert(Reg);
685     } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
686       HiRegsToSave.insert(Reg);
687     } else {
688       llvm_unreachable("callee-saved register of unexpected class");
689     }
690 
691     if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
692         !MF.getRegInfo().isLiveIn(Reg) &&
693         !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
694       CopyRegs.insert(Reg);
695   }
696 
697   // Unused argument registers can be used for the high register saving.
698   for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
699     if (!MF.getRegInfo().isLiveIn(ArgReg))
700       CopyRegs.insert(ArgReg);
701 
702   // Push the low registers and lr
703   if (!LoRegsToSave.empty()) {
704     MachineInstrBuilder MIB =
705         BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
706     for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
707       if (LoRegsToSave.count(Reg)) {
708         bool isKill = !MF.getRegInfo().isLiveIn(Reg);
709         if (isKill)
710           MBB.addLiveIn(Reg);
711 
712         MIB.addReg(Reg, getKillRegState(isKill));
713       }
714     }
715     MIB.setMIFlags(MachineInstr::FrameSetup);
716   }
717 
718   // Push the high registers. There are no store instructions that can access
719   // these registers directly, so we have to move them to low registers, and
720   // push them. This might take multiple pushes, as it is possible for there to
721   // be fewer low registers available than high registers which need saving.
722 
723   // These are in reverse order so that in the case where we need to use
724   // multiple PUSH instructions, the order of the registers on the stack still
725   // matches the unwind info. They need to be swicthed back to ascending order
726   // before adding to the PUSH instruction.
727   static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6,
728                                          ARM::R5, ARM::R4, ARM::R3,
729                                          ARM::R2, ARM::R1, ARM::R0};
730   static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8};
731 
732   const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
733   const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
734 
735   // Find the first register to save.
736   const unsigned *HiRegToSave = findNextOrderedReg(
737       std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd);
738 
739   while (HiRegToSave != AllHighRegsEnd) {
740     // Find the first low register to use.
741     const unsigned *CopyReg =
742         findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
743 
744     // Create the PUSH, but don't insert it yet (the MOVs need to come first).
745     MachineInstrBuilder PushMIB =
746         BuildMI(MF, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
747 
748     SmallVector<unsigned, 4> RegsToPush;
749     while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
750       if (HiRegsToSave.count(*HiRegToSave)) {
751         bool isKill = !MF.getRegInfo().isLiveIn(*HiRegToSave);
752         if (isKill)
753           MBB.addLiveIn(*HiRegToSave);
754 
755         // Emit a MOV from the high reg to the low reg.
756         BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
757             .addReg(*CopyReg, RegState::Define)
758             .addReg(*HiRegToSave, getKillRegState(isKill))
759             .add(predOps(ARMCC::AL));
760 
761         // Record the register that must be added to the PUSH.
762         RegsToPush.push_back(*CopyReg);
763 
764         CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
765         HiRegToSave =
766             findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd);
767       }
768     }
769 
770     // Add the low registers to the PUSH, in ascending order.
771     for (unsigned Reg : llvm::reverse(RegsToPush))
772       PushMIB.addReg(Reg, RegState::Kill);
773 
774     // Insert the PUSH instruction after the MOVs.
775     MBB.insert(MI, PushMIB);
776   }
777 
778   return true;
779 }
780 
781 bool Thumb1FrameLowering::
782 restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
783                             MachineBasicBlock::iterator MI,
784                             const std::vector<CalleeSavedInfo> &CSI,
785                             const TargetRegisterInfo *TRI) const {
786   if (CSI.empty())
787     return false;
788 
789   MachineFunction &MF = *MBB.getParent();
790   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
791   const TargetInstrInfo &TII = *STI.getInstrInfo();
792   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
793       MF.getSubtarget().getRegisterInfo());
794 
795   bool isVarArg = AFI->getArgRegsSaveSize() > 0;
796   DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
797 
798   SmallSet<unsigned, 9> LoRegsToRestore;
799   SmallSet<unsigned, 4> HiRegsToRestore;
800   // Low registers (r0-r7) which can be used to restore the high registers.
801   SmallSet<unsigned, 9> CopyRegs;
802 
803   for (CalleeSavedInfo I : CSI) {
804     unsigned Reg = I.getReg();
805 
806     if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
807       LoRegsToRestore.insert(Reg);
808     } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
809       HiRegsToRestore.insert(Reg);
810     } else {
811       llvm_unreachable("callee-saved register of unexpected class");
812     }
813 
814     // If this is a low register not used as the frame pointer, we may want to
815     // use it for restoring the high registers.
816     if ((ARM::tGPRRegClass.contains(Reg)) &&
817         !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
818       CopyRegs.insert(Reg);
819   }
820 
821   // If this is a return block, we may be able to use some unused return value
822   // registers for restoring the high regs.
823   auto Terminator = MBB.getFirstTerminator();
824   if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
825     CopyRegs.insert(ARM::R0);
826     CopyRegs.insert(ARM::R1);
827     CopyRegs.insert(ARM::R2);
828     CopyRegs.insert(ARM::R3);
829     for (auto Op : Terminator->implicit_operands()) {
830       if (Op.isReg())
831         CopyRegs.erase(Op.getReg());
832     }
833   }
834 
835   static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3,
836                                          ARM::R4, ARM::R5, ARM::R6, ARM::R7};
837   static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11};
838 
839   const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
840   const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
841 
842   // Find the first register to restore.
843   auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs),
844                                            HiRegsToRestore, AllHighRegsEnd);
845 
846   while (HiRegToRestore != AllHighRegsEnd) {
847     assert(!CopyRegs.empty());
848     // Find the first low register to use.
849     auto CopyReg =
850         findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
851 
852     // Create the POP instruction.
853     MachineInstrBuilder PopMIB =
854         BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
855 
856     while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
857       // Add the low register to the POP.
858       PopMIB.addReg(*CopyReg, RegState::Define);
859 
860       // Create the MOV from low to high register.
861       BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
862           .addReg(*HiRegToRestore, RegState::Define)
863           .addReg(*CopyReg, RegState::Kill)
864           .add(predOps(ARMCC::AL));
865 
866       CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
867       HiRegToRestore =
868           findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd);
869     }
870   }
871 
872   MachineInstrBuilder MIB =
873       BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
874 
875   bool NeedsPop = false;
876   for (unsigned i = CSI.size(); i != 0; --i) {
877     unsigned Reg = CSI[i-1].getReg();
878 
879     // High registers (excluding lr) have already been dealt with
880     if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
881       continue;
882 
883     if (Reg == ARM::LR) {
884       if (MBB.succ_empty()) {
885         // Special epilogue for vararg functions. See emitEpilogue
886         if (isVarArg)
887           continue;
888         // ARMv4T requires BX, see emitEpilogue
889         if (!STI.hasV5TOps())
890           continue;
891         Reg = ARM::PC;
892         (*MIB).setDesc(TII.get(ARM::tPOP_RET));
893         if (MI != MBB.end())
894           MIB.copyImplicitOps(*MI);
895         MI = MBB.erase(MI);
896       } else
897         // LR may only be popped into PC, as part of return sequence.
898         // If this isn't the return sequence, we'll need emitPopSpecialFixUp
899         // to restore LR the hard way.
900         continue;
901     }
902     MIB.addReg(Reg, getDefRegState(true));
903     NeedsPop = true;
904   }
905 
906   // It's illegal to emit pop instruction without operands.
907   if (NeedsPop)
908     MBB.insert(MI, &*MIB);
909   else
910     MF.DeleteMachineInstr(MIB);
911 
912   return true;
913 }
914