1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the PPC implementation of TargetFrameLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPCFrameLowering.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 /// VRRegNo - Map from a numbered VR register to its enum value.
32 ///
33 static const MCPhysReg VRRegNo[] = {
34  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
35  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
36  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
37  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
38 };
39 
40 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
41   if (STI.isDarwinABI())
42     return STI.isPPC64() ? 16 : 8;
43   // SVR4 ABI:
44   return STI.isPPC64() ? 16 : 4;
45 }
46 
47 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
48   return STI.isELFv2ABI() ? 24 : 40;
49 }
50 
51 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
52   // For the Darwin ABI:
53   // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
54   // for saving the frame pointer (if needed.)  While the published ABI has
55   // not used this slot since at least MacOSX 10.2, there is older code
56   // around that does use it, and that needs to continue to work.
57   if (STI.isDarwinABI())
58     return STI.isPPC64() ? -8U : -4U;
59 
60   // SVR4 ABI: First slot in the general register save area.
61   return STI.isPPC64() ? -8U : -4U;
62 }
63 
64 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
65   if (STI.isDarwinABI() || STI.isPPC64())
66     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
67 
68   // SVR4 ABI:
69   return 8;
70 }
71 
72 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
73   if (STI.isDarwinABI())
74     return STI.isPPC64() ? -16U : -8U;
75 
76   // SVR4 ABI: First slot in the general register save area.
77   return STI.isPPC64()
78              ? -16U
79              : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
80 }
81 
82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
83     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
84                           STI.getPlatformStackAlignment(), 0),
85       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
86       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
87       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
88       LinkageSize(computeLinkageSize(Subtarget)),
89       BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {}
90 
91 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
92 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
93     unsigned &NumEntries) const {
94   if (Subtarget.isDarwinABI()) {
95     NumEntries = 1;
96     if (Subtarget.isPPC64()) {
97       static const SpillSlot darwin64Offsets = {PPC::X31, -8};
98       return &darwin64Offsets;
99     } else {
100       static const SpillSlot darwinOffsets = {PPC::R31, -4};
101       return &darwinOffsets;
102     }
103   }
104 
105   // Early exit if not using the SVR4 ABI.
106   if (!Subtarget.isSVR4ABI()) {
107     NumEntries = 0;
108     return nullptr;
109   }
110 
111   // Note that the offsets here overlap, but this is fixed up in
112   // processFunctionBeforeFrameFinalized.
113 
114   static const SpillSlot Offsets[] = {
115       // Floating-point register save area offsets.
116       {PPC::F31, -8},
117       {PPC::F30, -16},
118       {PPC::F29, -24},
119       {PPC::F28, -32},
120       {PPC::F27, -40},
121       {PPC::F26, -48},
122       {PPC::F25, -56},
123       {PPC::F24, -64},
124       {PPC::F23, -72},
125       {PPC::F22, -80},
126       {PPC::F21, -88},
127       {PPC::F20, -96},
128       {PPC::F19, -104},
129       {PPC::F18, -112},
130       {PPC::F17, -120},
131       {PPC::F16, -128},
132       {PPC::F15, -136},
133       {PPC::F14, -144},
134 
135       // General register save area offsets.
136       {PPC::R31, -4},
137       {PPC::R30, -8},
138       {PPC::R29, -12},
139       {PPC::R28, -16},
140       {PPC::R27, -20},
141       {PPC::R26, -24},
142       {PPC::R25, -28},
143       {PPC::R24, -32},
144       {PPC::R23, -36},
145       {PPC::R22, -40},
146       {PPC::R21, -44},
147       {PPC::R20, -48},
148       {PPC::R19, -52},
149       {PPC::R18, -56},
150       {PPC::R17, -60},
151       {PPC::R16, -64},
152       {PPC::R15, -68},
153       {PPC::R14, -72},
154 
155       // CR save area offset.  We map each of the nonvolatile CR fields
156       // to the slot for CR2, which is the first of the nonvolatile CR
157       // fields to be assigned, so that we only allocate one save slot.
158       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
159       {PPC::CR2, -4},
160 
161       // VRSAVE save area offset.
162       {PPC::VRSAVE, -4},
163 
164       // Vector register save area
165       {PPC::V31, -16},
166       {PPC::V30, -32},
167       {PPC::V29, -48},
168       {PPC::V28, -64},
169       {PPC::V27, -80},
170       {PPC::V26, -96},
171       {PPC::V25, -112},
172       {PPC::V24, -128},
173       {PPC::V23, -144},
174       {PPC::V22, -160},
175       {PPC::V21, -176},
176       {PPC::V20, -192}};
177 
178   static const SpillSlot Offsets64[] = {
179       // Floating-point register save area offsets.
180       {PPC::F31, -8},
181       {PPC::F30, -16},
182       {PPC::F29, -24},
183       {PPC::F28, -32},
184       {PPC::F27, -40},
185       {PPC::F26, -48},
186       {PPC::F25, -56},
187       {PPC::F24, -64},
188       {PPC::F23, -72},
189       {PPC::F22, -80},
190       {PPC::F21, -88},
191       {PPC::F20, -96},
192       {PPC::F19, -104},
193       {PPC::F18, -112},
194       {PPC::F17, -120},
195       {PPC::F16, -128},
196       {PPC::F15, -136},
197       {PPC::F14, -144},
198 
199       // General register save area offsets.
200       {PPC::X31, -8},
201       {PPC::X30, -16},
202       {PPC::X29, -24},
203       {PPC::X28, -32},
204       {PPC::X27, -40},
205       {PPC::X26, -48},
206       {PPC::X25, -56},
207       {PPC::X24, -64},
208       {PPC::X23, -72},
209       {PPC::X22, -80},
210       {PPC::X21, -88},
211       {PPC::X20, -96},
212       {PPC::X19, -104},
213       {PPC::X18, -112},
214       {PPC::X17, -120},
215       {PPC::X16, -128},
216       {PPC::X15, -136},
217       {PPC::X14, -144},
218 
219       // VRSAVE save area offset.
220       {PPC::VRSAVE, -4},
221 
222       // Vector register save area
223       {PPC::V31, -16},
224       {PPC::V30, -32},
225       {PPC::V29, -48},
226       {PPC::V28, -64},
227       {PPC::V27, -80},
228       {PPC::V26, -96},
229       {PPC::V25, -112},
230       {PPC::V24, -128},
231       {PPC::V23, -144},
232       {PPC::V22, -160},
233       {PPC::V21, -176},
234       {PPC::V20, -192}};
235 
236   if (Subtarget.isPPC64()) {
237     NumEntries = array_lengthof(Offsets64);
238 
239     return Offsets64;
240   } else {
241     NumEntries = array_lengthof(Offsets);
242 
243     return Offsets;
244   }
245 }
246 
247 /// RemoveVRSaveCode - We have found that this function does not need any code
248 /// to manipulate the VRSAVE register, even though it uses vector registers.
249 /// This can happen when the only registers used are known to be live in or out
250 /// of the function.  Remove all of the VRSAVE related code from the function.
251 /// FIXME: The removal of the code results in a compile failure at -O0 when the
252 /// function contains a function call, as the GPR containing original VRSAVE
253 /// contents is spilled and reloaded around the call.  Without the prolog code,
254 /// the spill instruction refers to an undefined register.  This code needs
255 /// to account for all uses of that GPR.
256 static void RemoveVRSaveCode(MachineInstr *MI) {
257   MachineBasicBlock *Entry = MI->getParent();
258   MachineFunction *MF = Entry->getParent();
259 
260   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
261   MachineBasicBlock::iterator MBBI = MI;
262   ++MBBI;
263   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
264   MBBI->eraseFromParent();
265 
266   bool RemovedAllMTVRSAVEs = true;
267   // See if we can find and remove the MTVRSAVE instruction from all of the
268   // epilog blocks.
269   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
270     // If last instruction is a return instruction, add an epilogue
271     if (I->isReturnBlock()) {
272       bool FoundIt = false;
273       for (MBBI = I->end(); MBBI != I->begin(); ) {
274         --MBBI;
275         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
276           MBBI->eraseFromParent();  // remove it.
277           FoundIt = true;
278           break;
279         }
280       }
281       RemovedAllMTVRSAVEs &= FoundIt;
282     }
283   }
284 
285   // If we found and removed all MTVRSAVE instructions, remove the read of
286   // VRSAVE as well.
287   if (RemovedAllMTVRSAVEs) {
288     MBBI = MI;
289     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
290     --MBBI;
291     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
292     MBBI->eraseFromParent();
293   }
294 
295   // Finally, nuke the UPDATE_VRSAVE.
296   MI->eraseFromParent();
297 }
298 
299 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
300 // instruction selector.  Based on the vector registers that have been used,
301 // transform this into the appropriate ORI instruction.
302 static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
303   MachineFunction *MF = MI->getParent()->getParent();
304   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
305   DebugLoc dl = MI->getDebugLoc();
306 
307   const MachineRegisterInfo &MRI = MF->getRegInfo();
308   unsigned UsedRegMask = 0;
309   for (unsigned i = 0; i != 32; ++i)
310     if (MRI.isPhysRegModified(VRRegNo[i]))
311       UsedRegMask |= 1 << (31-i);
312 
313   // Live in and live out values already must be in the mask, so don't bother
314   // marking them.
315   for (MachineRegisterInfo::livein_iterator
316        I = MF->getRegInfo().livein_begin(),
317        E = MF->getRegInfo().livein_end(); I != E; ++I) {
318     unsigned RegNo = TRI->getEncodingValue(I->first);
319     if (VRRegNo[RegNo] == I->first)        // If this really is a vector reg.
320       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
321   }
322 
323   // Live out registers appear as use operands on return instructions.
324   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
325        UsedRegMask != 0 && BI != BE; ++BI) {
326     const MachineBasicBlock &MBB = *BI;
327     if (!MBB.isReturnBlock())
328       continue;
329     const MachineInstr &Ret = MBB.back();
330     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
331       const MachineOperand &MO = Ret.getOperand(I);
332       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
333         continue;
334       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
335       UsedRegMask &= ~(1 << (31-RegNo));
336     }
337   }
338 
339   // If no registers are used, turn this into a copy.
340   if (UsedRegMask == 0) {
341     // Remove all VRSAVE code.
342     RemoveVRSaveCode(MI);
343     return;
344   }
345 
346   unsigned SrcReg = MI->getOperand(1).getReg();
347   unsigned DstReg = MI->getOperand(0).getReg();
348 
349   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
350     if (DstReg != SrcReg)
351       BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
352         .addReg(SrcReg)
353         .addImm(UsedRegMask);
354     else
355       BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
356         .addReg(SrcReg, RegState::Kill)
357         .addImm(UsedRegMask);
358   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
359     if (DstReg != SrcReg)
360       BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
361         .addReg(SrcReg)
362         .addImm(UsedRegMask >> 16);
363     else
364       BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
365         .addReg(SrcReg, RegState::Kill)
366         .addImm(UsedRegMask >> 16);
367   } else {
368     if (DstReg != SrcReg)
369       BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
370         .addReg(SrcReg)
371         .addImm(UsedRegMask >> 16);
372     else
373       BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
374         .addReg(SrcReg, RegState::Kill)
375         .addImm(UsedRegMask >> 16);
376 
377     BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
378       .addReg(DstReg, RegState::Kill)
379       .addImm(UsedRegMask & 0xFFFF);
380   }
381 
382   // Remove the old UPDATE_VRSAVE instruction.
383   MI->eraseFromParent();
384 }
385 
386 static bool spillsCR(const MachineFunction &MF) {
387   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
388   return FuncInfo->isCRSpilled();
389 }
390 
391 static bool spillsVRSAVE(const MachineFunction &MF) {
392   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
393   return FuncInfo->isVRSAVESpilled();
394 }
395 
396 static bool hasSpills(const MachineFunction &MF) {
397   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
398   return FuncInfo->hasSpills();
399 }
400 
401 static bool hasNonRISpills(const MachineFunction &MF) {
402   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
403   return FuncInfo->hasNonRISpills();
404 }
405 
406 /// MustSaveLR - Return true if this function requires that we save the LR
407 /// register onto the stack in the prolog and restore it in the epilog of the
408 /// function.
409 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
410   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
411 
412   // We need a save/restore of LR if there is any def of LR (which is
413   // defined by calls, including the PIC setup sequence), or if there is
414   // some use of the LR stack slot (e.g. for builtin_return_address).
415   // (LR comes in 32 and 64 bit versions.)
416   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
417   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
418 }
419 
420 /// determineFrameLayout - Determine the size of the frame and maximum call
421 /// frame size.
422 unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
423                                                 bool UpdateMF,
424                                                 bool UseEstimate) const {
425   MachineFrameInfo *MFI = MF.getFrameInfo();
426 
427   // Get the number of bytes to allocate from the FrameInfo
428   unsigned FrameSize =
429     UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
430 
431   // Get stack alignments. The frame must be aligned to the greatest of these:
432   unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
433   unsigned MaxAlign = MFI->getMaxAlignment(); // algmt required by data in frame
434   unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
435 
436   const PPCRegisterInfo *RegInfo =
437       static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
438 
439   // If we are a leaf function, and use up to 224 bytes of stack space,
440   // don't have a frame pointer, calls, or dynamic alloca then we do not need
441   // to adjust the stack pointer (we fit in the Red Zone).
442   // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
443   // stackless code if all local vars are reg-allocated.
444   bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
445   unsigned LR = RegInfo->getRARegister();
446   if (!DisableRedZone &&
447       (Subtarget.isPPC64() ||                      // 32-bit SVR4, no stack-
448        !Subtarget.isSVR4ABI() ||                   //   allocated locals.
449         FrameSize == 0) &&
450       FrameSize <= 224 &&                          // Fits in red zone.
451       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
452       !MFI->adjustsStack() &&                      // No calls.
453       !MustSaveLR(MF, LR) &&
454       !RegInfo->hasBasePointer(MF)) { // No special alignment.
455     // No need for frame
456     if (UpdateMF)
457       MFI->setStackSize(0);
458     return 0;
459   }
460 
461   // Get the maximum call frame size of all the calls.
462   unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
463 
464   // Maximum call frame needs to be at least big enough for linkage area.
465   unsigned minCallFrameSize = getLinkageSize();
466   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
467 
468   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
469   // that allocations will be aligned.
470   if (MFI->hasVarSizedObjects())
471     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
472 
473   // Update maximum call frame size.
474   if (UpdateMF)
475     MFI->setMaxCallFrameSize(maxCallFrameSize);
476 
477   // Include call frame size in total.
478   FrameSize += maxCallFrameSize;
479 
480   // Make sure the frame is aligned.
481   FrameSize = (FrameSize + AlignMask) & ~AlignMask;
482 
483   // Update frame info.
484   if (UpdateMF)
485     MFI->setStackSize(FrameSize);
486 
487   return FrameSize;
488 }
489 
490 // hasFP - Return true if the specified function actually has a dedicated frame
491 // pointer register.
492 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
493   const MachineFrameInfo *MFI = MF.getFrameInfo();
494   // FIXME: This is pretty much broken by design: hasFP() might be called really
495   // early, before the stack layout was calculated and thus hasFP() might return
496   // true or false here depending on the time of call.
497   return (MFI->getStackSize()) && needsFP(MF);
498 }
499 
500 // needsFP - Return true if the specified function should have a dedicated frame
501 // pointer register.  This is true if the function has variable sized allocas or
502 // if frame pointer elimination is disabled.
503 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
504   const MachineFrameInfo *MFI = MF.getFrameInfo();
505 
506   // Naked functions have no stack frame pushed, so we don't have a frame
507   // pointer.
508   if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
509     return false;
510 
511   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
512     MFI->hasVarSizedObjects() ||
513     MFI->hasStackMap() || MFI->hasPatchPoint() ||
514     (MF.getTarget().Options.GuaranteedTailCallOpt &&
515      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
516 }
517 
518 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
519   bool is31 = needsFP(MF);
520   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
521   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
522 
523   const PPCRegisterInfo *RegInfo =
524       static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
525   bool HasBP = RegInfo->hasBasePointer(MF);
526   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
527   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
528 
529   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
530        BI != BE; ++BI)
531     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
532       --MBBI;
533       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
534         MachineOperand &MO = MBBI->getOperand(I);
535         if (!MO.isReg())
536           continue;
537 
538         switch (MO.getReg()) {
539         case PPC::FP:
540           MO.setReg(FPReg);
541           break;
542         case PPC::FP8:
543           MO.setReg(FP8Reg);
544           break;
545         case PPC::BP:
546           MO.setReg(BPReg);
547           break;
548         case PPC::BP8:
549           MO.setReg(BP8Reg);
550           break;
551 
552         }
553       }
554     }
555 }
556 
557 /*  This function will do the following:
558     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
559       respectively (defaults recommended by the ABI) and return true
560     - If MBB is not an entry block, initialize the register scavenger and look
561       for available registers.
562     - If the defaults (R0/R12) are available, return true
563     - If TwoUniqueRegsRequired is set to true, it looks for two unique
564       registers. Otherwise, look for a single available register.
565       - If the required registers are found, set SR1 and SR2 and return true.
566       - If the required registers are not found, set SR2 or both SR1 and SR2 to
567         PPC::NoRegister and return false.
568 
569     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
570     is not set, this function will attempt to find two different registers, but
571     still return true if only one register is available (and set SR1 == SR2).
572 */
573 bool
574 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
575                                       bool UseAtEnd,
576                                       bool TwoUniqueRegsRequired,
577                                       unsigned *SR1,
578                                       unsigned *SR2) const {
579   RegScavenger RS;
580   unsigned R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
581   unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
582 
583   // Set the defaults for the two scratch registers.
584   if (SR1)
585     *SR1 = R0;
586 
587   if (SR2) {
588     assert (SR1 && "Asking for the second scratch register but not the first?");
589     *SR2 = R12;
590   }
591 
592   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
593   if ((UseAtEnd && MBB->isReturnBlock()) ||
594       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
595     return true;
596 
597   RS.enterBasicBlock(*MBB);
598 
599   if (UseAtEnd && !MBB->empty()) {
600     // The scratch register will be used at the end of the block, so must
601     // consider all registers used within the block
602 
603     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
604     // If no terminator, back iterator up to previous instruction.
605     if (MBBI == MBB->end())
606       MBBI = std::prev(MBBI);
607 
608     if (MBBI != MBB->begin())
609       RS.forward(MBBI);
610   }
611 
612   // If the two registers are available, we're all good.
613   // Note that we only return here if both R0 and R12 are available because
614   // although the function may not require two unique registers, it may benefit
615   // from having two so we should try to provide them.
616   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
617     return true;
618 
619   // Get the list of callee-saved registers for the target.
620   const PPCRegisterInfo *RegInfo =
621       static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
622   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
623 
624   // Get all the available registers in the block.
625   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
626                                      &PPC::GPRCRegClass);
627 
628   // We shouldn't use callee-saved registers as scratch registers as they may be
629   // available when looking for a candidate block for shrink wrapping but not
630   // available when the actual prologue/epilogue is being emitted because they
631   // were added as live-in to the prologue block by PrologueEpilogueInserter.
632   for (int i = 0; CSRegs[i]; ++i)
633     BV.reset(CSRegs[i]);
634 
635   // Set the first scratch register to the first available one.
636   if (SR1) {
637     int FirstScratchReg = BV.find_first();
638     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
639   }
640 
641   // If there is another one available, set the second scratch register to that.
642   // Otherwise, set it to either PPC::NoRegister if this function requires two
643   // or to whatever SR1 is set to if this function doesn't require two.
644   if (SR2) {
645     int SecondScratchReg = BV.find_next(*SR1);
646     if (SecondScratchReg != -1)
647       *SR2 = SecondScratchReg;
648     else
649       *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
650   }
651 
652   // Now that we've done our best to provide both registers, double check
653   // whether we were unable to provide enough.
654   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
655     return false;
656 
657   return true;
658 }
659 
660 // We need a scratch register for spilling LR and for spilling CR. By default,
661 // we use two scratch registers to hide latency. However, if only one scratch
662 // register is available, we can adjust for that by not overlapping the spill
663 // code. However, if we need to realign the stack (i.e. have a base pointer)
664 // and the stack frame is large, we need two scratch registers.
665 bool
666 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
667   const PPCRegisterInfo *RegInfo =
668       static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
669   MachineFunction &MF = *(MBB->getParent());
670   bool HasBP = RegInfo->hasBasePointer(MF);
671   unsigned FrameSize = determineFrameLayout(MF, false);
672   int NegFrameSize = -FrameSize;
673   bool IsLargeFrame = !isInt<16>(NegFrameSize);
674   MachineFrameInfo *MFI = MF.getFrameInfo();
675   unsigned MaxAlign = MFI->getMaxAlignment();
676   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
677 
678   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
679 }
680 
681 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
682   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
683 
684   return findScratchRegister(TmpMBB, false,
685                              twoUniqueScratchRegsRequired(TmpMBB));
686 }
687 
688 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
689   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
690 
691   return findScratchRegister(TmpMBB, true);
692 }
693 
694 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
695                                     MachineBasicBlock &MBB) const {
696   MachineBasicBlock::iterator MBBI = MBB.begin();
697   MachineFrameInfo *MFI = MF.getFrameInfo();
698   const PPCInstrInfo &TII =
699       *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
700   const PPCRegisterInfo *RegInfo =
701       static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
702 
703   MachineModuleInfo &MMI = MF.getMMI();
704   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
705   DebugLoc dl;
706   bool needsCFI = MMI.hasDebugInfo() ||
707     MF.getFunction()->needsUnwindTableEntry();
708 
709   // Get processor type.
710   bool isPPC64 = Subtarget.isPPC64();
711   // Get the ABI.
712   bool isSVR4ABI = Subtarget.isSVR4ABI();
713   bool isELFv2ABI = Subtarget.isELFv2ABI();
714   assert((Subtarget.isDarwinABI() || isSVR4ABI) &&
715          "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
716 
717   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
718   // process it.
719   if (!isSVR4ABI)
720     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
721       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
722         HandleVRSaveUpdate(MBBI, TII);
723         break;
724       }
725     }
726 
727   // Move MBBI back to the beginning of the prologue block.
728   MBBI = MBB.begin();
729 
730   // Work out frame sizes.
731   unsigned FrameSize = determineFrameLayout(MF);
732   int NegFrameSize = -FrameSize;
733   if (!isInt<32>(NegFrameSize))
734     llvm_unreachable("Unhandled stack size!");
735 
736   if (MFI->isFrameAddressTaken())
737     replaceFPWithRealFP(MF);
738 
739   // Check if the link register (LR) must be saved.
740   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
741   bool MustSaveLR = FI->mustSaveLR();
742   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
743   bool MustSaveCR = !MustSaveCRs.empty();
744   // Do we have a frame pointer and/or base pointer for this function?
745   bool HasFP = hasFP(MF);
746   bool HasBP = RegInfo->hasBasePointer(MF);
747   bool HasRedZone = isPPC64 || !isSVR4ABI;
748 
749   unsigned SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
750   unsigned BPReg       = RegInfo->getBaseRegister(MF);
751   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
752   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
753   unsigned ScratchReg  = 0;
754   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
755   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
756   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
757                                                 : PPC::MFLR );
758   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
759                                                  : PPC::STW );
760   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
761                                                      : PPC::STWU );
762   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
763                                                         : PPC::STWUX);
764   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
765                                                           : PPC::LIS );
766   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
767                                                  : PPC::ORI );
768   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
769                                               : PPC::OR );
770   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
771                                                             : PPC::SUBFC);
772   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
773                                                                : PPC::SUBFIC);
774 
775   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
776   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
777   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
778   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
779   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
780          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
781 
782   // Using the same bool variable as below to supress compiler warnings.
783   bool SingleScratchReg =
784     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
785                         &ScratchReg, &TempReg);
786   assert(SingleScratchReg &&
787          "Required number of registers not available in this block");
788 
789   SingleScratchReg = ScratchReg == TempReg;
790 
791   int LROffset = getReturnSaveOffset();
792 
793   int FPOffset = 0;
794   if (HasFP) {
795     if (isSVR4ABI) {
796       MachineFrameInfo *FFI = MF.getFrameInfo();
797       int FPIndex = FI->getFramePointerSaveIndex();
798       assert(FPIndex && "No Frame Pointer Save Slot!");
799       FPOffset = FFI->getObjectOffset(FPIndex);
800     } else {
801       FPOffset = getFramePointerSaveOffset();
802     }
803   }
804 
805   int BPOffset = 0;
806   if (HasBP) {
807     if (isSVR4ABI) {
808       MachineFrameInfo *FFI = MF.getFrameInfo();
809       int BPIndex = FI->getBasePointerSaveIndex();
810       assert(BPIndex && "No Base Pointer Save Slot!");
811       BPOffset = FFI->getObjectOffset(BPIndex);
812     } else {
813       BPOffset = getBasePointerSaveOffset();
814     }
815   }
816 
817   int PBPOffset = 0;
818   if (FI->usesPICBase()) {
819     MachineFrameInfo *FFI = MF.getFrameInfo();
820     int PBPIndex = FI->getPICBasePointerSaveIndex();
821     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
822     PBPOffset = FFI->getObjectOffset(PBPIndex);
823   }
824 
825   // Get stack alignments.
826   unsigned MaxAlign = MFI->getMaxAlignment();
827   if (HasBP && MaxAlign > 1)
828     assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
829            "Invalid alignment!");
830 
831   // Frames of 32KB & larger require special handling because they cannot be
832   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
833   bool isLargeFrame = !isInt<16>(NegFrameSize);
834 
835   assert((isPPC64 || !MustSaveCR) &&
836          "Prologue CR saving supported only in 64-bit mode");
837 
838   // If we need to spill the CR and the LR but we don't have two separate
839   // registers available, we must spill them one at a time
840   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
841     // In the ELFv2 ABI, we are not required to save all CR fields.
842     // If only one or two CR fields are clobbered, it is more efficient to use
843     // mfocrf to selectively save just those fields, because mfocrf has short
844     // latency compares to mfcr.
845     unsigned MfcrOpcode = PPC::MFCR8;
846     unsigned CrState = RegState::ImplicitKill;
847     if (isELFv2ABI && MustSaveCRs.size() == 1) {
848       MfcrOpcode = PPC::MFOCRF8;
849       CrState = RegState::Kill;
850     }
851     MachineInstrBuilder MIB =
852       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
853     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
854       MIB.addReg(MustSaveCRs[i], CrState);
855     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
856       .addReg(TempReg, getKillRegState(true))
857       .addImm(8)
858       .addReg(SPReg);
859   }
860 
861   if (MustSaveLR)
862     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
863 
864   if (MustSaveCR &&
865       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
866     // In the ELFv2 ABI, we are not required to save all CR fields.
867     // If only one or two CR fields are clobbered, it is more efficient to use
868     // mfocrf to selectively save just those fields, because mfocrf has short
869     // latency compares to mfcr.
870     unsigned MfcrOpcode = PPC::MFCR8;
871     unsigned CrState = RegState::ImplicitKill;
872     if (isELFv2ABI && MustSaveCRs.size() == 1) {
873       MfcrOpcode = PPC::MFOCRF8;
874       CrState = RegState::Kill;
875     }
876     MachineInstrBuilder MIB =
877       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
878     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
879       MIB.addReg(MustSaveCRs[i], CrState);
880   }
881 
882   if (HasRedZone) {
883     if (HasFP)
884       BuildMI(MBB, MBBI, dl, StoreInst)
885         .addReg(FPReg)
886         .addImm(FPOffset)
887         .addReg(SPReg);
888     if (FI->usesPICBase())
889       BuildMI(MBB, MBBI, dl, StoreInst)
890         .addReg(PPC::R30)
891         .addImm(PBPOffset)
892         .addReg(SPReg);
893     if (HasBP)
894       BuildMI(MBB, MBBI, dl, StoreInst)
895         .addReg(BPReg)
896         .addImm(BPOffset)
897         .addReg(SPReg);
898   }
899 
900   if (MustSaveLR)
901     BuildMI(MBB, MBBI, dl, StoreInst)
902       .addReg(ScratchReg, getKillRegState(true))
903       .addImm(LROffset)
904       .addReg(SPReg);
905 
906   if (MustSaveCR &&
907       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
908     assert(HasRedZone && "A red zone is always available on PPC64");
909     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
910       .addReg(TempReg, getKillRegState(true))
911       .addImm(8)
912       .addReg(SPReg);
913   }
914 
915   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
916   if (!FrameSize)
917     return;
918 
919   // Adjust stack pointer: r1 += NegFrameSize.
920   // If there is a preferred stack alignment, align R1 now
921 
922   if (HasBP && HasRedZone) {
923     // Save a copy of r1 as the base pointer.
924     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
925       .addReg(SPReg)
926       .addReg(SPReg);
927   }
928 
929   // Have we generated a STUX instruction to claim stack frame? If so,
930   // the frame size will be placed in ScratchReg.
931   bool HasSTUX = false;
932 
933   // This condition must be kept in sync with canUseAsPrologue.
934   if (HasBP && MaxAlign > 1) {
935     if (isPPC64)
936       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
937         .addReg(SPReg)
938         .addImm(0)
939         .addImm(64 - Log2_32(MaxAlign));
940     else // PPC32...
941       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
942         .addReg(SPReg)
943         .addImm(0)
944         .addImm(32 - Log2_32(MaxAlign))
945         .addImm(31);
946     if (!isLargeFrame) {
947       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
948         .addReg(ScratchReg, RegState::Kill)
949         .addImm(NegFrameSize);
950     } else {
951       assert(!SingleScratchReg && "Only a single scratch reg available");
952       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
953         .addImm(NegFrameSize >> 16);
954       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
955         .addReg(TempReg, RegState::Kill)
956         .addImm(NegFrameSize & 0xFFFF);
957       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
958         .addReg(ScratchReg, RegState::Kill)
959         .addReg(TempReg, RegState::Kill);
960     }
961 
962     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
963       .addReg(SPReg, RegState::Kill)
964       .addReg(SPReg)
965       .addReg(ScratchReg);
966     HasSTUX = true;
967 
968   } else if (!isLargeFrame) {
969     BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
970       .addReg(SPReg)
971       .addImm(NegFrameSize)
972       .addReg(SPReg);
973 
974   } else {
975     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
976       .addImm(NegFrameSize >> 16);
977     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
978       .addReg(ScratchReg, RegState::Kill)
979       .addImm(NegFrameSize & 0xFFFF);
980     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
981       .addReg(SPReg, RegState::Kill)
982       .addReg(SPReg)
983       .addReg(ScratchReg);
984     HasSTUX = true;
985   }
986 
987   if (!HasRedZone) {
988     assert(!isPPC64 && "A red zone is always available on PPC64");
989     if (HasSTUX) {
990       // The frame size is in ScratchReg, and the SPReg has been advanced
991       // (downwards) by the frame size: SPReg = old SPReg + ScratchReg.
992       // Set ScratchReg to the original SPReg: ScratchReg = SPReg - ScratchReg.
993       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
994         .addReg(ScratchReg, RegState::Kill)
995         .addReg(SPReg);
996 
997       // Now that the stack frame has been allocated, save all the necessary
998       // registers using ScratchReg as the base address.
999       if (HasFP)
1000         BuildMI(MBB, MBBI, dl, StoreInst)
1001           .addReg(FPReg)
1002           .addImm(FPOffset)
1003           .addReg(ScratchReg);
1004       if (FI->usesPICBase())
1005         BuildMI(MBB, MBBI, dl, StoreInst)
1006           .addReg(PPC::R30)
1007           .addImm(PBPOffset)
1008           .addReg(ScratchReg);
1009       if (HasBP) {
1010         BuildMI(MBB, MBBI, dl, StoreInst)
1011           .addReg(BPReg)
1012           .addImm(BPOffset)
1013           .addReg(ScratchReg);
1014         BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1015           .addReg(ScratchReg, RegState::Kill)
1016           .addReg(ScratchReg);
1017       }
1018     } else {
1019       // The frame size is a known 16-bit constant (fitting in the immediate
1020       // field of STWU). To be here we have to be compiling for PPC32.
1021       // Since the SPReg has been decreased by FrameSize, add it back to each
1022       // offset.
1023       if (HasFP)
1024         BuildMI(MBB, MBBI, dl, StoreInst)
1025           .addReg(FPReg)
1026           .addImm(FrameSize + FPOffset)
1027           .addReg(SPReg);
1028       if (FI->usesPICBase())
1029         BuildMI(MBB, MBBI, dl, StoreInst)
1030           .addReg(PPC::R30)
1031           .addImm(FrameSize + PBPOffset)
1032           .addReg(SPReg);
1033       if (HasBP) {
1034         BuildMI(MBB, MBBI, dl, StoreInst)
1035           .addReg(BPReg)
1036           .addImm(FrameSize + BPOffset)
1037           .addReg(SPReg);
1038         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1039           .addReg(SPReg)
1040           .addImm(FrameSize);
1041       }
1042     }
1043   }
1044 
1045   // Add Call Frame Information for the instructions we generated above.
1046   if (needsCFI) {
1047     unsigned CFIIndex;
1048 
1049     if (HasBP) {
1050       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1051       // because if the stack needed aligning then CFA won't be at a fixed
1052       // offset from FP/SP.
1053       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1054       CFIIndex = MMI.addFrameInst(
1055           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1056     } else {
1057       // Adjust the definition of CFA to account for the change in SP.
1058       assert(NegFrameSize);
1059       CFIIndex = MMI.addFrameInst(
1060           MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1061     }
1062     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1063         .addCFIIndex(CFIIndex);
1064 
1065     if (HasFP) {
1066       // Describe where FP was saved, at a fixed offset from CFA.
1067       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1068       CFIIndex = MMI.addFrameInst(
1069           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1070       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1071           .addCFIIndex(CFIIndex);
1072     }
1073 
1074     if (FI->usesPICBase()) {
1075       // Describe where FP was saved, at a fixed offset from CFA.
1076       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1077       CFIIndex = MMI.addFrameInst(
1078           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1079       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1080           .addCFIIndex(CFIIndex);
1081     }
1082 
1083     if (HasBP) {
1084       // Describe where BP was saved, at a fixed offset from CFA.
1085       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1086       CFIIndex = MMI.addFrameInst(
1087           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1088       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1089           .addCFIIndex(CFIIndex);
1090     }
1091 
1092     if (MustSaveLR) {
1093       // Describe where LR was saved, at a fixed offset from CFA.
1094       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1095       CFIIndex = MMI.addFrameInst(
1096           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1097       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1098           .addCFIIndex(CFIIndex);
1099     }
1100   }
1101 
1102   // If there is a frame pointer, copy R1 into R31
1103   if (HasFP) {
1104     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1105       .addReg(SPReg)
1106       .addReg(SPReg);
1107 
1108     if (!HasBP && needsCFI) {
1109       // Change the definition of CFA from SP+offset to FP+offset, because SP
1110       // will change at every alloca.
1111       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1112       unsigned CFIIndex = MMI.addFrameInst(
1113           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1114 
1115       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1116           .addCFIIndex(CFIIndex);
1117     }
1118   }
1119 
1120   if (needsCFI) {
1121     // Describe where callee saved registers were saved, at fixed offsets from
1122     // CFA.
1123     const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
1124     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1125       unsigned Reg = CSI[I].getReg();
1126       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1127 
1128       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1129       // subregisters of CR2. We just need to emit a move of CR2.
1130       if (PPC::CRBITRCRegClass.contains(Reg))
1131         continue;
1132 
1133       // For SVR4, don't emit a move for the CR spill slot if we haven't
1134       // spilled CRs.
1135       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1136           && !MustSaveCR)
1137         continue;
1138 
1139       // For 64-bit SVR4 when we have spilled CRs, the spill location
1140       // is SP+8, not a frame-relative slot.
1141       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1142         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1143         // the whole CR word.  In the ELFv2 ABI, every CR that was
1144         // actually saved gets its own CFI record.
1145         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1146         unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
1147             nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
1148         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1149             .addCFIIndex(CFIIndex);
1150         continue;
1151       }
1152 
1153       int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
1154       unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
1155           nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1156       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1157           .addCFIIndex(CFIIndex);
1158     }
1159   }
1160 }
1161 
1162 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1163                                     MachineBasicBlock &MBB) const {
1164   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1165   DebugLoc dl;
1166 
1167   if (MBBI != MBB.end())
1168     dl = MBBI->getDebugLoc();
1169 
1170   const PPCInstrInfo &TII =
1171       *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
1172   const PPCRegisterInfo *RegInfo =
1173       static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
1174 
1175   // Get alignment info so we know how to restore the SP.
1176   const MachineFrameInfo *MFI = MF.getFrameInfo();
1177 
1178   // Get the number of bytes allocated from the FrameInfo.
1179   int FrameSize = MFI->getStackSize();
1180 
1181   // Get processor type.
1182   bool isPPC64 = Subtarget.isPPC64();
1183   // Get the ABI.
1184   bool isSVR4ABI = Subtarget.isSVR4ABI();
1185 
1186   // Check if the link register (LR) has been saved.
1187   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1188   bool MustSaveLR = FI->mustSaveLR();
1189   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1190   bool MustSaveCR = !MustSaveCRs.empty();
1191   // Do we have a frame pointer and/or base pointer for this function?
1192   bool HasFP = hasFP(MF);
1193   bool HasBP = RegInfo->hasBasePointer(MF);
1194 
1195   unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1196   unsigned BPReg      = RegInfo->getBaseRegister(MF);
1197   unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1198   unsigned ScratchReg = 0;
1199   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1200   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1201                                                  : PPC::MTLR );
1202   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1203                                                  : PPC::LWZ );
1204   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1205                                                            : PPC::LIS );
1206   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1207                                                   : PPC::ORI );
1208   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1209                                                    : PPC::ADDI );
1210   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1211                                                 : PPC::ADD4 );
1212 
1213   int LROffset = getReturnSaveOffset();
1214 
1215   int FPOffset = 0;
1216 
1217   // Using the same bool variable as below to supress compiler warnings.
1218   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1219                                               &TempReg);
1220   assert(SingleScratchReg &&
1221          "Could not find an available scratch register");
1222 
1223   SingleScratchReg = ScratchReg == TempReg;
1224 
1225   if (HasFP) {
1226     if (isSVR4ABI) {
1227       MachineFrameInfo *FFI = MF.getFrameInfo();
1228       int FPIndex = FI->getFramePointerSaveIndex();
1229       assert(FPIndex && "No Frame Pointer Save Slot!");
1230       FPOffset = FFI->getObjectOffset(FPIndex);
1231     } else {
1232       FPOffset = getFramePointerSaveOffset();
1233     }
1234   }
1235 
1236   int BPOffset = 0;
1237   if (HasBP) {
1238     if (isSVR4ABI) {
1239       MachineFrameInfo *FFI = MF.getFrameInfo();
1240       int BPIndex = FI->getBasePointerSaveIndex();
1241       assert(BPIndex && "No Base Pointer Save Slot!");
1242       BPOffset = FFI->getObjectOffset(BPIndex);
1243     } else {
1244       BPOffset = getBasePointerSaveOffset();
1245     }
1246   }
1247 
1248   int PBPOffset = 0;
1249   if (FI->usesPICBase()) {
1250     MachineFrameInfo *FFI = MF.getFrameInfo();
1251     int PBPIndex = FI->getPICBasePointerSaveIndex();
1252     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1253     PBPOffset = FFI->getObjectOffset(PBPIndex);
1254   }
1255 
1256   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1257 
1258   if (IsReturnBlock) {
1259     unsigned RetOpcode = MBBI->getOpcode();
1260     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1261                       RetOpcode == PPC::TCRETURNdi ||
1262                       RetOpcode == PPC::TCRETURNai ||
1263                       RetOpcode == PPC::TCRETURNri8 ||
1264                       RetOpcode == PPC::TCRETURNdi8 ||
1265                       RetOpcode == PPC::TCRETURNai8;
1266 
1267     if (UsesTCRet) {
1268       int MaxTCRetDelta = FI->getTailCallSPDelta();
1269       MachineOperand &StackAdjust = MBBI->getOperand(1);
1270       assert(StackAdjust.isImm() && "Expecting immediate value.");
1271       // Adjust stack pointer.
1272       int StackAdj = StackAdjust.getImm();
1273       int Delta = StackAdj - MaxTCRetDelta;
1274       assert((Delta >= 0) && "Delta must be positive");
1275       if (MaxTCRetDelta>0)
1276         FrameSize += (StackAdj +Delta);
1277       else
1278         FrameSize += StackAdj;
1279     }
1280   }
1281 
1282   // Frames of 32KB & larger require special handling because they cannot be
1283   // indexed into with a simple LD/LWZ immediate offset operand.
1284   bool isLargeFrame = !isInt<16>(FrameSize);
1285 
1286   if (FrameSize) {
1287     // In the prologue, the loaded (or persistent) stack pointer value is offset
1288     // by the STDU/STDUX/STWU/STWUX instruction.  Add this offset back now.
1289 
1290     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1291     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1292     // call which invalidates the stack pointer value in SP(0). So we use the
1293     // value of R31 in this case.
1294     if (FI->hasFastCall()) {
1295       assert(HasFP && "Expecting a valid frame pointer.");
1296       if (!isLargeFrame) {
1297         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1298           .addReg(FPReg).addImm(FrameSize);
1299       } else {
1300         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1301           .addImm(FrameSize >> 16);
1302         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1303           .addReg(ScratchReg, RegState::Kill)
1304           .addImm(FrameSize & 0xFFFF);
1305         BuildMI(MBB, MBBI, dl, AddInst)
1306           .addReg(SPReg)
1307           .addReg(FPReg)
1308           .addReg(ScratchReg);
1309       }
1310     } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) {
1311       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1312         .addReg(SPReg)
1313         .addImm(FrameSize);
1314     } else {
1315       BuildMI(MBB, MBBI, dl, LoadInst, SPReg)
1316         .addImm(0)
1317         .addReg(SPReg);
1318     }
1319   }
1320 
1321   assert((isPPC64 || !MustSaveCR) &&
1322          "Epilogue CR restoring supported only in 64-bit mode");
1323 
1324   // If we need to save both the LR and the CR and we only have one available
1325   // scratch register, we must do them one at a time.
1326   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1327     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1328       .addImm(8)
1329       .addReg(SPReg);
1330     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1331       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1332         .addReg(TempReg, getKillRegState(i == e-1));
1333   }
1334 
1335   if (MustSaveLR)
1336     BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1337       .addImm(LROffset)
1338       .addReg(SPReg);
1339 
1340   if (MustSaveCR &&
1341       !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
1342     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1343       .addImm(8)
1344       .addReg(SPReg);
1345 
1346   if (HasFP)
1347     BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1348       .addImm(FPOffset)
1349       .addReg(SPReg);
1350 
1351   if (FI->usesPICBase())
1352     // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
1353     BuildMI(MBB, MBBI, dl, LoadInst)
1354       .addReg(PPC::R30)
1355       .addImm(PBPOffset)
1356       .addReg(SPReg);
1357 
1358   if (HasBP)
1359     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1360       .addImm(BPOffset)
1361       .addReg(SPReg);
1362 
1363   if (MustSaveCR &&
1364       !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
1365     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1366       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1367         .addReg(TempReg, getKillRegState(i == e-1));
1368 
1369   if (MustSaveLR)
1370     BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
1371 
1372   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1373   // call optimization
1374   if (IsReturnBlock) {
1375     unsigned RetOpcode = MBBI->getOpcode();
1376     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1377         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1378         MF.getFunction()->getCallingConv() == CallingConv::Fast) {
1379       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1380       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1381 
1382       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1383         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1384           .addReg(SPReg).addImm(CallerAllocatedAmt);
1385       } else {
1386         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1387           .addImm(CallerAllocatedAmt >> 16);
1388         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1389           .addReg(ScratchReg, RegState::Kill)
1390           .addImm(CallerAllocatedAmt & 0xFFFF);
1391         BuildMI(MBB, MBBI, dl, AddInst)
1392           .addReg(SPReg)
1393           .addReg(FPReg)
1394           .addReg(ScratchReg);
1395       }
1396     } else {
1397       createTailCallBranchInstr(MBB);
1398     }
1399   }
1400 }
1401 
1402 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1403   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1404   DebugLoc dl;
1405 
1406   if (MBBI != MBB.end())
1407     dl = MBBI->getDebugLoc();
1408 
1409   const PPCInstrInfo &TII =
1410       *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
1411 
1412   // Create branch instruction for pseudo tail call return instruction
1413   unsigned RetOpcode = MBBI->getOpcode();
1414   if (RetOpcode == PPC::TCRETURNdi) {
1415     MBBI = MBB.getLastNonDebugInstr();
1416     MachineOperand &JumpTarget = MBBI->getOperand(0);
1417     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1418       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1419   } else if (RetOpcode == PPC::TCRETURNri) {
1420     MBBI = MBB.getLastNonDebugInstr();
1421     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1422     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1423   } else if (RetOpcode == PPC::TCRETURNai) {
1424     MBBI = MBB.getLastNonDebugInstr();
1425     MachineOperand &JumpTarget = MBBI->getOperand(0);
1426     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1427   } else if (RetOpcode == PPC::TCRETURNdi8) {
1428     MBBI = MBB.getLastNonDebugInstr();
1429     MachineOperand &JumpTarget = MBBI->getOperand(0);
1430     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1431       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1432   } else if (RetOpcode == PPC::TCRETURNri8) {
1433     MBBI = MBB.getLastNonDebugInstr();
1434     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1435     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1436   } else if (RetOpcode == PPC::TCRETURNai8) {
1437     MBBI = MBB.getLastNonDebugInstr();
1438     MachineOperand &JumpTarget = MBBI->getOperand(0);
1439     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1440   }
1441 }
1442 
1443 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1444                                             BitVector &SavedRegs,
1445                                             RegScavenger *RS) const {
1446   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1447 
1448   const PPCRegisterInfo *RegInfo =
1449       static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
1450 
1451   //  Save and clear the LR state.
1452   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1453   unsigned LR = RegInfo->getRARegister();
1454   FI->setMustSaveLR(MustSaveLR(MF, LR));
1455   SavedRegs.reset(LR);
1456 
1457   //  Save R31 if necessary
1458   int FPSI = FI->getFramePointerSaveIndex();
1459   bool isPPC64 = Subtarget.isPPC64();
1460   bool isDarwinABI  = Subtarget.isDarwinABI();
1461   MachineFrameInfo *MFI = MF.getFrameInfo();
1462 
1463   // If the frame pointer save index hasn't been defined yet.
1464   if (!FPSI && needsFP(MF)) {
1465     // Find out what the fix offset of the frame pointer save area.
1466     int FPOffset = getFramePointerSaveOffset();
1467     // Allocate the frame index for frame pointer save area.
1468     FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1469     // Save the result.
1470     FI->setFramePointerSaveIndex(FPSI);
1471   }
1472 
1473   int BPSI = FI->getBasePointerSaveIndex();
1474   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1475     int BPOffset = getBasePointerSaveOffset();
1476     // Allocate the frame index for the base pointer save area.
1477     BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1478     // Save the result.
1479     FI->setBasePointerSaveIndex(BPSI);
1480   }
1481 
1482   // Reserve stack space for the PIC Base register (R30).
1483   // Only used in SVR4 32-bit.
1484   if (FI->usesPICBase()) {
1485     int PBPSI = MFI->CreateFixedObject(4, -8, true);
1486     FI->setPICBasePointerSaveIndex(PBPSI);
1487   }
1488 
1489   // Make sure we don't explicitly spill r31, because, for example, we have
1490   // some inline asm which explicity clobbers it, when we otherwise have a
1491   // frame pointer and are using r31's spill slot for the prologue/epilogue
1492   // code. Same goes for the base pointer and the PIC base register.
1493   if (needsFP(MF))
1494     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1495   if (RegInfo->hasBasePointer(MF))
1496     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1497   if (FI->usesPICBase())
1498     SavedRegs.reset(PPC::R30);
1499 
1500   // Reserve stack space to move the linkage area to in case of a tail call.
1501   int TCSPDelta = 0;
1502   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1503       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1504     MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1505   }
1506 
1507   // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
1508   // function uses CR 2, 3, or 4.
1509   if (!isPPC64 && !isDarwinABI &&
1510       (SavedRegs.test(PPC::CR2) ||
1511        SavedRegs.test(PPC::CR3) ||
1512        SavedRegs.test(PPC::CR4))) {
1513     int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
1514     FI->setCRSpillFrameIndex(FrameIdx);
1515   }
1516 }
1517 
1518 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1519                                                        RegScavenger *RS) const {
1520   // Early exit if not using the SVR4 ABI.
1521   if (!Subtarget.isSVR4ABI()) {
1522     addScavengingSpillSlot(MF, RS);
1523     return;
1524   }
1525 
1526   // Get callee saved register information.
1527   MachineFrameInfo *FFI = MF.getFrameInfo();
1528   const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
1529 
1530   // If the function is shrink-wrapped, and if the function has a tail call, the
1531   // tail call might not be in the new RestoreBlock, so real branch instruction
1532   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1533   // RestoreBlock. So we handle this case here.
1534   if (FFI->getSavePoint() && FFI->hasTailCall()) {
1535     MachineBasicBlock *RestoreBlock = FFI->getRestorePoint();
1536     for (MachineBasicBlock &MBB : MF) {
1537       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1538         createTailCallBranchInstr(MBB);
1539     }
1540   }
1541 
1542   // Early exit if no callee saved registers are modified!
1543   if (CSI.empty() && !needsFP(MF)) {
1544     addScavengingSpillSlot(MF, RS);
1545     return;
1546   }
1547 
1548   unsigned MinGPR = PPC::R31;
1549   unsigned MinG8R = PPC::X31;
1550   unsigned MinFPR = PPC::F31;
1551   unsigned MinVR = PPC::V31;
1552 
1553   bool HasGPSaveArea = false;
1554   bool HasG8SaveArea = false;
1555   bool HasFPSaveArea = false;
1556   bool HasVRSAVESaveArea = false;
1557   bool HasVRSaveArea = false;
1558 
1559   SmallVector<CalleeSavedInfo, 18> GPRegs;
1560   SmallVector<CalleeSavedInfo, 18> G8Regs;
1561   SmallVector<CalleeSavedInfo, 18> FPRegs;
1562   SmallVector<CalleeSavedInfo, 18> VRegs;
1563 
1564   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1565     unsigned Reg = CSI[i].getReg();
1566     if (PPC::GPRCRegClass.contains(Reg)) {
1567       HasGPSaveArea = true;
1568 
1569       GPRegs.push_back(CSI[i]);
1570 
1571       if (Reg < MinGPR) {
1572         MinGPR = Reg;
1573       }
1574     } else if (PPC::G8RCRegClass.contains(Reg)) {
1575       HasG8SaveArea = true;
1576 
1577       G8Regs.push_back(CSI[i]);
1578 
1579       if (Reg < MinG8R) {
1580         MinG8R = Reg;
1581       }
1582     } else if (PPC::F8RCRegClass.contains(Reg)) {
1583       HasFPSaveArea = true;
1584 
1585       FPRegs.push_back(CSI[i]);
1586 
1587       if (Reg < MinFPR) {
1588         MinFPR = Reg;
1589       }
1590     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1591                PPC::CRRCRegClass.contains(Reg)) {
1592       ; // do nothing, as we already know whether CRs are spilled
1593     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1594       HasVRSAVESaveArea = true;
1595     } else if (PPC::VRRCRegClass.contains(Reg)) {
1596       HasVRSaveArea = true;
1597 
1598       VRegs.push_back(CSI[i]);
1599 
1600       if (Reg < MinVR) {
1601         MinVR = Reg;
1602       }
1603     } else {
1604       llvm_unreachable("Unknown RegisterClass!");
1605     }
1606   }
1607 
1608   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1609   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1610 
1611   int64_t LowerBound = 0;
1612 
1613   // Take into account stack space reserved for tail calls.
1614   int TCSPDelta = 0;
1615   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1616       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1617     LowerBound = TCSPDelta;
1618   }
1619 
1620   // The Floating-point register save area is right below the back chain word
1621   // of the previous stack frame.
1622   if (HasFPSaveArea) {
1623     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1624       int FI = FPRegs[i].getFrameIdx();
1625 
1626       FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
1627     }
1628 
1629     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1630   }
1631 
1632   // Check whether the frame pointer register is allocated. If so, make sure it
1633   // is spilled to the correct offset.
1634   if (needsFP(MF)) {
1635     HasGPSaveArea = true;
1636 
1637     int FI = PFI->getFramePointerSaveIndex();
1638     assert(FI && "No Frame Pointer Save Slot!");
1639 
1640     FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
1641   }
1642 
1643   if (PFI->usesPICBase()) {
1644     HasGPSaveArea = true;
1645 
1646     int FI = PFI->getPICBasePointerSaveIndex();
1647     assert(FI && "No PIC Base Pointer Save Slot!");
1648 
1649     FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
1650   }
1651 
1652   const PPCRegisterInfo *RegInfo =
1653       static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
1654   if (RegInfo->hasBasePointer(MF)) {
1655     HasGPSaveArea = true;
1656 
1657     int FI = PFI->getBasePointerSaveIndex();
1658     assert(FI && "No Base Pointer Save Slot!");
1659 
1660     FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
1661   }
1662 
1663   // General register save area starts right below the Floating-point
1664   // register save area.
1665   if (HasGPSaveArea || HasG8SaveArea) {
1666     // Move general register save area spill slots down, taking into account
1667     // the size of the Floating-point register save area.
1668     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1669       int FI = GPRegs[i].getFrameIdx();
1670 
1671       FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
1672     }
1673 
1674     // Move general register save area spill slots down, taking into account
1675     // the size of the Floating-point register save area.
1676     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
1677       int FI = G8Regs[i].getFrameIdx();
1678 
1679       FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
1680     }
1681 
1682     unsigned MinReg =
1683       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
1684                          TRI->getEncodingValue(MinG8R));
1685 
1686     if (Subtarget.isPPC64()) {
1687       LowerBound -= (31 - MinReg + 1) * 8;
1688     } else {
1689       LowerBound -= (31 - MinReg + 1) * 4;
1690     }
1691   }
1692 
1693   // For 32-bit only, the CR save area is below the general register
1694   // save area.  For 64-bit SVR4, the CR save area is addressed relative
1695   // to the stack pointer and hence does not need an adjustment here.
1696   // Only CR2 (the first nonvolatile spilled) has an associated frame
1697   // index so that we have a single uniform save area.
1698   if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
1699     // Adjust the frame index of the CR spill slot.
1700     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1701       unsigned Reg = CSI[i].getReg();
1702 
1703       if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
1704           // Leave Darwin logic as-is.
1705           || (!Subtarget.isSVR4ABI() &&
1706               (PPC::CRBITRCRegClass.contains(Reg) ||
1707                PPC::CRRCRegClass.contains(Reg)))) {
1708         int FI = CSI[i].getFrameIdx();
1709 
1710         FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
1711       }
1712     }
1713 
1714     LowerBound -= 4; // The CR save area is always 4 bytes long.
1715   }
1716 
1717   if (HasVRSAVESaveArea) {
1718     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
1719     //             which have the VRSAVE register class?
1720     // Adjust the frame index of the VRSAVE spill slot.
1721     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1722       unsigned Reg = CSI[i].getReg();
1723 
1724       if (PPC::VRSAVERCRegClass.contains(Reg)) {
1725         int FI = CSI[i].getFrameIdx();
1726 
1727         FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
1728       }
1729     }
1730 
1731     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
1732   }
1733 
1734   if (HasVRSaveArea) {
1735     // Insert alignment padding, we need 16-byte alignment.
1736     LowerBound = (LowerBound - 15) & ~(15);
1737 
1738     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
1739       int FI = VRegs[i].getFrameIdx();
1740 
1741       FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
1742     }
1743   }
1744 
1745   addScavengingSpillSlot(MF, RS);
1746 }
1747 
1748 void
1749 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
1750                                          RegScavenger *RS) const {
1751   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
1752   // a large stack, which will require scavenging a register to materialize a
1753   // large offset.
1754 
1755   // We need to have a scavenger spill slot for spills if the frame size is
1756   // large. In case there is no free register for large-offset addressing,
1757   // this slot is used for the necessary emergency spill. Also, we need the
1758   // slot for dynamic stack allocations.
1759 
1760   // The scavenger might be invoked if the frame offset does not fit into
1761   // the 16-bit immediate. We don't know the complete frame size here
1762   // because we've not yet computed callee-saved register spills or the
1763   // needed alignment padding.
1764   unsigned StackSize = determineFrameLayout(MF, false, true);
1765   MachineFrameInfo *MFI = MF.getFrameInfo();
1766   if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
1767       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
1768     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
1769     const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
1770     const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
1771     RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
1772                                                        RC->getAlignment(),
1773                                                        false));
1774 
1775     // Might we have over-aligned allocas?
1776     bool HasAlVars = MFI->hasVarSizedObjects() &&
1777                      MFI->getMaxAlignment() > getStackAlignment();
1778 
1779     // These kinds of spills might need two registers.
1780     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
1781       RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
1782                                                          RC->getAlignment(),
1783                                                          false));
1784 
1785   }
1786 }
1787 
1788 bool
1789 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
1790                                      MachineBasicBlock::iterator MI,
1791                                      const std::vector<CalleeSavedInfo> &CSI,
1792                                      const TargetRegisterInfo *TRI) const {
1793 
1794   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
1795   // Return false otherwise to maintain pre-existing behavior.
1796   if (!Subtarget.isSVR4ABI())
1797     return false;
1798 
1799   MachineFunction *MF = MBB.getParent();
1800   const PPCInstrInfo &TII =
1801       *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
1802   DebugLoc DL;
1803   bool CRSpilled = false;
1804   MachineInstrBuilder CRMIB;
1805 
1806   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1807     unsigned Reg = CSI[i].getReg();
1808     // Only Darwin actually uses the VRSAVE register, but it can still appear
1809     // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
1810     // Darwin, ignore it.
1811     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
1812       continue;
1813 
1814     // CR2 through CR4 are the nonvolatile CR fields.
1815     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
1816 
1817     // Add the callee-saved register as live-in; it's killed at the spill.
1818     MBB.addLiveIn(Reg);
1819 
1820     if (CRSpilled && IsCRField) {
1821       CRMIB.addReg(Reg, RegState::ImplicitKill);
1822       continue;
1823     }
1824 
1825     // Insert the spill to the stack frame.
1826     if (IsCRField) {
1827       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
1828       if (Subtarget.isPPC64()) {
1829         // The actual spill will happen at the start of the prologue.
1830         FuncInfo->addMustSaveCR(Reg);
1831       } else {
1832         CRSpilled = true;
1833         FuncInfo->setSpillsCR();
1834 
1835         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
1836         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
1837         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
1838                   .addReg(Reg, RegState::ImplicitKill);
1839 
1840         MBB.insert(MI, CRMIB);
1841         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
1842                                          .addReg(PPC::R12,
1843                                                  getKillRegState(true)),
1844                                          CSI[i].getFrameIdx()));
1845       }
1846     } else {
1847       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
1848       TII.storeRegToStackSlot(MBB, MI, Reg, true,
1849                               CSI[i].getFrameIdx(), RC, TRI);
1850     }
1851   }
1852   return true;
1853 }
1854 
1855 static void
1856 restoreCRs(bool isPPC64, bool is31,
1857            bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
1858            MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1859            const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
1860 
1861   MachineFunction *MF = MBB.getParent();
1862   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
1863   DebugLoc DL;
1864   unsigned RestoreOp, MoveReg;
1865 
1866   if (isPPC64)
1867     // This is handled during epilogue generation.
1868     return;
1869   else {
1870     // 32-bit:  FP-relative
1871     MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
1872                                              PPC::R12),
1873                                      CSI[CSIIndex].getFrameIdx()));
1874     RestoreOp = PPC::MTOCRF;
1875     MoveReg = PPC::R12;
1876   }
1877 
1878   if (CR2Spilled)
1879     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
1880                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
1881 
1882   if (CR3Spilled)
1883     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
1884                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
1885 
1886   if (CR4Spilled)
1887     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
1888                .addReg(MoveReg, getKillRegState(true)));
1889 }
1890 
1891 MachineBasicBlock::iterator PPCFrameLowering::
1892 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
1893                               MachineBasicBlock::iterator I) const {
1894   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
1895   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1896       I->getOpcode() == PPC::ADJCALLSTACKUP) {
1897     // Add (actually subtract) back the amount the callee popped on return.
1898     if (int CalleeAmt =  I->getOperand(1).getImm()) {
1899       bool is64Bit = Subtarget.isPPC64();
1900       CalleeAmt *= -1;
1901       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
1902       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
1903       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
1904       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
1905       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
1906       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
1907       MachineInstr *MI = I;
1908       const DebugLoc &dl = MI->getDebugLoc();
1909 
1910       if (isInt<16>(CalleeAmt)) {
1911         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
1912           .addReg(StackReg, RegState::Kill)
1913           .addImm(CalleeAmt);
1914       } else {
1915         MachineBasicBlock::iterator MBBI = I;
1916         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
1917           .addImm(CalleeAmt >> 16);
1918         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
1919           .addReg(TmpReg, RegState::Kill)
1920           .addImm(CalleeAmt & 0xFFFF);
1921         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
1922           .addReg(StackReg, RegState::Kill)
1923           .addReg(TmpReg);
1924       }
1925     }
1926   }
1927   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
1928   return MBB.erase(I);
1929 }
1930 
1931 bool
1932 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
1933                                         MachineBasicBlock::iterator MI,
1934                                         const std::vector<CalleeSavedInfo> &CSI,
1935                                         const TargetRegisterInfo *TRI) const {
1936 
1937   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
1938   // Return false otherwise to maintain pre-existing behavior.
1939   if (!Subtarget.isSVR4ABI())
1940     return false;
1941 
1942   MachineFunction *MF = MBB.getParent();
1943   const PPCInstrInfo &TII =
1944       *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
1945   bool CR2Spilled = false;
1946   bool CR3Spilled = false;
1947   bool CR4Spilled = false;
1948   unsigned CSIIndex = 0;
1949 
1950   // Initialize insertion-point logic; we will be restoring in reverse
1951   // order of spill.
1952   MachineBasicBlock::iterator I = MI, BeforeI = I;
1953   bool AtStart = I == MBB.begin();
1954 
1955   if (!AtStart)
1956     --BeforeI;
1957 
1958   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1959     unsigned Reg = CSI[i].getReg();
1960 
1961     // Only Darwin actually uses the VRSAVE register, but it can still appear
1962     // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
1963     // Darwin, ignore it.
1964     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
1965       continue;
1966 
1967     if (Reg == PPC::CR2) {
1968       CR2Spilled = true;
1969       // The spill slot is associated only with CR2, which is the
1970       // first nonvolatile spilled.  Save it here.
1971       CSIIndex = i;
1972       continue;
1973     } else if (Reg == PPC::CR3) {
1974       CR3Spilled = true;
1975       continue;
1976     } else if (Reg == PPC::CR4) {
1977       CR4Spilled = true;
1978       continue;
1979     } else {
1980       // When we first encounter a non-CR register after seeing at
1981       // least one CR register, restore all spilled CRs together.
1982       if ((CR2Spilled || CR3Spilled || CR4Spilled)
1983           && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1984         bool is31 = needsFP(*MF);
1985         restoreCRs(Subtarget.isPPC64(), is31,
1986                    CR2Spilled, CR3Spilled, CR4Spilled,
1987                    MBB, I, CSI, CSIIndex);
1988         CR2Spilled = CR3Spilled = CR4Spilled = false;
1989       }
1990 
1991       // Default behavior for non-CR saves.
1992       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
1993       TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
1994                                RC, TRI);
1995       assert(I != MBB.begin() &&
1996              "loadRegFromStackSlot didn't insert any code!");
1997       }
1998 
1999     // Insert in reverse order.
2000     if (AtStart)
2001       I = MBB.begin();
2002     else {
2003       I = BeforeI;
2004       ++I;
2005     }
2006   }
2007 
2008   // If we haven't yet spilled the CRs, do so now.
2009   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2010     bool is31 = needsFP(*MF);
2011     restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
2012                MBB, I, CSI, CSIIndex);
2013   }
2014 
2015   return true;
2016 }
2017 
2018 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2019   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2020           MF.getSubtarget<PPCSubtarget>().isPPC64());
2021 }
2022