1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "MCTargetDesc/PPCPredicates.h"
14 #include "PPCFrameLowering.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterScavenging.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Target/TargetOptions.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "framelowering"
33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
35 STATISTIC(NumPrologProbed, "Number of prologues probed");
36 
37 static cl::opt<bool>
38 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
39                      cl::desc("Enable spills in prologue to vector registers."),
40                      cl::init(false), cl::Hidden);
41 
42 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
43   if (STI.isAIXABI())
44     return STI.isPPC64() ? 16 : 8;
45   // SVR4 ABI:
46   return STI.isPPC64() ? 16 : 4;
47 }
48 
49 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isAIXABI())
51     return STI.isPPC64() ? 40 : 20;
52   return STI.isELFv2ABI() ? 24 : 40;
53 }
54 
55 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
56   // First slot in the general register save area.
57   return STI.isPPC64() ? -8U : -4U;
58 }
59 
60 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
61   if (STI.isAIXABI() || STI.isPPC64())
62     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
63 
64   // 32-bit SVR4 ABI:
65   return 8;
66 }
67 
68 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
69   // Third slot in the general purpose register save area.
70   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
71     return -12U;
72 
73   // Second slot in the general purpose register save area.
74   return STI.isPPC64() ? -16U : -8U;
75 }
76 
77 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
78   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
79 }
80 
81 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
82     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
83                           STI.getPlatformStackAlignment(), 0),
84       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
85       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
86       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
87       LinkageSize(computeLinkageSize(Subtarget)),
88       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
89       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
90 
91 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
92 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
93     unsigned &NumEntries) const {
94 
95 // Floating-point register save area offsets.
96 #define CALLEE_SAVED_FPRS \
97       {PPC::F31, -8},     \
98       {PPC::F30, -16},    \
99       {PPC::F29, -24},    \
100       {PPC::F28, -32},    \
101       {PPC::F27, -40},    \
102       {PPC::F26, -48},    \
103       {PPC::F25, -56},    \
104       {PPC::F24, -64},    \
105       {PPC::F23, -72},    \
106       {PPC::F22, -80},    \
107       {PPC::F21, -88},    \
108       {PPC::F20, -96},    \
109       {PPC::F19, -104},   \
110       {PPC::F18, -112},   \
111       {PPC::F17, -120},   \
112       {PPC::F16, -128},   \
113       {PPC::F15, -136},   \
114       {PPC::F14, -144}
115 
116 // 32-bit general purpose register save area offsets shared by ELF and
117 // AIX. AIX has an extra CSR with r13.
118 #define CALLEE_SAVED_GPRS32 \
119       {PPC::R31, -4},       \
120       {PPC::R30, -8},       \
121       {PPC::R29, -12},      \
122       {PPC::R28, -16},      \
123       {PPC::R27, -20},      \
124       {PPC::R26, -24},      \
125       {PPC::R25, -28},      \
126       {PPC::R24, -32},      \
127       {PPC::R23, -36},      \
128       {PPC::R22, -40},      \
129       {PPC::R21, -44},      \
130       {PPC::R20, -48},      \
131       {PPC::R19, -52},      \
132       {PPC::R18, -56},      \
133       {PPC::R17, -60},      \
134       {PPC::R16, -64},      \
135       {PPC::R15, -68},      \
136       {PPC::R14, -72}
137 
138 // 64-bit general purpose register save area offsets.
139 #define CALLEE_SAVED_GPRS64 \
140       {PPC::X31, -8},       \
141       {PPC::X30, -16},      \
142       {PPC::X29, -24},      \
143       {PPC::X28, -32},      \
144       {PPC::X27, -40},      \
145       {PPC::X26, -48},      \
146       {PPC::X25, -56},      \
147       {PPC::X24, -64},      \
148       {PPC::X23, -72},      \
149       {PPC::X22, -80},      \
150       {PPC::X21, -88},      \
151       {PPC::X20, -96},      \
152       {PPC::X19, -104},     \
153       {PPC::X18, -112},     \
154       {PPC::X17, -120},     \
155       {PPC::X16, -128},     \
156       {PPC::X15, -136},     \
157       {PPC::X14, -144}
158 
159 // Vector register save area offsets.
160 #define CALLEE_SAVED_VRS \
161       {PPC::V31, -16},   \
162       {PPC::V30, -32},   \
163       {PPC::V29, -48},   \
164       {PPC::V28, -64},   \
165       {PPC::V27, -80},   \
166       {PPC::V26, -96},   \
167       {PPC::V25, -112},  \
168       {PPC::V24, -128},  \
169       {PPC::V23, -144},  \
170       {PPC::V22, -160},  \
171       {PPC::V21, -176},  \
172       {PPC::V20, -192}
173 
174   // Note that the offsets here overlap, but this is fixed up in
175   // processFunctionBeforeFrameFinalized.
176 
177   static const SpillSlot ELFOffsets32[] = {
178       CALLEE_SAVED_FPRS,
179       CALLEE_SAVED_GPRS32,
180 
181       // CR save area offset.  We map each of the nonvolatile CR fields
182       // to the slot for CR2, which is the first of the nonvolatile CR
183       // fields to be assigned, so that we only allocate one save slot.
184       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
185       {PPC::CR2, -4},
186 
187       // VRSAVE save area offset.
188       {PPC::VRSAVE, -4},
189 
190       CALLEE_SAVED_VRS,
191 
192       // SPE register save area (overlaps Vector save area).
193       {PPC::S31, -8},
194       {PPC::S30, -16},
195       {PPC::S29, -24},
196       {PPC::S28, -32},
197       {PPC::S27, -40},
198       {PPC::S26, -48},
199       {PPC::S25, -56},
200       {PPC::S24, -64},
201       {PPC::S23, -72},
202       {PPC::S22, -80},
203       {PPC::S21, -88},
204       {PPC::S20, -96},
205       {PPC::S19, -104},
206       {PPC::S18, -112},
207       {PPC::S17, -120},
208       {PPC::S16, -128},
209       {PPC::S15, -136},
210       {PPC::S14, -144}};
211 
212   static const SpillSlot ELFOffsets64[] = {
213       CALLEE_SAVED_FPRS,
214       CALLEE_SAVED_GPRS64,
215 
216       // VRSAVE save area offset.
217       {PPC::VRSAVE, -4},
218       CALLEE_SAVED_VRS
219   };
220 
221   static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
222                                            CALLEE_SAVED_GPRS32,
223                                            // Add AIX's extra CSR.
224                                            {PPC::R13, -76},
225                                            CALLEE_SAVED_VRS};
226 
227   static const SpillSlot AIXOffsets64[] = {
228       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
229 
230   if (Subtarget.is64BitELFABI()) {
231     NumEntries = array_lengthof(ELFOffsets64);
232     return ELFOffsets64;
233   }
234 
235   if (Subtarget.is32BitELFABI()) {
236     NumEntries = array_lengthof(ELFOffsets32);
237     return ELFOffsets32;
238   }
239 
240   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
241 
242   if (Subtarget.isPPC64()) {
243     NumEntries = array_lengthof(AIXOffsets64);
244     return AIXOffsets64;
245   }
246 
247   NumEntries = array_lengthof(AIXOffsets32);
248   return AIXOffsets32;
249 }
250 
251 static bool spillsCR(const MachineFunction &MF) {
252   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
253   return FuncInfo->isCRSpilled();
254 }
255 
256 static bool hasSpills(const MachineFunction &MF) {
257   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
258   return FuncInfo->hasSpills();
259 }
260 
261 static bool hasNonRISpills(const MachineFunction &MF) {
262   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
263   return FuncInfo->hasNonRISpills();
264 }
265 
266 /// MustSaveLR - Return true if this function requires that we save the LR
267 /// register onto the stack in the prolog and restore it in the epilog of the
268 /// function.
269 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
270   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
271 
272   // We need a save/restore of LR if there is any def of LR (which is
273   // defined by calls, including the PIC setup sequence), or if there is
274   // some use of the LR stack slot (e.g. for builtin_return_address).
275   // (LR comes in 32 and 64 bit versions.)
276   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
277   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
278 }
279 
280 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
281 /// call frame size. Update the MachineFunction object with the stack size.
282 unsigned
283 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
284                                                 bool UseEstimate) const {
285   unsigned NewMaxCallFrameSize = 0;
286   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
287                                             &NewMaxCallFrameSize);
288   MF.getFrameInfo().setStackSize(FrameSize);
289   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
290   return FrameSize;
291 }
292 
293 /// determineFrameLayout - Determine the size of the frame and maximum call
294 /// frame size.
295 unsigned
296 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
297                                        bool UseEstimate,
298                                        unsigned *NewMaxCallFrameSize) const {
299   const MachineFrameInfo &MFI = MF.getFrameInfo();
300   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
301 
302   // Get the number of bytes to allocate from the FrameInfo
303   unsigned FrameSize =
304     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
305 
306   // Get stack alignments. The frame must be aligned to the greatest of these:
307   Align TargetAlign = getStackAlign(); // alignment required per the ABI
308   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
309   Align Alignment = std::max(TargetAlign, MaxAlign);
310 
311   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
312 
313   unsigned LR = RegInfo->getRARegister();
314   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
315   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
316                        !MFI.adjustsStack() &&       // No calls.
317                        !MustSaveLR(MF, LR) &&       // No need to save LR.
318                        !FI->mustSaveTOC() &&        // No need to save TOC.
319                        !RegInfo->hasBasePointer(MF); // No special alignment.
320 
321   // Note: for PPC32 SVR4ABI, we can still generate stackless
322   // code if all local vars are reg-allocated.
323   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
324 
325   // Check whether we can skip adjusting the stack pointer (by using red zone)
326   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
327     // No need for frame
328     return 0;
329   }
330 
331   // Get the maximum call frame size of all the calls.
332   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
333 
334   // Maximum call frame needs to be at least big enough for linkage area.
335   unsigned minCallFrameSize = getLinkageSize();
336   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
337 
338   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
339   // that allocations will be aligned.
340   if (MFI.hasVarSizedObjects())
341     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
342 
343   // Update the new max call frame size if the caller passes in a valid pointer.
344   if (NewMaxCallFrameSize)
345     *NewMaxCallFrameSize = maxCallFrameSize;
346 
347   // Include call frame size in total.
348   FrameSize += maxCallFrameSize;
349 
350   // Make sure the frame is aligned.
351   FrameSize = alignTo(FrameSize, Alignment);
352 
353   return FrameSize;
354 }
355 
356 // hasFP - Return true if the specified function actually has a dedicated frame
357 // pointer register.
358 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
359   const MachineFrameInfo &MFI = MF.getFrameInfo();
360   // FIXME: This is pretty much broken by design: hasFP() might be called really
361   // early, before the stack layout was calculated and thus hasFP() might return
362   // true or false here depending on the time of call.
363   return (MFI.getStackSize()) && needsFP(MF);
364 }
365 
366 // needsFP - Return true if the specified function should have a dedicated frame
367 // pointer register.  This is true if the function has variable sized allocas or
368 // if frame pointer elimination is disabled.
369 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
370   const MachineFrameInfo &MFI = MF.getFrameInfo();
371 
372   // Naked functions have no stack frame pushed, so we don't have a frame
373   // pointer.
374   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
375     return false;
376 
377   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
378          MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
379          MF.exposesReturnsTwice() ||
380          (MF.getTarget().Options.GuaranteedTailCallOpt &&
381           MF.getInfo<PPCFunctionInfo>()->hasFastCall());
382 }
383 
384 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
385   bool is31 = needsFP(MF);
386   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
387   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
388 
389   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
390   bool HasBP = RegInfo->hasBasePointer(MF);
391   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
392   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
393 
394   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
395        BI != BE; ++BI)
396     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
397       --MBBI;
398       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
399         MachineOperand &MO = MBBI->getOperand(I);
400         if (!MO.isReg())
401           continue;
402 
403         switch (MO.getReg()) {
404         case PPC::FP:
405           MO.setReg(FPReg);
406           break;
407         case PPC::FP8:
408           MO.setReg(FP8Reg);
409           break;
410         case PPC::BP:
411           MO.setReg(BPReg);
412           break;
413         case PPC::BP8:
414           MO.setReg(BP8Reg);
415           break;
416 
417         }
418       }
419     }
420 }
421 
422 /*  This function will do the following:
423     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
424       respectively (defaults recommended by the ABI) and return true
425     - If MBB is not an entry block, initialize the register scavenger and look
426       for available registers.
427     - If the defaults (R0/R12) are available, return true
428     - If TwoUniqueRegsRequired is set to true, it looks for two unique
429       registers. Otherwise, look for a single available register.
430       - If the required registers are found, set SR1 and SR2 and return true.
431       - If the required registers are not found, set SR2 or both SR1 and SR2 to
432         PPC::NoRegister and return false.
433 
434     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
435     is not set, this function will attempt to find two different registers, but
436     still return true if only one register is available (and set SR1 == SR2).
437 */
438 bool
439 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
440                                       bool UseAtEnd,
441                                       bool TwoUniqueRegsRequired,
442                                       Register *SR1,
443                                       Register *SR2) const {
444   RegScavenger RS;
445   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
446   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
447 
448   // Set the defaults for the two scratch registers.
449   if (SR1)
450     *SR1 = R0;
451 
452   if (SR2) {
453     assert (SR1 && "Asking for the second scratch register but not the first?");
454     *SR2 = R12;
455   }
456 
457   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
458   if ((UseAtEnd && MBB->isReturnBlock()) ||
459       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
460     return true;
461 
462   RS.enterBasicBlock(*MBB);
463 
464   if (UseAtEnd && !MBB->empty()) {
465     // The scratch register will be used at the end of the block, so must
466     // consider all registers used within the block
467 
468     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
469     // If no terminator, back iterator up to previous instruction.
470     if (MBBI == MBB->end())
471       MBBI = std::prev(MBBI);
472 
473     if (MBBI != MBB->begin())
474       RS.forward(MBBI);
475   }
476 
477   // If the two registers are available, we're all good.
478   // Note that we only return here if both R0 and R12 are available because
479   // although the function may not require two unique registers, it may benefit
480   // from having two so we should try to provide them.
481   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
482     return true;
483 
484   // Get the list of callee-saved registers for the target.
485   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
486   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
487 
488   // Get all the available registers in the block.
489   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
490                                      &PPC::GPRCRegClass);
491 
492   // We shouldn't use callee-saved registers as scratch registers as they may be
493   // available when looking for a candidate block for shrink wrapping but not
494   // available when the actual prologue/epilogue is being emitted because they
495   // were added as live-in to the prologue block by PrologueEpilogueInserter.
496   for (int i = 0; CSRegs[i]; ++i)
497     BV.reset(CSRegs[i]);
498 
499   // Set the first scratch register to the first available one.
500   if (SR1) {
501     int FirstScratchReg = BV.find_first();
502     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
503   }
504 
505   // If there is another one available, set the second scratch register to that.
506   // Otherwise, set it to either PPC::NoRegister if this function requires two
507   // or to whatever SR1 is set to if this function doesn't require two.
508   if (SR2) {
509     int SecondScratchReg = BV.find_next(*SR1);
510     if (SecondScratchReg != -1)
511       *SR2 = SecondScratchReg;
512     else
513       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
514   }
515 
516   // Now that we've done our best to provide both registers, double check
517   // whether we were unable to provide enough.
518   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
519     return false;
520 
521   return true;
522 }
523 
524 // We need a scratch register for spilling LR and for spilling CR. By default,
525 // we use two scratch registers to hide latency. However, if only one scratch
526 // register is available, we can adjust for that by not overlapping the spill
527 // code. However, if we need to realign the stack (i.e. have a base pointer)
528 // and the stack frame is large, we need two scratch registers.
529 // Also, stack probe requires two scratch registers, one for old sp, one for
530 // large frame and large probe size.
531 bool
532 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
533   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
534   MachineFunction &MF = *(MBB->getParent());
535   bool HasBP = RegInfo->hasBasePointer(MF);
536   unsigned FrameSize = determineFrameLayout(MF);
537   int NegFrameSize = -FrameSize;
538   bool IsLargeFrame = !isInt<16>(NegFrameSize);
539   MachineFrameInfo &MFI = MF.getFrameInfo();
540   Align MaxAlign = MFI.getMaxAlign();
541   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
542   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
543 
544   return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
545          TLI.hasInlineStackProbe(MF);
546 }
547 
548 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
549   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
550 
551   return findScratchRegister(TmpMBB, false,
552                              twoUniqueScratchRegsRequired(TmpMBB));
553 }
554 
555 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
556   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
557 
558   return findScratchRegister(TmpMBB, true);
559 }
560 
561 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
562   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
563   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
564 
565   // Abort if there is no register info or function info.
566   if (!RegInfo || !FI)
567     return false;
568 
569   // Only move the stack update on ELFv2 ABI and PPC64.
570   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
571     return false;
572 
573   // Check the frame size first and return false if it does not fit the
574   // requirements.
575   // We need a non-zero frame size as well as a frame that will fit in the red
576   // zone. This is because by moving the stack pointer update we are now storing
577   // to the red zone until the stack pointer is updated. If we get an interrupt
578   // inside the prologue but before the stack update we now have a number of
579   // stores to the red zone and those stores must all fit.
580   MachineFrameInfo &MFI = MF.getFrameInfo();
581   unsigned FrameSize = MFI.getStackSize();
582   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
583     return false;
584 
585   // Frame pointers and base pointers complicate matters so don't do anything
586   // if we have them. For example having a frame pointer will sometimes require
587   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
588   // difficult. Similar situation exists with setjmp.
589   if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
590     return false;
591 
592   // Calls to fast_cc functions use different rules for passing parameters on
593   // the stack from the ABI and using PIC base in the function imposes
594   // similar restrictions to using the base pointer. It is not generally safe
595   // to move the stack pointer update in these situations.
596   if (FI->hasFastCall() || FI->usesPICBase())
597     return false;
598 
599   // Finally we can move the stack update if we do not require register
600   // scavenging. Register scavenging can introduce more spills and so
601   // may make the frame size larger than we have computed.
602   return !RegInfo->requiresFrameIndexScavenging(MF);
603 }
604 
605 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
606                                     MachineBasicBlock &MBB) const {
607   MachineBasicBlock::iterator MBBI = MBB.begin();
608   MachineFrameInfo &MFI = MF.getFrameInfo();
609   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
610   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
611   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
612 
613   MachineModuleInfo &MMI = MF.getMMI();
614   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
615   DebugLoc dl;
616   // AIX assembler does not support cfi directives.
617   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
618 
619   // Get processor type.
620   bool isPPC64 = Subtarget.isPPC64();
621   // Get the ABI.
622   bool isSVR4ABI = Subtarget.isSVR4ABI();
623   bool isELFv2ABI = Subtarget.isELFv2ABI();
624   assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
625 
626   // Work out frame sizes.
627   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
628   int NegFrameSize = -FrameSize;
629   if (!isInt<32>(NegFrameSize))
630     llvm_unreachable("Unhandled stack size!");
631 
632   if (MFI.isFrameAddressTaken())
633     replaceFPWithRealFP(MF);
634 
635   // Check if the link register (LR) must be saved.
636   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
637   bool MustSaveLR = FI->mustSaveLR();
638   bool MustSaveTOC = FI->mustSaveTOC();
639   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
640   bool MustSaveCR = !MustSaveCRs.empty();
641   // Do we have a frame pointer and/or base pointer for this function?
642   bool HasFP = hasFP(MF);
643   bool HasBP = RegInfo->hasBasePointer(MF);
644   bool HasRedZone = isPPC64 || !isSVR4ABI;
645 
646   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
647   Register BPReg = RegInfo->getBaseRegister(MF);
648   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
649   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
650   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
651   Register ScratchReg;
652   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
653   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
654   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
655                                                 : PPC::MFLR );
656   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
657                                                  : PPC::STW );
658   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
659                                                      : PPC::STWU );
660   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
661                                                         : PPC::STWUX);
662   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
663                                                           : PPC::LIS );
664   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
665                                                  : PPC::ORI );
666   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
667                                               : PPC::OR );
668   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
669                                                             : PPC::SUBFC);
670   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
671                                                                : PPC::SUBFIC);
672   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
673                                                            : PPC::MFCR);
674   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
675 
676   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
677   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
678   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
679   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
680   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
681          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
682 
683   // Using the same bool variable as below to suppress compiler warnings.
684   bool SingleScratchReg = findScratchRegister(
685       &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
686   assert(SingleScratchReg &&
687          "Required number of registers not available in this block");
688 
689   SingleScratchReg = ScratchReg == TempReg;
690 
691   int LROffset = getReturnSaveOffset();
692 
693   int FPOffset = 0;
694   if (HasFP) {
695     MachineFrameInfo &MFI = MF.getFrameInfo();
696     int FPIndex = FI->getFramePointerSaveIndex();
697     assert(FPIndex && "No Frame Pointer Save Slot!");
698     FPOffset = MFI.getObjectOffset(FPIndex);
699   }
700 
701   int BPOffset = 0;
702   if (HasBP) {
703     MachineFrameInfo &MFI = MF.getFrameInfo();
704     int BPIndex = FI->getBasePointerSaveIndex();
705     assert(BPIndex && "No Base Pointer Save Slot!");
706     BPOffset = MFI.getObjectOffset(BPIndex);
707   }
708 
709   int PBPOffset = 0;
710   if (FI->usesPICBase()) {
711     MachineFrameInfo &MFI = MF.getFrameInfo();
712     int PBPIndex = FI->getPICBasePointerSaveIndex();
713     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
714     PBPOffset = MFI.getObjectOffset(PBPIndex);
715   }
716 
717   // Get stack alignments.
718   Align MaxAlign = MFI.getMaxAlign();
719   if (HasBP && MaxAlign > 1)
720     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
721 
722   // Frames of 32KB & larger require special handling because they cannot be
723   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
724   bool isLargeFrame = !isInt<16>(NegFrameSize);
725 
726   // Check if we can move the stack update instruction (stdu) down the prologue
727   // past the callee saves. Hopefully this will avoid the situation where the
728   // saves are waiting for the update on the store with update to complete.
729   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
730   bool MovingStackUpdateDown = false;
731 
732   // Check if we can move the stack update.
733   if (stackUpdateCanBeMoved(MF)) {
734     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
735     for (CalleeSavedInfo CSI : Info) {
736       // If the callee saved register is spilled to a register instead of the
737       // stack then the spill no longer uses the stack pointer.
738       // This can lead to two consequences:
739       // 1) We no longer need to update the stack because the function does not
740       //    spill any callee saved registers to stack.
741       // 2) We have a situation where we still have to update the stack pointer
742       //    even though some registers are spilled to other registers. In
743       //    this case the current code moves the stack update to an incorrect
744       //    position.
745       // In either case we should abort moving the stack update operation.
746       if (CSI.isSpilledToReg()) {
747         StackUpdateLoc = MBBI;
748         MovingStackUpdateDown = false;
749         break;
750       }
751 
752       int FrIdx = CSI.getFrameIdx();
753       // If the frame index is not negative the callee saved info belongs to a
754       // stack object that is not a fixed stack object. We ignore non-fixed
755       // stack objects because we won't move the stack update pointer past them.
756       if (FrIdx >= 0)
757         continue;
758 
759       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
760         StackUpdateLoc++;
761         MovingStackUpdateDown = true;
762       } else {
763         // We need all of the Frame Indices to meet these conditions.
764         // If they do not, abort the whole operation.
765         StackUpdateLoc = MBBI;
766         MovingStackUpdateDown = false;
767         break;
768       }
769     }
770 
771     // If the operation was not aborted then update the object offset.
772     if (MovingStackUpdateDown) {
773       for (CalleeSavedInfo CSI : Info) {
774         int FrIdx = CSI.getFrameIdx();
775         if (FrIdx < 0)
776           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
777       }
778     }
779   }
780 
781   // Where in the prologue we move the CR fields depends on how many scratch
782   // registers we have, and if we need to save the link register or not. This
783   // lambda is to avoid duplicating the logic in 2 places.
784   auto BuildMoveFromCR = [&]() {
785     if (isELFv2ABI && MustSaveCRs.size() == 1) {
786     // In the ELFv2 ABI, we are not required to save all CR fields.
787     // If only one CR field is clobbered, it is more efficient to use
788     // mfocrf to selectively save just that field, because mfocrf has short
789     // latency compares to mfcr.
790       assert(isPPC64 && "V2 ABI is 64-bit only.");
791       MachineInstrBuilder MIB =
792           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
793       MIB.addReg(MustSaveCRs[0], RegState::Kill);
794     } else {
795       MachineInstrBuilder MIB =
796           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
797       for (unsigned CRfield : MustSaveCRs)
798         MIB.addReg(CRfield, RegState::ImplicitKill);
799     }
800   };
801 
802   // If we need to spill the CR and the LR but we don't have two separate
803   // registers available, we must spill them one at a time
804   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
805     BuildMoveFromCR();
806     BuildMI(MBB, MBBI, dl, StoreWordInst)
807         .addReg(TempReg, getKillRegState(true))
808         .addImm(CRSaveOffset)
809         .addReg(SPReg);
810   }
811 
812   if (MustSaveLR)
813     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
814 
815   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
816     BuildMoveFromCR();
817 
818   if (HasRedZone) {
819     if (HasFP)
820       BuildMI(MBB, MBBI, dl, StoreInst)
821         .addReg(FPReg)
822         .addImm(FPOffset)
823         .addReg(SPReg);
824     if (FI->usesPICBase())
825       BuildMI(MBB, MBBI, dl, StoreInst)
826         .addReg(PPC::R30)
827         .addImm(PBPOffset)
828         .addReg(SPReg);
829     if (HasBP)
830       BuildMI(MBB, MBBI, dl, StoreInst)
831         .addReg(BPReg)
832         .addImm(BPOffset)
833         .addReg(SPReg);
834   }
835 
836   if (MustSaveLR)
837     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
838       .addReg(ScratchReg, getKillRegState(true))
839       .addImm(LROffset)
840       .addReg(SPReg);
841 
842   if (MustSaveCR &&
843       !(SingleScratchReg && MustSaveLR)) {
844     assert(HasRedZone && "A red zone is always available on PPC64");
845     BuildMI(MBB, MBBI, dl, StoreWordInst)
846       .addReg(TempReg, getKillRegState(true))
847       .addImm(CRSaveOffset)
848       .addReg(SPReg);
849   }
850 
851   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
852   if (!FrameSize)
853     return;
854 
855   // Adjust stack pointer: r1 += NegFrameSize.
856   // If there is a preferred stack alignment, align R1 now
857 
858   if (HasBP && HasRedZone) {
859     // Save a copy of r1 as the base pointer.
860     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
861       .addReg(SPReg)
862       .addReg(SPReg);
863   }
864 
865   // Have we generated a STUX instruction to claim stack frame? If so,
866   // the negated frame size will be placed in ScratchReg.
867   bool HasSTUX = false;
868 
869   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
870   // pointer is always stored at SP, we will get a free probe due to an essential
871   // STU(X) instruction.
872   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
873     // To be consistent with other targets, a pseudo instruction is emitted and
874     // will be later expanded in `inlineStackProbe`.
875     BuildMI(MBB, MBBI, dl,
876             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
877                             : PPC::PROBED_STACKALLOC_32))
878         .addDef(ScratchReg)
879         .addDef(TempReg) // TempReg stores the old sp.
880         .addImm(NegFrameSize);
881     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
882     // update the ScratchReg to meet the assumption that ScratchReg contains
883     // the NegFrameSize. This solution is rather tricky.
884     if (!HasRedZone) {
885       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
886           .addReg(TempReg)
887           .addReg(SPReg);
888       HasSTUX = true;
889     }
890   } else {
891     // This condition must be kept in sync with canUseAsPrologue.
892     if (HasBP && MaxAlign > 1) {
893       if (isPPC64)
894         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
895             .addReg(SPReg)
896             .addImm(0)
897             .addImm(64 - Log2(MaxAlign));
898       else // PPC32...
899         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
900             .addReg(SPReg)
901             .addImm(0)
902             .addImm(32 - Log2(MaxAlign))
903             .addImm(31);
904       if (!isLargeFrame) {
905         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
906             .addReg(ScratchReg, RegState::Kill)
907             .addImm(NegFrameSize);
908       } else {
909         assert(!SingleScratchReg && "Only a single scratch reg available");
910         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
911             .addImm(NegFrameSize >> 16);
912         BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
913             .addReg(TempReg, RegState::Kill)
914             .addImm(NegFrameSize & 0xFFFF);
915         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
916             .addReg(ScratchReg, RegState::Kill)
917             .addReg(TempReg, RegState::Kill);
918       }
919 
920       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
921           .addReg(SPReg, RegState::Kill)
922           .addReg(SPReg)
923           .addReg(ScratchReg);
924       HasSTUX = true;
925 
926     } else if (!isLargeFrame) {
927       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
928           .addReg(SPReg)
929           .addImm(NegFrameSize)
930           .addReg(SPReg);
931 
932     } else {
933       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
934           .addImm(NegFrameSize >> 16);
935       BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
936           .addReg(ScratchReg, RegState::Kill)
937           .addImm(NegFrameSize & 0xFFFF);
938       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
939           .addReg(SPReg, RegState::Kill)
940           .addReg(SPReg)
941           .addReg(ScratchReg);
942       HasSTUX = true;
943     }
944   }
945 
946   // Save the TOC register after the stack pointer update if a prologue TOC
947   // save is required for the function.
948   if (MustSaveTOC) {
949     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
950     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
951       .addReg(TOCReg, getKillRegState(true))
952       .addImm(TOCSaveOffset)
953       .addReg(SPReg);
954   }
955 
956   if (!HasRedZone) {
957     assert(!isPPC64 && "A red zone is always available on PPC64");
958     if (HasSTUX) {
959       // The negated frame size is in ScratchReg, and the SPReg has been
960       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
961       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
962       // the stack frame (i.e. the old SP), ideally, we would put the old
963       // SP into a register and use it as the base for the stores. The
964       // problem is that the only available register may be ScratchReg,
965       // which could be R0, and R0 cannot be used as a base address.
966 
967       // First, set ScratchReg to the old SP. This may need to be modified
968       // later.
969       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
970         .addReg(ScratchReg, RegState::Kill)
971         .addReg(SPReg);
972 
973       if (ScratchReg == PPC::R0) {
974         // R0 cannot be used as a base register, but it can be used as an
975         // index in a store-indexed.
976         int LastOffset = 0;
977         if (HasFP)  {
978           // R0 += (FPOffset-LastOffset).
979           // Need addic, since addi treats R0 as 0.
980           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
981             .addReg(ScratchReg)
982             .addImm(FPOffset-LastOffset);
983           LastOffset = FPOffset;
984           // Store FP into *R0.
985           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
986             .addReg(FPReg, RegState::Kill)  // Save FP.
987             .addReg(PPC::ZERO)
988             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
989         }
990         if (FI->usesPICBase()) {
991           // R0 += (PBPOffset-LastOffset).
992           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
993             .addReg(ScratchReg)
994             .addImm(PBPOffset-LastOffset);
995           LastOffset = PBPOffset;
996           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
997             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
998             .addReg(PPC::ZERO)
999             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1000         }
1001         if (HasBP) {
1002           // R0 += (BPOffset-LastOffset).
1003           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1004             .addReg(ScratchReg)
1005             .addImm(BPOffset-LastOffset);
1006           LastOffset = BPOffset;
1007           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1008             .addReg(BPReg, RegState::Kill)  // Save BP.
1009             .addReg(PPC::ZERO)
1010             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1011           // BP = R0-LastOffset
1012           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1013             .addReg(ScratchReg, RegState::Kill)
1014             .addImm(-LastOffset);
1015         }
1016       } else {
1017         // ScratchReg is not R0, so use it as the base register. It is
1018         // already set to the old SP, so we can use the offsets directly.
1019 
1020         // Now that the stack frame has been allocated, save all the necessary
1021         // registers using ScratchReg as the base address.
1022         if (HasFP)
1023           BuildMI(MBB, MBBI, dl, StoreInst)
1024             .addReg(FPReg)
1025             .addImm(FPOffset)
1026             .addReg(ScratchReg);
1027         if (FI->usesPICBase())
1028           BuildMI(MBB, MBBI, dl, StoreInst)
1029             .addReg(PPC::R30)
1030             .addImm(PBPOffset)
1031             .addReg(ScratchReg);
1032         if (HasBP) {
1033           BuildMI(MBB, MBBI, dl, StoreInst)
1034             .addReg(BPReg)
1035             .addImm(BPOffset)
1036             .addReg(ScratchReg);
1037           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1038             .addReg(ScratchReg, RegState::Kill)
1039             .addReg(ScratchReg);
1040         }
1041       }
1042     } else {
1043       // The frame size is a known 16-bit constant (fitting in the immediate
1044       // field of STWU). To be here we have to be compiling for PPC32.
1045       // Since the SPReg has been decreased by FrameSize, add it back to each
1046       // offset.
1047       if (HasFP)
1048         BuildMI(MBB, MBBI, dl, StoreInst)
1049           .addReg(FPReg)
1050           .addImm(FrameSize + FPOffset)
1051           .addReg(SPReg);
1052       if (FI->usesPICBase())
1053         BuildMI(MBB, MBBI, dl, StoreInst)
1054           .addReg(PPC::R30)
1055           .addImm(FrameSize + PBPOffset)
1056           .addReg(SPReg);
1057       if (HasBP) {
1058         BuildMI(MBB, MBBI, dl, StoreInst)
1059           .addReg(BPReg)
1060           .addImm(FrameSize + BPOffset)
1061           .addReg(SPReg);
1062         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1063           .addReg(SPReg)
1064           .addImm(FrameSize);
1065       }
1066     }
1067   }
1068 
1069   // Add Call Frame Information for the instructions we generated above.
1070   if (needsCFI) {
1071     unsigned CFIIndex;
1072 
1073     if (HasBP) {
1074       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1075       // because if the stack needed aligning then CFA won't be at a fixed
1076       // offset from FP/SP.
1077       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1078       CFIIndex = MF.addFrameInst(
1079           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1080     } else {
1081       // Adjust the definition of CFA to account for the change in SP.
1082       assert(NegFrameSize);
1083       CFIIndex = MF.addFrameInst(
1084           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1085     }
1086     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1087         .addCFIIndex(CFIIndex);
1088 
1089     if (HasFP) {
1090       // Describe where FP was saved, at a fixed offset from CFA.
1091       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1092       CFIIndex = MF.addFrameInst(
1093           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1094       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1095           .addCFIIndex(CFIIndex);
1096     }
1097 
1098     if (FI->usesPICBase()) {
1099       // Describe where FP was saved, at a fixed offset from CFA.
1100       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1101       CFIIndex = MF.addFrameInst(
1102           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1103       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1104           .addCFIIndex(CFIIndex);
1105     }
1106 
1107     if (HasBP) {
1108       // Describe where BP was saved, at a fixed offset from CFA.
1109       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1110       CFIIndex = MF.addFrameInst(
1111           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1112       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1113           .addCFIIndex(CFIIndex);
1114     }
1115 
1116     if (MustSaveLR) {
1117       // Describe where LR was saved, at a fixed offset from CFA.
1118       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1119       CFIIndex = MF.addFrameInst(
1120           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1121       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1122           .addCFIIndex(CFIIndex);
1123     }
1124   }
1125 
1126   // If there is a frame pointer, copy R1 into R31
1127   if (HasFP) {
1128     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1129       .addReg(SPReg)
1130       .addReg(SPReg);
1131 
1132     if (!HasBP && needsCFI) {
1133       // Change the definition of CFA from SP+offset to FP+offset, because SP
1134       // will change at every alloca.
1135       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1136       unsigned CFIIndex = MF.addFrameInst(
1137           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1138 
1139       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1140           .addCFIIndex(CFIIndex);
1141     }
1142   }
1143 
1144   if (needsCFI) {
1145     // Describe where callee saved registers were saved, at fixed offsets from
1146     // CFA.
1147     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1148     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1149       unsigned Reg = CSI[I].getReg();
1150       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1151 
1152       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1153       // subregisters of CR2. We just need to emit a move of CR2.
1154       if (PPC::CRBITRCRegClass.contains(Reg))
1155         continue;
1156 
1157       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1158         continue;
1159 
1160       // For SVR4, don't emit a move for the CR spill slot if we haven't
1161       // spilled CRs.
1162       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1163           && !MustSaveCR)
1164         continue;
1165 
1166       // For 64-bit SVR4 when we have spilled CRs, the spill location
1167       // is SP+8, not a frame-relative slot.
1168       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1169         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1170         // the whole CR word.  In the ELFv2 ABI, every CR that was
1171         // actually saved gets its own CFI record.
1172         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1173         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1174             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1175         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1176             .addCFIIndex(CFIIndex);
1177         continue;
1178       }
1179 
1180       if (CSI[I].isSpilledToReg()) {
1181         unsigned SpilledReg = CSI[I].getDstReg();
1182         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1183             nullptr, MRI->getDwarfRegNum(Reg, true),
1184             MRI->getDwarfRegNum(SpilledReg, true)));
1185         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1186           .addCFIIndex(CFIRegister);
1187       } else {
1188         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1189         // We have changed the object offset above but we do not want to change
1190         // the actual offsets in the CFI instruction so we have to undo the
1191         // offset change here.
1192         if (MovingStackUpdateDown)
1193           Offset -= NegFrameSize;
1194 
1195         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1196             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1197         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1198             .addCFIIndex(CFIIndex);
1199       }
1200     }
1201   }
1202 }
1203 
1204 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1205                                         MachineBasicBlock &PrologMBB) const {
1206   // TODO: Generate CFI instructions.
1207   bool isPPC64 = Subtarget.isPPC64();
1208   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1209   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1210   MachineFrameInfo &MFI = MF.getFrameInfo();
1211   MachineModuleInfo &MMI = MF.getMMI();
1212   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1213   // AIX assembler does not support cfi directives.
1214   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1215   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1216     int Opc = MI.getOpcode();
1217     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1218   });
1219   if (StackAllocMIPos == PrologMBB.end())
1220     return;
1221   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1222   MachineBasicBlock *CurrentMBB = &PrologMBB;
1223   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1224   MachineInstr &MI = *StackAllocMIPos;
1225   int64_t NegFrameSize = MI.getOperand(2).getImm();
1226   unsigned ProbeSize = TLI.getStackProbeSize(MF);
1227   int64_t NegProbeSize = -(int64_t)ProbeSize;
1228   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1229   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1230   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1231   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1232   Register ScratchReg = MI.getOperand(0).getReg();
1233   Register FPReg = MI.getOperand(1).getReg();
1234   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1235   bool HasBP = RegInfo->hasBasePointer(MF);
1236   Register BPReg = RegInfo->getBaseRegister(MF);
1237   Align MaxAlign = MFI.getMaxAlign();
1238   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1239   // Subroutines to generate .cfi_* directives.
1240   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1241                             MachineBasicBlock::iterator MBBI, Register Reg) {
1242     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1243     unsigned CFIIndex = MF.addFrameInst(
1244         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1245     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1246         .addCFIIndex(CFIIndex);
1247   };
1248   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1249                          MachineBasicBlock::iterator MBBI, Register Reg,
1250                          int Offset) {
1251     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1252     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1253         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1254     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1255         .addCFIIndex(CFIIndex);
1256   };
1257   // Subroutine to determine if we can use the Imm as part of d-form.
1258   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1259   // Subroutine to materialize the Imm into TempReg.
1260   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1261                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1262                             Register &TempReg) {
1263     assert(isInt<32>(Imm) && "Unhandled imm");
1264     if (isInt<16>(Imm))
1265       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1266           .addImm(Imm);
1267     else {
1268       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1269           .addImm(Imm >> 16);
1270       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1271           .addReg(TempReg)
1272           .addImm(Imm & 0xFFFF);
1273     }
1274   };
1275   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1276   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1277                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1278                               Register NegSizeReg, bool UseDForm,
1279                               Register StoreReg) {
1280     if (UseDForm)
1281       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1282           .addReg(StoreReg)
1283           .addImm(NegSize)
1284           .addReg(SPReg);
1285     else
1286       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1287           .addReg(StoreReg)
1288           .addReg(SPReg)
1289           .addReg(NegSizeReg);
1290   };
1291   // Used to probe realignment gap [stackptr - (stackptr % align), stackptr)
1292   // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30
1293   // available and r1 is already copied to r30 which is BPReg. So BPReg stores
1294   // the value of stackptr.
1295   // First we have to probe tail interval whose size is less than probesize,
1296   // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage,
1297   // ScratchReg stores the value of ((stackptr % align) % probesize). Then we
1298   // probe each block sized probesize until stackptr meets
1299   // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized
1300   // as negprobesize. At both stages, TempReg stores the value of
1301   // (stackptr - (stackptr % align)).
1302   auto dynamicProbe = [&](MachineBasicBlock &MBB,
1303                           MachineBasicBlock::iterator MBBI, Register ScratchReg,
1304                           Register TempReg) {
1305     assert(HasBP && isPPC64 && "Probe alignment part not available");
1306     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1307     // ScratchReg = stackptr % align
1308     BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
1309         .addReg(BPReg)
1310         .addImm(0)
1311         .addImm(64 - Log2(MaxAlign));
1312     // TempReg = stackptr - (stackptr % align)
1313     BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg)
1314         .addReg(ScratchReg)
1315         .addReg(BPReg);
1316     // ScratchReg = (stackptr % align) % probesize
1317     BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
1318         .addReg(ScratchReg)
1319         .addImm(0)
1320         .addImm(64 - Log2(ProbeSize));
1321     Register CRReg = PPC::CR0;
1322     // If (stackptr % align) % probesize == 0, we should not generate probe
1323     // code. Layout of output assembly kinda like:
1324     // bb.0:
1325     //   ...
1326     //   cmpldi $scratchreg, 0
1327     //   beq bb.2
1328     // bb.1: # Probe tail interval
1329     //   neg $scratchreg, $scratchreg
1330     //   stdux $bpreg, r1, $scratchreg
1331     // bb.2:
1332     //   <materialize negprobesize into $scratchreg>
1333     //   cmpd r1, $tempreg
1334     //   beq bb.4
1335     // bb.3: # Loop to probe each block
1336     //   stdux $bpreg, r1, $scratchreg
1337     //   cmpd r1, $tempreg
1338     //   bne bb.3
1339     // bb.4:
1340     //   ...
1341     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1342     MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB);
1343     MF.insert(MBBInsertPoint, ProbeResidualMBB);
1344     MachineBasicBlock *ProbeLoopPreHeaderMBB =
1345         MF.CreateMachineBasicBlock(ProbedBB);
1346     MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB);
1347     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1348     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1349     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1350     MF.insert(MBBInsertPoint, ProbeExitMBB);
1351     // bb.4
1352     ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1353     ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1354     // bb.0
1355     BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0);
1356     BuildMI(&MBB, DL, TII.get(PPC::BCC))
1357         .addImm(PPC::PRED_EQ)
1358         .addReg(CRReg)
1359         .addMBB(ProbeLoopPreHeaderMBB);
1360     MBB.addSuccessor(ProbeResidualMBB);
1361     MBB.addSuccessor(ProbeLoopPreHeaderMBB);
1362     // bb.1
1363     BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg)
1364         .addReg(ScratchReg);
1365     allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg,
1366                      false, BPReg);
1367     ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB);
1368     // bb.2
1369     MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(),
1370                    NegProbeSize, ScratchReg);
1371     BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg)
1372         .addReg(SPReg)
1373         .addReg(TempReg);
1374     BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC))
1375         .addImm(PPC::PRED_EQ)
1376         .addReg(CRReg)
1377         .addMBB(ProbeExitMBB);
1378     ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB);
1379     ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB);
1380     // bb.3
1381     allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg,
1382                      false, BPReg);
1383     BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg)
1384         .addReg(SPReg)
1385         .addReg(TempReg);
1386     BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1387         .addImm(PPC::PRED_NE)
1388         .addReg(CRReg)
1389         .addMBB(ProbeLoopBodyMBB);
1390     ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1391     ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1392     // Update liveins.
1393     recomputeLiveIns(*ProbeResidualMBB);
1394     recomputeLiveIns(*ProbeLoopPreHeaderMBB);
1395     recomputeLiveIns(*ProbeLoopBodyMBB);
1396     recomputeLiveIns(*ProbeExitMBB);
1397     return ProbeExitMBB;
1398   };
1399   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1400   // SP = SP - SP % MaxAlign.
1401   if (HasBP && MaxAlign > 1) {
1402     // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in
1403     // 64-bit mode.
1404     if (isPPC64) {
1405       // Use BPReg to calculate CFA.
1406       if (needsCFI)
1407         buildDefCFA(*CurrentMBB, {MI}, BPReg, 0);
1408       // Since we have SPReg copied to BPReg at the moment, FPReg can be used as
1409       // TempReg.
1410       Register TempReg = FPReg;
1411       CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg);
1412       // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64.
1413       BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
1414           .addReg(BPReg)
1415           .addReg(BPReg);
1416     } else {
1417       // Initialize current frame pointer.
1418       BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
1419           .addReg(SPReg)
1420           .addReg(SPReg);
1421       // Use FPReg to calculate CFA.
1422       if (needsCFI)
1423         buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1424       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1425           .addReg(FPReg)
1426           .addImm(0)
1427           .addImm(32 - Log2(MaxAlign))
1428           .addImm(31);
1429       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg)
1430           .addReg(ScratchReg)
1431           .addReg(SPReg);
1432     }
1433   } else {
1434     // Initialize current frame pointer.
1435     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1436     // Use FPReg to calculate CFA.
1437     if (needsCFI)
1438       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1439   }
1440   // Probe residual part.
1441   if (NegResidualSize) {
1442     bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1443     if (!ResidualUseDForm)
1444       MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1445     allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1446                      ResidualUseDForm, FPReg);
1447   }
1448   bool UseDForm = CanUseDForm(NegProbeSize);
1449   // If number of blocks is small, just probe them directly.
1450   if (NumBlocks < 3) {
1451     if (!UseDForm)
1452       MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1453     for (int i = 0; i < NumBlocks; ++i)
1454       allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1455                        FPReg);
1456     if (needsCFI) {
1457       // Restore using SPReg to calculate CFA.
1458       buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1459     }
1460   } else {
1461     // Since CTR is a volatile register and current shrinkwrap implementation
1462     // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1463     // CTR loop to probe.
1464     // Calculate trip count and stores it in CTRReg.
1465     MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1466     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1467         .addReg(ScratchReg, RegState::Kill);
1468     if (!UseDForm)
1469       MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1470     // Create MBBs of the loop.
1471     MachineFunction::iterator MBBInsertPoint =
1472         std::next(CurrentMBB->getIterator());
1473     MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1474     MF.insert(MBBInsertPoint, LoopMBB);
1475     MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1476     MF.insert(MBBInsertPoint, ExitMBB);
1477     // Synthesize the loop body.
1478     allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1479                      UseDForm, FPReg);
1480     BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1481         .addMBB(LoopMBB);
1482     LoopMBB->addSuccessor(ExitMBB);
1483     LoopMBB->addSuccessor(LoopMBB);
1484     // Synthesize the exit MBB.
1485     ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1486                     std::next(MachineBasicBlock::iterator(MI)),
1487                     CurrentMBB->end());
1488     ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1489     CurrentMBB->addSuccessor(LoopMBB);
1490     if (needsCFI) {
1491       // Restore using SPReg to calculate CFA.
1492       buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1493     }
1494     // Update liveins.
1495     recomputeLiveIns(*LoopMBB);
1496     recomputeLiveIns(*ExitMBB);
1497   }
1498   ++NumPrologProbed;
1499   MI.eraseFromParent();
1500 }
1501 
1502 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1503                                     MachineBasicBlock &MBB) const {
1504   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1505   DebugLoc dl;
1506 
1507   if (MBBI != MBB.end())
1508     dl = MBBI->getDebugLoc();
1509 
1510   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1511   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1512 
1513   // Get alignment info so we know how to restore the SP.
1514   const MachineFrameInfo &MFI = MF.getFrameInfo();
1515 
1516   // Get the number of bytes allocated from the FrameInfo.
1517   int FrameSize = MFI.getStackSize();
1518 
1519   // Get processor type.
1520   bool isPPC64 = Subtarget.isPPC64();
1521 
1522   // Check if the link register (LR) has been saved.
1523   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1524   bool MustSaveLR = FI->mustSaveLR();
1525   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1526   bool MustSaveCR = !MustSaveCRs.empty();
1527   // Do we have a frame pointer and/or base pointer for this function?
1528   bool HasFP = hasFP(MF);
1529   bool HasBP = RegInfo->hasBasePointer(MF);
1530   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1531 
1532   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1533   Register BPReg = RegInfo->getBaseRegister(MF);
1534   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1535   Register ScratchReg;
1536   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1537   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1538                                                  : PPC::MTLR );
1539   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1540                                                  : PPC::LWZ );
1541   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1542                                                            : PPC::LIS );
1543   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1544                                               : PPC::OR );
1545   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1546                                                   : PPC::ORI );
1547   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1548                                                    : PPC::ADDI );
1549   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1550                                                 : PPC::ADD4 );
1551   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1552                                                      : PPC::LWZ);
1553   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1554                                                      : PPC::MTOCRF);
1555   int LROffset = getReturnSaveOffset();
1556 
1557   int FPOffset = 0;
1558 
1559   // Using the same bool variable as below to suppress compiler warnings.
1560   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1561                                               &TempReg);
1562   assert(SingleScratchReg &&
1563          "Could not find an available scratch register");
1564 
1565   SingleScratchReg = ScratchReg == TempReg;
1566 
1567   if (HasFP) {
1568     int FPIndex = FI->getFramePointerSaveIndex();
1569     assert(FPIndex && "No Frame Pointer Save Slot!");
1570     FPOffset = MFI.getObjectOffset(FPIndex);
1571   }
1572 
1573   int BPOffset = 0;
1574   if (HasBP) {
1575       int BPIndex = FI->getBasePointerSaveIndex();
1576       assert(BPIndex && "No Base Pointer Save Slot!");
1577       BPOffset = MFI.getObjectOffset(BPIndex);
1578   }
1579 
1580   int PBPOffset = 0;
1581   if (FI->usesPICBase()) {
1582     int PBPIndex = FI->getPICBasePointerSaveIndex();
1583     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1584     PBPOffset = MFI.getObjectOffset(PBPIndex);
1585   }
1586 
1587   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1588 
1589   if (IsReturnBlock) {
1590     unsigned RetOpcode = MBBI->getOpcode();
1591     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1592                       RetOpcode == PPC::TCRETURNdi ||
1593                       RetOpcode == PPC::TCRETURNai ||
1594                       RetOpcode == PPC::TCRETURNri8 ||
1595                       RetOpcode == PPC::TCRETURNdi8 ||
1596                       RetOpcode == PPC::TCRETURNai8;
1597 
1598     if (UsesTCRet) {
1599       int MaxTCRetDelta = FI->getTailCallSPDelta();
1600       MachineOperand &StackAdjust = MBBI->getOperand(1);
1601       assert(StackAdjust.isImm() && "Expecting immediate value.");
1602       // Adjust stack pointer.
1603       int StackAdj = StackAdjust.getImm();
1604       int Delta = StackAdj - MaxTCRetDelta;
1605       assert((Delta >= 0) && "Delta must be positive");
1606       if (MaxTCRetDelta>0)
1607         FrameSize += (StackAdj +Delta);
1608       else
1609         FrameSize += StackAdj;
1610     }
1611   }
1612 
1613   // Frames of 32KB & larger require special handling because they cannot be
1614   // indexed into with a simple LD/LWZ immediate offset operand.
1615   bool isLargeFrame = !isInt<16>(FrameSize);
1616 
1617   // On targets without red zone, the SP needs to be restored last, so that
1618   // all live contents of the stack frame are upwards of the SP. This means
1619   // that we cannot restore SP just now, since there may be more registers
1620   // to restore from the stack frame (e.g. R31). If the frame size is not
1621   // a simple immediate value, we will need a spare register to hold the
1622   // restored SP. If the frame size is known and small, we can simply adjust
1623   // the offsets of the registers to be restored, and still use SP to restore
1624   // them. In such case, the final update of SP will be to add the frame
1625   // size to it.
1626   // To simplify the code, set RBReg to the base register used to restore
1627   // values from the stack, and set SPAdd to the value that needs to be added
1628   // to the SP at the end. The default values are as if red zone was present.
1629   unsigned RBReg = SPReg;
1630   unsigned SPAdd = 0;
1631 
1632   // Check if we can move the stack update instruction up the epilogue
1633   // past the callee saves. This will allow the move to LR instruction
1634   // to be executed before the restores of the callee saves which means
1635   // that the callee saves can hide the latency from the MTLR instrcution.
1636   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1637   if (stackUpdateCanBeMoved(MF)) {
1638     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1639     for (CalleeSavedInfo CSI : Info) {
1640       // If the callee saved register is spilled to another register abort the
1641       // stack update movement.
1642       if (CSI.isSpilledToReg()) {
1643         StackUpdateLoc = MBBI;
1644         break;
1645       }
1646       int FrIdx = CSI.getFrameIdx();
1647       // If the frame index is not negative the callee saved info belongs to a
1648       // stack object that is not a fixed stack object. We ignore non-fixed
1649       // stack objects because we won't move the update of the stack pointer
1650       // past them.
1651       if (FrIdx >= 0)
1652         continue;
1653 
1654       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1655         StackUpdateLoc--;
1656       else {
1657         // Abort the operation as we can't update all CSR restores.
1658         StackUpdateLoc = MBBI;
1659         break;
1660       }
1661     }
1662   }
1663 
1664   if (FrameSize) {
1665     // In the prologue, the loaded (or persistent) stack pointer value is
1666     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1667     // zone add this offset back now.
1668 
1669     // If the function has a base pointer, the stack pointer has been copied
1670     // to it so we can restore it by copying in the other direction.
1671     if (HasRedZone && HasBP) {
1672       BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1673         addReg(BPReg).
1674         addReg(BPReg);
1675     }
1676     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1677     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1678     // call which invalidates the stack pointer value in SP(0). So we use the
1679     // value of R31 in this case. Similar situation exists with setjmp.
1680     else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1681       assert(HasFP && "Expecting a valid frame pointer.");
1682       if (!HasRedZone)
1683         RBReg = FPReg;
1684       if (!isLargeFrame) {
1685         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1686           .addReg(FPReg).addImm(FrameSize);
1687       } else {
1688         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1689           .addImm(FrameSize >> 16);
1690         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1691           .addReg(ScratchReg, RegState::Kill)
1692           .addImm(FrameSize & 0xFFFF);
1693         BuildMI(MBB, MBBI, dl, AddInst)
1694           .addReg(RBReg)
1695           .addReg(FPReg)
1696           .addReg(ScratchReg);
1697       }
1698     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1699       if (HasRedZone) {
1700         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1701           .addReg(SPReg)
1702           .addImm(FrameSize);
1703       } else {
1704         // Make sure that adding FrameSize will not overflow the max offset
1705         // size.
1706         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1707                "Local offsets should be negative");
1708         SPAdd = FrameSize;
1709         FPOffset += FrameSize;
1710         BPOffset += FrameSize;
1711         PBPOffset += FrameSize;
1712       }
1713     } else {
1714       // We don't want to use ScratchReg as a base register, because it
1715       // could happen to be R0. Use FP instead, but make sure to preserve it.
1716       if (!HasRedZone) {
1717         // If FP is not saved, copy it to ScratchReg.
1718         if (!HasFP)
1719           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1720             .addReg(FPReg)
1721             .addReg(FPReg);
1722         RBReg = FPReg;
1723       }
1724       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1725         .addImm(0)
1726         .addReg(SPReg);
1727     }
1728   }
1729   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1730   // If there is no red zone, ScratchReg may be needed for holding a useful
1731   // value (although not the base register). Make sure it is not overwritten
1732   // too early.
1733 
1734   // If we need to restore both the LR and the CR and we only have one
1735   // available scratch register, we must do them one at a time.
1736   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1737     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1738     // is live here.
1739     assert(HasRedZone && "Expecting red zone");
1740     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1741       .addImm(CRSaveOffset)
1742       .addReg(SPReg);
1743     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1744       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1745         .addReg(TempReg, getKillRegState(i == e-1));
1746   }
1747 
1748   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1749   // LR is stored in the caller's stack frame. ScratchReg will be needed
1750   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1751   // a base register anyway, because it may happen to be R0.
1752   bool LoadedLR = false;
1753   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1754     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1755       .addImm(LROffset+SPAdd)
1756       .addReg(RBReg);
1757     LoadedLR = true;
1758   }
1759 
1760   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1761     assert(RBReg == SPReg && "Should be using SP as a base register");
1762     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1763       .addImm(CRSaveOffset)
1764       .addReg(RBReg);
1765   }
1766 
1767   if (HasFP) {
1768     // If there is red zone, restore FP directly, since SP has already been
1769     // restored. Otherwise, restore the value of FP into ScratchReg.
1770     if (HasRedZone || RBReg == SPReg)
1771       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1772         .addImm(FPOffset)
1773         .addReg(SPReg);
1774     else
1775       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1776         .addImm(FPOffset)
1777         .addReg(RBReg);
1778   }
1779 
1780   if (FI->usesPICBase())
1781     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1782       .addImm(PBPOffset)
1783       .addReg(RBReg);
1784 
1785   if (HasBP)
1786     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1787       .addImm(BPOffset)
1788       .addReg(RBReg);
1789 
1790   // There is nothing more to be loaded from the stack, so now we can
1791   // restore SP: SP = RBReg + SPAdd.
1792   if (RBReg != SPReg || SPAdd != 0) {
1793     assert(!HasRedZone && "This should not happen with red zone");
1794     // If SPAdd is 0, generate a copy.
1795     if (SPAdd == 0)
1796       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1797         .addReg(RBReg)
1798         .addReg(RBReg);
1799     else
1800       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1801         .addReg(RBReg)
1802         .addImm(SPAdd);
1803 
1804     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1805     if (RBReg == FPReg)
1806       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1807         .addReg(ScratchReg)
1808         .addReg(ScratchReg);
1809 
1810     // Now load the LR from the caller's stack frame.
1811     if (MustSaveLR && !LoadedLR)
1812       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1813         .addImm(LROffset)
1814         .addReg(SPReg);
1815   }
1816 
1817   if (MustSaveCR &&
1818       !(SingleScratchReg && MustSaveLR))
1819     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1820       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1821         .addReg(TempReg, getKillRegState(i == e-1));
1822 
1823   if (MustSaveLR)
1824     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1825 
1826   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1827   // call optimization
1828   if (IsReturnBlock) {
1829     unsigned RetOpcode = MBBI->getOpcode();
1830     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1831         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1832         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1833       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1834       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1835 
1836       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1837         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1838           .addReg(SPReg).addImm(CallerAllocatedAmt);
1839       } else {
1840         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1841           .addImm(CallerAllocatedAmt >> 16);
1842         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1843           .addReg(ScratchReg, RegState::Kill)
1844           .addImm(CallerAllocatedAmt & 0xFFFF);
1845         BuildMI(MBB, MBBI, dl, AddInst)
1846           .addReg(SPReg)
1847           .addReg(FPReg)
1848           .addReg(ScratchReg);
1849       }
1850     } else {
1851       createTailCallBranchInstr(MBB);
1852     }
1853   }
1854 }
1855 
1856 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1857   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1858 
1859   // If we got this far a first terminator should exist.
1860   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1861 
1862   DebugLoc dl = MBBI->getDebugLoc();
1863   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1864 
1865   // Create branch instruction for pseudo tail call return instruction.
1866   // The TCRETURNdi variants are direct calls. Valid targets for those are
1867   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1868   // since we can tail call external functions with PC-Rel (i.e. we don't need
1869   // to worry about different TOC pointers). Some of the external functions will
1870   // be MO_GlobalAddress while others like memcpy for example, are going to
1871   // be MO_ExternalSymbol.
1872   unsigned RetOpcode = MBBI->getOpcode();
1873   if (RetOpcode == PPC::TCRETURNdi) {
1874     MBBI = MBB.getLastNonDebugInstr();
1875     MachineOperand &JumpTarget = MBBI->getOperand(0);
1876     if (JumpTarget.isGlobal())
1877       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1878         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1879     else if (JumpTarget.isSymbol())
1880       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1881         addExternalSymbol(JumpTarget.getSymbolName());
1882     else
1883       llvm_unreachable("Expecting Global or External Symbol");
1884   } else if (RetOpcode == PPC::TCRETURNri) {
1885     MBBI = MBB.getLastNonDebugInstr();
1886     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1887     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1888   } else if (RetOpcode == PPC::TCRETURNai) {
1889     MBBI = MBB.getLastNonDebugInstr();
1890     MachineOperand &JumpTarget = MBBI->getOperand(0);
1891     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1892   } else if (RetOpcode == PPC::TCRETURNdi8) {
1893     MBBI = MBB.getLastNonDebugInstr();
1894     MachineOperand &JumpTarget = MBBI->getOperand(0);
1895     if (JumpTarget.isGlobal())
1896       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1897         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1898     else if (JumpTarget.isSymbol())
1899       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1900         addExternalSymbol(JumpTarget.getSymbolName());
1901     else
1902       llvm_unreachable("Expecting Global or External Symbol");
1903   } else if (RetOpcode == PPC::TCRETURNri8) {
1904     MBBI = MBB.getLastNonDebugInstr();
1905     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1906     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1907   } else if (RetOpcode == PPC::TCRETURNai8) {
1908     MBBI = MBB.getLastNonDebugInstr();
1909     MachineOperand &JumpTarget = MBBI->getOperand(0);
1910     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1911   }
1912 }
1913 
1914 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1915                                             BitVector &SavedRegs,
1916                                             RegScavenger *RS) const {
1917   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1918 
1919   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1920 
1921   //  Save and clear the LR state.
1922   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1923   unsigned LR = RegInfo->getRARegister();
1924   FI->setMustSaveLR(MustSaveLR(MF, LR));
1925   SavedRegs.reset(LR);
1926 
1927   //  Save R31 if necessary
1928   int FPSI = FI->getFramePointerSaveIndex();
1929   const bool isPPC64 = Subtarget.isPPC64();
1930   MachineFrameInfo &MFI = MF.getFrameInfo();
1931 
1932   // If the frame pointer save index hasn't been defined yet.
1933   if (!FPSI && needsFP(MF)) {
1934     // Find out what the fix offset of the frame pointer save area.
1935     int FPOffset = getFramePointerSaveOffset();
1936     // Allocate the frame index for frame pointer save area.
1937     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1938     // Save the result.
1939     FI->setFramePointerSaveIndex(FPSI);
1940   }
1941 
1942   int BPSI = FI->getBasePointerSaveIndex();
1943   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1944     int BPOffset = getBasePointerSaveOffset();
1945     // Allocate the frame index for the base pointer save area.
1946     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1947     // Save the result.
1948     FI->setBasePointerSaveIndex(BPSI);
1949   }
1950 
1951   // Reserve stack space for the PIC Base register (R30).
1952   // Only used in SVR4 32-bit.
1953   if (FI->usesPICBase()) {
1954     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1955     FI->setPICBasePointerSaveIndex(PBPSI);
1956   }
1957 
1958   // Make sure we don't explicitly spill r31, because, for example, we have
1959   // some inline asm which explicitly clobbers it, when we otherwise have a
1960   // frame pointer and are using r31's spill slot for the prologue/epilogue
1961   // code. Same goes for the base pointer and the PIC base register.
1962   if (needsFP(MF))
1963     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1964   if (RegInfo->hasBasePointer(MF))
1965     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1966   if (FI->usesPICBase())
1967     SavedRegs.reset(PPC::R30);
1968 
1969   // Reserve stack space to move the linkage area to in case of a tail call.
1970   int TCSPDelta = 0;
1971   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1972       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1973     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1974   }
1975 
1976   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
1977   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
1978   // object at the offset of the CR-save slot in the linkage area. The actual
1979   // save and restore of the condition register will be created as part of the
1980   // prologue and epilogue insertion, but the FixedStack object is needed to
1981   // keep the CalleSavedInfo valid.
1982   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
1983        SavedRegs.test(PPC::CR4))) {
1984     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
1985     const int64_t SpillOffset =
1986         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
1987     int FrameIdx =
1988         MFI.CreateFixedObject(SpillSize, SpillOffset,
1989                               /* IsImmutable */ true, /* IsAliased */ false);
1990     FI->setCRSpillFrameIndex(FrameIdx);
1991   }
1992 }
1993 
1994 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1995                                                        RegScavenger *RS) const {
1996   // Get callee saved register information.
1997   MachineFrameInfo &MFI = MF.getFrameInfo();
1998   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1999 
2000   // If the function is shrink-wrapped, and if the function has a tail call, the
2001   // tail call might not be in the new RestoreBlock, so real branch instruction
2002   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2003   // RestoreBlock. So we handle this case here.
2004   if (MFI.getSavePoint() && MFI.hasTailCall()) {
2005     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2006     for (MachineBasicBlock &MBB : MF) {
2007       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2008         createTailCallBranchInstr(MBB);
2009     }
2010   }
2011 
2012   // Early exit if no callee saved registers are modified!
2013   if (CSI.empty() && !needsFP(MF)) {
2014     addScavengingSpillSlot(MF, RS);
2015     return;
2016   }
2017 
2018   unsigned MinGPR = PPC::R31;
2019   unsigned MinG8R = PPC::X31;
2020   unsigned MinFPR = PPC::F31;
2021   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2022 
2023   bool HasGPSaveArea = false;
2024   bool HasG8SaveArea = false;
2025   bool HasFPSaveArea = false;
2026   bool HasVRSaveArea = false;
2027 
2028   SmallVector<CalleeSavedInfo, 18> GPRegs;
2029   SmallVector<CalleeSavedInfo, 18> G8Regs;
2030   SmallVector<CalleeSavedInfo, 18> FPRegs;
2031   SmallVector<CalleeSavedInfo, 18> VRegs;
2032 
2033   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2034     unsigned Reg = CSI[i].getReg();
2035     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2036             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2037            "Not expecting to try to spill R2 in a function that must save TOC");
2038     if (PPC::GPRCRegClass.contains(Reg)) {
2039       HasGPSaveArea = true;
2040 
2041       GPRegs.push_back(CSI[i]);
2042 
2043       if (Reg < MinGPR) {
2044         MinGPR = Reg;
2045       }
2046     } else if (PPC::G8RCRegClass.contains(Reg)) {
2047       HasG8SaveArea = true;
2048 
2049       G8Regs.push_back(CSI[i]);
2050 
2051       if (Reg < MinG8R) {
2052         MinG8R = Reg;
2053       }
2054     } else if (PPC::F8RCRegClass.contains(Reg)) {
2055       HasFPSaveArea = true;
2056 
2057       FPRegs.push_back(CSI[i]);
2058 
2059       if (Reg < MinFPR) {
2060         MinFPR = Reg;
2061       }
2062     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2063                PPC::CRRCRegClass.contains(Reg)) {
2064       ; // do nothing, as we already know whether CRs are spilled
2065     } else if (PPC::VRRCRegClass.contains(Reg) ||
2066                PPC::SPERCRegClass.contains(Reg)) {
2067       // Altivec and SPE are mutually exclusive, but have the same stack
2068       // alignment requirements, so overload the save area for both cases.
2069       HasVRSaveArea = true;
2070 
2071       VRegs.push_back(CSI[i]);
2072 
2073       if (Reg < MinVR) {
2074         MinVR = Reg;
2075       }
2076     } else {
2077       llvm_unreachable("Unknown RegisterClass!");
2078     }
2079   }
2080 
2081   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2082   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2083 
2084   int64_t LowerBound = 0;
2085 
2086   // Take into account stack space reserved for tail calls.
2087   int TCSPDelta = 0;
2088   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2089       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2090     LowerBound = TCSPDelta;
2091   }
2092 
2093   // The Floating-point register save area is right below the back chain word
2094   // of the previous stack frame.
2095   if (HasFPSaveArea) {
2096     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2097       int FI = FPRegs[i].getFrameIdx();
2098 
2099       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2100     }
2101 
2102     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2103   }
2104 
2105   // Check whether the frame pointer register is allocated. If so, make sure it
2106   // is spilled to the correct offset.
2107   if (needsFP(MF)) {
2108     int FI = PFI->getFramePointerSaveIndex();
2109     assert(FI && "No Frame Pointer Save Slot!");
2110     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2111     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2112     HasGPSaveArea = true;
2113   }
2114 
2115   if (PFI->usesPICBase()) {
2116     int FI = PFI->getPICBasePointerSaveIndex();
2117     assert(FI && "No PIC Base Pointer Save Slot!");
2118     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2119 
2120     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2121     HasGPSaveArea = true;
2122   }
2123 
2124   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2125   if (RegInfo->hasBasePointer(MF)) {
2126     int FI = PFI->getBasePointerSaveIndex();
2127     assert(FI && "No Base Pointer Save Slot!");
2128     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2129 
2130     Register BP = RegInfo->getBaseRegister(MF);
2131     if (PPC::G8RCRegClass.contains(BP)) {
2132       MinG8R = std::min<unsigned>(MinG8R, BP);
2133       HasG8SaveArea = true;
2134     } else if (PPC::GPRCRegClass.contains(BP)) {
2135       MinGPR = std::min<unsigned>(MinGPR, BP);
2136       HasGPSaveArea = true;
2137     }
2138   }
2139 
2140   // General register save area starts right below the Floating-point
2141   // register save area.
2142   if (HasGPSaveArea || HasG8SaveArea) {
2143     // Move general register save area spill slots down, taking into account
2144     // the size of the Floating-point register save area.
2145     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2146       if (!GPRegs[i].isSpilledToReg()) {
2147         int FI = GPRegs[i].getFrameIdx();
2148         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2149       }
2150     }
2151 
2152     // Move general register save area spill slots down, taking into account
2153     // the size of the Floating-point register save area.
2154     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2155       if (!G8Regs[i].isSpilledToReg()) {
2156         int FI = G8Regs[i].getFrameIdx();
2157         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2158       }
2159     }
2160 
2161     unsigned MinReg =
2162       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2163                          TRI->getEncodingValue(MinG8R));
2164 
2165     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2166     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2167   }
2168 
2169   // For 32-bit only, the CR save area is below the general register
2170   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2171   // to the stack pointer and hence does not need an adjustment here.
2172   // Only CR2 (the first nonvolatile spilled) has an associated frame
2173   // index so that we have a single uniform save area.
2174   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2175     // Adjust the frame index of the CR spill slot.
2176     for (const auto &CSInfo : CSI) {
2177       if (CSInfo.getReg() == PPC::CR2) {
2178         int FI = CSInfo.getFrameIdx();
2179         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2180         break;
2181       }
2182     }
2183 
2184     LowerBound -= 4; // The CR save area is always 4 bytes long.
2185   }
2186 
2187   // Both Altivec and SPE have the same alignment and padding requirements
2188   // within the stack frame.
2189   if (HasVRSaveArea) {
2190     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2191     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2192     // we are using negative number here (the stack grows downward). We should
2193     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2194     // is the alignment size ( n = 16 here) and y is the size after aligning.
2195     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2196     LowerBound &= ~(15);
2197 
2198     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2199       int FI = VRegs[i].getFrameIdx();
2200 
2201       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2202     }
2203   }
2204 
2205   addScavengingSpillSlot(MF, RS);
2206 }
2207 
2208 void
2209 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2210                                          RegScavenger *RS) const {
2211   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2212   // a large stack, which will require scavenging a register to materialize a
2213   // large offset.
2214 
2215   // We need to have a scavenger spill slot for spills if the frame size is
2216   // large. In case there is no free register for large-offset addressing,
2217   // this slot is used for the necessary emergency spill. Also, we need the
2218   // slot for dynamic stack allocations.
2219 
2220   // The scavenger might be invoked if the frame offset does not fit into
2221   // the 16-bit immediate. We don't know the complete frame size here
2222   // because we've not yet computed callee-saved register spills or the
2223   // needed alignment padding.
2224   unsigned StackSize = determineFrameLayout(MF, true);
2225   MachineFrameInfo &MFI = MF.getFrameInfo();
2226   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2227       (hasSpills(MF) && !isInt<16>(StackSize))) {
2228     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2229     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2230     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2231     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2232     unsigned Size = TRI.getSpillSize(RC);
2233     Align Alignment = TRI.getSpillAlign(RC);
2234     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2235 
2236     // Might we have over-aligned allocas?
2237     bool HasAlVars =
2238         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2239 
2240     // These kinds of spills might need two registers.
2241     if (spillsCR(MF) || HasAlVars)
2242       RS->addScavengingFrameIndex(
2243           MFI.CreateStackObject(Size, Alignment, false));
2244   }
2245 }
2246 
2247 // This function checks if a callee saved gpr can be spilled to a volatile
2248 // vector register. This occurs for leaf functions when the option
2249 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2250 // which were not spilled to vectors, return false so the target independent
2251 // code can handle them by assigning a FrameIdx to a stack slot.
2252 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2253     MachineFunction &MF, const TargetRegisterInfo *TRI,
2254     std::vector<CalleeSavedInfo> &CSI) const {
2255 
2256   if (CSI.empty())
2257     return true; // Early exit if no callee saved registers are modified!
2258 
2259   // Early exit if cannot spill gprs to volatile vector registers.
2260   MachineFrameInfo &MFI = MF.getFrameInfo();
2261   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2262     return false;
2263 
2264   // Build a BitVector of VSRs that can be used for spilling GPRs.
2265   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2266   BitVector BVCalleeSaved(TRI->getNumRegs());
2267   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2268   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2269   for (unsigned i = 0; CSRegs[i]; ++i)
2270     BVCalleeSaved.set(CSRegs[i]);
2271 
2272   for (unsigned Reg : BVAllocatable.set_bits()) {
2273     // Set to 0 if the register is not a volatile VSX register, or if it is
2274     // used in the function.
2275     if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2276         MF.getRegInfo().isPhysRegUsed(Reg))
2277       BVAllocatable.reset(Reg);
2278   }
2279 
2280   bool AllSpilledToReg = true;
2281   unsigned LastVSRUsedForSpill = 0;
2282   for (auto &CS : CSI) {
2283     if (BVAllocatable.none())
2284       return false;
2285 
2286     unsigned Reg = CS.getReg();
2287 
2288     if (!PPC::G8RCRegClass.contains(Reg)) {
2289       AllSpilledToReg = false;
2290       continue;
2291     }
2292 
2293     // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2294     // into one VSR using the mtvsrdd instruction.
2295     if (LastVSRUsedForSpill != 0) {
2296       CS.setDstReg(LastVSRUsedForSpill);
2297       BVAllocatable.reset(LastVSRUsedForSpill);
2298       LastVSRUsedForSpill = 0;
2299       continue;
2300     }
2301 
2302     unsigned VolatileVFReg = BVAllocatable.find_first();
2303     if (VolatileVFReg < BVAllocatable.size()) {
2304       CS.setDstReg(VolatileVFReg);
2305       LastVSRUsedForSpill = VolatileVFReg;
2306     } else {
2307       AllSpilledToReg = false;
2308     }
2309   }
2310   return AllSpilledToReg;
2311 }
2312 
2313 bool PPCFrameLowering::spillCalleeSavedRegisters(
2314     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2315     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2316 
2317   MachineFunction *MF = MBB.getParent();
2318   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2319   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2320   bool MustSaveTOC = FI->mustSaveTOC();
2321   DebugLoc DL;
2322   bool CRSpilled = false;
2323   MachineInstrBuilder CRMIB;
2324   BitVector Spilled(TRI->getNumRegs());
2325 
2326   VSRContainingGPRs.clear();
2327 
2328   // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2329   // or two GPRs, so we need table to record information for later save/restore.
2330   llvm::for_each(CSI, [&](const CalleeSavedInfo &Info) {
2331     if (Info.isSpilledToReg()) {
2332       auto &SpilledVSR =
2333           VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2334       assert(SpilledVSR.second == 0 &&
2335              "Can't spill more than two GPRs into VSR!");
2336       if (SpilledVSR.first == 0)
2337         SpilledVSR.first = Info.getReg();
2338       else
2339         SpilledVSR.second = Info.getReg();
2340     }
2341   });
2342 
2343   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2344     unsigned Reg = CSI[i].getReg();
2345 
2346     // CR2 through CR4 are the nonvolatile CR fields.
2347     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2348 
2349     // Add the callee-saved register as live-in; it's killed at the spill.
2350     // Do not do this for callee-saved registers that are live-in to the
2351     // function because they will already be marked live-in and this will be
2352     // adding it for a second time. It is an error to add the same register
2353     // to the set more than once.
2354     const MachineRegisterInfo &MRI = MF->getRegInfo();
2355     bool IsLiveIn = MRI.isLiveIn(Reg);
2356     if (!IsLiveIn)
2357        MBB.addLiveIn(Reg);
2358 
2359     if (CRSpilled && IsCRField) {
2360       CRMIB.addReg(Reg, RegState::ImplicitKill);
2361       continue;
2362     }
2363 
2364     // The actual spill will happen in the prologue.
2365     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2366       continue;
2367 
2368     // Insert the spill to the stack frame.
2369     if (IsCRField) {
2370       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2371       if (!Subtarget.is32BitELFABI()) {
2372         // The actual spill will happen at the start of the prologue.
2373         FuncInfo->addMustSaveCR(Reg);
2374       } else {
2375         CRSpilled = true;
2376         FuncInfo->setSpillsCR();
2377 
2378         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2379         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2380         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2381                   .addReg(Reg, RegState::ImplicitKill);
2382 
2383         MBB.insert(MI, CRMIB);
2384         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2385                                          .addReg(PPC::R12,
2386                                                  getKillRegState(true)),
2387                                          CSI[i].getFrameIdx()));
2388       }
2389     } else {
2390       if (CSI[i].isSpilledToReg()) {
2391         unsigned Dst = CSI[i].getDstReg();
2392 
2393         if (Spilled[Dst])
2394           continue;
2395 
2396         if (VSRContainingGPRs[Dst].second != 0) {
2397           assert(Subtarget.hasP9Vector() &&
2398                  "mtvsrdd is unavailable on pre-P9 targets.");
2399 
2400           NumPESpillVSR += 2;
2401           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2402               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2403               .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2404         } else if (VSRContainingGPRs[Dst].second == 0) {
2405           assert(Subtarget.hasP8Vector() &&
2406                  "Can't move GPR to VSR on pre-P8 targets.");
2407 
2408           ++NumPESpillVSR;
2409           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2410                   TRI->getSubReg(Dst, PPC::sub_64))
2411               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2412         } else {
2413           llvm_unreachable("More than two GPRs spilled to a VSR!");
2414         }
2415         Spilled.set(Dst);
2416       } else {
2417         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2418         // Use !IsLiveIn for the kill flag.
2419         // We do not want to kill registers that are live in this function
2420         // before their use because they will become undefined registers.
2421         // Functions without NoUnwind need to preserve the order of elements in
2422         // saved vector registers.
2423         if (Subtarget.needsSwapsForVSXMemOps() &&
2424             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2425           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2426                                        CSI[i].getFrameIdx(), RC, TRI);
2427         else
2428           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2429                                   RC, TRI);
2430       }
2431     }
2432   }
2433   return true;
2434 }
2435 
2436 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2437                        bool CR4Spilled, MachineBasicBlock &MBB,
2438                        MachineBasicBlock::iterator MI,
2439                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2440 
2441   MachineFunction *MF = MBB.getParent();
2442   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2443   DebugLoc DL;
2444   unsigned MoveReg = PPC::R12;
2445 
2446   // 32-bit:  FP-relative
2447   MBB.insert(MI,
2448              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2449                                CSI[CSIIndex].getFrameIdx()));
2450 
2451   unsigned RestoreOp = PPC::MTOCRF;
2452   if (CR2Spilled)
2453     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2454                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2455 
2456   if (CR3Spilled)
2457     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2458                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2459 
2460   if (CR4Spilled)
2461     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2462                .addReg(MoveReg, getKillRegState(true)));
2463 }
2464 
2465 MachineBasicBlock::iterator PPCFrameLowering::
2466 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2467                               MachineBasicBlock::iterator I) const {
2468   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2469   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2470       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2471     // Add (actually subtract) back the amount the callee popped on return.
2472     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2473       bool is64Bit = Subtarget.isPPC64();
2474       CalleeAmt *= -1;
2475       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2476       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2477       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2478       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2479       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2480       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2481       const DebugLoc &dl = I->getDebugLoc();
2482 
2483       if (isInt<16>(CalleeAmt)) {
2484         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2485           .addReg(StackReg, RegState::Kill)
2486           .addImm(CalleeAmt);
2487       } else {
2488         MachineBasicBlock::iterator MBBI = I;
2489         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2490           .addImm(CalleeAmt >> 16);
2491         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2492           .addReg(TmpReg, RegState::Kill)
2493           .addImm(CalleeAmt & 0xFFFF);
2494         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2495           .addReg(StackReg, RegState::Kill)
2496           .addReg(TmpReg);
2497       }
2498     }
2499   }
2500   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2501   return MBB.erase(I);
2502 }
2503 
2504 static bool isCalleeSavedCR(unsigned Reg) {
2505   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2506 }
2507 
2508 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2509     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2510     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2511   MachineFunction *MF = MBB.getParent();
2512   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2513   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2514   bool MustSaveTOC = FI->mustSaveTOC();
2515   bool CR2Spilled = false;
2516   bool CR3Spilled = false;
2517   bool CR4Spilled = false;
2518   unsigned CSIIndex = 0;
2519   BitVector Restored(TRI->getNumRegs());
2520 
2521   // Initialize insertion-point logic; we will be restoring in reverse
2522   // order of spill.
2523   MachineBasicBlock::iterator I = MI, BeforeI = I;
2524   bool AtStart = I == MBB.begin();
2525 
2526   if (!AtStart)
2527     --BeforeI;
2528 
2529   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2530     unsigned Reg = CSI[i].getReg();
2531 
2532     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2533       continue;
2534 
2535     // Restore of callee saved condition register field is handled during
2536     // epilogue insertion.
2537     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2538       continue;
2539 
2540     if (Reg == PPC::CR2) {
2541       CR2Spilled = true;
2542       // The spill slot is associated only with CR2, which is the
2543       // first nonvolatile spilled.  Save it here.
2544       CSIIndex = i;
2545       continue;
2546     } else if (Reg == PPC::CR3) {
2547       CR3Spilled = true;
2548       continue;
2549     } else if (Reg == PPC::CR4) {
2550       CR4Spilled = true;
2551       continue;
2552     } else {
2553       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2554       // least one CR register, restore all spilled CRs together.
2555       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2556         bool is31 = needsFP(*MF);
2557         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2558                    CSIIndex);
2559         CR2Spilled = CR3Spilled = CR4Spilled = false;
2560       }
2561 
2562       if (CSI[i].isSpilledToReg()) {
2563         DebugLoc DL;
2564         unsigned Dst = CSI[i].getDstReg();
2565 
2566         if (Restored[Dst])
2567           continue;
2568 
2569         if (VSRContainingGPRs[Dst].second != 0) {
2570           assert(Subtarget.hasP9Vector());
2571           NumPEReloadVSR += 2;
2572           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2573                   VSRContainingGPRs[Dst].second)
2574               .addReg(Dst);
2575           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2576                   VSRContainingGPRs[Dst].first)
2577               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2578         } else if (VSRContainingGPRs[Dst].second == 0) {
2579           assert(Subtarget.hasP8Vector());
2580           ++NumPEReloadVSR;
2581           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2582                   VSRContainingGPRs[Dst].first)
2583               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2584         } else {
2585           llvm_unreachable("More than two GPRs spilled to a VSR!");
2586         }
2587 
2588         Restored.set(Dst);
2589 
2590       } else {
2591        // Default behavior for non-CR saves.
2592         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2593 
2594         // Functions without NoUnwind need to preserve the order of elements in
2595         // saved vector registers.
2596         if (Subtarget.needsSwapsForVSXMemOps() &&
2597             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2598           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2599                                         TRI);
2600         else
2601           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2602 
2603         assert(I != MBB.begin() &&
2604                "loadRegFromStackSlot didn't insert any code!");
2605       }
2606     }
2607 
2608     // Insert in reverse order.
2609     if (AtStart)
2610       I = MBB.begin();
2611     else {
2612       I = BeforeI;
2613       ++I;
2614     }
2615   }
2616 
2617   // If we haven't yet spilled the CRs, do so now.
2618   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2619     assert(Subtarget.is32BitELFABI() &&
2620            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2621     bool is31 = needsFP(*MF);
2622     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2623   }
2624 
2625   return true;
2626 }
2627 
2628 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2629   return TOCSaveOffset;
2630 }
2631 
2632 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2633   return FramePointerSaveOffset;
2634 }
2635 
2636 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2637   return BasePointerSaveOffset;
2638 }
2639 
2640 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2641   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2642     return false;
2643   return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2644 }
2645