1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the PPC implementation of TargetFrameLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "PPCFrameLowering.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterScavenging.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Target/TargetOptions.h"
29
30 using namespace llvm;
31
32 #define DEBUG_TYPE "framelowering"
33 STATISTIC(NumNoNeedForFrame, "Number of functions without frames");
34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
36
37 static cl::opt<bool>
38 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
39 cl::desc("Enable spills in prologue to vector registers."),
40 cl::init(false), cl::Hidden);
41
42 /// VRRegNo - Map from a numbered VR register to its enum value.
43 ///
44 static const MCPhysReg VRRegNo[] = {
45 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
46 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
47 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
48 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
49 };
50
computeReturnSaveOffset(const PPCSubtarget & STI)51 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
52 if (STI.isDarwinABI())
53 return STI.isPPC64() ? 16 : 8;
54 // SVR4 ABI:
55 return STI.isPPC64() ? 16 : 4;
56 }
57
computeTOCSaveOffset(const PPCSubtarget & STI)58 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
59 return STI.isELFv2ABI() ? 24 : 40;
60 }
61
computeFramePointerSaveOffset(const PPCSubtarget & STI)62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
63 // For the Darwin ABI:
64 // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
65 // for saving the frame pointer (if needed.) While the published ABI has
66 // not used this slot since at least MacOSX 10.2, there is older code
67 // around that does use it, and that needs to continue to work.
68 if (STI.isDarwinABI())
69 return STI.isPPC64() ? -8U : -4U;
70
71 // SVR4 ABI: First slot in the general register save area.
72 return STI.isPPC64() ? -8U : -4U;
73 }
74
computeLinkageSize(const PPCSubtarget & STI)75 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
76 if (STI.isDarwinABI() || STI.isPPC64())
77 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
78
79 // SVR4 ABI:
80 return 8;
81 }
82
computeBasePointerSaveOffset(const PPCSubtarget & STI)83 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
84 if (STI.isDarwinABI())
85 return STI.isPPC64() ? -16U : -8U;
86
87 // SVR4 ABI: First slot in the general register save area.
88 return STI.isPPC64()
89 ? -16U
90 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
91 }
92
PPCFrameLowering(const PPCSubtarget & STI)93 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
94 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
95 STI.getPlatformStackAlignment(), 0),
96 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
97 TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
98 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
99 LinkageSize(computeLinkageSize(Subtarget)),
100 BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {}
101
102 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
getCalleeSavedSpillSlots(unsigned & NumEntries) const103 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
104 unsigned &NumEntries) const {
105 if (Subtarget.isDarwinABI()) {
106 NumEntries = 1;
107 if (Subtarget.isPPC64()) {
108 static const SpillSlot darwin64Offsets = {PPC::X31, -8};
109 return &darwin64Offsets;
110 } else {
111 static const SpillSlot darwinOffsets = {PPC::R31, -4};
112 return &darwinOffsets;
113 }
114 }
115
116 // Early exit if not using the SVR4 ABI.
117 if (!Subtarget.isSVR4ABI()) {
118 NumEntries = 0;
119 return nullptr;
120 }
121
122 // Note that the offsets here overlap, but this is fixed up in
123 // processFunctionBeforeFrameFinalized.
124
125 static const SpillSlot Offsets[] = {
126 // Floating-point register save area offsets.
127 {PPC::F31, -8},
128 {PPC::F30, -16},
129 {PPC::F29, -24},
130 {PPC::F28, -32},
131 {PPC::F27, -40},
132 {PPC::F26, -48},
133 {PPC::F25, -56},
134 {PPC::F24, -64},
135 {PPC::F23, -72},
136 {PPC::F22, -80},
137 {PPC::F21, -88},
138 {PPC::F20, -96},
139 {PPC::F19, -104},
140 {PPC::F18, -112},
141 {PPC::F17, -120},
142 {PPC::F16, -128},
143 {PPC::F15, -136},
144 {PPC::F14, -144},
145
146 // General register save area offsets.
147 {PPC::R31, -4},
148 {PPC::R30, -8},
149 {PPC::R29, -12},
150 {PPC::R28, -16},
151 {PPC::R27, -20},
152 {PPC::R26, -24},
153 {PPC::R25, -28},
154 {PPC::R24, -32},
155 {PPC::R23, -36},
156 {PPC::R22, -40},
157 {PPC::R21, -44},
158 {PPC::R20, -48},
159 {PPC::R19, -52},
160 {PPC::R18, -56},
161 {PPC::R17, -60},
162 {PPC::R16, -64},
163 {PPC::R15, -68},
164 {PPC::R14, -72},
165
166 // CR save area offset. We map each of the nonvolatile CR fields
167 // to the slot for CR2, which is the first of the nonvolatile CR
168 // fields to be assigned, so that we only allocate one save slot.
169 // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
170 {PPC::CR2, -4},
171
172 // VRSAVE save area offset.
173 {PPC::VRSAVE, -4},
174
175 // Vector register save area
176 {PPC::V31, -16},
177 {PPC::V30, -32},
178 {PPC::V29, -48},
179 {PPC::V28, -64},
180 {PPC::V27, -80},
181 {PPC::V26, -96},
182 {PPC::V25, -112},
183 {PPC::V24, -128},
184 {PPC::V23, -144},
185 {PPC::V22, -160},
186 {PPC::V21, -176},
187 {PPC::V20, -192},
188
189 // SPE register save area (overlaps Vector save area).
190 {PPC::S31, -8},
191 {PPC::S30, -16},
192 {PPC::S29, -24},
193 {PPC::S28, -32},
194 {PPC::S27, -40},
195 {PPC::S26, -48},
196 {PPC::S25, -56},
197 {PPC::S24, -64},
198 {PPC::S23, -72},
199 {PPC::S22, -80},
200 {PPC::S21, -88},
201 {PPC::S20, -96},
202 {PPC::S19, -104},
203 {PPC::S18, -112},
204 {PPC::S17, -120},
205 {PPC::S16, -128},
206 {PPC::S15, -136},
207 {PPC::S14, -144}};
208
209 static const SpillSlot Offsets64[] = {
210 // Floating-point register save area offsets.
211 {PPC::F31, -8},
212 {PPC::F30, -16},
213 {PPC::F29, -24},
214 {PPC::F28, -32},
215 {PPC::F27, -40},
216 {PPC::F26, -48},
217 {PPC::F25, -56},
218 {PPC::F24, -64},
219 {PPC::F23, -72},
220 {PPC::F22, -80},
221 {PPC::F21, -88},
222 {PPC::F20, -96},
223 {PPC::F19, -104},
224 {PPC::F18, -112},
225 {PPC::F17, -120},
226 {PPC::F16, -128},
227 {PPC::F15, -136},
228 {PPC::F14, -144},
229
230 // General register save area offsets.
231 {PPC::X31, -8},
232 {PPC::X30, -16},
233 {PPC::X29, -24},
234 {PPC::X28, -32},
235 {PPC::X27, -40},
236 {PPC::X26, -48},
237 {PPC::X25, -56},
238 {PPC::X24, -64},
239 {PPC::X23, -72},
240 {PPC::X22, -80},
241 {PPC::X21, -88},
242 {PPC::X20, -96},
243 {PPC::X19, -104},
244 {PPC::X18, -112},
245 {PPC::X17, -120},
246 {PPC::X16, -128},
247 {PPC::X15, -136},
248 {PPC::X14, -144},
249
250 // VRSAVE save area offset.
251 {PPC::VRSAVE, -4},
252
253 // Vector register save area
254 {PPC::V31, -16},
255 {PPC::V30, -32},
256 {PPC::V29, -48},
257 {PPC::V28, -64},
258 {PPC::V27, -80},
259 {PPC::V26, -96},
260 {PPC::V25, -112},
261 {PPC::V24, -128},
262 {PPC::V23, -144},
263 {PPC::V22, -160},
264 {PPC::V21, -176},
265 {PPC::V20, -192}};
266
267 if (Subtarget.isPPC64()) {
268 NumEntries = array_lengthof(Offsets64);
269
270 return Offsets64;
271 } else {
272 NumEntries = array_lengthof(Offsets);
273
274 return Offsets;
275 }
276 }
277
278 /// RemoveVRSaveCode - We have found that this function does not need any code
279 /// to manipulate the VRSAVE register, even though it uses vector registers.
280 /// This can happen when the only registers used are known to be live in or out
281 /// of the function. Remove all of the VRSAVE related code from the function.
282 /// FIXME: The removal of the code results in a compile failure at -O0 when the
283 /// function contains a function call, as the GPR containing original VRSAVE
284 /// contents is spilled and reloaded around the call. Without the prolog code,
285 /// the spill instruction refers to an undefined register. This code needs
286 /// to account for all uses of that GPR.
RemoveVRSaveCode(MachineInstr & MI)287 static void RemoveVRSaveCode(MachineInstr &MI) {
288 MachineBasicBlock *Entry = MI.getParent();
289 MachineFunction *MF = Entry->getParent();
290
291 // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
292 MachineBasicBlock::iterator MBBI = MI;
293 ++MBBI;
294 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
295 MBBI->eraseFromParent();
296
297 bool RemovedAllMTVRSAVEs = true;
298 // See if we can find and remove the MTVRSAVE instruction from all of the
299 // epilog blocks.
300 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
301 // If last instruction is a return instruction, add an epilogue
302 if (I->isReturnBlock()) {
303 bool FoundIt = false;
304 for (MBBI = I->end(); MBBI != I->begin(); ) {
305 --MBBI;
306 if (MBBI->getOpcode() == PPC::MTVRSAVE) {
307 MBBI->eraseFromParent(); // remove it.
308 FoundIt = true;
309 break;
310 }
311 }
312 RemovedAllMTVRSAVEs &= FoundIt;
313 }
314 }
315
316 // If we found and removed all MTVRSAVE instructions, remove the read of
317 // VRSAVE as well.
318 if (RemovedAllMTVRSAVEs) {
319 MBBI = MI;
320 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
321 --MBBI;
322 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
323 MBBI->eraseFromParent();
324 }
325
326 // Finally, nuke the UPDATE_VRSAVE.
327 MI.eraseFromParent();
328 }
329
330 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
331 // instruction selector. Based on the vector registers that have been used,
332 // transform this into the appropriate ORI instruction.
HandleVRSaveUpdate(MachineInstr & MI,const TargetInstrInfo & TII)333 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
334 MachineFunction *MF = MI.getParent()->getParent();
335 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
336 DebugLoc dl = MI.getDebugLoc();
337
338 const MachineRegisterInfo &MRI = MF->getRegInfo();
339 unsigned UsedRegMask = 0;
340 for (unsigned i = 0; i != 32; ++i)
341 if (MRI.isPhysRegModified(VRRegNo[i]))
342 UsedRegMask |= 1 << (31-i);
343
344 // Live in and live out values already must be in the mask, so don't bother
345 // marking them.
346 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
347 unsigned RegNo = TRI->getEncodingValue(LI.first);
348 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg.
349 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
350 }
351
352 // Live out registers appear as use operands on return instructions.
353 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
354 UsedRegMask != 0 && BI != BE; ++BI) {
355 const MachineBasicBlock &MBB = *BI;
356 if (!MBB.isReturnBlock())
357 continue;
358 const MachineInstr &Ret = MBB.back();
359 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
360 const MachineOperand &MO = Ret.getOperand(I);
361 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
362 continue;
363 unsigned RegNo = TRI->getEncodingValue(MO.getReg());
364 UsedRegMask &= ~(1 << (31-RegNo));
365 }
366 }
367
368 // If no registers are used, turn this into a copy.
369 if (UsedRegMask == 0) {
370 // Remove all VRSAVE code.
371 RemoveVRSaveCode(MI);
372 return;
373 }
374
375 unsigned SrcReg = MI.getOperand(1).getReg();
376 unsigned DstReg = MI.getOperand(0).getReg();
377
378 if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
379 if (DstReg != SrcReg)
380 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
381 .addReg(SrcReg)
382 .addImm(UsedRegMask);
383 else
384 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
385 .addReg(SrcReg, RegState::Kill)
386 .addImm(UsedRegMask);
387 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
388 if (DstReg != SrcReg)
389 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
390 .addReg(SrcReg)
391 .addImm(UsedRegMask >> 16);
392 else
393 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
394 .addReg(SrcReg, RegState::Kill)
395 .addImm(UsedRegMask >> 16);
396 } else {
397 if (DstReg != SrcReg)
398 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
399 .addReg(SrcReg)
400 .addImm(UsedRegMask >> 16);
401 else
402 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
403 .addReg(SrcReg, RegState::Kill)
404 .addImm(UsedRegMask >> 16);
405
406 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
407 .addReg(DstReg, RegState::Kill)
408 .addImm(UsedRegMask & 0xFFFF);
409 }
410
411 // Remove the old UPDATE_VRSAVE instruction.
412 MI.eraseFromParent();
413 }
414
spillsCR(const MachineFunction & MF)415 static bool spillsCR(const MachineFunction &MF) {
416 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
417 return FuncInfo->isCRSpilled();
418 }
419
spillsVRSAVE(const MachineFunction & MF)420 static bool spillsVRSAVE(const MachineFunction &MF) {
421 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
422 return FuncInfo->isVRSAVESpilled();
423 }
424
hasSpills(const MachineFunction & MF)425 static bool hasSpills(const MachineFunction &MF) {
426 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
427 return FuncInfo->hasSpills();
428 }
429
hasNonRISpills(const MachineFunction & MF)430 static bool hasNonRISpills(const MachineFunction &MF) {
431 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
432 return FuncInfo->hasNonRISpills();
433 }
434
435 /// MustSaveLR - Return true if this function requires that we save the LR
436 /// register onto the stack in the prolog and restore it in the epilog of the
437 /// function.
MustSaveLR(const MachineFunction & MF,unsigned LR)438 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
439 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
440
441 // We need a save/restore of LR if there is any def of LR (which is
442 // defined by calls, including the PIC setup sequence), or if there is
443 // some use of the LR stack slot (e.g. for builtin_return_address).
444 // (LR comes in 32 and 64 bit versions.)
445 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
446 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
447 }
448
449 /// determineFrameLayout - Determine the size of the frame and maximum call
450 /// frame size.
determineFrameLayout(MachineFunction & MF,bool UpdateMF,bool UseEstimate) const451 unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
452 bool UpdateMF,
453 bool UseEstimate) const {
454 MachineFrameInfo &MFI = MF.getFrameInfo();
455
456 // Get the number of bytes to allocate from the FrameInfo
457 unsigned FrameSize =
458 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
459
460 // Get stack alignments. The frame must be aligned to the greatest of these:
461 unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
462 unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
463 unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
464
465 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
466
467 unsigned LR = RegInfo->getRARegister();
468 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
469 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
470 !MFI.adjustsStack() && // No calls.
471 !MustSaveLR(MF, LR) && // No need to save LR.
472 !RegInfo->hasBasePointer(MF); // No special alignment.
473
474 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
475 // code if all local vars are reg-allocated.
476 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
477
478 // Check whether we can skip adjusting the stack pointer (by using red zone)
479 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
480 NumNoNeedForFrame++;
481 // No need for frame
482 if (UpdateMF)
483 MFI.setStackSize(0);
484 return 0;
485 }
486
487 // Get the maximum call frame size of all the calls.
488 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
489
490 // Maximum call frame needs to be at least big enough for linkage area.
491 unsigned minCallFrameSize = getLinkageSize();
492 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
493
494 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
495 // that allocations will be aligned.
496 if (MFI.hasVarSizedObjects())
497 maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
498
499 // Update maximum call frame size.
500 if (UpdateMF)
501 MFI.setMaxCallFrameSize(maxCallFrameSize);
502
503 // Include call frame size in total.
504 FrameSize += maxCallFrameSize;
505
506 // Make sure the frame is aligned.
507 FrameSize = (FrameSize + AlignMask) & ~AlignMask;
508
509 // Update frame info.
510 if (UpdateMF)
511 MFI.setStackSize(FrameSize);
512
513 return FrameSize;
514 }
515
516 // hasFP - Return true if the specified function actually has a dedicated frame
517 // pointer register.
hasFP(const MachineFunction & MF) const518 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
519 const MachineFrameInfo &MFI = MF.getFrameInfo();
520 // FIXME: This is pretty much broken by design: hasFP() might be called really
521 // early, before the stack layout was calculated and thus hasFP() might return
522 // true or false here depending on the time of call.
523 return (MFI.getStackSize()) && needsFP(MF);
524 }
525
526 // needsFP - Return true if the specified function should have a dedicated frame
527 // pointer register. This is true if the function has variable sized allocas or
528 // if frame pointer elimination is disabled.
needsFP(const MachineFunction & MF) const529 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
530 const MachineFrameInfo &MFI = MF.getFrameInfo();
531
532 // Naked functions have no stack frame pushed, so we don't have a frame
533 // pointer.
534 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
535 return false;
536
537 return MF.getTarget().Options.DisableFramePointerElim(MF) ||
538 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
539 (MF.getTarget().Options.GuaranteedTailCallOpt &&
540 MF.getInfo<PPCFunctionInfo>()->hasFastCall());
541 }
542
replaceFPWithRealFP(MachineFunction & MF) const543 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
544 bool is31 = needsFP(MF);
545 unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
546 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
547
548 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
549 bool HasBP = RegInfo->hasBasePointer(MF);
550 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
551 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
552
553 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
554 BI != BE; ++BI)
555 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
556 --MBBI;
557 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
558 MachineOperand &MO = MBBI->getOperand(I);
559 if (!MO.isReg())
560 continue;
561
562 switch (MO.getReg()) {
563 case PPC::FP:
564 MO.setReg(FPReg);
565 break;
566 case PPC::FP8:
567 MO.setReg(FP8Reg);
568 break;
569 case PPC::BP:
570 MO.setReg(BPReg);
571 break;
572 case PPC::BP8:
573 MO.setReg(BP8Reg);
574 break;
575
576 }
577 }
578 }
579 }
580
581 /* This function will do the following:
582 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
583 respectively (defaults recommended by the ABI) and return true
584 - If MBB is not an entry block, initialize the register scavenger and look
585 for available registers.
586 - If the defaults (R0/R12) are available, return true
587 - If TwoUniqueRegsRequired is set to true, it looks for two unique
588 registers. Otherwise, look for a single available register.
589 - If the required registers are found, set SR1 and SR2 and return true.
590 - If the required registers are not found, set SR2 or both SR1 and SR2 to
591 PPC::NoRegister and return false.
592
593 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
594 is not set, this function will attempt to find two different registers, but
595 still return true if only one register is available (and set SR1 == SR2).
596 */
597 bool
findScratchRegister(MachineBasicBlock * MBB,bool UseAtEnd,bool TwoUniqueRegsRequired,unsigned * SR1,unsigned * SR2) const598 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
599 bool UseAtEnd,
600 bool TwoUniqueRegsRequired,
601 unsigned *SR1,
602 unsigned *SR2) const {
603 RegScavenger RS;
604 unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
605 unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
606
607 // Set the defaults for the two scratch registers.
608 if (SR1)
609 *SR1 = R0;
610
611 if (SR2) {
612 assert (SR1 && "Asking for the second scratch register but not the first?");
613 *SR2 = R12;
614 }
615
616 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
617 if ((UseAtEnd && MBB->isReturnBlock()) ||
618 (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
619 return true;
620
621 RS.enterBasicBlock(*MBB);
622
623 if (UseAtEnd && !MBB->empty()) {
624 // The scratch register will be used at the end of the block, so must
625 // consider all registers used within the block
626
627 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
628 // If no terminator, back iterator up to previous instruction.
629 if (MBBI == MBB->end())
630 MBBI = std::prev(MBBI);
631
632 if (MBBI != MBB->begin())
633 RS.forward(MBBI);
634 }
635
636 // If the two registers are available, we're all good.
637 // Note that we only return here if both R0 and R12 are available because
638 // although the function may not require two unique registers, it may benefit
639 // from having two so we should try to provide them.
640 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
641 return true;
642
643 // Get the list of callee-saved registers for the target.
644 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
645 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
646
647 // Get all the available registers in the block.
648 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
649 &PPC::GPRCRegClass);
650
651 // We shouldn't use callee-saved registers as scratch registers as they may be
652 // available when looking for a candidate block for shrink wrapping but not
653 // available when the actual prologue/epilogue is being emitted because they
654 // were added as live-in to the prologue block by PrologueEpilogueInserter.
655 for (int i = 0; CSRegs[i]; ++i)
656 BV.reset(CSRegs[i]);
657
658 // Set the first scratch register to the first available one.
659 if (SR1) {
660 int FirstScratchReg = BV.find_first();
661 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
662 }
663
664 // If there is another one available, set the second scratch register to that.
665 // Otherwise, set it to either PPC::NoRegister if this function requires two
666 // or to whatever SR1 is set to if this function doesn't require two.
667 if (SR2) {
668 int SecondScratchReg = BV.find_next(*SR1);
669 if (SecondScratchReg != -1)
670 *SR2 = SecondScratchReg;
671 else
672 *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
673 }
674
675 // Now that we've done our best to provide both registers, double check
676 // whether we were unable to provide enough.
677 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
678 return false;
679
680 return true;
681 }
682
683 // We need a scratch register for spilling LR and for spilling CR. By default,
684 // we use two scratch registers to hide latency. However, if only one scratch
685 // register is available, we can adjust for that by not overlapping the spill
686 // code. However, if we need to realign the stack (i.e. have a base pointer)
687 // and the stack frame is large, we need two scratch registers.
688 bool
twoUniqueScratchRegsRequired(MachineBasicBlock * MBB) const689 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
690 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
691 MachineFunction &MF = *(MBB->getParent());
692 bool HasBP = RegInfo->hasBasePointer(MF);
693 unsigned FrameSize = determineFrameLayout(MF, false);
694 int NegFrameSize = -FrameSize;
695 bool IsLargeFrame = !isInt<16>(NegFrameSize);
696 MachineFrameInfo &MFI = MF.getFrameInfo();
697 unsigned MaxAlign = MFI.getMaxAlignment();
698 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
699
700 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
701 }
702
canUseAsPrologue(const MachineBasicBlock & MBB) const703 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
704 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
705
706 return findScratchRegister(TmpMBB, false,
707 twoUniqueScratchRegsRequired(TmpMBB));
708 }
709
canUseAsEpilogue(const MachineBasicBlock & MBB) const710 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
711 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
712
713 return findScratchRegister(TmpMBB, true);
714 }
715
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const716 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
717 MachineBasicBlock &MBB) const {
718 MachineBasicBlock::iterator MBBI = MBB.begin();
719 MachineFrameInfo &MFI = MF.getFrameInfo();
720 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
721 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
722
723 MachineModuleInfo &MMI = MF.getMMI();
724 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
725 DebugLoc dl;
726 bool needsCFI = MMI.hasDebugInfo() ||
727 MF.getFunction().needsUnwindTableEntry();
728
729 // Get processor type.
730 bool isPPC64 = Subtarget.isPPC64();
731 // Get the ABI.
732 bool isSVR4ABI = Subtarget.isSVR4ABI();
733 bool isELFv2ABI = Subtarget.isELFv2ABI();
734 assert((Subtarget.isDarwinABI() || isSVR4ABI) &&
735 "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
736
737 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
738 // process it.
739 if (!isSVR4ABI)
740 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
741 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
742 HandleVRSaveUpdate(*MBBI, TII);
743 break;
744 }
745 }
746
747 // Move MBBI back to the beginning of the prologue block.
748 MBBI = MBB.begin();
749
750 // Work out frame sizes.
751 unsigned FrameSize = determineFrameLayout(MF);
752 int NegFrameSize = -FrameSize;
753 if (!isInt<32>(NegFrameSize))
754 llvm_unreachable("Unhandled stack size!");
755
756 if (MFI.isFrameAddressTaken())
757 replaceFPWithRealFP(MF);
758
759 // Check if the link register (LR) must be saved.
760 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
761 bool MustSaveLR = FI->mustSaveLR();
762 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
763 bool MustSaveCR = !MustSaveCRs.empty();
764 // Do we have a frame pointer and/or base pointer for this function?
765 bool HasFP = hasFP(MF);
766 bool HasBP = RegInfo->hasBasePointer(MF);
767 bool HasRedZone = isPPC64 || !isSVR4ABI;
768
769 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
770 unsigned BPReg = RegInfo->getBaseRegister(MF);
771 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
772 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
773 unsigned ScratchReg = 0;
774 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
775 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
776 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
777 : PPC::MFLR );
778 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
779 : PPC::STW );
780 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
781 : PPC::STWU );
782 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
783 : PPC::STWUX);
784 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
785 : PPC::LIS );
786 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
787 : PPC::ORI );
788 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
789 : PPC::OR );
790 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
791 : PPC::SUBFC);
792 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
793 : PPC::SUBFIC);
794
795 // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
796 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
797 // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
798 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
799 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
800 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
801
802 // Using the same bool variable as below to suppress compiler warnings.
803 bool SingleScratchReg =
804 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
805 &ScratchReg, &TempReg);
806 assert(SingleScratchReg &&
807 "Required number of registers not available in this block");
808
809 SingleScratchReg = ScratchReg == TempReg;
810
811 int LROffset = getReturnSaveOffset();
812
813 int FPOffset = 0;
814 if (HasFP) {
815 if (isSVR4ABI) {
816 MachineFrameInfo &MFI = MF.getFrameInfo();
817 int FPIndex = FI->getFramePointerSaveIndex();
818 assert(FPIndex && "No Frame Pointer Save Slot!");
819 FPOffset = MFI.getObjectOffset(FPIndex);
820 } else {
821 FPOffset = getFramePointerSaveOffset();
822 }
823 }
824
825 int BPOffset = 0;
826 if (HasBP) {
827 if (isSVR4ABI) {
828 MachineFrameInfo &MFI = MF.getFrameInfo();
829 int BPIndex = FI->getBasePointerSaveIndex();
830 assert(BPIndex && "No Base Pointer Save Slot!");
831 BPOffset = MFI.getObjectOffset(BPIndex);
832 } else {
833 BPOffset = getBasePointerSaveOffset();
834 }
835 }
836
837 int PBPOffset = 0;
838 if (FI->usesPICBase()) {
839 MachineFrameInfo &MFI = MF.getFrameInfo();
840 int PBPIndex = FI->getPICBasePointerSaveIndex();
841 assert(PBPIndex && "No PIC Base Pointer Save Slot!");
842 PBPOffset = MFI.getObjectOffset(PBPIndex);
843 }
844
845 // Get stack alignments.
846 unsigned MaxAlign = MFI.getMaxAlignment();
847 if (HasBP && MaxAlign > 1)
848 assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
849 "Invalid alignment!");
850
851 // Frames of 32KB & larger require special handling because they cannot be
852 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
853 bool isLargeFrame = !isInt<16>(NegFrameSize);
854
855 assert((isPPC64 || !MustSaveCR) &&
856 "Prologue CR saving supported only in 64-bit mode");
857
858 // If we need to spill the CR and the LR but we don't have two separate
859 // registers available, we must spill them one at a time
860 if (MustSaveCR && SingleScratchReg && MustSaveLR) {
861 // In the ELFv2 ABI, we are not required to save all CR fields.
862 // If only one or two CR fields are clobbered, it is more efficient to use
863 // mfocrf to selectively save just those fields, because mfocrf has short
864 // latency compares to mfcr.
865 unsigned MfcrOpcode = PPC::MFCR8;
866 unsigned CrState = RegState::ImplicitKill;
867 if (isELFv2ABI && MustSaveCRs.size() == 1) {
868 MfcrOpcode = PPC::MFOCRF8;
869 CrState = RegState::Kill;
870 }
871 MachineInstrBuilder MIB =
872 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
873 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
874 MIB.addReg(MustSaveCRs[i], CrState);
875 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
876 .addReg(TempReg, getKillRegState(true))
877 .addImm(8)
878 .addReg(SPReg);
879 }
880
881 if (MustSaveLR)
882 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
883
884 if (MustSaveCR &&
885 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
886 // In the ELFv2 ABI, we are not required to save all CR fields.
887 // If only one or two CR fields are clobbered, it is more efficient to use
888 // mfocrf to selectively save just those fields, because mfocrf has short
889 // latency compares to mfcr.
890 unsigned MfcrOpcode = PPC::MFCR8;
891 unsigned CrState = RegState::ImplicitKill;
892 if (isELFv2ABI && MustSaveCRs.size() == 1) {
893 MfcrOpcode = PPC::MFOCRF8;
894 CrState = RegState::Kill;
895 }
896 MachineInstrBuilder MIB =
897 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
898 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
899 MIB.addReg(MustSaveCRs[i], CrState);
900 }
901
902 if (HasRedZone) {
903 if (HasFP)
904 BuildMI(MBB, MBBI, dl, StoreInst)
905 .addReg(FPReg)
906 .addImm(FPOffset)
907 .addReg(SPReg);
908 if (FI->usesPICBase())
909 BuildMI(MBB, MBBI, dl, StoreInst)
910 .addReg(PPC::R30)
911 .addImm(PBPOffset)
912 .addReg(SPReg);
913 if (HasBP)
914 BuildMI(MBB, MBBI, dl, StoreInst)
915 .addReg(BPReg)
916 .addImm(BPOffset)
917 .addReg(SPReg);
918 }
919
920 if (MustSaveLR)
921 BuildMI(MBB, MBBI, dl, StoreInst)
922 .addReg(ScratchReg, getKillRegState(true))
923 .addImm(LROffset)
924 .addReg(SPReg);
925
926 if (MustSaveCR &&
927 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
928 assert(HasRedZone && "A red zone is always available on PPC64");
929 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
930 .addReg(TempReg, getKillRegState(true))
931 .addImm(8)
932 .addReg(SPReg);
933 }
934
935 // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
936 if (!FrameSize)
937 return;
938
939 // Adjust stack pointer: r1 += NegFrameSize.
940 // If there is a preferred stack alignment, align R1 now
941
942 if (HasBP && HasRedZone) {
943 // Save a copy of r1 as the base pointer.
944 BuildMI(MBB, MBBI, dl, OrInst, BPReg)
945 .addReg(SPReg)
946 .addReg(SPReg);
947 }
948
949 // Have we generated a STUX instruction to claim stack frame? If so,
950 // the negated frame size will be placed in ScratchReg.
951 bool HasSTUX = false;
952
953 // This condition must be kept in sync with canUseAsPrologue.
954 if (HasBP && MaxAlign > 1) {
955 if (isPPC64)
956 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
957 .addReg(SPReg)
958 .addImm(0)
959 .addImm(64 - Log2_32(MaxAlign));
960 else // PPC32...
961 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
962 .addReg(SPReg)
963 .addImm(0)
964 .addImm(32 - Log2_32(MaxAlign))
965 .addImm(31);
966 if (!isLargeFrame) {
967 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
968 .addReg(ScratchReg, RegState::Kill)
969 .addImm(NegFrameSize);
970 } else {
971 assert(!SingleScratchReg && "Only a single scratch reg available");
972 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
973 .addImm(NegFrameSize >> 16);
974 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
975 .addReg(TempReg, RegState::Kill)
976 .addImm(NegFrameSize & 0xFFFF);
977 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
978 .addReg(ScratchReg, RegState::Kill)
979 .addReg(TempReg, RegState::Kill);
980 }
981
982 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
983 .addReg(SPReg, RegState::Kill)
984 .addReg(SPReg)
985 .addReg(ScratchReg);
986 HasSTUX = true;
987
988 } else if (!isLargeFrame) {
989 BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
990 .addReg(SPReg)
991 .addImm(NegFrameSize)
992 .addReg(SPReg);
993
994 } else {
995 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
996 .addImm(NegFrameSize >> 16);
997 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
998 .addReg(ScratchReg, RegState::Kill)
999 .addImm(NegFrameSize & 0xFFFF);
1000 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1001 .addReg(SPReg, RegState::Kill)
1002 .addReg(SPReg)
1003 .addReg(ScratchReg);
1004 HasSTUX = true;
1005 }
1006
1007 if (!HasRedZone) {
1008 assert(!isPPC64 && "A red zone is always available on PPC64");
1009 if (HasSTUX) {
1010 // The negated frame size is in ScratchReg, and the SPReg has been
1011 // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1012 // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1013 // the stack frame (i.e. the old SP), ideally, we would put the old
1014 // SP into a register and use it as the base for the stores. The
1015 // problem is that the only available register may be ScratchReg,
1016 // which could be R0, and R0 cannot be used as a base address.
1017
1018 // First, set ScratchReg to the old SP. This may need to be modified
1019 // later.
1020 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1021 .addReg(ScratchReg, RegState::Kill)
1022 .addReg(SPReg);
1023
1024 if (ScratchReg == PPC::R0) {
1025 // R0 cannot be used as a base register, but it can be used as an
1026 // index in a store-indexed.
1027 int LastOffset = 0;
1028 if (HasFP) {
1029 // R0 += (FPOffset-LastOffset).
1030 // Need addic, since addi treats R0 as 0.
1031 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1032 .addReg(ScratchReg)
1033 .addImm(FPOffset-LastOffset);
1034 LastOffset = FPOffset;
1035 // Store FP into *R0.
1036 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1037 .addReg(FPReg, RegState::Kill) // Save FP.
1038 .addReg(PPC::ZERO)
1039 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1040 }
1041 if (FI->usesPICBase()) {
1042 // R0 += (PBPOffset-LastOffset).
1043 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1044 .addReg(ScratchReg)
1045 .addImm(PBPOffset-LastOffset);
1046 LastOffset = PBPOffset;
1047 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1048 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer.
1049 .addReg(PPC::ZERO)
1050 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1051 }
1052 if (HasBP) {
1053 // R0 += (BPOffset-LastOffset).
1054 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1055 .addReg(ScratchReg)
1056 .addImm(BPOffset-LastOffset);
1057 LastOffset = BPOffset;
1058 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1059 .addReg(BPReg, RegState::Kill) // Save BP.
1060 .addReg(PPC::ZERO)
1061 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1062 // BP = R0-LastOffset
1063 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1064 .addReg(ScratchReg, RegState::Kill)
1065 .addImm(-LastOffset);
1066 }
1067 } else {
1068 // ScratchReg is not R0, so use it as the base register. It is
1069 // already set to the old SP, so we can use the offsets directly.
1070
1071 // Now that the stack frame has been allocated, save all the necessary
1072 // registers using ScratchReg as the base address.
1073 if (HasFP)
1074 BuildMI(MBB, MBBI, dl, StoreInst)
1075 .addReg(FPReg)
1076 .addImm(FPOffset)
1077 .addReg(ScratchReg);
1078 if (FI->usesPICBase())
1079 BuildMI(MBB, MBBI, dl, StoreInst)
1080 .addReg(PPC::R30)
1081 .addImm(PBPOffset)
1082 .addReg(ScratchReg);
1083 if (HasBP) {
1084 BuildMI(MBB, MBBI, dl, StoreInst)
1085 .addReg(BPReg)
1086 .addImm(BPOffset)
1087 .addReg(ScratchReg);
1088 BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1089 .addReg(ScratchReg, RegState::Kill)
1090 .addReg(ScratchReg);
1091 }
1092 }
1093 } else {
1094 // The frame size is a known 16-bit constant (fitting in the immediate
1095 // field of STWU). To be here we have to be compiling for PPC32.
1096 // Since the SPReg has been decreased by FrameSize, add it back to each
1097 // offset.
1098 if (HasFP)
1099 BuildMI(MBB, MBBI, dl, StoreInst)
1100 .addReg(FPReg)
1101 .addImm(FrameSize + FPOffset)
1102 .addReg(SPReg);
1103 if (FI->usesPICBase())
1104 BuildMI(MBB, MBBI, dl, StoreInst)
1105 .addReg(PPC::R30)
1106 .addImm(FrameSize + PBPOffset)
1107 .addReg(SPReg);
1108 if (HasBP) {
1109 BuildMI(MBB, MBBI, dl, StoreInst)
1110 .addReg(BPReg)
1111 .addImm(FrameSize + BPOffset)
1112 .addReg(SPReg);
1113 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1114 .addReg(SPReg)
1115 .addImm(FrameSize);
1116 }
1117 }
1118 }
1119
1120 // Add Call Frame Information for the instructions we generated above.
1121 if (needsCFI) {
1122 unsigned CFIIndex;
1123
1124 if (HasBP) {
1125 // Define CFA in terms of BP. Do this in preference to using FP/SP,
1126 // because if the stack needed aligning then CFA won't be at a fixed
1127 // offset from FP/SP.
1128 unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1129 CFIIndex = MF.addFrameInst(
1130 MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1131 } else {
1132 // Adjust the definition of CFA to account for the change in SP.
1133 assert(NegFrameSize);
1134 CFIIndex = MF.addFrameInst(
1135 MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1136 }
1137 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1138 .addCFIIndex(CFIIndex);
1139
1140 if (HasFP) {
1141 // Describe where FP was saved, at a fixed offset from CFA.
1142 unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1143 CFIIndex = MF.addFrameInst(
1144 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1145 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1146 .addCFIIndex(CFIIndex);
1147 }
1148
1149 if (FI->usesPICBase()) {
1150 // Describe where FP was saved, at a fixed offset from CFA.
1151 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1152 CFIIndex = MF.addFrameInst(
1153 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1154 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1155 .addCFIIndex(CFIIndex);
1156 }
1157
1158 if (HasBP) {
1159 // Describe where BP was saved, at a fixed offset from CFA.
1160 unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1161 CFIIndex = MF.addFrameInst(
1162 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1163 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1164 .addCFIIndex(CFIIndex);
1165 }
1166
1167 if (MustSaveLR) {
1168 // Describe where LR was saved, at a fixed offset from CFA.
1169 unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1170 CFIIndex = MF.addFrameInst(
1171 MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1172 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1173 .addCFIIndex(CFIIndex);
1174 }
1175 }
1176
1177 // If there is a frame pointer, copy R1 into R31
1178 if (HasFP) {
1179 BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1180 .addReg(SPReg)
1181 .addReg(SPReg);
1182
1183 if (!HasBP && needsCFI) {
1184 // Change the definition of CFA from SP+offset to FP+offset, because SP
1185 // will change at every alloca.
1186 unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1187 unsigned CFIIndex = MF.addFrameInst(
1188 MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1189
1190 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1191 .addCFIIndex(CFIIndex);
1192 }
1193 }
1194
1195 if (needsCFI) {
1196 // Describe where callee saved registers were saved, at fixed offsets from
1197 // CFA.
1198 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1199 for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1200 unsigned Reg = CSI[I].getReg();
1201 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1202
1203 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1204 // subregisters of CR2. We just need to emit a move of CR2.
1205 if (PPC::CRBITRCRegClass.contains(Reg))
1206 continue;
1207
1208 // For SVR4, don't emit a move for the CR spill slot if we haven't
1209 // spilled CRs.
1210 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1211 && !MustSaveCR)
1212 continue;
1213
1214 // For 64-bit SVR4 when we have spilled CRs, the spill location
1215 // is SP+8, not a frame-relative slot.
1216 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1217 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1218 // the whole CR word. In the ELFv2 ABI, every CR that was
1219 // actually saved gets its own CFI record.
1220 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1221 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1222 nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
1223 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1224 .addCFIIndex(CFIIndex);
1225 continue;
1226 }
1227
1228 if (CSI[I].isSpilledToReg()) {
1229 unsigned SpilledReg = CSI[I].getDstReg();
1230 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1231 nullptr, MRI->getDwarfRegNum(Reg, true),
1232 MRI->getDwarfRegNum(SpilledReg, true)));
1233 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1234 .addCFIIndex(CFIRegister);
1235 } else {
1236 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1237 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1238 nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1239 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1240 .addCFIIndex(CFIIndex);
1241 }
1242 }
1243 }
1244 }
1245
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const1246 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1247 MachineBasicBlock &MBB) const {
1248 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1249 DebugLoc dl;
1250
1251 if (MBBI != MBB.end())
1252 dl = MBBI->getDebugLoc();
1253
1254 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1255 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1256
1257 // Get alignment info so we know how to restore the SP.
1258 const MachineFrameInfo &MFI = MF.getFrameInfo();
1259
1260 // Get the number of bytes allocated from the FrameInfo.
1261 int FrameSize = MFI.getStackSize();
1262
1263 // Get processor type.
1264 bool isPPC64 = Subtarget.isPPC64();
1265 // Get the ABI.
1266 bool isSVR4ABI = Subtarget.isSVR4ABI();
1267
1268 // Check if the link register (LR) has been saved.
1269 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1270 bool MustSaveLR = FI->mustSaveLR();
1271 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1272 bool MustSaveCR = !MustSaveCRs.empty();
1273 // Do we have a frame pointer and/or base pointer for this function?
1274 bool HasFP = hasFP(MF);
1275 bool HasBP = RegInfo->hasBasePointer(MF);
1276 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1277
1278 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1279 unsigned BPReg = RegInfo->getBaseRegister(MF);
1280 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
1281 unsigned ScratchReg = 0;
1282 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1283 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1284 : PPC::MTLR );
1285 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1286 : PPC::LWZ );
1287 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1288 : PPC::LIS );
1289 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1290 : PPC::OR );
1291 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1292 : PPC::ORI );
1293 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1294 : PPC::ADDI );
1295 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1296 : PPC::ADD4 );
1297
1298 int LROffset = getReturnSaveOffset();
1299
1300 int FPOffset = 0;
1301
1302 // Using the same bool variable as below to suppress compiler warnings.
1303 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1304 &TempReg);
1305 assert(SingleScratchReg &&
1306 "Could not find an available scratch register");
1307
1308 SingleScratchReg = ScratchReg == TempReg;
1309
1310 if (HasFP) {
1311 if (isSVR4ABI) {
1312 int FPIndex = FI->getFramePointerSaveIndex();
1313 assert(FPIndex && "No Frame Pointer Save Slot!");
1314 FPOffset = MFI.getObjectOffset(FPIndex);
1315 } else {
1316 FPOffset = getFramePointerSaveOffset();
1317 }
1318 }
1319
1320 int BPOffset = 0;
1321 if (HasBP) {
1322 if (isSVR4ABI) {
1323 int BPIndex = FI->getBasePointerSaveIndex();
1324 assert(BPIndex && "No Base Pointer Save Slot!");
1325 BPOffset = MFI.getObjectOffset(BPIndex);
1326 } else {
1327 BPOffset = getBasePointerSaveOffset();
1328 }
1329 }
1330
1331 int PBPOffset = 0;
1332 if (FI->usesPICBase()) {
1333 int PBPIndex = FI->getPICBasePointerSaveIndex();
1334 assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1335 PBPOffset = MFI.getObjectOffset(PBPIndex);
1336 }
1337
1338 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1339
1340 if (IsReturnBlock) {
1341 unsigned RetOpcode = MBBI->getOpcode();
1342 bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
1343 RetOpcode == PPC::TCRETURNdi ||
1344 RetOpcode == PPC::TCRETURNai ||
1345 RetOpcode == PPC::TCRETURNri8 ||
1346 RetOpcode == PPC::TCRETURNdi8 ||
1347 RetOpcode == PPC::TCRETURNai8;
1348
1349 if (UsesTCRet) {
1350 int MaxTCRetDelta = FI->getTailCallSPDelta();
1351 MachineOperand &StackAdjust = MBBI->getOperand(1);
1352 assert(StackAdjust.isImm() && "Expecting immediate value.");
1353 // Adjust stack pointer.
1354 int StackAdj = StackAdjust.getImm();
1355 int Delta = StackAdj - MaxTCRetDelta;
1356 assert((Delta >= 0) && "Delta must be positive");
1357 if (MaxTCRetDelta>0)
1358 FrameSize += (StackAdj +Delta);
1359 else
1360 FrameSize += StackAdj;
1361 }
1362 }
1363
1364 // Frames of 32KB & larger require special handling because they cannot be
1365 // indexed into with a simple LD/LWZ immediate offset operand.
1366 bool isLargeFrame = !isInt<16>(FrameSize);
1367
1368 // On targets without red zone, the SP needs to be restored last, so that
1369 // all live contents of the stack frame are upwards of the SP. This means
1370 // that we cannot restore SP just now, since there may be more registers
1371 // to restore from the stack frame (e.g. R31). If the frame size is not
1372 // a simple immediate value, we will need a spare register to hold the
1373 // restored SP. If the frame size is known and small, we can simply adjust
1374 // the offsets of the registers to be restored, and still use SP to restore
1375 // them. In such case, the final update of SP will be to add the frame
1376 // size to it.
1377 // To simplify the code, set RBReg to the base register used to restore
1378 // values from the stack, and set SPAdd to the value that needs to be added
1379 // to the SP at the end. The default values are as if red zone was present.
1380 unsigned RBReg = SPReg;
1381 unsigned SPAdd = 0;
1382
1383 if (FrameSize) {
1384 // In the prologue, the loaded (or persistent) stack pointer value is
1385 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1386 // zone add this offset back now.
1387
1388 // If this function contained a fastcc call and GuaranteedTailCallOpt is
1389 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1390 // call which invalidates the stack pointer value in SP(0). So we use the
1391 // value of R31 in this case.
1392 if (FI->hasFastCall()) {
1393 assert(HasFP && "Expecting a valid frame pointer.");
1394 if (!HasRedZone)
1395 RBReg = FPReg;
1396 if (!isLargeFrame) {
1397 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1398 .addReg(FPReg).addImm(FrameSize);
1399 } else {
1400 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1401 .addImm(FrameSize >> 16);
1402 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1403 .addReg(ScratchReg, RegState::Kill)
1404 .addImm(FrameSize & 0xFFFF);
1405 BuildMI(MBB, MBBI, dl, AddInst)
1406 .addReg(RBReg)
1407 .addReg(FPReg)
1408 .addReg(ScratchReg);
1409 }
1410 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1411 if (HasRedZone) {
1412 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1413 .addReg(SPReg)
1414 .addImm(FrameSize);
1415 } else {
1416 // Make sure that adding FrameSize will not overflow the max offset
1417 // size.
1418 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1419 "Local offsets should be negative");
1420 SPAdd = FrameSize;
1421 FPOffset += FrameSize;
1422 BPOffset += FrameSize;
1423 PBPOffset += FrameSize;
1424 }
1425 } else {
1426 // We don't want to use ScratchReg as a base register, because it
1427 // could happen to be R0. Use FP instead, but make sure to preserve it.
1428 if (!HasRedZone) {
1429 // If FP is not saved, copy it to ScratchReg.
1430 if (!HasFP)
1431 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1432 .addReg(FPReg)
1433 .addReg(FPReg);
1434 RBReg = FPReg;
1435 }
1436 BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
1437 .addImm(0)
1438 .addReg(SPReg);
1439 }
1440 }
1441 assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1442 // If there is no red zone, ScratchReg may be needed for holding a useful
1443 // value (although not the base register). Make sure it is not overwritten
1444 // too early.
1445
1446 assert((isPPC64 || !MustSaveCR) &&
1447 "Epilogue CR restoring supported only in 64-bit mode");
1448
1449 // If we need to restore both the LR and the CR and we only have one
1450 // available scratch register, we must do them one at a time.
1451 if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1452 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1453 // is live here.
1454 assert(HasRedZone && "Expecting red zone");
1455 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1456 .addImm(8)
1457 .addReg(SPReg);
1458 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1459 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1460 .addReg(TempReg, getKillRegState(i == e-1));
1461 }
1462
1463 // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1464 // LR is stored in the caller's stack frame. ScratchReg will be needed
1465 // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1466 // a base register anyway, because it may happen to be R0.
1467 bool LoadedLR = false;
1468 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1469 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1470 .addImm(LROffset+SPAdd)
1471 .addReg(RBReg);
1472 LoadedLR = true;
1473 }
1474
1475 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1476 // This will only occur for PPC64.
1477 assert(isPPC64 && "Expecting 64-bit mode");
1478 assert(RBReg == SPReg && "Should be using SP as a base register");
1479 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1480 .addImm(8)
1481 .addReg(RBReg);
1482 }
1483
1484 if (HasFP) {
1485 // If there is red zone, restore FP directly, since SP has already been
1486 // restored. Otherwise, restore the value of FP into ScratchReg.
1487 if (HasRedZone || RBReg == SPReg)
1488 BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1489 .addImm(FPOffset)
1490 .addReg(SPReg);
1491 else
1492 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1493 .addImm(FPOffset)
1494 .addReg(RBReg);
1495 }
1496
1497 if (FI->usesPICBase())
1498 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1499 .addImm(PBPOffset)
1500 .addReg(RBReg);
1501
1502 if (HasBP)
1503 BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1504 .addImm(BPOffset)
1505 .addReg(RBReg);
1506
1507 // There is nothing more to be loaded from the stack, so now we can
1508 // restore SP: SP = RBReg + SPAdd.
1509 if (RBReg != SPReg || SPAdd != 0) {
1510 assert(!HasRedZone && "This should not happen with red zone");
1511 // If SPAdd is 0, generate a copy.
1512 if (SPAdd == 0)
1513 BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1514 .addReg(RBReg)
1515 .addReg(RBReg);
1516 else
1517 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1518 .addReg(RBReg)
1519 .addImm(SPAdd);
1520
1521 assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1522 if (RBReg == FPReg)
1523 BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1524 .addReg(ScratchReg)
1525 .addReg(ScratchReg);
1526
1527 // Now load the LR from the caller's stack frame.
1528 if (MustSaveLR && !LoadedLR)
1529 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1530 .addImm(LROffset)
1531 .addReg(SPReg);
1532 }
1533
1534 if (MustSaveCR &&
1535 !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
1536 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1537 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1538 .addReg(TempReg, getKillRegState(i == e-1));
1539
1540 if (MustSaveLR)
1541 BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
1542
1543 // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1544 // call optimization
1545 if (IsReturnBlock) {
1546 unsigned RetOpcode = MBBI->getOpcode();
1547 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1548 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1549 MF.getFunction().getCallingConv() == CallingConv::Fast) {
1550 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1551 unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1552
1553 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1554 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1555 .addReg(SPReg).addImm(CallerAllocatedAmt);
1556 } else {
1557 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1558 .addImm(CallerAllocatedAmt >> 16);
1559 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1560 .addReg(ScratchReg, RegState::Kill)
1561 .addImm(CallerAllocatedAmt & 0xFFFF);
1562 BuildMI(MBB, MBBI, dl, AddInst)
1563 .addReg(SPReg)
1564 .addReg(FPReg)
1565 .addReg(ScratchReg);
1566 }
1567 } else {
1568 createTailCallBranchInstr(MBB);
1569 }
1570 }
1571 }
1572
createTailCallBranchInstr(MachineBasicBlock & MBB) const1573 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1574 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1575
1576 // If we got this far a first terminator should exist.
1577 assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1578
1579 DebugLoc dl = MBBI->getDebugLoc();
1580 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1581
1582 // Create branch instruction for pseudo tail call return instruction
1583 unsigned RetOpcode = MBBI->getOpcode();
1584 if (RetOpcode == PPC::TCRETURNdi) {
1585 MBBI = MBB.getLastNonDebugInstr();
1586 MachineOperand &JumpTarget = MBBI->getOperand(0);
1587 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1588 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1589 } else if (RetOpcode == PPC::TCRETURNri) {
1590 MBBI = MBB.getLastNonDebugInstr();
1591 assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1592 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1593 } else if (RetOpcode == PPC::TCRETURNai) {
1594 MBBI = MBB.getLastNonDebugInstr();
1595 MachineOperand &JumpTarget = MBBI->getOperand(0);
1596 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1597 } else if (RetOpcode == PPC::TCRETURNdi8) {
1598 MBBI = MBB.getLastNonDebugInstr();
1599 MachineOperand &JumpTarget = MBBI->getOperand(0);
1600 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1601 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1602 } else if (RetOpcode == PPC::TCRETURNri8) {
1603 MBBI = MBB.getLastNonDebugInstr();
1604 assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1605 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1606 } else if (RetOpcode == PPC::TCRETURNai8) {
1607 MBBI = MBB.getLastNonDebugInstr();
1608 MachineOperand &JumpTarget = MBBI->getOperand(0);
1609 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1610 }
1611 }
1612
determineCalleeSaves(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const1613 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1614 BitVector &SavedRegs,
1615 RegScavenger *RS) const {
1616 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1617
1618 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1619
1620 // Save and clear the LR state.
1621 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1622 unsigned LR = RegInfo->getRARegister();
1623 FI->setMustSaveLR(MustSaveLR(MF, LR));
1624 SavedRegs.reset(LR);
1625
1626 // Save R31 if necessary
1627 int FPSI = FI->getFramePointerSaveIndex();
1628 bool isPPC64 = Subtarget.isPPC64();
1629 bool isDarwinABI = Subtarget.isDarwinABI();
1630 MachineFrameInfo &MFI = MF.getFrameInfo();
1631
1632 // If the frame pointer save index hasn't been defined yet.
1633 if (!FPSI && needsFP(MF)) {
1634 // Find out what the fix offset of the frame pointer save area.
1635 int FPOffset = getFramePointerSaveOffset();
1636 // Allocate the frame index for frame pointer save area.
1637 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1638 // Save the result.
1639 FI->setFramePointerSaveIndex(FPSI);
1640 }
1641
1642 int BPSI = FI->getBasePointerSaveIndex();
1643 if (!BPSI && RegInfo->hasBasePointer(MF)) {
1644 int BPOffset = getBasePointerSaveOffset();
1645 // Allocate the frame index for the base pointer save area.
1646 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1647 // Save the result.
1648 FI->setBasePointerSaveIndex(BPSI);
1649 }
1650
1651 // Reserve stack space for the PIC Base register (R30).
1652 // Only used in SVR4 32-bit.
1653 if (FI->usesPICBase()) {
1654 int PBPSI = MFI.CreateFixedObject(4, -8, true);
1655 FI->setPICBasePointerSaveIndex(PBPSI);
1656 }
1657
1658 // Make sure we don't explicitly spill r31, because, for example, we have
1659 // some inline asm which explicitly clobbers it, when we otherwise have a
1660 // frame pointer and are using r31's spill slot for the prologue/epilogue
1661 // code. Same goes for the base pointer and the PIC base register.
1662 if (needsFP(MF))
1663 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1664 if (RegInfo->hasBasePointer(MF))
1665 SavedRegs.reset(RegInfo->getBaseRegister(MF));
1666 if (FI->usesPICBase())
1667 SavedRegs.reset(PPC::R30);
1668
1669 // Reserve stack space to move the linkage area to in case of a tail call.
1670 int TCSPDelta = 0;
1671 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1672 (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1673 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1674 }
1675
1676 // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
1677 // function uses CR 2, 3, or 4.
1678 if (!isPPC64 && !isDarwinABI &&
1679 (SavedRegs.test(PPC::CR2) ||
1680 SavedRegs.test(PPC::CR3) ||
1681 SavedRegs.test(PPC::CR4))) {
1682 int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true);
1683 FI->setCRSpillFrameIndex(FrameIdx);
1684 }
1685 }
1686
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const1687 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1688 RegScavenger *RS) const {
1689 // Early exit if not using the SVR4 ABI.
1690 if (!Subtarget.isSVR4ABI()) {
1691 addScavengingSpillSlot(MF, RS);
1692 return;
1693 }
1694
1695 // Get callee saved register information.
1696 MachineFrameInfo &MFI = MF.getFrameInfo();
1697 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1698
1699 // If the function is shrink-wrapped, and if the function has a tail call, the
1700 // tail call might not be in the new RestoreBlock, so real branch instruction
1701 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1702 // RestoreBlock. So we handle this case here.
1703 if (MFI.getSavePoint() && MFI.hasTailCall()) {
1704 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1705 for (MachineBasicBlock &MBB : MF) {
1706 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1707 createTailCallBranchInstr(MBB);
1708 }
1709 }
1710
1711 // Early exit if no callee saved registers are modified!
1712 if (CSI.empty() && !needsFP(MF)) {
1713 addScavengingSpillSlot(MF, RS);
1714 return;
1715 }
1716
1717 unsigned MinGPR = PPC::R31;
1718 unsigned MinG8R = PPC::X31;
1719 unsigned MinFPR = PPC::F31;
1720 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1721
1722 bool HasGPSaveArea = false;
1723 bool HasG8SaveArea = false;
1724 bool HasFPSaveArea = false;
1725 bool HasVRSAVESaveArea = false;
1726 bool HasVRSaveArea = false;
1727
1728 SmallVector<CalleeSavedInfo, 18> GPRegs;
1729 SmallVector<CalleeSavedInfo, 18> G8Regs;
1730 SmallVector<CalleeSavedInfo, 18> FPRegs;
1731 SmallVector<CalleeSavedInfo, 18> VRegs;
1732
1733 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1734 unsigned Reg = CSI[i].getReg();
1735 if (PPC::GPRCRegClass.contains(Reg) ||
1736 PPC::SPE4RCRegClass.contains(Reg)) {
1737 HasGPSaveArea = true;
1738
1739 GPRegs.push_back(CSI[i]);
1740
1741 if (Reg < MinGPR) {
1742 MinGPR = Reg;
1743 }
1744 } else if (PPC::G8RCRegClass.contains(Reg)) {
1745 HasG8SaveArea = true;
1746
1747 G8Regs.push_back(CSI[i]);
1748
1749 if (Reg < MinG8R) {
1750 MinG8R = Reg;
1751 }
1752 } else if (PPC::F8RCRegClass.contains(Reg)) {
1753 HasFPSaveArea = true;
1754
1755 FPRegs.push_back(CSI[i]);
1756
1757 if (Reg < MinFPR) {
1758 MinFPR = Reg;
1759 }
1760 } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1761 PPC::CRRCRegClass.contains(Reg)) {
1762 ; // do nothing, as we already know whether CRs are spilled
1763 } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1764 HasVRSAVESaveArea = true;
1765 } else if (PPC::VRRCRegClass.contains(Reg) ||
1766 PPC::SPERCRegClass.contains(Reg)) {
1767 // Altivec and SPE are mutually exclusive, but have the same stack
1768 // alignment requirements, so overload the save area for both cases.
1769 HasVRSaveArea = true;
1770
1771 VRegs.push_back(CSI[i]);
1772
1773 if (Reg < MinVR) {
1774 MinVR = Reg;
1775 }
1776 } else {
1777 llvm_unreachable("Unknown RegisterClass!");
1778 }
1779 }
1780
1781 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1782 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1783
1784 int64_t LowerBound = 0;
1785
1786 // Take into account stack space reserved for tail calls.
1787 int TCSPDelta = 0;
1788 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1789 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1790 LowerBound = TCSPDelta;
1791 }
1792
1793 // The Floating-point register save area is right below the back chain word
1794 // of the previous stack frame.
1795 if (HasFPSaveArea) {
1796 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1797 int FI = FPRegs[i].getFrameIdx();
1798
1799 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1800 }
1801
1802 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1803 }
1804
1805 // Check whether the frame pointer register is allocated. If so, make sure it
1806 // is spilled to the correct offset.
1807 if (needsFP(MF)) {
1808 int FI = PFI->getFramePointerSaveIndex();
1809 assert(FI && "No Frame Pointer Save Slot!");
1810 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1811 // FP is R31/X31, so no need to update MinGPR/MinG8R.
1812 HasGPSaveArea = true;
1813 }
1814
1815 if (PFI->usesPICBase()) {
1816 int FI = PFI->getPICBasePointerSaveIndex();
1817 assert(FI && "No PIC Base Pointer Save Slot!");
1818 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1819
1820 MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1821 HasGPSaveArea = true;
1822 }
1823
1824 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1825 if (RegInfo->hasBasePointer(MF)) {
1826 int FI = PFI->getBasePointerSaveIndex();
1827 assert(FI && "No Base Pointer Save Slot!");
1828 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1829
1830 unsigned BP = RegInfo->getBaseRegister(MF);
1831 if (PPC::G8RCRegClass.contains(BP)) {
1832 MinG8R = std::min<unsigned>(MinG8R, BP);
1833 HasG8SaveArea = true;
1834 } else if (PPC::GPRCRegClass.contains(BP)) {
1835 MinGPR = std::min<unsigned>(MinGPR, BP);
1836 HasGPSaveArea = true;
1837 }
1838 }
1839
1840 // General register save area starts right below the Floating-point
1841 // register save area.
1842 if (HasGPSaveArea || HasG8SaveArea) {
1843 // Move general register save area spill slots down, taking into account
1844 // the size of the Floating-point register save area.
1845 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1846 if (!GPRegs[i].isSpilledToReg()) {
1847 int FI = GPRegs[i].getFrameIdx();
1848 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1849 }
1850 }
1851
1852 // Move general register save area spill slots down, taking into account
1853 // the size of the Floating-point register save area.
1854 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
1855 if (!G8Regs[i].isSpilledToReg()) {
1856 int FI = G8Regs[i].getFrameIdx();
1857 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1858 }
1859 }
1860
1861 unsigned MinReg =
1862 std::min<unsigned>(TRI->getEncodingValue(MinGPR),
1863 TRI->getEncodingValue(MinG8R));
1864
1865 if (Subtarget.isPPC64()) {
1866 LowerBound -= (31 - MinReg + 1) * 8;
1867 } else {
1868 LowerBound -= (31 - MinReg + 1) * 4;
1869 }
1870 }
1871
1872 // For 32-bit only, the CR save area is below the general register
1873 // save area. For 64-bit SVR4, the CR save area is addressed relative
1874 // to the stack pointer and hence does not need an adjustment here.
1875 // Only CR2 (the first nonvolatile spilled) has an associated frame
1876 // index so that we have a single uniform save area.
1877 if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
1878 // Adjust the frame index of the CR spill slot.
1879 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1880 unsigned Reg = CSI[i].getReg();
1881
1882 if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
1883 // Leave Darwin logic as-is.
1884 || (!Subtarget.isSVR4ABI() &&
1885 (PPC::CRBITRCRegClass.contains(Reg) ||
1886 PPC::CRRCRegClass.contains(Reg)))) {
1887 int FI = CSI[i].getFrameIdx();
1888
1889 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1890 }
1891 }
1892
1893 LowerBound -= 4; // The CR save area is always 4 bytes long.
1894 }
1895
1896 if (HasVRSAVESaveArea) {
1897 // FIXME SVR4: Is it actually possible to have multiple elements in CSI
1898 // which have the VRSAVE register class?
1899 // Adjust the frame index of the VRSAVE spill slot.
1900 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1901 unsigned Reg = CSI[i].getReg();
1902
1903 if (PPC::VRSAVERCRegClass.contains(Reg)) {
1904 int FI = CSI[i].getFrameIdx();
1905
1906 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1907 }
1908 }
1909
1910 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
1911 }
1912
1913 // Both Altivec and SPE have the same alignment and padding requirements
1914 // within the stack frame.
1915 if (HasVRSaveArea) {
1916 // Insert alignment padding, we need 16-byte alignment. Note: for positive
1917 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
1918 // we are using negative number here (the stack grows downward). We should
1919 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
1920 // is the alignment size ( n = 16 here) and y is the size after aligning.
1921 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
1922 LowerBound &= ~(15);
1923
1924 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
1925 int FI = VRegs[i].getFrameIdx();
1926
1927 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1928 }
1929 }
1930
1931 addScavengingSpillSlot(MF, RS);
1932 }
1933
1934 void
addScavengingSpillSlot(MachineFunction & MF,RegScavenger * RS) const1935 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
1936 RegScavenger *RS) const {
1937 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
1938 // a large stack, which will require scavenging a register to materialize a
1939 // large offset.
1940
1941 // We need to have a scavenger spill slot for spills if the frame size is
1942 // large. In case there is no free register for large-offset addressing,
1943 // this slot is used for the necessary emergency spill. Also, we need the
1944 // slot for dynamic stack allocations.
1945
1946 // The scavenger might be invoked if the frame offset does not fit into
1947 // the 16-bit immediate. We don't know the complete frame size here
1948 // because we've not yet computed callee-saved register spills or the
1949 // needed alignment padding.
1950 unsigned StackSize = determineFrameLayout(MF, false, true);
1951 MachineFrameInfo &MFI = MF.getFrameInfo();
1952 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
1953 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
1954 const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
1955 const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
1956 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
1957 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
1958 unsigned Size = TRI.getSpillSize(RC);
1959 unsigned Align = TRI.getSpillAlignment(RC);
1960 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
1961
1962 // Might we have over-aligned allocas?
1963 bool HasAlVars = MFI.hasVarSizedObjects() &&
1964 MFI.getMaxAlignment() > getStackAlignment();
1965
1966 // These kinds of spills might need two registers.
1967 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
1968 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
1969
1970 }
1971 }
1972
1973 // This function checks if a callee saved gpr can be spilled to a volatile
1974 // vector register. This occurs for leaf functions when the option
1975 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
1976 // which were not spilled to vectors, return false so the target independent
1977 // code can handle them by assigning a FrameIdx to a stack slot.
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const1978 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
1979 MachineFunction &MF, const TargetRegisterInfo *TRI,
1980 std::vector<CalleeSavedInfo> &CSI) const {
1981
1982 if (CSI.empty())
1983 return true; // Early exit if no callee saved registers are modified!
1984
1985 // Early exit if cannot spill gprs to volatile vector registers.
1986 MachineFrameInfo &MFI = MF.getFrameInfo();
1987 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
1988 return false;
1989
1990 // Build a BitVector of VSRs that can be used for spilling GPRs.
1991 BitVector BVAllocatable = TRI->getAllocatableSet(MF);
1992 BitVector BVCalleeSaved(TRI->getNumRegs());
1993 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1994 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1995 for (unsigned i = 0; CSRegs[i]; ++i)
1996 BVCalleeSaved.set(CSRegs[i]);
1997
1998 for (unsigned Reg : BVAllocatable.set_bits()) {
1999 // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2000 // used in the function.
2001 if (BVCalleeSaved[Reg] ||
2002 (!PPC::F8RCRegClass.contains(Reg) &&
2003 !PPC::VFRCRegClass.contains(Reg)) ||
2004 (MF.getRegInfo().isPhysRegUsed(Reg)))
2005 BVAllocatable.reset(Reg);
2006 }
2007
2008 bool AllSpilledToReg = true;
2009 for (auto &CS : CSI) {
2010 if (BVAllocatable.none())
2011 return false;
2012
2013 unsigned Reg = CS.getReg();
2014 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2015 AllSpilledToReg = false;
2016 continue;
2017 }
2018
2019 unsigned VolatileVFReg = BVAllocatable.find_first();
2020 if (VolatileVFReg < BVAllocatable.size()) {
2021 CS.setDstReg(VolatileVFReg);
2022 BVAllocatable.reset(VolatileVFReg);
2023 } else {
2024 AllSpilledToReg = false;
2025 }
2026 }
2027 return AllSpilledToReg;
2028 }
2029
2030
2031 bool
spillCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,const std::vector<CalleeSavedInfo> & CSI,const TargetRegisterInfo * TRI) const2032 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
2033 MachineBasicBlock::iterator MI,
2034 const std::vector<CalleeSavedInfo> &CSI,
2035 const TargetRegisterInfo *TRI) const {
2036
2037 // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2038 // Return false otherwise to maintain pre-existing behavior.
2039 if (!Subtarget.isSVR4ABI())
2040 return false;
2041
2042 MachineFunction *MF = MBB.getParent();
2043 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2044 DebugLoc DL;
2045 bool CRSpilled = false;
2046 MachineInstrBuilder CRMIB;
2047
2048 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2049 unsigned Reg = CSI[i].getReg();
2050 // Only Darwin actually uses the VRSAVE register, but it can still appear
2051 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
2052 // Darwin, ignore it.
2053 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2054 continue;
2055
2056 // CR2 through CR4 are the nonvolatile CR fields.
2057 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2058
2059 // Add the callee-saved register as live-in; it's killed at the spill.
2060 // Do not do this for callee-saved registers that are live-in to the
2061 // function because they will already be marked live-in and this will be
2062 // adding it for a second time. It is an error to add the same register
2063 // to the set more than once.
2064 const MachineRegisterInfo &MRI = MF->getRegInfo();
2065 bool IsLiveIn = MRI.isLiveIn(Reg);
2066 if (!IsLiveIn)
2067 MBB.addLiveIn(Reg);
2068
2069 if (CRSpilled && IsCRField) {
2070 CRMIB.addReg(Reg, RegState::ImplicitKill);
2071 continue;
2072 }
2073
2074 // Insert the spill to the stack frame.
2075 if (IsCRField) {
2076 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2077 if (Subtarget.isPPC64()) {
2078 // The actual spill will happen at the start of the prologue.
2079 FuncInfo->addMustSaveCR(Reg);
2080 } else {
2081 CRSpilled = true;
2082 FuncInfo->setSpillsCR();
2083
2084 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
2085 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2086 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2087 .addReg(Reg, RegState::ImplicitKill);
2088
2089 MBB.insert(MI, CRMIB);
2090 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2091 .addReg(PPC::R12,
2092 getKillRegState(true)),
2093 CSI[i].getFrameIdx()));
2094 }
2095 } else {
2096 if (CSI[i].isSpilledToReg()) {
2097 NumPESpillVSR++;
2098 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2099 .addReg(Reg, getKillRegState(true));
2100 } else {
2101 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2102 // Use !IsLiveIn for the kill flag.
2103 // We do not want to kill registers that are live in this function
2104 // before their use because they will become undefined registers.
2105 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
2106 CSI[i].getFrameIdx(), RC, TRI);
2107 }
2108 }
2109 }
2110 return true;
2111 }
2112
2113 static void
restoreCRs(bool isPPC64,bool is31,bool CR2Spilled,bool CR3Spilled,bool CR4Spilled,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,const std::vector<CalleeSavedInfo> & CSI,unsigned CSIIndex)2114 restoreCRs(bool isPPC64, bool is31,
2115 bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
2116 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2117 const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
2118
2119 MachineFunction *MF = MBB.getParent();
2120 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2121 DebugLoc DL;
2122 unsigned RestoreOp, MoveReg;
2123
2124 if (isPPC64)
2125 // This is handled during epilogue generation.
2126 return;
2127 else {
2128 // 32-bit: FP-relative
2129 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
2130 PPC::R12),
2131 CSI[CSIIndex].getFrameIdx()));
2132 RestoreOp = PPC::MTOCRF;
2133 MoveReg = PPC::R12;
2134 }
2135
2136 if (CR2Spilled)
2137 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2138 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2139
2140 if (CR3Spilled)
2141 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2142 .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2143
2144 if (CR4Spilled)
2145 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2146 .addReg(MoveReg, getKillRegState(true)));
2147 }
2148
2149 MachineBasicBlock::iterator PPCFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const2150 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2151 MachineBasicBlock::iterator I) const {
2152 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2153 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2154 I->getOpcode() == PPC::ADJCALLSTACKUP) {
2155 // Add (actually subtract) back the amount the callee popped on return.
2156 if (int CalleeAmt = I->getOperand(1).getImm()) {
2157 bool is64Bit = Subtarget.isPPC64();
2158 CalleeAmt *= -1;
2159 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2160 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2161 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2162 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2163 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2164 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2165 const DebugLoc &dl = I->getDebugLoc();
2166
2167 if (isInt<16>(CalleeAmt)) {
2168 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2169 .addReg(StackReg, RegState::Kill)
2170 .addImm(CalleeAmt);
2171 } else {
2172 MachineBasicBlock::iterator MBBI = I;
2173 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2174 .addImm(CalleeAmt >> 16);
2175 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2176 .addReg(TmpReg, RegState::Kill)
2177 .addImm(CalleeAmt & 0xFFFF);
2178 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2179 .addReg(StackReg, RegState::Kill)
2180 .addReg(TmpReg);
2181 }
2182 }
2183 }
2184 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2185 return MBB.erase(I);
2186 }
2187
2188 bool
restoreCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,std::vector<CalleeSavedInfo> & CSI,const TargetRegisterInfo * TRI) const2189 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2190 MachineBasicBlock::iterator MI,
2191 std::vector<CalleeSavedInfo> &CSI,
2192 const TargetRegisterInfo *TRI) const {
2193
2194 // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2195 // Return false otherwise to maintain pre-existing behavior.
2196 if (!Subtarget.isSVR4ABI())
2197 return false;
2198
2199 MachineFunction *MF = MBB.getParent();
2200 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2201 bool CR2Spilled = false;
2202 bool CR3Spilled = false;
2203 bool CR4Spilled = false;
2204 unsigned CSIIndex = 0;
2205
2206 // Initialize insertion-point logic; we will be restoring in reverse
2207 // order of spill.
2208 MachineBasicBlock::iterator I = MI, BeforeI = I;
2209 bool AtStart = I == MBB.begin();
2210
2211 if (!AtStart)
2212 --BeforeI;
2213
2214 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2215 unsigned Reg = CSI[i].getReg();
2216
2217 // Only Darwin actually uses the VRSAVE register, but it can still appear
2218 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
2219 // Darwin, ignore it.
2220 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2221 continue;
2222
2223 if (Reg == PPC::CR2) {
2224 CR2Spilled = true;
2225 // The spill slot is associated only with CR2, which is the
2226 // first nonvolatile spilled. Save it here.
2227 CSIIndex = i;
2228 continue;
2229 } else if (Reg == PPC::CR3) {
2230 CR3Spilled = true;
2231 continue;
2232 } else if (Reg == PPC::CR4) {
2233 CR4Spilled = true;
2234 continue;
2235 } else {
2236 // When we first encounter a non-CR register after seeing at
2237 // least one CR register, restore all spilled CRs together.
2238 if ((CR2Spilled || CR3Spilled || CR4Spilled)
2239 && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
2240 bool is31 = needsFP(*MF);
2241 restoreCRs(Subtarget.isPPC64(), is31,
2242 CR2Spilled, CR3Spilled, CR4Spilled,
2243 MBB, I, CSI, CSIIndex);
2244 CR2Spilled = CR3Spilled = CR4Spilled = false;
2245 }
2246
2247 if (CSI[i].isSpilledToReg()) {
2248 DebugLoc DL;
2249 NumPEReloadVSR++;
2250 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2251 .addReg(CSI[i].getDstReg(), getKillRegState(true));
2252 } else {
2253 // Default behavior for non-CR saves.
2254 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2255 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2256 assert(I != MBB.begin() &&
2257 "loadRegFromStackSlot didn't insert any code!");
2258 }
2259 }
2260
2261 // Insert in reverse order.
2262 if (AtStart)
2263 I = MBB.begin();
2264 else {
2265 I = BeforeI;
2266 ++I;
2267 }
2268 }
2269
2270 // If we haven't yet spilled the CRs, do so now.
2271 if (CR2Spilled || CR3Spilled || CR4Spilled) {
2272 bool is31 = needsFP(*MF);
2273 restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
2274 MBB, I, CSI, CSIIndex);
2275 }
2276
2277 return true;
2278 }
2279
enableShrinkWrapping(const MachineFunction & MF) const2280 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2281 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2282 return false;
2283 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2284 MF.getSubtarget<PPCSubtarget>().isPPC64());
2285 }
2286