1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "PPCFrameLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/Target/TargetOptions.h"
30
31 using namespace llvm;
32
33 #define DEBUG_TYPE "framelowering"
34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
36 STATISTIC(NumPrologProbed, "Number of prologues probed");
37
38 static cl::opt<bool>
39 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
40 cl::desc("Enable spills in prologue to vector registers."),
41 cl::init(false), cl::Hidden);
42
computeReturnSaveOffset(const PPCSubtarget & STI)43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
44 if (STI.isAIXABI())
45 return STI.isPPC64() ? 16 : 8;
46 // SVR4 ABI:
47 return STI.isPPC64() ? 16 : 4;
48 }
49
computeTOCSaveOffset(const PPCSubtarget & STI)50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
51 if (STI.isAIXABI())
52 return STI.isPPC64() ? 40 : 20;
53 return STI.isELFv2ABI() ? 24 : 40;
54 }
55
computeFramePointerSaveOffset(const PPCSubtarget & STI)56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
57 // First slot in the general register save area.
58 return STI.isPPC64() ? -8U : -4U;
59 }
60
computeLinkageSize(const PPCSubtarget & STI)61 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
62 if (STI.isAIXABI() || STI.isPPC64())
63 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
64
65 // 32-bit SVR4 ABI:
66 return 8;
67 }
68
computeBasePointerSaveOffset(const PPCSubtarget & STI)69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
70 // Third slot in the general purpose register save area.
71 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
72 return -12U;
73
74 // Second slot in the general purpose register save area.
75 return STI.isPPC64() ? -16U : -8U;
76 }
77
computeCRSaveOffset(const PPCSubtarget & STI)78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
79 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
80 }
81
PPCFrameLowering(const PPCSubtarget & STI)82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
83 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
84 STI.getPlatformStackAlignment(), 0),
85 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
86 TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
87 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
88 LinkageSize(computeLinkageSize(Subtarget)),
89 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
90 CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
91
92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
getCalleeSavedSpillSlots(unsigned & NumEntries) const93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
94 unsigned &NumEntries) const {
95
96 // Floating-point register save area offsets.
97 #define CALLEE_SAVED_FPRS \
98 {PPC::F31, -8}, \
99 {PPC::F30, -16}, \
100 {PPC::F29, -24}, \
101 {PPC::F28, -32}, \
102 {PPC::F27, -40}, \
103 {PPC::F26, -48}, \
104 {PPC::F25, -56}, \
105 {PPC::F24, -64}, \
106 {PPC::F23, -72}, \
107 {PPC::F22, -80}, \
108 {PPC::F21, -88}, \
109 {PPC::F20, -96}, \
110 {PPC::F19, -104}, \
111 {PPC::F18, -112}, \
112 {PPC::F17, -120}, \
113 {PPC::F16, -128}, \
114 {PPC::F15, -136}, \
115 {PPC::F14, -144}
116
117 // 32-bit general purpose register save area offsets shared by ELF and
118 // AIX. AIX has an extra CSR with r13.
119 #define CALLEE_SAVED_GPRS32 \
120 {PPC::R31, -4}, \
121 {PPC::R30, -8}, \
122 {PPC::R29, -12}, \
123 {PPC::R28, -16}, \
124 {PPC::R27, -20}, \
125 {PPC::R26, -24}, \
126 {PPC::R25, -28}, \
127 {PPC::R24, -32}, \
128 {PPC::R23, -36}, \
129 {PPC::R22, -40}, \
130 {PPC::R21, -44}, \
131 {PPC::R20, -48}, \
132 {PPC::R19, -52}, \
133 {PPC::R18, -56}, \
134 {PPC::R17, -60}, \
135 {PPC::R16, -64}, \
136 {PPC::R15, -68}, \
137 {PPC::R14, -72}
138
139 // 64-bit general purpose register save area offsets.
140 #define CALLEE_SAVED_GPRS64 \
141 {PPC::X31, -8}, \
142 {PPC::X30, -16}, \
143 {PPC::X29, -24}, \
144 {PPC::X28, -32}, \
145 {PPC::X27, -40}, \
146 {PPC::X26, -48}, \
147 {PPC::X25, -56}, \
148 {PPC::X24, -64}, \
149 {PPC::X23, -72}, \
150 {PPC::X22, -80}, \
151 {PPC::X21, -88}, \
152 {PPC::X20, -96}, \
153 {PPC::X19, -104}, \
154 {PPC::X18, -112}, \
155 {PPC::X17, -120}, \
156 {PPC::X16, -128}, \
157 {PPC::X15, -136}, \
158 {PPC::X14, -144}
159
160 // Vector register save area offsets.
161 #define CALLEE_SAVED_VRS \
162 {PPC::V31, -16}, \
163 {PPC::V30, -32}, \
164 {PPC::V29, -48}, \
165 {PPC::V28, -64}, \
166 {PPC::V27, -80}, \
167 {PPC::V26, -96}, \
168 {PPC::V25, -112}, \
169 {PPC::V24, -128}, \
170 {PPC::V23, -144}, \
171 {PPC::V22, -160}, \
172 {PPC::V21, -176}, \
173 {PPC::V20, -192}
174
175 // Note that the offsets here overlap, but this is fixed up in
176 // processFunctionBeforeFrameFinalized.
177
178 static const SpillSlot ELFOffsets32[] = {
179 CALLEE_SAVED_FPRS,
180 CALLEE_SAVED_GPRS32,
181
182 // CR save area offset. We map each of the nonvolatile CR fields
183 // to the slot for CR2, which is the first of the nonvolatile CR
184 // fields to be assigned, so that we only allocate one save slot.
185 // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
186 {PPC::CR2, -4},
187
188 // VRSAVE save area offset.
189 {PPC::VRSAVE, -4},
190
191 CALLEE_SAVED_VRS,
192
193 // SPE register save area (overlaps Vector save area).
194 {PPC::S31, -8},
195 {PPC::S30, -16},
196 {PPC::S29, -24},
197 {PPC::S28, -32},
198 {PPC::S27, -40},
199 {PPC::S26, -48},
200 {PPC::S25, -56},
201 {PPC::S24, -64},
202 {PPC::S23, -72},
203 {PPC::S22, -80},
204 {PPC::S21, -88},
205 {PPC::S20, -96},
206 {PPC::S19, -104},
207 {PPC::S18, -112},
208 {PPC::S17, -120},
209 {PPC::S16, -128},
210 {PPC::S15, -136},
211 {PPC::S14, -144}};
212
213 static const SpillSlot ELFOffsets64[] = {
214 CALLEE_SAVED_FPRS,
215 CALLEE_SAVED_GPRS64,
216
217 // VRSAVE save area offset.
218 {PPC::VRSAVE, -4},
219 CALLEE_SAVED_VRS
220 };
221
222 static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
223 CALLEE_SAVED_GPRS32,
224 // Add AIX's extra CSR.
225 {PPC::R13, -76},
226 CALLEE_SAVED_VRS};
227
228 static const SpillSlot AIXOffsets64[] = {
229 CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
230
231 if (Subtarget.is64BitELFABI()) {
232 NumEntries = array_lengthof(ELFOffsets64);
233 return ELFOffsets64;
234 }
235
236 if (Subtarget.is32BitELFABI()) {
237 NumEntries = array_lengthof(ELFOffsets32);
238 return ELFOffsets32;
239 }
240
241 assert(Subtarget.isAIXABI() && "Unexpected ABI.");
242
243 if (Subtarget.isPPC64()) {
244 NumEntries = array_lengthof(AIXOffsets64);
245 return AIXOffsets64;
246 }
247
248 NumEntries = array_lengthof(AIXOffsets32);
249 return AIXOffsets32;
250 }
251
spillsCR(const MachineFunction & MF)252 static bool spillsCR(const MachineFunction &MF) {
253 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
254 return FuncInfo->isCRSpilled();
255 }
256
hasSpills(const MachineFunction & MF)257 static bool hasSpills(const MachineFunction &MF) {
258 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
259 return FuncInfo->hasSpills();
260 }
261
hasNonRISpills(const MachineFunction & MF)262 static bool hasNonRISpills(const MachineFunction &MF) {
263 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
264 return FuncInfo->hasNonRISpills();
265 }
266
267 /// MustSaveLR - Return true if this function requires that we save the LR
268 /// register onto the stack in the prolog and restore it in the epilog of the
269 /// function.
MustSaveLR(const MachineFunction & MF,unsigned LR)270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
271 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
272
273 // We need a save/restore of LR if there is any def of LR (which is
274 // defined by calls, including the PIC setup sequence), or if there is
275 // some use of the LR stack slot (e.g. for builtin_return_address).
276 // (LR comes in 32 and 64 bit versions.)
277 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
278 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
279 }
280
281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
282 /// call frame size. Update the MachineFunction object with the stack size.
283 uint64_t
determineFrameLayoutAndUpdate(MachineFunction & MF,bool UseEstimate) const284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
285 bool UseEstimate) const {
286 unsigned NewMaxCallFrameSize = 0;
287 uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
288 &NewMaxCallFrameSize);
289 MF.getFrameInfo().setStackSize(FrameSize);
290 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
291 return FrameSize;
292 }
293
294 /// determineFrameLayout - Determine the size of the frame and maximum call
295 /// frame size.
296 uint64_t
determineFrameLayout(const MachineFunction & MF,bool UseEstimate,unsigned * NewMaxCallFrameSize) const297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
298 bool UseEstimate,
299 unsigned *NewMaxCallFrameSize) const {
300 const MachineFrameInfo &MFI = MF.getFrameInfo();
301 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
302
303 // Get the number of bytes to allocate from the FrameInfo
304 uint64_t FrameSize =
305 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
306
307 // Get stack alignments. The frame must be aligned to the greatest of these:
308 Align TargetAlign = getStackAlign(); // alignment required per the ABI
309 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame
310 Align Alignment = std::max(TargetAlign, MaxAlign);
311
312 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
313
314 unsigned LR = RegInfo->getRARegister();
315 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
316 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
317 !MFI.adjustsStack() && // No calls.
318 !MustSaveLR(MF, LR) && // No need to save LR.
319 !FI->mustSaveTOC() && // No need to save TOC.
320 !RegInfo->hasBasePointer(MF); // No special alignment.
321
322 // Note: for PPC32 SVR4ABI, we can still generate stackless
323 // code if all local vars are reg-allocated.
324 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
325
326 // Check whether we can skip adjusting the stack pointer (by using red zone)
327 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
328 // No need for frame
329 return 0;
330 }
331
332 // Get the maximum call frame size of all the calls.
333 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
334
335 // Maximum call frame needs to be at least big enough for linkage area.
336 unsigned minCallFrameSize = getLinkageSize();
337 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
338
339 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
340 // that allocations will be aligned.
341 if (MFI.hasVarSizedObjects())
342 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
343
344 // Update the new max call frame size if the caller passes in a valid pointer.
345 if (NewMaxCallFrameSize)
346 *NewMaxCallFrameSize = maxCallFrameSize;
347
348 // Include call frame size in total.
349 FrameSize += maxCallFrameSize;
350
351 // Make sure the frame is aligned.
352 FrameSize = alignTo(FrameSize, Alignment);
353
354 return FrameSize;
355 }
356
357 // hasFP - Return true if the specified function actually has a dedicated frame
358 // pointer register.
hasFP(const MachineFunction & MF) const359 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
360 const MachineFrameInfo &MFI = MF.getFrameInfo();
361 // FIXME: This is pretty much broken by design: hasFP() might be called really
362 // early, before the stack layout was calculated and thus hasFP() might return
363 // true or false here depending on the time of call.
364 return (MFI.getStackSize()) && needsFP(MF);
365 }
366
367 // needsFP - Return true if the specified function should have a dedicated frame
368 // pointer register. This is true if the function has variable sized allocas or
369 // if frame pointer elimination is disabled.
needsFP(const MachineFunction & MF) const370 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
371 const MachineFrameInfo &MFI = MF.getFrameInfo();
372
373 // Naked functions have no stack frame pushed, so we don't have a frame
374 // pointer.
375 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
376 return false;
377
378 return MF.getTarget().Options.DisableFramePointerElim(MF) ||
379 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
380 MF.exposesReturnsTwice() ||
381 (MF.getTarget().Options.GuaranteedTailCallOpt &&
382 MF.getInfo<PPCFunctionInfo>()->hasFastCall());
383 }
384
replaceFPWithRealFP(MachineFunction & MF) const385 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
386 bool is31 = needsFP(MF);
387 unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
388 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
389
390 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
391 bool HasBP = RegInfo->hasBasePointer(MF);
392 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
393 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
394
395 for (MachineBasicBlock &MBB : MF)
396 for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
397 --MBBI;
398 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
399 MachineOperand &MO = MBBI->getOperand(I);
400 if (!MO.isReg())
401 continue;
402
403 switch (MO.getReg()) {
404 case PPC::FP:
405 MO.setReg(FPReg);
406 break;
407 case PPC::FP8:
408 MO.setReg(FP8Reg);
409 break;
410 case PPC::BP:
411 MO.setReg(BPReg);
412 break;
413 case PPC::BP8:
414 MO.setReg(BP8Reg);
415 break;
416
417 }
418 }
419 }
420 }
421
422 /* This function will do the following:
423 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
424 respectively (defaults recommended by the ABI) and return true
425 - If MBB is not an entry block, initialize the register scavenger and look
426 for available registers.
427 - If the defaults (R0/R12) are available, return true
428 - If TwoUniqueRegsRequired is set to true, it looks for two unique
429 registers. Otherwise, look for a single available register.
430 - If the required registers are found, set SR1 and SR2 and return true.
431 - If the required registers are not found, set SR2 or both SR1 and SR2 to
432 PPC::NoRegister and return false.
433
434 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
435 is not set, this function will attempt to find two different registers, but
436 still return true if only one register is available (and set SR1 == SR2).
437 */
438 bool
findScratchRegister(MachineBasicBlock * MBB,bool UseAtEnd,bool TwoUniqueRegsRequired,Register * SR1,Register * SR2) const439 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
440 bool UseAtEnd,
441 bool TwoUniqueRegsRequired,
442 Register *SR1,
443 Register *SR2) const {
444 RegScavenger RS;
445 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
446 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
447
448 // Set the defaults for the two scratch registers.
449 if (SR1)
450 *SR1 = R0;
451
452 if (SR2) {
453 assert (SR1 && "Asking for the second scratch register but not the first?");
454 *SR2 = R12;
455 }
456
457 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
458 if ((UseAtEnd && MBB->isReturnBlock()) ||
459 (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
460 return true;
461
462 RS.enterBasicBlock(*MBB);
463
464 if (UseAtEnd && !MBB->empty()) {
465 // The scratch register will be used at the end of the block, so must
466 // consider all registers used within the block
467
468 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
469 // If no terminator, back iterator up to previous instruction.
470 if (MBBI == MBB->end())
471 MBBI = std::prev(MBBI);
472
473 if (MBBI != MBB->begin())
474 RS.forward(MBBI);
475 }
476
477 // If the two registers are available, we're all good.
478 // Note that we only return here if both R0 and R12 are available because
479 // although the function may not require two unique registers, it may benefit
480 // from having two so we should try to provide them.
481 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
482 return true;
483
484 // Get the list of callee-saved registers for the target.
485 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
486 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
487
488 // Get all the available registers in the block.
489 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
490 &PPC::GPRCRegClass);
491
492 // We shouldn't use callee-saved registers as scratch registers as they may be
493 // available when looking for a candidate block for shrink wrapping but not
494 // available when the actual prologue/epilogue is being emitted because they
495 // were added as live-in to the prologue block by PrologueEpilogueInserter.
496 for (int i = 0; CSRegs[i]; ++i)
497 BV.reset(CSRegs[i]);
498
499 // Set the first scratch register to the first available one.
500 if (SR1) {
501 int FirstScratchReg = BV.find_first();
502 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
503 }
504
505 // If there is another one available, set the second scratch register to that.
506 // Otherwise, set it to either PPC::NoRegister if this function requires two
507 // or to whatever SR1 is set to if this function doesn't require two.
508 if (SR2) {
509 int SecondScratchReg = BV.find_next(*SR1);
510 if (SecondScratchReg != -1)
511 *SR2 = SecondScratchReg;
512 else
513 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
514 }
515
516 // Now that we've done our best to provide both registers, double check
517 // whether we were unable to provide enough.
518 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
519 return false;
520
521 return true;
522 }
523
524 // We need a scratch register for spilling LR and for spilling CR. By default,
525 // we use two scratch registers to hide latency. However, if only one scratch
526 // register is available, we can adjust for that by not overlapping the spill
527 // code. However, if we need to realign the stack (i.e. have a base pointer)
528 // and the stack frame is large, we need two scratch registers.
529 // Also, stack probe requires two scratch registers, one for old sp, one for
530 // large frame and large probe size.
531 bool
twoUniqueScratchRegsRequired(MachineBasicBlock * MBB) const532 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
533 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
534 MachineFunction &MF = *(MBB->getParent());
535 bool HasBP = RegInfo->hasBasePointer(MF);
536 unsigned FrameSize = determineFrameLayout(MF);
537 int NegFrameSize = -FrameSize;
538 bool IsLargeFrame = !isInt<16>(NegFrameSize);
539 MachineFrameInfo &MFI = MF.getFrameInfo();
540 Align MaxAlign = MFI.getMaxAlign();
541 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
542 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
543
544 return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
545 TLI.hasInlineStackProbe(MF);
546 }
547
canUseAsPrologue(const MachineBasicBlock & MBB) const548 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
549 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
550
551 return findScratchRegister(TmpMBB, false,
552 twoUniqueScratchRegsRequired(TmpMBB));
553 }
554
canUseAsEpilogue(const MachineBasicBlock & MBB) const555 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
556 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
557
558 return findScratchRegister(TmpMBB, true);
559 }
560
stackUpdateCanBeMoved(MachineFunction & MF) const561 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
562 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
563 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
564
565 // Abort if there is no register info or function info.
566 if (!RegInfo || !FI)
567 return false;
568
569 // Only move the stack update on ELFv2 ABI and PPC64.
570 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
571 return false;
572
573 // Check the frame size first and return false if it does not fit the
574 // requirements.
575 // We need a non-zero frame size as well as a frame that will fit in the red
576 // zone. This is because by moving the stack pointer update we are now storing
577 // to the red zone until the stack pointer is updated. If we get an interrupt
578 // inside the prologue but before the stack update we now have a number of
579 // stores to the red zone and those stores must all fit.
580 MachineFrameInfo &MFI = MF.getFrameInfo();
581 unsigned FrameSize = MFI.getStackSize();
582 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
583 return false;
584
585 // Frame pointers and base pointers complicate matters so don't do anything
586 // if we have them. For example having a frame pointer will sometimes require
587 // a copy of r1 into r31 and that makes keeping track of updates to r1 more
588 // difficult. Similar situation exists with setjmp.
589 if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
590 return false;
591
592 // Calls to fast_cc functions use different rules for passing parameters on
593 // the stack from the ABI and using PIC base in the function imposes
594 // similar restrictions to using the base pointer. It is not generally safe
595 // to move the stack pointer update in these situations.
596 if (FI->hasFastCall() || FI->usesPICBase())
597 return false;
598
599 // Finally we can move the stack update if we do not require register
600 // scavenging. Register scavenging can introduce more spills and so
601 // may make the frame size larger than we have computed.
602 return !RegInfo->requiresFrameIndexScavenging(MF);
603 }
604
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const605 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
606 MachineBasicBlock &MBB) const {
607 MachineBasicBlock::iterator MBBI = MBB.begin();
608 MachineFrameInfo &MFI = MF.getFrameInfo();
609 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
610 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
611 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
612
613 MachineModuleInfo &MMI = MF.getMMI();
614 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
615 DebugLoc dl;
616 // AIX assembler does not support cfi directives.
617 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
618
619 // Get processor type.
620 bool isPPC64 = Subtarget.isPPC64();
621 // Get the ABI.
622 bool isSVR4ABI = Subtarget.isSVR4ABI();
623 bool isELFv2ABI = Subtarget.isELFv2ABI();
624 assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
625
626 // Work out frame sizes.
627 uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
628 int64_t NegFrameSize = -FrameSize;
629 if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
630 llvm_unreachable("Unhandled stack size!");
631
632 if (MFI.isFrameAddressTaken())
633 replaceFPWithRealFP(MF);
634
635 // Check if the link register (LR) must be saved.
636 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
637 bool MustSaveLR = FI->mustSaveLR();
638 bool MustSaveTOC = FI->mustSaveTOC();
639 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
640 bool MustSaveCR = !MustSaveCRs.empty();
641 // Do we have a frame pointer and/or base pointer for this function?
642 bool HasFP = hasFP(MF);
643 bool HasBP = RegInfo->hasBasePointer(MF);
644 bool HasRedZone = isPPC64 || !isSVR4ABI;
645 bool HasROPProtect = Subtarget.hasROPProtect();
646 bool HasPrivileged = Subtarget.hasPrivileged();
647
648 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
649 Register BPReg = RegInfo->getBaseRegister(MF);
650 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
651 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
652 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
653 Register ScratchReg;
654 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
655 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
656 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
657 : PPC::MFLR );
658 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
659 : PPC::STW );
660 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
661 : PPC::STWU );
662 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
663 : PPC::STWUX);
664 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
665 : PPC::OR );
666 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
667 : PPC::SUBFC);
668 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
669 : PPC::SUBFIC);
670 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
671 : PPC::MFCR);
672 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
673 const MCInstrDesc &HashST =
674 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
675 : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
676
677 // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
678 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
679 // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
680 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
681 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
682 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
683
684 // Using the same bool variable as below to suppress compiler warnings.
685 bool SingleScratchReg = findScratchRegister(
686 &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
687 assert(SingleScratchReg &&
688 "Required number of registers not available in this block");
689
690 SingleScratchReg = ScratchReg == TempReg;
691
692 int64_t LROffset = getReturnSaveOffset();
693
694 int64_t FPOffset = 0;
695 if (HasFP) {
696 MachineFrameInfo &MFI = MF.getFrameInfo();
697 int FPIndex = FI->getFramePointerSaveIndex();
698 assert(FPIndex && "No Frame Pointer Save Slot!");
699 FPOffset = MFI.getObjectOffset(FPIndex);
700 }
701
702 int64_t BPOffset = 0;
703 if (HasBP) {
704 MachineFrameInfo &MFI = MF.getFrameInfo();
705 int BPIndex = FI->getBasePointerSaveIndex();
706 assert(BPIndex && "No Base Pointer Save Slot!");
707 BPOffset = MFI.getObjectOffset(BPIndex);
708 }
709
710 int64_t PBPOffset = 0;
711 if (FI->usesPICBase()) {
712 MachineFrameInfo &MFI = MF.getFrameInfo();
713 int PBPIndex = FI->getPICBasePointerSaveIndex();
714 assert(PBPIndex && "No PIC Base Pointer Save Slot!");
715 PBPOffset = MFI.getObjectOffset(PBPIndex);
716 }
717
718 // Get stack alignments.
719 Align MaxAlign = MFI.getMaxAlign();
720 if (HasBP && MaxAlign > 1)
721 assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
722
723 // Frames of 32KB & larger require special handling because they cannot be
724 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
725 bool isLargeFrame = !isInt<16>(NegFrameSize);
726
727 // Check if we can move the stack update instruction (stdu) down the prologue
728 // past the callee saves. Hopefully this will avoid the situation where the
729 // saves are waiting for the update on the store with update to complete.
730 MachineBasicBlock::iterator StackUpdateLoc = MBBI;
731 bool MovingStackUpdateDown = false;
732
733 // Check if we can move the stack update.
734 if (stackUpdateCanBeMoved(MF)) {
735 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
736 for (CalleeSavedInfo CSI : Info) {
737 // If the callee saved register is spilled to a register instead of the
738 // stack then the spill no longer uses the stack pointer.
739 // This can lead to two consequences:
740 // 1) We no longer need to update the stack because the function does not
741 // spill any callee saved registers to stack.
742 // 2) We have a situation where we still have to update the stack pointer
743 // even though some registers are spilled to other registers. In
744 // this case the current code moves the stack update to an incorrect
745 // position.
746 // In either case we should abort moving the stack update operation.
747 if (CSI.isSpilledToReg()) {
748 StackUpdateLoc = MBBI;
749 MovingStackUpdateDown = false;
750 break;
751 }
752
753 int FrIdx = CSI.getFrameIdx();
754 // If the frame index is not negative the callee saved info belongs to a
755 // stack object that is not a fixed stack object. We ignore non-fixed
756 // stack objects because we won't move the stack update pointer past them.
757 if (FrIdx >= 0)
758 continue;
759
760 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
761 StackUpdateLoc++;
762 MovingStackUpdateDown = true;
763 } else {
764 // We need all of the Frame Indices to meet these conditions.
765 // If they do not, abort the whole operation.
766 StackUpdateLoc = MBBI;
767 MovingStackUpdateDown = false;
768 break;
769 }
770 }
771
772 // If the operation was not aborted then update the object offset.
773 if (MovingStackUpdateDown) {
774 for (CalleeSavedInfo CSI : Info) {
775 int FrIdx = CSI.getFrameIdx();
776 if (FrIdx < 0)
777 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
778 }
779 }
780 }
781
782 // Where in the prologue we move the CR fields depends on how many scratch
783 // registers we have, and if we need to save the link register or not. This
784 // lambda is to avoid duplicating the logic in 2 places.
785 auto BuildMoveFromCR = [&]() {
786 if (isELFv2ABI && MustSaveCRs.size() == 1) {
787 // In the ELFv2 ABI, we are not required to save all CR fields.
788 // If only one CR field is clobbered, it is more efficient to use
789 // mfocrf to selectively save just that field, because mfocrf has short
790 // latency compares to mfcr.
791 assert(isPPC64 && "V2 ABI is 64-bit only.");
792 MachineInstrBuilder MIB =
793 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
794 MIB.addReg(MustSaveCRs[0], RegState::Kill);
795 } else {
796 MachineInstrBuilder MIB =
797 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
798 for (unsigned CRfield : MustSaveCRs)
799 MIB.addReg(CRfield, RegState::ImplicitKill);
800 }
801 };
802
803 // If we need to spill the CR and the LR but we don't have two separate
804 // registers available, we must spill them one at a time
805 if (MustSaveCR && SingleScratchReg && MustSaveLR) {
806 BuildMoveFromCR();
807 BuildMI(MBB, MBBI, dl, StoreWordInst)
808 .addReg(TempReg, getKillRegState(true))
809 .addImm(CRSaveOffset)
810 .addReg(SPReg);
811 }
812
813 if (MustSaveLR)
814 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
815
816 if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
817 BuildMoveFromCR();
818
819 if (HasRedZone) {
820 if (HasFP)
821 BuildMI(MBB, MBBI, dl, StoreInst)
822 .addReg(FPReg)
823 .addImm(FPOffset)
824 .addReg(SPReg);
825 if (FI->usesPICBase())
826 BuildMI(MBB, MBBI, dl, StoreInst)
827 .addReg(PPC::R30)
828 .addImm(PBPOffset)
829 .addReg(SPReg);
830 if (HasBP)
831 BuildMI(MBB, MBBI, dl, StoreInst)
832 .addReg(BPReg)
833 .addImm(BPOffset)
834 .addReg(SPReg);
835 }
836
837 // Generate the instruction to store the LR. In the case where ROP protection
838 // is required the register holding the LR should not be killed as it will be
839 // used by the hash store instruction.
840 if (MustSaveLR) {
841 BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
842 .addReg(ScratchReg, getKillRegState(!HasROPProtect))
843 .addImm(LROffset)
844 .addReg(SPReg);
845
846 // Add the ROP protection Hash Store instruction.
847 // NOTE: This is technically a violation of the ABI. The hash can be saved
848 // up to 512 bytes into the Protected Zone. This can be outside of the
849 // initial 288 byte volatile program storage region in the Protected Zone.
850 // However, this restriction will be removed in an upcoming revision of the
851 // ABI.
852 if (HasROPProtect) {
853 const int SaveIndex = FI->getROPProtectionHashSaveIndex();
854 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
855 assert((ImmOffset <= -8 && ImmOffset >= -512) &&
856 "ROP hash save offset out of range.");
857 assert(((ImmOffset & 0x7) == 0) &&
858 "ROP hash save offset must be 8 byte aligned.");
859 BuildMI(MBB, StackUpdateLoc, dl, HashST)
860 .addReg(ScratchReg, getKillRegState(true))
861 .addImm(ImmOffset)
862 .addReg(SPReg);
863 }
864 }
865
866 if (MustSaveCR &&
867 !(SingleScratchReg && MustSaveLR)) {
868 assert(HasRedZone && "A red zone is always available on PPC64");
869 BuildMI(MBB, MBBI, dl, StoreWordInst)
870 .addReg(TempReg, getKillRegState(true))
871 .addImm(CRSaveOffset)
872 .addReg(SPReg);
873 }
874
875 // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
876 if (!FrameSize)
877 return;
878
879 // Adjust stack pointer: r1 += NegFrameSize.
880 // If there is a preferred stack alignment, align R1 now
881
882 if (HasBP && HasRedZone) {
883 // Save a copy of r1 as the base pointer.
884 BuildMI(MBB, MBBI, dl, OrInst, BPReg)
885 .addReg(SPReg)
886 .addReg(SPReg);
887 }
888
889 // Have we generated a STUX instruction to claim stack frame? If so,
890 // the negated frame size will be placed in ScratchReg.
891 bool HasSTUX = false;
892
893 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
894 // pointer is always stored at SP, we will get a free probe due to an essential
895 // STU(X) instruction.
896 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
897 // To be consistent with other targets, a pseudo instruction is emitted and
898 // will be later expanded in `inlineStackProbe`.
899 BuildMI(MBB, MBBI, dl,
900 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
901 : PPC::PROBED_STACKALLOC_32))
902 .addDef(TempReg)
903 .addDef(ScratchReg) // ScratchReg stores the old sp.
904 .addImm(NegFrameSize);
905 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
906 // update the ScratchReg to meet the assumption that ScratchReg contains
907 // the NegFrameSize. This solution is rather tricky.
908 if (!HasRedZone) {
909 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
910 .addReg(ScratchReg)
911 .addReg(SPReg);
912 HasSTUX = true;
913 }
914 } else {
915 // This condition must be kept in sync with canUseAsPrologue.
916 if (HasBP && MaxAlign > 1) {
917 if (isPPC64)
918 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
919 .addReg(SPReg)
920 .addImm(0)
921 .addImm(64 - Log2(MaxAlign));
922 else // PPC32...
923 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
924 .addReg(SPReg)
925 .addImm(0)
926 .addImm(32 - Log2(MaxAlign))
927 .addImm(31);
928 if (!isLargeFrame) {
929 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
930 .addReg(ScratchReg, RegState::Kill)
931 .addImm(NegFrameSize);
932 } else {
933 assert(!SingleScratchReg && "Only a single scratch reg available");
934 TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
935 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
936 .addReg(ScratchReg, RegState::Kill)
937 .addReg(TempReg, RegState::Kill);
938 }
939
940 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
941 .addReg(SPReg, RegState::Kill)
942 .addReg(SPReg)
943 .addReg(ScratchReg);
944 HasSTUX = true;
945
946 } else if (!isLargeFrame) {
947 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
948 .addReg(SPReg)
949 .addImm(NegFrameSize)
950 .addReg(SPReg);
951
952 } else {
953 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
954 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
955 .addReg(SPReg, RegState::Kill)
956 .addReg(SPReg)
957 .addReg(ScratchReg);
958 HasSTUX = true;
959 }
960 }
961
962 // Save the TOC register after the stack pointer update if a prologue TOC
963 // save is required for the function.
964 if (MustSaveTOC) {
965 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
966 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
967 .addReg(TOCReg, getKillRegState(true))
968 .addImm(TOCSaveOffset)
969 .addReg(SPReg);
970 }
971
972 if (!HasRedZone) {
973 assert(!isPPC64 && "A red zone is always available on PPC64");
974 if (HasSTUX) {
975 // The negated frame size is in ScratchReg, and the SPReg has been
976 // decremented by the frame size: SPReg = old SPReg + ScratchReg.
977 // Since FPOffset, PBPOffset, etc. are relative to the beginning of
978 // the stack frame (i.e. the old SP), ideally, we would put the old
979 // SP into a register and use it as the base for the stores. The
980 // problem is that the only available register may be ScratchReg,
981 // which could be R0, and R0 cannot be used as a base address.
982
983 // First, set ScratchReg to the old SP. This may need to be modified
984 // later.
985 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
986 .addReg(ScratchReg, RegState::Kill)
987 .addReg(SPReg);
988
989 if (ScratchReg == PPC::R0) {
990 // R0 cannot be used as a base register, but it can be used as an
991 // index in a store-indexed.
992 int LastOffset = 0;
993 if (HasFP) {
994 // R0 += (FPOffset-LastOffset).
995 // Need addic, since addi treats R0 as 0.
996 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
997 .addReg(ScratchReg)
998 .addImm(FPOffset-LastOffset);
999 LastOffset = FPOffset;
1000 // Store FP into *R0.
1001 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1002 .addReg(FPReg, RegState::Kill) // Save FP.
1003 .addReg(PPC::ZERO)
1004 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1005 }
1006 if (FI->usesPICBase()) {
1007 // R0 += (PBPOffset-LastOffset).
1008 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1009 .addReg(ScratchReg)
1010 .addImm(PBPOffset-LastOffset);
1011 LastOffset = PBPOffset;
1012 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1013 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer.
1014 .addReg(PPC::ZERO)
1015 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1016 }
1017 if (HasBP) {
1018 // R0 += (BPOffset-LastOffset).
1019 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1020 .addReg(ScratchReg)
1021 .addImm(BPOffset-LastOffset);
1022 LastOffset = BPOffset;
1023 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1024 .addReg(BPReg, RegState::Kill) // Save BP.
1025 .addReg(PPC::ZERO)
1026 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1027 // BP = R0-LastOffset
1028 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1029 .addReg(ScratchReg, RegState::Kill)
1030 .addImm(-LastOffset);
1031 }
1032 } else {
1033 // ScratchReg is not R0, so use it as the base register. It is
1034 // already set to the old SP, so we can use the offsets directly.
1035
1036 // Now that the stack frame has been allocated, save all the necessary
1037 // registers using ScratchReg as the base address.
1038 if (HasFP)
1039 BuildMI(MBB, MBBI, dl, StoreInst)
1040 .addReg(FPReg)
1041 .addImm(FPOffset)
1042 .addReg(ScratchReg);
1043 if (FI->usesPICBase())
1044 BuildMI(MBB, MBBI, dl, StoreInst)
1045 .addReg(PPC::R30)
1046 .addImm(PBPOffset)
1047 .addReg(ScratchReg);
1048 if (HasBP) {
1049 BuildMI(MBB, MBBI, dl, StoreInst)
1050 .addReg(BPReg)
1051 .addImm(BPOffset)
1052 .addReg(ScratchReg);
1053 BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1054 .addReg(ScratchReg, RegState::Kill)
1055 .addReg(ScratchReg);
1056 }
1057 }
1058 } else {
1059 // The frame size is a known 16-bit constant (fitting in the immediate
1060 // field of STWU). To be here we have to be compiling for PPC32.
1061 // Since the SPReg has been decreased by FrameSize, add it back to each
1062 // offset.
1063 if (HasFP)
1064 BuildMI(MBB, MBBI, dl, StoreInst)
1065 .addReg(FPReg)
1066 .addImm(FrameSize + FPOffset)
1067 .addReg(SPReg);
1068 if (FI->usesPICBase())
1069 BuildMI(MBB, MBBI, dl, StoreInst)
1070 .addReg(PPC::R30)
1071 .addImm(FrameSize + PBPOffset)
1072 .addReg(SPReg);
1073 if (HasBP) {
1074 BuildMI(MBB, MBBI, dl, StoreInst)
1075 .addReg(BPReg)
1076 .addImm(FrameSize + BPOffset)
1077 .addReg(SPReg);
1078 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1079 .addReg(SPReg)
1080 .addImm(FrameSize);
1081 }
1082 }
1083 }
1084
1085 // Add Call Frame Information for the instructions we generated above.
1086 if (needsCFI) {
1087 unsigned CFIIndex;
1088
1089 if (HasBP) {
1090 // Define CFA in terms of BP. Do this in preference to using FP/SP,
1091 // because if the stack needed aligning then CFA won't be at a fixed
1092 // offset from FP/SP.
1093 unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1094 CFIIndex = MF.addFrameInst(
1095 MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1096 } else {
1097 // Adjust the definition of CFA to account for the change in SP.
1098 assert(NegFrameSize);
1099 CFIIndex = MF.addFrameInst(
1100 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1101 }
1102 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1103 .addCFIIndex(CFIIndex);
1104
1105 if (HasFP) {
1106 // Describe where FP was saved, at a fixed offset from CFA.
1107 unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1108 CFIIndex = MF.addFrameInst(
1109 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1110 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1111 .addCFIIndex(CFIIndex);
1112 }
1113
1114 if (FI->usesPICBase()) {
1115 // Describe where FP was saved, at a fixed offset from CFA.
1116 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1117 CFIIndex = MF.addFrameInst(
1118 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1119 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1120 .addCFIIndex(CFIIndex);
1121 }
1122
1123 if (HasBP) {
1124 // Describe where BP was saved, at a fixed offset from CFA.
1125 unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1126 CFIIndex = MF.addFrameInst(
1127 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1128 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1129 .addCFIIndex(CFIIndex);
1130 }
1131
1132 if (MustSaveLR) {
1133 // Describe where LR was saved, at a fixed offset from CFA.
1134 unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1135 CFIIndex = MF.addFrameInst(
1136 MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1137 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1138 .addCFIIndex(CFIIndex);
1139 }
1140 }
1141
1142 // If there is a frame pointer, copy R1 into R31
1143 if (HasFP) {
1144 BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1145 .addReg(SPReg)
1146 .addReg(SPReg);
1147
1148 if (!HasBP && needsCFI) {
1149 // Change the definition of CFA from SP+offset to FP+offset, because SP
1150 // will change at every alloca.
1151 unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1152 unsigned CFIIndex = MF.addFrameInst(
1153 MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1154
1155 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1156 .addCFIIndex(CFIIndex);
1157 }
1158 }
1159
1160 if (needsCFI) {
1161 // Describe where callee saved registers were saved, at fixed offsets from
1162 // CFA.
1163 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1164 for (const CalleeSavedInfo &I : CSI) {
1165 Register Reg = I.getReg();
1166 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1167
1168 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1169 // subregisters of CR2. We just need to emit a move of CR2.
1170 if (PPC::CRBITRCRegClass.contains(Reg))
1171 continue;
1172
1173 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1174 continue;
1175
1176 // For SVR4, don't emit a move for the CR spill slot if we haven't
1177 // spilled CRs.
1178 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1179 && !MustSaveCR)
1180 continue;
1181
1182 // For 64-bit SVR4 when we have spilled CRs, the spill location
1183 // is SP+8, not a frame-relative slot.
1184 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1185 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1186 // the whole CR word. In the ELFv2 ABI, every CR that was
1187 // actually saved gets its own CFI record.
1188 Register CRReg = isELFv2ABI? Reg : PPC::CR2;
1189 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1190 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1191 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1192 .addCFIIndex(CFIIndex);
1193 continue;
1194 }
1195
1196 if (I.isSpilledToReg()) {
1197 unsigned SpilledReg = I.getDstReg();
1198 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1199 nullptr, MRI->getDwarfRegNum(Reg, true),
1200 MRI->getDwarfRegNum(SpilledReg, true)));
1201 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1202 .addCFIIndex(CFIRegister);
1203 } else {
1204 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1205 // We have changed the object offset above but we do not want to change
1206 // the actual offsets in the CFI instruction so we have to undo the
1207 // offset change here.
1208 if (MovingStackUpdateDown)
1209 Offset -= NegFrameSize;
1210
1211 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1212 nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1213 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1214 .addCFIIndex(CFIIndex);
1215 }
1216 }
1217 }
1218 }
1219
inlineStackProbe(MachineFunction & MF,MachineBasicBlock & PrologMBB) const1220 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1221 MachineBasicBlock &PrologMBB) const {
1222 bool isPPC64 = Subtarget.isPPC64();
1223 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1224 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1225 MachineFrameInfo &MFI = MF.getFrameInfo();
1226 MachineModuleInfo &MMI = MF.getMMI();
1227 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1228 // AIX assembler does not support cfi directives.
1229 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1230 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1231 int Opc = MI.getOpcode();
1232 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1233 });
1234 if (StackAllocMIPos == PrologMBB.end())
1235 return;
1236 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1237 MachineBasicBlock *CurrentMBB = &PrologMBB;
1238 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1239 MachineInstr &MI = *StackAllocMIPos;
1240 int64_t NegFrameSize = MI.getOperand(2).getImm();
1241 unsigned ProbeSize = TLI.getStackProbeSize(MF);
1242 int64_t NegProbeSize = -(int64_t)ProbeSize;
1243 assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1244 int64_t NumBlocks = NegFrameSize / NegProbeSize;
1245 int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1246 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1247 Register ScratchReg = MI.getOperand(0).getReg();
1248 Register FPReg = MI.getOperand(1).getReg();
1249 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1250 bool HasBP = RegInfo->hasBasePointer(MF);
1251 Register BPReg = RegInfo->getBaseRegister(MF);
1252 Align MaxAlign = MFI.getMaxAlign();
1253 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1254 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1255 // Subroutines to generate .cfi_* directives.
1256 auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1257 MachineBasicBlock::iterator MBBI, Register Reg) {
1258 unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1259 unsigned CFIIndex = MF.addFrameInst(
1260 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1261 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1262 .addCFIIndex(CFIIndex);
1263 };
1264 auto buildDefCFA = [&](MachineBasicBlock &MBB,
1265 MachineBasicBlock::iterator MBBI, Register Reg,
1266 int Offset) {
1267 unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1268 unsigned CFIIndex = MBB.getParent()->addFrameInst(
1269 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1270 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1271 .addCFIIndex(CFIIndex);
1272 };
1273 // Subroutine to determine if we can use the Imm as part of d-form.
1274 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1275 // Subroutine to materialize the Imm into TempReg.
1276 auto MaterializeImm = [&](MachineBasicBlock &MBB,
1277 MachineBasicBlock::iterator MBBI, int64_t Imm,
1278 Register &TempReg) {
1279 assert(isInt<32>(Imm) && "Unhandled imm");
1280 if (isInt<16>(Imm))
1281 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1282 .addImm(Imm);
1283 else {
1284 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1285 .addImm(Imm >> 16);
1286 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1287 .addReg(TempReg)
1288 .addImm(Imm & 0xFFFF);
1289 }
1290 };
1291 // Subroutine to store frame pointer and decrease stack pointer by probe size.
1292 auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1293 MachineBasicBlock::iterator MBBI, int64_t NegSize,
1294 Register NegSizeReg, bool UseDForm,
1295 Register StoreReg) {
1296 if (UseDForm)
1297 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1298 .addReg(StoreReg)
1299 .addImm(NegSize)
1300 .addReg(SPReg);
1301 else
1302 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1303 .addReg(StoreReg)
1304 .addReg(SPReg)
1305 .addReg(NegSizeReg);
1306 };
1307 // Used to probe stack when realignment is required.
1308 // Note that, according to ABI's requirement, *sp must always equals the
1309 // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1310 // Following is pseudo code:
1311 // final_sp = (sp & align) + negframesize;
1312 // neg_gap = final_sp - sp;
1313 // while (neg_gap < negprobesize) {
1314 // stdu fp, negprobesize(sp);
1315 // neg_gap -= negprobesize;
1316 // }
1317 // stdux fp, sp, neg_gap
1318 //
1319 // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1320 // before probe code, we don't need to save it, so we get one additional reg
1321 // that can be used to materialize the probeside if needed to use xform.
1322 // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1323 // now.
1324 //
1325 // The allocations are:
1326 // if (HasBP && HasRedzone) {
1327 // r0: materialize the probesize if needed so that we can use xform.
1328 // r12: `neg_gap`
1329 // } else {
1330 // r0: back-chain pointer
1331 // r12: `neg_gap`.
1332 // }
1333 auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1334 MachineBasicBlock::iterator MBBI,
1335 Register ScratchReg, Register TempReg) {
1336 assert(HasBP && "The function is supposed to have base pointer when its "
1337 "stack is realigned.");
1338 assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1339
1340 // FIXME: We can eliminate this limitation if we get more infomation about
1341 // which part of redzone are already used. Used redzone can be treated
1342 // probed. But there might be `holes' in redzone probed, this could
1343 // complicate the implementation.
1344 assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1345 "Probe size should be larger or equal to the size of red-zone so "
1346 "that red-zone is not clobbered by probing.");
1347
1348 Register &FinalStackPtr = TempReg;
1349 // FIXME: We only support NegProbeSize materializable by DForm currently.
1350 // When HasBP && HasRedzone, we can use xform if we have an additional idle
1351 // register.
1352 NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1353 assert(isInt<16>(NegProbeSize) &&
1354 "NegProbeSize should be materializable by DForm");
1355 Register CRReg = PPC::CR0;
1356 // Layout of output assembly kinda like:
1357 // bb.0:
1358 // ...
1359 // sub $scratchreg, $finalsp, r1
1360 // cmpdi $scratchreg, <negprobesize>
1361 // bge bb.2
1362 // bb.1:
1363 // stdu <backchain>, <negprobesize>(r1)
1364 // sub $scratchreg, $scratchreg, negprobesize
1365 // cmpdi $scratchreg, <negprobesize>
1366 // blt bb.1
1367 // bb.2:
1368 // stdux <backchain>, r1, $scratchreg
1369 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1370 MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1371 MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1372 MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1373 MF.insert(MBBInsertPoint, ProbeExitMBB);
1374 // bb.2
1375 {
1376 Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1377 allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1378 BackChainPointer);
1379 if (HasRedZone)
1380 // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1381 // to TempReg to satisfy it.
1382 BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1383 .addReg(BPReg)
1384 .addReg(BPReg);
1385 ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1386 ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1387 }
1388 // bb.0
1389 {
1390 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1391 .addReg(SPReg)
1392 .addReg(FinalStackPtr);
1393 if (!HasRedZone)
1394 BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1395 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1396 .addReg(ScratchReg)
1397 .addImm(NegProbeSize);
1398 BuildMI(&MBB, DL, TII.get(PPC::BCC))
1399 .addImm(PPC::PRED_GE)
1400 .addReg(CRReg)
1401 .addMBB(ProbeExitMBB);
1402 MBB.addSuccessor(ProbeLoopBodyMBB);
1403 MBB.addSuccessor(ProbeExitMBB);
1404 }
1405 // bb.1
1406 {
1407 Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1408 allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1409 0, true /*UseDForm*/, BackChainPointer);
1410 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1411 ScratchReg)
1412 .addReg(ScratchReg)
1413 .addImm(-NegProbeSize);
1414 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1415 CRReg)
1416 .addReg(ScratchReg)
1417 .addImm(NegProbeSize);
1418 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1419 .addImm(PPC::PRED_LT)
1420 .addReg(CRReg)
1421 .addMBB(ProbeLoopBodyMBB);
1422 ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1423 ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1424 }
1425 // Update liveins.
1426 recomputeLiveIns(*ProbeLoopBodyMBB);
1427 recomputeLiveIns(*ProbeExitMBB);
1428 return ProbeExitMBB;
1429 };
1430 // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1431 // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1432 // the offset subtracted from SP is determined by SP's runtime value.
1433 if (HasBP && MaxAlign > 1) {
1434 // Calculate final stack pointer.
1435 if (isPPC64)
1436 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1437 .addReg(SPReg)
1438 .addImm(0)
1439 .addImm(64 - Log2(MaxAlign));
1440 else
1441 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1442 .addReg(SPReg)
1443 .addImm(0)
1444 .addImm(32 - Log2(MaxAlign))
1445 .addImm(31);
1446 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1447 FPReg)
1448 .addReg(ScratchReg)
1449 .addReg(SPReg);
1450 MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1451 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1452 FPReg)
1453 .addReg(ScratchReg)
1454 .addReg(FPReg);
1455 CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1456 if (needsCFI)
1457 buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1458 } else {
1459 // Initialize current frame pointer.
1460 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1461 // Use FPReg to calculate CFA.
1462 if (needsCFI)
1463 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1464 // Probe residual part.
1465 if (NegResidualSize) {
1466 bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1467 if (!ResidualUseDForm)
1468 MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1469 allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1470 ResidualUseDForm, FPReg);
1471 }
1472 bool UseDForm = CanUseDForm(NegProbeSize);
1473 // If number of blocks is small, just probe them directly.
1474 if (NumBlocks < 3) {
1475 if (!UseDForm)
1476 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1477 for (int i = 0; i < NumBlocks; ++i)
1478 allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1479 FPReg);
1480 if (needsCFI) {
1481 // Restore using SPReg to calculate CFA.
1482 buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1483 }
1484 } else {
1485 // Since CTR is a volatile register and current shrinkwrap implementation
1486 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1487 // CTR loop to probe.
1488 // Calculate trip count and stores it in CTRReg.
1489 MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1490 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1491 .addReg(ScratchReg, RegState::Kill);
1492 if (!UseDForm)
1493 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1494 // Create MBBs of the loop.
1495 MachineFunction::iterator MBBInsertPoint =
1496 std::next(CurrentMBB->getIterator());
1497 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1498 MF.insert(MBBInsertPoint, LoopMBB);
1499 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1500 MF.insert(MBBInsertPoint, ExitMBB);
1501 // Synthesize the loop body.
1502 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1503 UseDForm, FPReg);
1504 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1505 .addMBB(LoopMBB);
1506 LoopMBB->addSuccessor(ExitMBB);
1507 LoopMBB->addSuccessor(LoopMBB);
1508 // Synthesize the exit MBB.
1509 ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1510 std::next(MachineBasicBlock::iterator(MI)),
1511 CurrentMBB->end());
1512 ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1513 CurrentMBB->addSuccessor(LoopMBB);
1514 if (needsCFI) {
1515 // Restore using SPReg to calculate CFA.
1516 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1517 }
1518 // Update liveins.
1519 recomputeLiveIns(*LoopMBB);
1520 recomputeLiveIns(*ExitMBB);
1521 }
1522 }
1523 ++NumPrologProbed;
1524 MI.eraseFromParent();
1525 }
1526
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const1527 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1528 MachineBasicBlock &MBB) const {
1529 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1530 DebugLoc dl;
1531
1532 if (MBBI != MBB.end())
1533 dl = MBBI->getDebugLoc();
1534
1535 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1536 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1537
1538 // Get alignment info so we know how to restore the SP.
1539 const MachineFrameInfo &MFI = MF.getFrameInfo();
1540
1541 // Get the number of bytes allocated from the FrameInfo.
1542 int64_t FrameSize = MFI.getStackSize();
1543
1544 // Get processor type.
1545 bool isPPC64 = Subtarget.isPPC64();
1546
1547 // Check if the link register (LR) has been saved.
1548 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1549 bool MustSaveLR = FI->mustSaveLR();
1550 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1551 bool MustSaveCR = !MustSaveCRs.empty();
1552 // Do we have a frame pointer and/or base pointer for this function?
1553 bool HasFP = hasFP(MF);
1554 bool HasBP = RegInfo->hasBasePointer(MF);
1555 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1556 bool HasROPProtect = Subtarget.hasROPProtect();
1557 bool HasPrivileged = Subtarget.hasPrivileged();
1558
1559 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1560 Register BPReg = RegInfo->getBaseRegister(MF);
1561 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
1562 Register ScratchReg;
1563 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1564 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1565 : PPC::MTLR );
1566 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1567 : PPC::LWZ );
1568 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1569 : PPC::LIS );
1570 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1571 : PPC::OR );
1572 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1573 : PPC::ORI );
1574 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1575 : PPC::ADDI );
1576 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1577 : PPC::ADD4 );
1578 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1579 : PPC::LWZ);
1580 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1581 : PPC::MTOCRF);
1582 const MCInstrDesc &HashChk =
1583 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
1584 : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
1585 int64_t LROffset = getReturnSaveOffset();
1586
1587 int64_t FPOffset = 0;
1588
1589 // Using the same bool variable as below to suppress compiler warnings.
1590 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1591 &TempReg);
1592 assert(SingleScratchReg &&
1593 "Could not find an available scratch register");
1594
1595 SingleScratchReg = ScratchReg == TempReg;
1596
1597 if (HasFP) {
1598 int FPIndex = FI->getFramePointerSaveIndex();
1599 assert(FPIndex && "No Frame Pointer Save Slot!");
1600 FPOffset = MFI.getObjectOffset(FPIndex);
1601 }
1602
1603 int64_t BPOffset = 0;
1604 if (HasBP) {
1605 int BPIndex = FI->getBasePointerSaveIndex();
1606 assert(BPIndex && "No Base Pointer Save Slot!");
1607 BPOffset = MFI.getObjectOffset(BPIndex);
1608 }
1609
1610 int64_t PBPOffset = 0;
1611 if (FI->usesPICBase()) {
1612 int PBPIndex = FI->getPICBasePointerSaveIndex();
1613 assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1614 PBPOffset = MFI.getObjectOffset(PBPIndex);
1615 }
1616
1617 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1618
1619 if (IsReturnBlock) {
1620 unsigned RetOpcode = MBBI->getOpcode();
1621 bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
1622 RetOpcode == PPC::TCRETURNdi ||
1623 RetOpcode == PPC::TCRETURNai ||
1624 RetOpcode == PPC::TCRETURNri8 ||
1625 RetOpcode == PPC::TCRETURNdi8 ||
1626 RetOpcode == PPC::TCRETURNai8;
1627
1628 if (UsesTCRet) {
1629 int MaxTCRetDelta = FI->getTailCallSPDelta();
1630 MachineOperand &StackAdjust = MBBI->getOperand(1);
1631 assert(StackAdjust.isImm() && "Expecting immediate value.");
1632 // Adjust stack pointer.
1633 int StackAdj = StackAdjust.getImm();
1634 int Delta = StackAdj - MaxTCRetDelta;
1635 assert((Delta >= 0) && "Delta must be positive");
1636 if (MaxTCRetDelta>0)
1637 FrameSize += (StackAdj +Delta);
1638 else
1639 FrameSize += StackAdj;
1640 }
1641 }
1642
1643 // Frames of 32KB & larger require special handling because they cannot be
1644 // indexed into with a simple LD/LWZ immediate offset operand.
1645 bool isLargeFrame = !isInt<16>(FrameSize);
1646
1647 // On targets without red zone, the SP needs to be restored last, so that
1648 // all live contents of the stack frame are upwards of the SP. This means
1649 // that we cannot restore SP just now, since there may be more registers
1650 // to restore from the stack frame (e.g. R31). If the frame size is not
1651 // a simple immediate value, we will need a spare register to hold the
1652 // restored SP. If the frame size is known and small, we can simply adjust
1653 // the offsets of the registers to be restored, and still use SP to restore
1654 // them. In such case, the final update of SP will be to add the frame
1655 // size to it.
1656 // To simplify the code, set RBReg to the base register used to restore
1657 // values from the stack, and set SPAdd to the value that needs to be added
1658 // to the SP at the end. The default values are as if red zone was present.
1659 unsigned RBReg = SPReg;
1660 uint64_t SPAdd = 0;
1661
1662 // Check if we can move the stack update instruction up the epilogue
1663 // past the callee saves. This will allow the move to LR instruction
1664 // to be executed before the restores of the callee saves which means
1665 // that the callee saves can hide the latency from the MTLR instrcution.
1666 MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1667 if (stackUpdateCanBeMoved(MF)) {
1668 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1669 for (CalleeSavedInfo CSI : Info) {
1670 // If the callee saved register is spilled to another register abort the
1671 // stack update movement.
1672 if (CSI.isSpilledToReg()) {
1673 StackUpdateLoc = MBBI;
1674 break;
1675 }
1676 int FrIdx = CSI.getFrameIdx();
1677 // If the frame index is not negative the callee saved info belongs to a
1678 // stack object that is not a fixed stack object. We ignore non-fixed
1679 // stack objects because we won't move the update of the stack pointer
1680 // past them.
1681 if (FrIdx >= 0)
1682 continue;
1683
1684 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1685 StackUpdateLoc--;
1686 else {
1687 // Abort the operation as we can't update all CSR restores.
1688 StackUpdateLoc = MBBI;
1689 break;
1690 }
1691 }
1692 }
1693
1694 if (FrameSize) {
1695 // In the prologue, the loaded (or persistent) stack pointer value is
1696 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1697 // zone add this offset back now.
1698
1699 // If the function has a base pointer, the stack pointer has been copied
1700 // to it so we can restore it by copying in the other direction.
1701 if (HasRedZone && HasBP) {
1702 BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1703 addReg(BPReg).
1704 addReg(BPReg);
1705 }
1706 // If this function contained a fastcc call and GuaranteedTailCallOpt is
1707 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1708 // call which invalidates the stack pointer value in SP(0). So we use the
1709 // value of R31 in this case. Similar situation exists with setjmp.
1710 else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1711 assert(HasFP && "Expecting a valid frame pointer.");
1712 if (!HasRedZone)
1713 RBReg = FPReg;
1714 if (!isLargeFrame) {
1715 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1716 .addReg(FPReg).addImm(FrameSize);
1717 } else {
1718 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
1719 BuildMI(MBB, MBBI, dl, AddInst)
1720 .addReg(RBReg)
1721 .addReg(FPReg)
1722 .addReg(ScratchReg);
1723 }
1724 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1725 if (HasRedZone) {
1726 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1727 .addReg(SPReg)
1728 .addImm(FrameSize);
1729 } else {
1730 // Make sure that adding FrameSize will not overflow the max offset
1731 // size.
1732 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1733 "Local offsets should be negative");
1734 SPAdd = FrameSize;
1735 FPOffset += FrameSize;
1736 BPOffset += FrameSize;
1737 PBPOffset += FrameSize;
1738 }
1739 } else {
1740 // We don't want to use ScratchReg as a base register, because it
1741 // could happen to be R0. Use FP instead, but make sure to preserve it.
1742 if (!HasRedZone) {
1743 // If FP is not saved, copy it to ScratchReg.
1744 if (!HasFP)
1745 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1746 .addReg(FPReg)
1747 .addReg(FPReg);
1748 RBReg = FPReg;
1749 }
1750 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1751 .addImm(0)
1752 .addReg(SPReg);
1753 }
1754 }
1755 assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1756 // If there is no red zone, ScratchReg may be needed for holding a useful
1757 // value (although not the base register). Make sure it is not overwritten
1758 // too early.
1759
1760 // If we need to restore both the LR and the CR and we only have one
1761 // available scratch register, we must do them one at a time.
1762 if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1763 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1764 // is live here.
1765 assert(HasRedZone && "Expecting red zone");
1766 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1767 .addImm(CRSaveOffset)
1768 .addReg(SPReg);
1769 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1770 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1771 .addReg(TempReg, getKillRegState(i == e-1));
1772 }
1773
1774 // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1775 // LR is stored in the caller's stack frame. ScratchReg will be needed
1776 // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1777 // a base register anyway, because it may happen to be R0.
1778 bool LoadedLR = false;
1779 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1780 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1781 .addImm(LROffset+SPAdd)
1782 .addReg(RBReg);
1783 LoadedLR = true;
1784 }
1785
1786 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1787 assert(RBReg == SPReg && "Should be using SP as a base register");
1788 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1789 .addImm(CRSaveOffset)
1790 .addReg(RBReg);
1791 }
1792
1793 if (HasFP) {
1794 // If there is red zone, restore FP directly, since SP has already been
1795 // restored. Otherwise, restore the value of FP into ScratchReg.
1796 if (HasRedZone || RBReg == SPReg)
1797 BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1798 .addImm(FPOffset)
1799 .addReg(SPReg);
1800 else
1801 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1802 .addImm(FPOffset)
1803 .addReg(RBReg);
1804 }
1805
1806 if (FI->usesPICBase())
1807 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1808 .addImm(PBPOffset)
1809 .addReg(RBReg);
1810
1811 if (HasBP)
1812 BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1813 .addImm(BPOffset)
1814 .addReg(RBReg);
1815
1816 // There is nothing more to be loaded from the stack, so now we can
1817 // restore SP: SP = RBReg + SPAdd.
1818 if (RBReg != SPReg || SPAdd != 0) {
1819 assert(!HasRedZone && "This should not happen with red zone");
1820 // If SPAdd is 0, generate a copy.
1821 if (SPAdd == 0)
1822 BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1823 .addReg(RBReg)
1824 .addReg(RBReg);
1825 else
1826 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1827 .addReg(RBReg)
1828 .addImm(SPAdd);
1829
1830 assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1831 if (RBReg == FPReg)
1832 BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1833 .addReg(ScratchReg)
1834 .addReg(ScratchReg);
1835
1836 // Now load the LR from the caller's stack frame.
1837 if (MustSaveLR && !LoadedLR)
1838 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1839 .addImm(LROffset)
1840 .addReg(SPReg);
1841 }
1842
1843 if (MustSaveCR &&
1844 !(SingleScratchReg && MustSaveLR))
1845 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1846 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1847 .addReg(TempReg, getKillRegState(i == e-1));
1848
1849 if (MustSaveLR) {
1850 // If ROP protection is required, an extra instruction is added to compute a
1851 // hash and then compare it to the hash stored in the prologue.
1852 if (HasROPProtect) {
1853 const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1854 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1855 assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1856 "ROP hash check location offset out of range.");
1857 assert(((ImmOffset & 0x7) == 0) &&
1858 "ROP hash check location offset must be 8 byte aligned.");
1859 BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1860 .addReg(ScratchReg)
1861 .addImm(ImmOffset)
1862 .addReg(SPReg);
1863 }
1864 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1865 }
1866
1867 // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1868 // call optimization
1869 if (IsReturnBlock) {
1870 unsigned RetOpcode = MBBI->getOpcode();
1871 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1872 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1873 MF.getFunction().getCallingConv() == CallingConv::Fast) {
1874 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1875 unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1876
1877 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1878 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1879 .addReg(SPReg).addImm(CallerAllocatedAmt);
1880 } else {
1881 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1882 .addImm(CallerAllocatedAmt >> 16);
1883 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1884 .addReg(ScratchReg, RegState::Kill)
1885 .addImm(CallerAllocatedAmt & 0xFFFF);
1886 BuildMI(MBB, MBBI, dl, AddInst)
1887 .addReg(SPReg)
1888 .addReg(FPReg)
1889 .addReg(ScratchReg);
1890 }
1891 } else {
1892 createTailCallBranchInstr(MBB);
1893 }
1894 }
1895 }
1896
createTailCallBranchInstr(MachineBasicBlock & MBB) const1897 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1898 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1899
1900 // If we got this far a first terminator should exist.
1901 assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1902
1903 DebugLoc dl = MBBI->getDebugLoc();
1904 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1905
1906 // Create branch instruction for pseudo tail call return instruction.
1907 // The TCRETURNdi variants are direct calls. Valid targets for those are
1908 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1909 // since we can tail call external functions with PC-Rel (i.e. we don't need
1910 // to worry about different TOC pointers). Some of the external functions will
1911 // be MO_GlobalAddress while others like memcpy for example, are going to
1912 // be MO_ExternalSymbol.
1913 unsigned RetOpcode = MBBI->getOpcode();
1914 if (RetOpcode == PPC::TCRETURNdi) {
1915 MBBI = MBB.getLastNonDebugInstr();
1916 MachineOperand &JumpTarget = MBBI->getOperand(0);
1917 if (JumpTarget.isGlobal())
1918 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1919 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1920 else if (JumpTarget.isSymbol())
1921 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1922 addExternalSymbol(JumpTarget.getSymbolName());
1923 else
1924 llvm_unreachable("Expecting Global or External Symbol");
1925 } else if (RetOpcode == PPC::TCRETURNri) {
1926 MBBI = MBB.getLastNonDebugInstr();
1927 assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1928 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1929 } else if (RetOpcode == PPC::TCRETURNai) {
1930 MBBI = MBB.getLastNonDebugInstr();
1931 MachineOperand &JumpTarget = MBBI->getOperand(0);
1932 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1933 } else if (RetOpcode == PPC::TCRETURNdi8) {
1934 MBBI = MBB.getLastNonDebugInstr();
1935 MachineOperand &JumpTarget = MBBI->getOperand(0);
1936 if (JumpTarget.isGlobal())
1937 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1938 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1939 else if (JumpTarget.isSymbol())
1940 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1941 addExternalSymbol(JumpTarget.getSymbolName());
1942 else
1943 llvm_unreachable("Expecting Global or External Symbol");
1944 } else if (RetOpcode == PPC::TCRETURNri8) {
1945 MBBI = MBB.getLastNonDebugInstr();
1946 assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1947 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1948 } else if (RetOpcode == PPC::TCRETURNai8) {
1949 MBBI = MBB.getLastNonDebugInstr();
1950 MachineOperand &JumpTarget = MBBI->getOperand(0);
1951 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1952 }
1953 }
1954
determineCalleeSaves(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const1955 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1956 BitVector &SavedRegs,
1957 RegScavenger *RS) const {
1958 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1959
1960 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1961
1962 // Do not explicitly save the callee saved VSRp registers.
1963 // The individual VSR subregisters will be saved instead.
1964 SavedRegs.reset(PPC::VSRp26);
1965 SavedRegs.reset(PPC::VSRp27);
1966 SavedRegs.reset(PPC::VSRp28);
1967 SavedRegs.reset(PPC::VSRp29);
1968 SavedRegs.reset(PPC::VSRp30);
1969 SavedRegs.reset(PPC::VSRp31);
1970
1971 // Save and clear the LR state.
1972 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1973 unsigned LR = RegInfo->getRARegister();
1974 FI->setMustSaveLR(MustSaveLR(MF, LR));
1975 SavedRegs.reset(LR);
1976
1977 // Save R31 if necessary
1978 int FPSI = FI->getFramePointerSaveIndex();
1979 const bool isPPC64 = Subtarget.isPPC64();
1980 MachineFrameInfo &MFI = MF.getFrameInfo();
1981
1982 // If the frame pointer save index hasn't been defined yet.
1983 if (!FPSI && needsFP(MF)) {
1984 // Find out what the fix offset of the frame pointer save area.
1985 int FPOffset = getFramePointerSaveOffset();
1986 // Allocate the frame index for frame pointer save area.
1987 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1988 // Save the result.
1989 FI->setFramePointerSaveIndex(FPSI);
1990 }
1991
1992 int BPSI = FI->getBasePointerSaveIndex();
1993 if (!BPSI && RegInfo->hasBasePointer(MF)) {
1994 int BPOffset = getBasePointerSaveOffset();
1995 // Allocate the frame index for the base pointer save area.
1996 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1997 // Save the result.
1998 FI->setBasePointerSaveIndex(BPSI);
1999 }
2000
2001 // Reserve stack space for the PIC Base register (R30).
2002 // Only used in SVR4 32-bit.
2003 if (FI->usesPICBase()) {
2004 int PBPSI = MFI.CreateFixedObject(4, -8, true);
2005 FI->setPICBasePointerSaveIndex(PBPSI);
2006 }
2007
2008 // Make sure we don't explicitly spill r31, because, for example, we have
2009 // some inline asm which explicitly clobbers it, when we otherwise have a
2010 // frame pointer and are using r31's spill slot for the prologue/epilogue
2011 // code. Same goes for the base pointer and the PIC base register.
2012 if (needsFP(MF))
2013 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2014 if (RegInfo->hasBasePointer(MF))
2015 SavedRegs.reset(RegInfo->getBaseRegister(MF));
2016 if (FI->usesPICBase())
2017 SavedRegs.reset(PPC::R30);
2018
2019 // Reserve stack space to move the linkage area to in case of a tail call.
2020 int TCSPDelta = 0;
2021 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2022 (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2023 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2024 }
2025
2026 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2027 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2028 // object at the offset of the CR-save slot in the linkage area. The actual
2029 // save and restore of the condition register will be created as part of the
2030 // prologue and epilogue insertion, but the FixedStack object is needed to
2031 // keep the CalleSavedInfo valid.
2032 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2033 SavedRegs.test(PPC::CR4))) {
2034 const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2035 const int64_t SpillOffset =
2036 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2037 int FrameIdx =
2038 MFI.CreateFixedObject(SpillSize, SpillOffset,
2039 /* IsImmutable */ true, /* IsAliased */ false);
2040 FI->setCRSpillFrameIndex(FrameIdx);
2041 }
2042 }
2043
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const2044 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2045 RegScavenger *RS) const {
2046 // Get callee saved register information.
2047 MachineFrameInfo &MFI = MF.getFrameInfo();
2048 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2049
2050 // If the function is shrink-wrapped, and if the function has a tail call, the
2051 // tail call might not be in the new RestoreBlock, so real branch instruction
2052 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2053 // RestoreBlock. So we handle this case here.
2054 if (MFI.getSavePoint() && MFI.hasTailCall()) {
2055 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2056 for (MachineBasicBlock &MBB : MF) {
2057 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2058 createTailCallBranchInstr(MBB);
2059 }
2060 }
2061
2062 // Early exit if no callee saved registers are modified!
2063 if (CSI.empty() && !needsFP(MF)) {
2064 addScavengingSpillSlot(MF, RS);
2065 return;
2066 }
2067
2068 unsigned MinGPR = PPC::R31;
2069 unsigned MinG8R = PPC::X31;
2070 unsigned MinFPR = PPC::F31;
2071 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2072
2073 bool HasGPSaveArea = false;
2074 bool HasG8SaveArea = false;
2075 bool HasFPSaveArea = false;
2076 bool HasVRSaveArea = false;
2077
2078 SmallVector<CalleeSavedInfo, 18> GPRegs;
2079 SmallVector<CalleeSavedInfo, 18> G8Regs;
2080 SmallVector<CalleeSavedInfo, 18> FPRegs;
2081 SmallVector<CalleeSavedInfo, 18> VRegs;
2082
2083 for (const CalleeSavedInfo &I : CSI) {
2084 Register Reg = I.getReg();
2085 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2086 (Reg != PPC::X2 && Reg != PPC::R2)) &&
2087 "Not expecting to try to spill R2 in a function that must save TOC");
2088 if (PPC::GPRCRegClass.contains(Reg)) {
2089 HasGPSaveArea = true;
2090
2091 GPRegs.push_back(I);
2092
2093 if (Reg < MinGPR) {
2094 MinGPR = Reg;
2095 }
2096 } else if (PPC::G8RCRegClass.contains(Reg)) {
2097 HasG8SaveArea = true;
2098
2099 G8Regs.push_back(I);
2100
2101 if (Reg < MinG8R) {
2102 MinG8R = Reg;
2103 }
2104 } else if (PPC::F8RCRegClass.contains(Reg)) {
2105 HasFPSaveArea = true;
2106
2107 FPRegs.push_back(I);
2108
2109 if (Reg < MinFPR) {
2110 MinFPR = Reg;
2111 }
2112 } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2113 PPC::CRRCRegClass.contains(Reg)) {
2114 ; // do nothing, as we already know whether CRs are spilled
2115 } else if (PPC::VRRCRegClass.contains(Reg) ||
2116 PPC::SPERCRegClass.contains(Reg)) {
2117 // Altivec and SPE are mutually exclusive, but have the same stack
2118 // alignment requirements, so overload the save area for both cases.
2119 HasVRSaveArea = true;
2120
2121 VRegs.push_back(I);
2122
2123 if (Reg < MinVR) {
2124 MinVR = Reg;
2125 }
2126 } else {
2127 llvm_unreachable("Unknown RegisterClass!");
2128 }
2129 }
2130
2131 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2132 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2133
2134 int64_t LowerBound = 0;
2135
2136 // Take into account stack space reserved for tail calls.
2137 int TCSPDelta = 0;
2138 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2139 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2140 LowerBound = TCSPDelta;
2141 }
2142
2143 // The Floating-point register save area is right below the back chain word
2144 // of the previous stack frame.
2145 if (HasFPSaveArea) {
2146 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2147 int FI = FPRegs[i].getFrameIdx();
2148
2149 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2150 }
2151
2152 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2153 }
2154
2155 // Check whether the frame pointer register is allocated. If so, make sure it
2156 // is spilled to the correct offset.
2157 if (needsFP(MF)) {
2158 int FI = PFI->getFramePointerSaveIndex();
2159 assert(FI && "No Frame Pointer Save Slot!");
2160 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2161 // FP is R31/X31, so no need to update MinGPR/MinG8R.
2162 HasGPSaveArea = true;
2163 }
2164
2165 if (PFI->usesPICBase()) {
2166 int FI = PFI->getPICBasePointerSaveIndex();
2167 assert(FI && "No PIC Base Pointer Save Slot!");
2168 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2169
2170 MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2171 HasGPSaveArea = true;
2172 }
2173
2174 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2175 if (RegInfo->hasBasePointer(MF)) {
2176 int FI = PFI->getBasePointerSaveIndex();
2177 assert(FI && "No Base Pointer Save Slot!");
2178 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2179
2180 Register BP = RegInfo->getBaseRegister(MF);
2181 if (PPC::G8RCRegClass.contains(BP)) {
2182 MinG8R = std::min<unsigned>(MinG8R, BP);
2183 HasG8SaveArea = true;
2184 } else if (PPC::GPRCRegClass.contains(BP)) {
2185 MinGPR = std::min<unsigned>(MinGPR, BP);
2186 HasGPSaveArea = true;
2187 }
2188 }
2189
2190 // General register save area starts right below the Floating-point
2191 // register save area.
2192 if (HasGPSaveArea || HasG8SaveArea) {
2193 // Move general register save area spill slots down, taking into account
2194 // the size of the Floating-point register save area.
2195 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2196 if (!GPRegs[i].isSpilledToReg()) {
2197 int FI = GPRegs[i].getFrameIdx();
2198 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2199 }
2200 }
2201
2202 // Move general register save area spill slots down, taking into account
2203 // the size of the Floating-point register save area.
2204 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2205 if (!G8Regs[i].isSpilledToReg()) {
2206 int FI = G8Regs[i].getFrameIdx();
2207 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2208 }
2209 }
2210
2211 unsigned MinReg =
2212 std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2213 TRI->getEncodingValue(MinG8R));
2214
2215 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2216 LowerBound -= (31 - MinReg + 1) * GPRegSize;
2217 }
2218
2219 // For 32-bit only, the CR save area is below the general register
2220 // save area. For 64-bit SVR4, the CR save area is addressed relative
2221 // to the stack pointer and hence does not need an adjustment here.
2222 // Only CR2 (the first nonvolatile spilled) has an associated frame
2223 // index so that we have a single uniform save area.
2224 if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2225 // Adjust the frame index of the CR spill slot.
2226 for (const auto &CSInfo : CSI) {
2227 if (CSInfo.getReg() == PPC::CR2) {
2228 int FI = CSInfo.getFrameIdx();
2229 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2230 break;
2231 }
2232 }
2233
2234 LowerBound -= 4; // The CR save area is always 4 bytes long.
2235 }
2236
2237 // Both Altivec and SPE have the same alignment and padding requirements
2238 // within the stack frame.
2239 if (HasVRSaveArea) {
2240 // Insert alignment padding, we need 16-byte alignment. Note: for positive
2241 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2242 // we are using negative number here (the stack grows downward). We should
2243 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2244 // is the alignment size ( n = 16 here) and y is the size after aligning.
2245 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2246 LowerBound &= ~(15);
2247
2248 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2249 int FI = VRegs[i].getFrameIdx();
2250
2251 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2252 }
2253 }
2254
2255 addScavengingSpillSlot(MF, RS);
2256 }
2257
2258 void
addScavengingSpillSlot(MachineFunction & MF,RegScavenger * RS) const2259 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2260 RegScavenger *RS) const {
2261 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2262 // a large stack, which will require scavenging a register to materialize a
2263 // large offset.
2264
2265 // We need to have a scavenger spill slot for spills if the frame size is
2266 // large. In case there is no free register for large-offset addressing,
2267 // this slot is used for the necessary emergency spill. Also, we need the
2268 // slot for dynamic stack allocations.
2269
2270 // The scavenger might be invoked if the frame offset does not fit into
2271 // the 16-bit immediate. We don't know the complete frame size here
2272 // because we've not yet computed callee-saved register spills or the
2273 // needed alignment padding.
2274 unsigned StackSize = determineFrameLayout(MF, true);
2275 MachineFrameInfo &MFI = MF.getFrameInfo();
2276 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2277 (hasSpills(MF) && !isInt<16>(StackSize))) {
2278 const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2279 const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2280 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2281 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2282 unsigned Size = TRI.getSpillSize(RC);
2283 Align Alignment = TRI.getSpillAlign(RC);
2284 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2285
2286 // Might we have over-aligned allocas?
2287 bool HasAlVars =
2288 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2289
2290 // These kinds of spills might need two registers.
2291 if (spillsCR(MF) || HasAlVars)
2292 RS->addScavengingFrameIndex(
2293 MFI.CreateStackObject(Size, Alignment, false));
2294 }
2295 }
2296
2297 // This function checks if a callee saved gpr can be spilled to a volatile
2298 // vector register. This occurs for leaf functions when the option
2299 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2300 // which were not spilled to vectors, return false so the target independent
2301 // code can handle them by assigning a FrameIdx to a stack slot.
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const2302 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2303 MachineFunction &MF, const TargetRegisterInfo *TRI,
2304 std::vector<CalleeSavedInfo> &CSI) const {
2305
2306 if (CSI.empty())
2307 return true; // Early exit if no callee saved registers are modified!
2308
2309 // Early exit if cannot spill gprs to volatile vector registers.
2310 MachineFrameInfo &MFI = MF.getFrameInfo();
2311 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2312 return false;
2313
2314 // Build a BitVector of VSRs that can be used for spilling GPRs.
2315 BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2316 BitVector BVCalleeSaved(TRI->getNumRegs());
2317 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2318 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2319 for (unsigned i = 0; CSRegs[i]; ++i)
2320 BVCalleeSaved.set(CSRegs[i]);
2321
2322 for (unsigned Reg : BVAllocatable.set_bits()) {
2323 // Set to 0 if the register is not a volatile VSX register, or if it is
2324 // used in the function.
2325 if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2326 MF.getRegInfo().isPhysRegUsed(Reg))
2327 BVAllocatable.reset(Reg);
2328 }
2329
2330 bool AllSpilledToReg = true;
2331 unsigned LastVSRUsedForSpill = 0;
2332 for (auto &CS : CSI) {
2333 if (BVAllocatable.none())
2334 return false;
2335
2336 Register Reg = CS.getReg();
2337
2338 if (!PPC::G8RCRegClass.contains(Reg)) {
2339 AllSpilledToReg = false;
2340 continue;
2341 }
2342
2343 // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2344 // into one VSR using the mtvsrdd instruction.
2345 if (LastVSRUsedForSpill != 0) {
2346 CS.setDstReg(LastVSRUsedForSpill);
2347 BVAllocatable.reset(LastVSRUsedForSpill);
2348 LastVSRUsedForSpill = 0;
2349 continue;
2350 }
2351
2352 unsigned VolatileVFReg = BVAllocatable.find_first();
2353 if (VolatileVFReg < BVAllocatable.size()) {
2354 CS.setDstReg(VolatileVFReg);
2355 LastVSRUsedForSpill = VolatileVFReg;
2356 } else {
2357 AllSpilledToReg = false;
2358 }
2359 }
2360 return AllSpilledToReg;
2361 }
2362
spillCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const2363 bool PPCFrameLowering::spillCalleeSavedRegisters(
2364 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2365 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2366
2367 MachineFunction *MF = MBB.getParent();
2368 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2369 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2370 bool MustSaveTOC = FI->mustSaveTOC();
2371 DebugLoc DL;
2372 bool CRSpilled = false;
2373 MachineInstrBuilder CRMIB;
2374 BitVector Spilled(TRI->getNumRegs());
2375
2376 VSRContainingGPRs.clear();
2377
2378 // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2379 // or two GPRs, so we need table to record information for later save/restore.
2380 for (const CalleeSavedInfo &Info : CSI) {
2381 if (Info.isSpilledToReg()) {
2382 auto &SpilledVSR =
2383 VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2384 assert(SpilledVSR.second == 0 &&
2385 "Can't spill more than two GPRs into VSR!");
2386 if (SpilledVSR.first == 0)
2387 SpilledVSR.first = Info.getReg();
2388 else
2389 SpilledVSR.second = Info.getReg();
2390 }
2391 }
2392
2393 for (const CalleeSavedInfo &I : CSI) {
2394 Register Reg = I.getReg();
2395
2396 // CR2 through CR4 are the nonvolatile CR fields.
2397 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2398
2399 // Add the callee-saved register as live-in; it's killed at the spill.
2400 // Do not do this for callee-saved registers that are live-in to the
2401 // function because they will already be marked live-in and this will be
2402 // adding it for a second time. It is an error to add the same register
2403 // to the set more than once.
2404 const MachineRegisterInfo &MRI = MF->getRegInfo();
2405 bool IsLiveIn = MRI.isLiveIn(Reg);
2406 if (!IsLiveIn)
2407 MBB.addLiveIn(Reg);
2408
2409 if (CRSpilled && IsCRField) {
2410 CRMIB.addReg(Reg, RegState::ImplicitKill);
2411 continue;
2412 }
2413
2414 // The actual spill will happen in the prologue.
2415 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2416 continue;
2417
2418 // Insert the spill to the stack frame.
2419 if (IsCRField) {
2420 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2421 if (!Subtarget.is32BitELFABI()) {
2422 // The actual spill will happen at the start of the prologue.
2423 FuncInfo->addMustSaveCR(Reg);
2424 } else {
2425 CRSpilled = true;
2426 FuncInfo->setSpillsCR();
2427
2428 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
2429 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2430 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2431 .addReg(Reg, RegState::ImplicitKill);
2432
2433 MBB.insert(MI, CRMIB);
2434 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2435 .addReg(PPC::R12,
2436 getKillRegState(true)),
2437 I.getFrameIdx()));
2438 }
2439 } else {
2440 if (I.isSpilledToReg()) {
2441 unsigned Dst = I.getDstReg();
2442
2443 if (Spilled[Dst])
2444 continue;
2445
2446 if (VSRContainingGPRs[Dst].second != 0) {
2447 assert(Subtarget.hasP9Vector() &&
2448 "mtvsrdd is unavailable on pre-P9 targets.");
2449
2450 NumPESpillVSR += 2;
2451 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2452 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2453 .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2454 } else if (VSRContainingGPRs[Dst].second == 0) {
2455 assert(Subtarget.hasP8Vector() &&
2456 "Can't move GPR to VSR on pre-P8 targets.");
2457
2458 ++NumPESpillVSR;
2459 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2460 TRI->getSubReg(Dst, PPC::sub_64))
2461 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2462 } else {
2463 llvm_unreachable("More than two GPRs spilled to a VSR!");
2464 }
2465 Spilled.set(Dst);
2466 } else {
2467 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2468 // Use !IsLiveIn for the kill flag.
2469 // We do not want to kill registers that are live in this function
2470 // before their use because they will become undefined registers.
2471 // Functions without NoUnwind need to preserve the order of elements in
2472 // saved vector registers.
2473 if (Subtarget.needsSwapsForVSXMemOps() &&
2474 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2475 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2476 I.getFrameIdx(), RC, TRI);
2477 else
2478 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(),
2479 RC, TRI);
2480 }
2481 }
2482 }
2483 return true;
2484 }
2485
restoreCRs(bool is31,bool CR2Spilled,bool CR3Spilled,bool CR4Spilled,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,unsigned CSIIndex)2486 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2487 bool CR4Spilled, MachineBasicBlock &MBB,
2488 MachineBasicBlock::iterator MI,
2489 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2490
2491 MachineFunction *MF = MBB.getParent();
2492 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2493 DebugLoc DL;
2494 unsigned MoveReg = PPC::R12;
2495
2496 // 32-bit: FP-relative
2497 MBB.insert(MI,
2498 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2499 CSI[CSIIndex].getFrameIdx()));
2500
2501 unsigned RestoreOp = PPC::MTOCRF;
2502 if (CR2Spilled)
2503 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2504 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2505
2506 if (CR3Spilled)
2507 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2508 .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2509
2510 if (CR4Spilled)
2511 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2512 .addReg(MoveReg, getKillRegState(true)));
2513 }
2514
2515 MachineBasicBlock::iterator PPCFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const2516 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2517 MachineBasicBlock::iterator I) const {
2518 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2519 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2520 I->getOpcode() == PPC::ADJCALLSTACKUP) {
2521 // Add (actually subtract) back the amount the callee popped on return.
2522 if (int CalleeAmt = I->getOperand(1).getImm()) {
2523 bool is64Bit = Subtarget.isPPC64();
2524 CalleeAmt *= -1;
2525 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2526 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2527 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2528 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2529 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2530 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2531 const DebugLoc &dl = I->getDebugLoc();
2532
2533 if (isInt<16>(CalleeAmt)) {
2534 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2535 .addReg(StackReg, RegState::Kill)
2536 .addImm(CalleeAmt);
2537 } else {
2538 MachineBasicBlock::iterator MBBI = I;
2539 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2540 .addImm(CalleeAmt >> 16);
2541 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2542 .addReg(TmpReg, RegState::Kill)
2543 .addImm(CalleeAmt & 0xFFFF);
2544 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2545 .addReg(StackReg, RegState::Kill)
2546 .addReg(TmpReg);
2547 }
2548 }
2549 }
2550 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2551 return MBB.erase(I);
2552 }
2553
isCalleeSavedCR(unsigned Reg)2554 static bool isCalleeSavedCR(unsigned Reg) {
2555 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2556 }
2557
restoreCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,MutableArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const2558 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2559 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2560 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2561 MachineFunction *MF = MBB.getParent();
2562 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2563 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2564 bool MustSaveTOC = FI->mustSaveTOC();
2565 bool CR2Spilled = false;
2566 bool CR3Spilled = false;
2567 bool CR4Spilled = false;
2568 unsigned CSIIndex = 0;
2569 BitVector Restored(TRI->getNumRegs());
2570
2571 // Initialize insertion-point logic; we will be restoring in reverse
2572 // order of spill.
2573 MachineBasicBlock::iterator I = MI, BeforeI = I;
2574 bool AtStart = I == MBB.begin();
2575
2576 if (!AtStart)
2577 --BeforeI;
2578
2579 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2580 Register Reg = CSI[i].getReg();
2581
2582 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2583 continue;
2584
2585 // Restore of callee saved condition register field is handled during
2586 // epilogue insertion.
2587 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2588 continue;
2589
2590 if (Reg == PPC::CR2) {
2591 CR2Spilled = true;
2592 // The spill slot is associated only with CR2, which is the
2593 // first nonvolatile spilled. Save it here.
2594 CSIIndex = i;
2595 continue;
2596 } else if (Reg == PPC::CR3) {
2597 CR3Spilled = true;
2598 continue;
2599 } else if (Reg == PPC::CR4) {
2600 CR4Spilled = true;
2601 continue;
2602 } else {
2603 // On 32-bit ELF when we first encounter a non-CR register after seeing at
2604 // least one CR register, restore all spilled CRs together.
2605 if (CR2Spilled || CR3Spilled || CR4Spilled) {
2606 bool is31 = needsFP(*MF);
2607 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2608 CSIIndex);
2609 CR2Spilled = CR3Spilled = CR4Spilled = false;
2610 }
2611
2612 if (CSI[i].isSpilledToReg()) {
2613 DebugLoc DL;
2614 unsigned Dst = CSI[i].getDstReg();
2615
2616 if (Restored[Dst])
2617 continue;
2618
2619 if (VSRContainingGPRs[Dst].second != 0) {
2620 assert(Subtarget.hasP9Vector());
2621 NumPEReloadVSR += 2;
2622 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2623 VSRContainingGPRs[Dst].second)
2624 .addReg(Dst);
2625 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2626 VSRContainingGPRs[Dst].first)
2627 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2628 } else if (VSRContainingGPRs[Dst].second == 0) {
2629 assert(Subtarget.hasP8Vector());
2630 ++NumPEReloadVSR;
2631 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2632 VSRContainingGPRs[Dst].first)
2633 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2634 } else {
2635 llvm_unreachable("More than two GPRs spilled to a VSR!");
2636 }
2637
2638 Restored.set(Dst);
2639
2640 } else {
2641 // Default behavior for non-CR saves.
2642 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2643
2644 // Functions without NoUnwind need to preserve the order of elements in
2645 // saved vector registers.
2646 if (Subtarget.needsSwapsForVSXMemOps() &&
2647 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2648 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2649 TRI);
2650 else
2651 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2652
2653 assert(I != MBB.begin() &&
2654 "loadRegFromStackSlot didn't insert any code!");
2655 }
2656 }
2657
2658 // Insert in reverse order.
2659 if (AtStart)
2660 I = MBB.begin();
2661 else {
2662 I = BeforeI;
2663 ++I;
2664 }
2665 }
2666
2667 // If we haven't yet spilled the CRs, do so now.
2668 if (CR2Spilled || CR3Spilled || CR4Spilled) {
2669 assert(Subtarget.is32BitELFABI() &&
2670 "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2671 bool is31 = needsFP(*MF);
2672 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2673 }
2674
2675 return true;
2676 }
2677
getTOCSaveOffset() const2678 uint64_t PPCFrameLowering::getTOCSaveOffset() const {
2679 return TOCSaveOffset;
2680 }
2681
getFramePointerSaveOffset() const2682 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
2683 return FramePointerSaveOffset;
2684 }
2685
getBasePointerSaveOffset() const2686 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
2687 return BasePointerSaveOffset;
2688 }
2689
enableShrinkWrapping(const MachineFunction & MF) const2690 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2691 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2692 return false;
2693 return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2694 }
2695