1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MCTargetDesc/PPCPredicates.h" 14 #include "PPCFrameLowering.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/RegisterScavenging.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/Target/TargetOptions.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "framelowering" 33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 35 STATISTIC(NumPrologProbed, "Number of prologues probed"); 36 37 static cl::opt<bool> 38 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 39 cl::desc("Enable spills in prologue to vector registers."), 40 cl::init(false), cl::Hidden); 41 42 /// VRRegNo - Map from a numbered VR register to its enum value. 43 /// 44 static const MCPhysReg VRRegNo[] = { 45 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 46 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 47 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 48 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 49 }; 50 51 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 52 if (STI.isAIXABI()) 53 return STI.isPPC64() ? 16 : 8; 54 // SVR4 ABI: 55 return STI.isPPC64() ? 16 : 4; 56 } 57 58 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 59 if (STI.isAIXABI()) 60 return STI.isPPC64() ? 40 : 20; 61 return STI.isELFv2ABI() ? 24 : 40; 62 } 63 64 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 65 // First slot in the general register save area. 66 return STI.isPPC64() ? -8U : -4U; 67 } 68 69 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 70 if (STI.isAIXABI() || STI.isPPC64()) 71 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 72 73 // 32-bit SVR4 ABI: 74 return 8; 75 } 76 77 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 78 // Third slot in the general purpose register save area. 79 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 80 return -12U; 81 82 // Second slot in the general purpose register save area. 83 return STI.isPPC64() ? -16U : -8U; 84 } 85 86 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 87 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 88 } 89 90 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 91 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 92 STI.getPlatformStackAlignment(), 0), 93 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 94 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 95 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 96 LinkageSize(computeLinkageSize(Subtarget)), 97 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 98 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 99 100 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 101 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 102 unsigned &NumEntries) const { 103 104 // Floating-point register save area offsets. 105 #define CALLEE_SAVED_FPRS \ 106 {PPC::F31, -8}, \ 107 {PPC::F30, -16}, \ 108 {PPC::F29, -24}, \ 109 {PPC::F28, -32}, \ 110 {PPC::F27, -40}, \ 111 {PPC::F26, -48}, \ 112 {PPC::F25, -56}, \ 113 {PPC::F24, -64}, \ 114 {PPC::F23, -72}, \ 115 {PPC::F22, -80}, \ 116 {PPC::F21, -88}, \ 117 {PPC::F20, -96}, \ 118 {PPC::F19, -104}, \ 119 {PPC::F18, -112}, \ 120 {PPC::F17, -120}, \ 121 {PPC::F16, -128}, \ 122 {PPC::F15, -136}, \ 123 {PPC::F14, -144} 124 125 // 32-bit general purpose register save area offsets shared by ELF and 126 // AIX. AIX has an extra CSR with r13. 127 #define CALLEE_SAVED_GPRS32 \ 128 {PPC::R31, -4}, \ 129 {PPC::R30, -8}, \ 130 {PPC::R29, -12}, \ 131 {PPC::R28, -16}, \ 132 {PPC::R27, -20}, \ 133 {PPC::R26, -24}, \ 134 {PPC::R25, -28}, \ 135 {PPC::R24, -32}, \ 136 {PPC::R23, -36}, \ 137 {PPC::R22, -40}, \ 138 {PPC::R21, -44}, \ 139 {PPC::R20, -48}, \ 140 {PPC::R19, -52}, \ 141 {PPC::R18, -56}, \ 142 {PPC::R17, -60}, \ 143 {PPC::R16, -64}, \ 144 {PPC::R15, -68}, \ 145 {PPC::R14, -72} 146 147 // 64-bit general purpose register save area offsets. 148 #define CALLEE_SAVED_GPRS64 \ 149 {PPC::X31, -8}, \ 150 {PPC::X30, -16}, \ 151 {PPC::X29, -24}, \ 152 {PPC::X28, -32}, \ 153 {PPC::X27, -40}, \ 154 {PPC::X26, -48}, \ 155 {PPC::X25, -56}, \ 156 {PPC::X24, -64}, \ 157 {PPC::X23, -72}, \ 158 {PPC::X22, -80}, \ 159 {PPC::X21, -88}, \ 160 {PPC::X20, -96}, \ 161 {PPC::X19, -104}, \ 162 {PPC::X18, -112}, \ 163 {PPC::X17, -120}, \ 164 {PPC::X16, -128}, \ 165 {PPC::X15, -136}, \ 166 {PPC::X14, -144} 167 168 // Vector register save area offsets. 169 #define CALLEE_SAVED_VRS \ 170 {PPC::V31, -16}, \ 171 {PPC::V30, -32}, \ 172 {PPC::V29, -48}, \ 173 {PPC::V28, -64}, \ 174 {PPC::V27, -80}, \ 175 {PPC::V26, -96}, \ 176 {PPC::V25, -112}, \ 177 {PPC::V24, -128}, \ 178 {PPC::V23, -144}, \ 179 {PPC::V22, -160}, \ 180 {PPC::V21, -176}, \ 181 {PPC::V20, -192} 182 183 // Note that the offsets here overlap, but this is fixed up in 184 // processFunctionBeforeFrameFinalized. 185 186 static const SpillSlot ELFOffsets32[] = { 187 CALLEE_SAVED_FPRS, 188 CALLEE_SAVED_GPRS32, 189 190 // CR save area offset. We map each of the nonvolatile CR fields 191 // to the slot for CR2, which is the first of the nonvolatile CR 192 // fields to be assigned, so that we only allocate one save slot. 193 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 194 {PPC::CR2, -4}, 195 196 // VRSAVE save area offset. 197 {PPC::VRSAVE, -4}, 198 199 CALLEE_SAVED_VRS, 200 201 // SPE register save area (overlaps Vector save area). 202 {PPC::S31, -8}, 203 {PPC::S30, -16}, 204 {PPC::S29, -24}, 205 {PPC::S28, -32}, 206 {PPC::S27, -40}, 207 {PPC::S26, -48}, 208 {PPC::S25, -56}, 209 {PPC::S24, -64}, 210 {PPC::S23, -72}, 211 {PPC::S22, -80}, 212 {PPC::S21, -88}, 213 {PPC::S20, -96}, 214 {PPC::S19, -104}, 215 {PPC::S18, -112}, 216 {PPC::S17, -120}, 217 {PPC::S16, -128}, 218 {PPC::S15, -136}, 219 {PPC::S14, -144}}; 220 221 static const SpillSlot ELFOffsets64[] = { 222 CALLEE_SAVED_FPRS, 223 CALLEE_SAVED_GPRS64, 224 225 // VRSAVE save area offset. 226 {PPC::VRSAVE, -4}, 227 CALLEE_SAVED_VRS 228 }; 229 230 static const SpillSlot AIXOffsets32[] = { 231 CALLEE_SAVED_FPRS, 232 CALLEE_SAVED_GPRS32, 233 // Add AIX's extra CSR. 234 {PPC::R13, -76}, 235 // TODO: Update when we add vector support for AIX. 236 }; 237 238 static const SpillSlot AIXOffsets64[] = { 239 CALLEE_SAVED_FPRS, 240 CALLEE_SAVED_GPRS64, 241 // TODO: Update when we add vector support for AIX. 242 }; 243 244 if (Subtarget.is64BitELFABI()) { 245 NumEntries = array_lengthof(ELFOffsets64); 246 return ELFOffsets64; 247 } 248 249 if (Subtarget.is32BitELFABI()) { 250 NumEntries = array_lengthof(ELFOffsets32); 251 return ELFOffsets32; 252 } 253 254 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 255 256 if (Subtarget.isPPC64()) { 257 NumEntries = array_lengthof(AIXOffsets64); 258 return AIXOffsets64; 259 } 260 261 NumEntries = array_lengthof(AIXOffsets32); 262 return AIXOffsets32; 263 } 264 265 /// RemoveVRSaveCode - We have found that this function does not need any code 266 /// to manipulate the VRSAVE register, even though it uses vector registers. 267 /// This can happen when the only registers used are known to be live in or out 268 /// of the function. Remove all of the VRSAVE related code from the function. 269 /// FIXME: The removal of the code results in a compile failure at -O0 when the 270 /// function contains a function call, as the GPR containing original VRSAVE 271 /// contents is spilled and reloaded around the call. Without the prolog code, 272 /// the spill instruction refers to an undefined register. This code needs 273 /// to account for all uses of that GPR. 274 static void RemoveVRSaveCode(MachineInstr &MI) { 275 MachineBasicBlock *Entry = MI.getParent(); 276 MachineFunction *MF = Entry->getParent(); 277 278 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 279 MachineBasicBlock::iterator MBBI = MI; 280 ++MBBI; 281 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 282 MBBI->eraseFromParent(); 283 284 bool RemovedAllMTVRSAVEs = true; 285 // See if we can find and remove the MTVRSAVE instruction from all of the 286 // epilog blocks. 287 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 288 // If last instruction is a return instruction, add an epilogue 289 if (I->isReturnBlock()) { 290 bool FoundIt = false; 291 for (MBBI = I->end(); MBBI != I->begin(); ) { 292 --MBBI; 293 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 294 MBBI->eraseFromParent(); // remove it. 295 FoundIt = true; 296 break; 297 } 298 } 299 RemovedAllMTVRSAVEs &= FoundIt; 300 } 301 } 302 303 // If we found and removed all MTVRSAVE instructions, remove the read of 304 // VRSAVE as well. 305 if (RemovedAllMTVRSAVEs) { 306 MBBI = MI; 307 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 308 --MBBI; 309 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 310 MBBI->eraseFromParent(); 311 } 312 313 // Finally, nuke the UPDATE_VRSAVE. 314 MI.eraseFromParent(); 315 } 316 317 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 318 // instruction selector. Based on the vector registers that have been used, 319 // transform this into the appropriate ORI instruction. 320 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { 321 MachineFunction *MF = MI.getParent()->getParent(); 322 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 323 DebugLoc dl = MI.getDebugLoc(); 324 325 const MachineRegisterInfo &MRI = MF->getRegInfo(); 326 unsigned UsedRegMask = 0; 327 for (unsigned i = 0; i != 32; ++i) 328 if (MRI.isPhysRegModified(VRRegNo[i])) 329 UsedRegMask |= 1 << (31-i); 330 331 // Live in and live out values already must be in the mask, so don't bother 332 // marking them. 333 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { 334 unsigned RegNo = TRI->getEncodingValue(LI.first); 335 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. 336 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 337 } 338 339 // Live out registers appear as use operands on return instructions. 340 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 341 UsedRegMask != 0 && BI != BE; ++BI) { 342 const MachineBasicBlock &MBB = *BI; 343 if (!MBB.isReturnBlock()) 344 continue; 345 const MachineInstr &Ret = MBB.back(); 346 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 347 const MachineOperand &MO = Ret.getOperand(I); 348 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 349 continue; 350 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 351 UsedRegMask &= ~(1 << (31-RegNo)); 352 } 353 } 354 355 // If no registers are used, turn this into a copy. 356 if (UsedRegMask == 0) { 357 // Remove all VRSAVE code. 358 RemoveVRSaveCode(MI); 359 return; 360 } 361 362 Register SrcReg = MI.getOperand(1).getReg(); 363 Register DstReg = MI.getOperand(0).getReg(); 364 365 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 366 if (DstReg != SrcReg) 367 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 368 .addReg(SrcReg) 369 .addImm(UsedRegMask); 370 else 371 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 372 .addReg(SrcReg, RegState::Kill) 373 .addImm(UsedRegMask); 374 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 375 if (DstReg != SrcReg) 376 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 377 .addReg(SrcReg) 378 .addImm(UsedRegMask >> 16); 379 else 380 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 381 .addReg(SrcReg, RegState::Kill) 382 .addImm(UsedRegMask >> 16); 383 } else { 384 if (DstReg != SrcReg) 385 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 386 .addReg(SrcReg) 387 .addImm(UsedRegMask >> 16); 388 else 389 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 390 .addReg(SrcReg, RegState::Kill) 391 .addImm(UsedRegMask >> 16); 392 393 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 394 .addReg(DstReg, RegState::Kill) 395 .addImm(UsedRegMask & 0xFFFF); 396 } 397 398 // Remove the old UPDATE_VRSAVE instruction. 399 MI.eraseFromParent(); 400 } 401 402 static bool spillsCR(const MachineFunction &MF) { 403 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 404 return FuncInfo->isCRSpilled(); 405 } 406 407 static bool spillsVRSAVE(const MachineFunction &MF) { 408 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 409 return FuncInfo->isVRSAVESpilled(); 410 } 411 412 static bool hasSpills(const MachineFunction &MF) { 413 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 414 return FuncInfo->hasSpills(); 415 } 416 417 static bool hasNonRISpills(const MachineFunction &MF) { 418 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 419 return FuncInfo->hasNonRISpills(); 420 } 421 422 /// MustSaveLR - Return true if this function requires that we save the LR 423 /// register onto the stack in the prolog and restore it in the epilog of the 424 /// function. 425 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 426 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 427 428 // We need a save/restore of LR if there is any def of LR (which is 429 // defined by calls, including the PIC setup sequence), or if there is 430 // some use of the LR stack slot (e.g. for builtin_return_address). 431 // (LR comes in 32 and 64 bit versions.) 432 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 433 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 434 } 435 436 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 437 /// call frame size. Update the MachineFunction object with the stack size. 438 unsigned 439 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 440 bool UseEstimate) const { 441 unsigned NewMaxCallFrameSize = 0; 442 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 443 &NewMaxCallFrameSize); 444 MF.getFrameInfo().setStackSize(FrameSize); 445 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 446 return FrameSize; 447 } 448 449 /// determineFrameLayout - Determine the size of the frame and maximum call 450 /// frame size. 451 unsigned 452 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 453 bool UseEstimate, 454 unsigned *NewMaxCallFrameSize) const { 455 const MachineFrameInfo &MFI = MF.getFrameInfo(); 456 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 457 458 // Get the number of bytes to allocate from the FrameInfo 459 unsigned FrameSize = 460 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 461 462 // Get stack alignments. The frame must be aligned to the greatest of these: 463 Align TargetAlign = getStackAlign(); // alignment required per the ABI 464 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 465 Align Alignment = std::max(TargetAlign, MaxAlign); 466 467 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 468 469 unsigned LR = RegInfo->getRARegister(); 470 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 471 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 472 !MFI.adjustsStack() && // No calls. 473 !MustSaveLR(MF, LR) && // No need to save LR. 474 !FI->mustSaveTOC() && // No need to save TOC. 475 !RegInfo->hasBasePointer(MF); // No special alignment. 476 477 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless 478 // code if all local vars are reg-allocated. 479 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 480 481 // Check whether we can skip adjusting the stack pointer (by using red zone) 482 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 483 // No need for frame 484 return 0; 485 } 486 487 // Get the maximum call frame size of all the calls. 488 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 489 490 // Maximum call frame needs to be at least big enough for linkage area. 491 unsigned minCallFrameSize = getLinkageSize(); 492 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 493 494 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 495 // that allocations will be aligned. 496 if (MFI.hasVarSizedObjects()) 497 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 498 499 // Update the new max call frame size if the caller passes in a valid pointer. 500 if (NewMaxCallFrameSize) 501 *NewMaxCallFrameSize = maxCallFrameSize; 502 503 // Include call frame size in total. 504 FrameSize += maxCallFrameSize; 505 506 // Make sure the frame is aligned. 507 FrameSize = alignTo(FrameSize, Alignment); 508 509 return FrameSize; 510 } 511 512 // hasFP - Return true if the specified function actually has a dedicated frame 513 // pointer register. 514 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 515 const MachineFrameInfo &MFI = MF.getFrameInfo(); 516 // FIXME: This is pretty much broken by design: hasFP() might be called really 517 // early, before the stack layout was calculated and thus hasFP() might return 518 // true or false here depending on the time of call. 519 return (MFI.getStackSize()) && needsFP(MF); 520 } 521 522 // needsFP - Return true if the specified function should have a dedicated frame 523 // pointer register. This is true if the function has variable sized allocas or 524 // if frame pointer elimination is disabled. 525 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 526 const MachineFrameInfo &MFI = MF.getFrameInfo(); 527 528 // Naked functions have no stack frame pushed, so we don't have a frame 529 // pointer. 530 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 531 return false; 532 533 return MF.getTarget().Options.DisableFramePointerElim(MF) || 534 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 535 (MF.getTarget().Options.GuaranteedTailCallOpt && 536 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 537 } 538 539 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 540 bool is31 = needsFP(MF); 541 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 542 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 543 544 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 545 bool HasBP = RegInfo->hasBasePointer(MF); 546 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 547 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 548 549 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 550 BI != BE; ++BI) 551 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 552 --MBBI; 553 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 554 MachineOperand &MO = MBBI->getOperand(I); 555 if (!MO.isReg()) 556 continue; 557 558 switch (MO.getReg()) { 559 case PPC::FP: 560 MO.setReg(FPReg); 561 break; 562 case PPC::FP8: 563 MO.setReg(FP8Reg); 564 break; 565 case PPC::BP: 566 MO.setReg(BPReg); 567 break; 568 case PPC::BP8: 569 MO.setReg(BP8Reg); 570 break; 571 572 } 573 } 574 } 575 } 576 577 /* This function will do the following: 578 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 579 respectively (defaults recommended by the ABI) and return true 580 - If MBB is not an entry block, initialize the register scavenger and look 581 for available registers. 582 - If the defaults (R0/R12) are available, return true 583 - If TwoUniqueRegsRequired is set to true, it looks for two unique 584 registers. Otherwise, look for a single available register. 585 - If the required registers are found, set SR1 and SR2 and return true. 586 - If the required registers are not found, set SR2 or both SR1 and SR2 to 587 PPC::NoRegister and return false. 588 589 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 590 is not set, this function will attempt to find two different registers, but 591 still return true if only one register is available (and set SR1 == SR2). 592 */ 593 bool 594 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 595 bool UseAtEnd, 596 bool TwoUniqueRegsRequired, 597 Register *SR1, 598 Register *SR2) const { 599 RegScavenger RS; 600 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 601 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 602 603 // Set the defaults for the two scratch registers. 604 if (SR1) 605 *SR1 = R0; 606 607 if (SR2) { 608 assert (SR1 && "Asking for the second scratch register but not the first?"); 609 *SR2 = R12; 610 } 611 612 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 613 if ((UseAtEnd && MBB->isReturnBlock()) || 614 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 615 return true; 616 617 RS.enterBasicBlock(*MBB); 618 619 if (UseAtEnd && !MBB->empty()) { 620 // The scratch register will be used at the end of the block, so must 621 // consider all registers used within the block 622 623 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 624 // If no terminator, back iterator up to previous instruction. 625 if (MBBI == MBB->end()) 626 MBBI = std::prev(MBBI); 627 628 if (MBBI != MBB->begin()) 629 RS.forward(MBBI); 630 } 631 632 // If the two registers are available, we're all good. 633 // Note that we only return here if both R0 and R12 are available because 634 // although the function may not require two unique registers, it may benefit 635 // from having two so we should try to provide them. 636 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 637 return true; 638 639 // Get the list of callee-saved registers for the target. 640 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 641 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 642 643 // Get all the available registers in the block. 644 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 645 &PPC::GPRCRegClass); 646 647 // We shouldn't use callee-saved registers as scratch registers as they may be 648 // available when looking for a candidate block for shrink wrapping but not 649 // available when the actual prologue/epilogue is being emitted because they 650 // were added as live-in to the prologue block by PrologueEpilogueInserter. 651 for (int i = 0; CSRegs[i]; ++i) 652 BV.reset(CSRegs[i]); 653 654 // Set the first scratch register to the first available one. 655 if (SR1) { 656 int FirstScratchReg = BV.find_first(); 657 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 658 } 659 660 // If there is another one available, set the second scratch register to that. 661 // Otherwise, set it to either PPC::NoRegister if this function requires two 662 // or to whatever SR1 is set to if this function doesn't require two. 663 if (SR2) { 664 int SecondScratchReg = BV.find_next(*SR1); 665 if (SecondScratchReg != -1) 666 *SR2 = SecondScratchReg; 667 else 668 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 669 } 670 671 // Now that we've done our best to provide both registers, double check 672 // whether we were unable to provide enough. 673 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 674 return false; 675 676 return true; 677 } 678 679 // We need a scratch register for spilling LR and for spilling CR. By default, 680 // we use two scratch registers to hide latency. However, if only one scratch 681 // register is available, we can adjust for that by not overlapping the spill 682 // code. However, if we need to realign the stack (i.e. have a base pointer) 683 // and the stack frame is large, we need two scratch registers. 684 bool 685 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 686 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 687 MachineFunction &MF = *(MBB->getParent()); 688 bool HasBP = RegInfo->hasBasePointer(MF); 689 unsigned FrameSize = determineFrameLayout(MF); 690 int NegFrameSize = -FrameSize; 691 bool IsLargeFrame = !isInt<16>(NegFrameSize); 692 MachineFrameInfo &MFI = MF.getFrameInfo(); 693 Align MaxAlign = MFI.getMaxAlign(); 694 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 695 696 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 697 } 698 699 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 700 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 701 702 return findScratchRegister(TmpMBB, false, 703 twoUniqueScratchRegsRequired(TmpMBB)); 704 } 705 706 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 707 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 708 709 return findScratchRegister(TmpMBB, true); 710 } 711 712 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 713 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 714 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 715 716 // Abort if there is no register info or function info. 717 if (!RegInfo || !FI) 718 return false; 719 720 // Only move the stack update on ELFv2 ABI and PPC64. 721 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 722 return false; 723 724 // Check the frame size first and return false if it does not fit the 725 // requirements. 726 // We need a non-zero frame size as well as a frame that will fit in the red 727 // zone. This is because by moving the stack pointer update we are now storing 728 // to the red zone until the stack pointer is updated. If we get an interrupt 729 // inside the prologue but before the stack update we now have a number of 730 // stores to the red zone and those stores must all fit. 731 MachineFrameInfo &MFI = MF.getFrameInfo(); 732 unsigned FrameSize = MFI.getStackSize(); 733 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 734 return false; 735 736 // Frame pointers and base pointers complicate matters so don't do anything 737 // if we have them. For example having a frame pointer will sometimes require 738 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 739 // difficult. 740 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 741 return false; 742 743 // Calls to fast_cc functions use different rules for passing parameters on 744 // the stack from the ABI and using PIC base in the function imposes 745 // similar restrictions to using the base pointer. It is not generally safe 746 // to move the stack pointer update in these situations. 747 if (FI->hasFastCall() || FI->usesPICBase()) 748 return false; 749 750 // Finally we can move the stack update if we do not require register 751 // scavenging. Register scavenging can introduce more spills and so 752 // may make the frame size larger than we have computed. 753 return !RegInfo->requiresFrameIndexScavenging(MF); 754 } 755 756 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 757 MachineBasicBlock &MBB) const { 758 MachineBasicBlock::iterator MBBI = MBB.begin(); 759 MachineFrameInfo &MFI = MF.getFrameInfo(); 760 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 761 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 762 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 763 764 MachineModuleInfo &MMI = MF.getMMI(); 765 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 766 DebugLoc dl; 767 // AIX assembler does not support cfi directives. 768 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 769 770 // Get processor type. 771 bool isPPC64 = Subtarget.isPPC64(); 772 // Get the ABI. 773 bool isSVR4ABI = Subtarget.isSVR4ABI(); 774 bool isAIXABI = Subtarget.isAIXABI(); 775 bool isELFv2ABI = Subtarget.isELFv2ABI(); 776 assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI."); 777 778 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 779 // process it. 780 if (!isSVR4ABI) 781 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 782 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 783 if (isAIXABI) 784 report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); 785 HandleVRSaveUpdate(*MBBI, TII); 786 break; 787 } 788 } 789 790 // Move MBBI back to the beginning of the prologue block. 791 MBBI = MBB.begin(); 792 793 // Work out frame sizes. 794 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 795 int NegFrameSize = -FrameSize; 796 if (!isInt<32>(NegFrameSize)) 797 llvm_unreachable("Unhandled stack size!"); 798 799 if (MFI.isFrameAddressTaken()) 800 replaceFPWithRealFP(MF); 801 802 // Check if the link register (LR) must be saved. 803 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 804 bool MustSaveLR = FI->mustSaveLR(); 805 bool MustSaveTOC = FI->mustSaveTOC(); 806 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 807 bool MustSaveCR = !MustSaveCRs.empty(); 808 // Do we have a frame pointer and/or base pointer for this function? 809 bool HasFP = hasFP(MF); 810 bool HasBP = RegInfo->hasBasePointer(MF); 811 bool HasRedZone = isPPC64 || !isSVR4ABI; 812 813 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 814 Register BPReg = RegInfo->getBaseRegister(MF); 815 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 816 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 817 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 818 Register ScratchReg; 819 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 820 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 821 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 822 : PPC::MFLR ); 823 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 824 : PPC::STW ); 825 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 826 : PPC::STWU ); 827 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 828 : PPC::STWUX); 829 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 830 : PPC::LIS ); 831 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 832 : PPC::ORI ); 833 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 834 : PPC::OR ); 835 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 836 : PPC::SUBFC); 837 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 838 : PPC::SUBFIC); 839 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 840 : PPC::MFCR); 841 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 842 843 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 844 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 845 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 846 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 847 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 848 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 849 850 // Using the same bool variable as below to suppress compiler warnings. 851 // Stack probe requires two scratch registers, one for old sp, one for large 852 // frame and large probe size. 853 bool SingleScratchReg = findScratchRegister( 854 &MBB, false, 855 twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF), 856 &ScratchReg, &TempReg); 857 assert(SingleScratchReg && 858 "Required number of registers not available in this block"); 859 860 SingleScratchReg = ScratchReg == TempReg; 861 862 int LROffset = getReturnSaveOffset(); 863 864 int FPOffset = 0; 865 if (HasFP) { 866 MachineFrameInfo &MFI = MF.getFrameInfo(); 867 int FPIndex = FI->getFramePointerSaveIndex(); 868 assert(FPIndex && "No Frame Pointer Save Slot!"); 869 FPOffset = MFI.getObjectOffset(FPIndex); 870 } 871 872 int BPOffset = 0; 873 if (HasBP) { 874 MachineFrameInfo &MFI = MF.getFrameInfo(); 875 int BPIndex = FI->getBasePointerSaveIndex(); 876 assert(BPIndex && "No Base Pointer Save Slot!"); 877 BPOffset = MFI.getObjectOffset(BPIndex); 878 } 879 880 int PBPOffset = 0; 881 if (FI->usesPICBase()) { 882 MachineFrameInfo &MFI = MF.getFrameInfo(); 883 int PBPIndex = FI->getPICBasePointerSaveIndex(); 884 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 885 PBPOffset = MFI.getObjectOffset(PBPIndex); 886 } 887 888 // Get stack alignments. 889 Align MaxAlign = MFI.getMaxAlign(); 890 if (HasBP && MaxAlign > 1) 891 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 892 893 // Frames of 32KB & larger require special handling because they cannot be 894 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 895 bool isLargeFrame = !isInt<16>(NegFrameSize); 896 897 // Check if we can move the stack update instruction (stdu) down the prologue 898 // past the callee saves. Hopefully this will avoid the situation where the 899 // saves are waiting for the update on the store with update to complete. 900 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 901 bool MovingStackUpdateDown = false; 902 903 // Check if we can move the stack update. 904 if (stackUpdateCanBeMoved(MF)) { 905 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 906 for (CalleeSavedInfo CSI : Info) { 907 int FrIdx = CSI.getFrameIdx(); 908 // If the frame index is not negative the callee saved info belongs to a 909 // stack object that is not a fixed stack object. We ignore non-fixed 910 // stack objects because we won't move the stack update pointer past them. 911 if (FrIdx >= 0) 912 continue; 913 914 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 915 StackUpdateLoc++; 916 MovingStackUpdateDown = true; 917 } else { 918 // We need all of the Frame Indices to meet these conditions. 919 // If they do not, abort the whole operation. 920 StackUpdateLoc = MBBI; 921 MovingStackUpdateDown = false; 922 break; 923 } 924 } 925 926 // If the operation was not aborted then update the object offset. 927 if (MovingStackUpdateDown) { 928 for (CalleeSavedInfo CSI : Info) { 929 int FrIdx = CSI.getFrameIdx(); 930 if (FrIdx < 0) 931 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 932 } 933 } 934 } 935 936 // Where in the prologue we move the CR fields depends on how many scratch 937 // registers we have, and if we need to save the link register or not. This 938 // lambda is to avoid duplicating the logic in 2 places. 939 auto BuildMoveFromCR = [&]() { 940 if (isELFv2ABI && MustSaveCRs.size() == 1) { 941 // In the ELFv2 ABI, we are not required to save all CR fields. 942 // If only one CR field is clobbered, it is more efficient to use 943 // mfocrf to selectively save just that field, because mfocrf has short 944 // latency compares to mfcr. 945 assert(isPPC64 && "V2 ABI is 64-bit only."); 946 MachineInstrBuilder MIB = 947 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 948 MIB.addReg(MustSaveCRs[0], RegState::Kill); 949 } else { 950 MachineInstrBuilder MIB = 951 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 952 for (unsigned CRfield : MustSaveCRs) 953 MIB.addReg(CRfield, RegState::ImplicitKill); 954 } 955 }; 956 957 // If we need to spill the CR and the LR but we don't have two separate 958 // registers available, we must spill them one at a time 959 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 960 BuildMoveFromCR(); 961 BuildMI(MBB, MBBI, dl, StoreWordInst) 962 .addReg(TempReg, getKillRegState(true)) 963 .addImm(CRSaveOffset) 964 .addReg(SPReg); 965 } 966 967 if (MustSaveLR) 968 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 969 970 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 971 BuildMoveFromCR(); 972 973 if (HasRedZone) { 974 if (HasFP) 975 BuildMI(MBB, MBBI, dl, StoreInst) 976 .addReg(FPReg) 977 .addImm(FPOffset) 978 .addReg(SPReg); 979 if (FI->usesPICBase()) 980 BuildMI(MBB, MBBI, dl, StoreInst) 981 .addReg(PPC::R30) 982 .addImm(PBPOffset) 983 .addReg(SPReg); 984 if (HasBP) 985 BuildMI(MBB, MBBI, dl, StoreInst) 986 .addReg(BPReg) 987 .addImm(BPOffset) 988 .addReg(SPReg); 989 } 990 991 if (MustSaveLR) 992 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 993 .addReg(ScratchReg, getKillRegState(true)) 994 .addImm(LROffset) 995 .addReg(SPReg); 996 997 if (MustSaveCR && 998 !(SingleScratchReg && MustSaveLR)) { 999 assert(HasRedZone && "A red zone is always available on PPC64"); 1000 BuildMI(MBB, MBBI, dl, StoreWordInst) 1001 .addReg(TempReg, getKillRegState(true)) 1002 .addImm(CRSaveOffset) 1003 .addReg(SPReg); 1004 } 1005 1006 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 1007 if (!FrameSize) 1008 return; 1009 1010 // Adjust stack pointer: r1 += NegFrameSize. 1011 // If there is a preferred stack alignment, align R1 now 1012 1013 if (HasBP && HasRedZone) { 1014 // Save a copy of r1 as the base pointer. 1015 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1016 .addReg(SPReg) 1017 .addReg(SPReg); 1018 } 1019 1020 // Have we generated a STUX instruction to claim stack frame? If so, 1021 // the negated frame size will be placed in ScratchReg. 1022 bool HasSTUX = false; 1023 1024 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain 1025 // pointer is always stored at SP, we will get a free probe due to an essential 1026 // STU(X) instruction. 1027 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { 1028 // To be consistent with other targets, a pseudo instruction is emitted and 1029 // will be later expanded in `inlineStackProbe`. 1030 BuildMI(MBB, MBBI, dl, 1031 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 1032 : PPC::PROBED_STACKALLOC_32)) 1033 .addDef(ScratchReg) 1034 .addDef(TempReg) // TempReg stores the old sp. 1035 .addImm(NegFrameSize); 1036 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we 1037 // update the ScratchReg to meet the assumption that ScratchReg contains 1038 // the NegFrameSize. This solution is rather tricky. 1039 if (!HasRedZone) { 1040 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1041 .addReg(TempReg) 1042 .addReg(SPReg); 1043 HasSTUX = true; 1044 } 1045 } else { 1046 // This condition must be kept in sync with canUseAsPrologue. 1047 if (HasBP && MaxAlign > 1) { 1048 if (isPPC64) 1049 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 1050 .addReg(SPReg) 1051 .addImm(0) 1052 .addImm(64 - Log2(MaxAlign)); 1053 else // PPC32... 1054 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 1055 .addReg(SPReg) 1056 .addImm(0) 1057 .addImm(32 - Log2(MaxAlign)) 1058 .addImm(31); 1059 if (!isLargeFrame) { 1060 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 1061 .addReg(ScratchReg, RegState::Kill) 1062 .addImm(NegFrameSize); 1063 } else { 1064 assert(!SingleScratchReg && "Only a single scratch reg available"); 1065 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 1066 .addImm(NegFrameSize >> 16); 1067 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 1068 .addReg(TempReg, RegState::Kill) 1069 .addImm(NegFrameSize & 0xFFFF); 1070 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 1071 .addReg(ScratchReg, RegState::Kill) 1072 .addReg(TempReg, RegState::Kill); 1073 } 1074 1075 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1076 .addReg(SPReg, RegState::Kill) 1077 .addReg(SPReg) 1078 .addReg(ScratchReg); 1079 HasSTUX = true; 1080 1081 } else if (!isLargeFrame) { 1082 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 1083 .addReg(SPReg) 1084 .addImm(NegFrameSize) 1085 .addReg(SPReg); 1086 1087 } else { 1088 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1089 .addImm(NegFrameSize >> 16); 1090 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1091 .addReg(ScratchReg, RegState::Kill) 1092 .addImm(NegFrameSize & 0xFFFF); 1093 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1094 .addReg(SPReg, RegState::Kill) 1095 .addReg(SPReg) 1096 .addReg(ScratchReg); 1097 HasSTUX = true; 1098 } 1099 } 1100 1101 // Save the TOC register after the stack pointer update if a prologue TOC 1102 // save is required for the function. 1103 if (MustSaveTOC) { 1104 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 1105 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 1106 .addReg(TOCReg, getKillRegState(true)) 1107 .addImm(TOCSaveOffset) 1108 .addReg(SPReg); 1109 } 1110 1111 if (!HasRedZone) { 1112 assert(!isPPC64 && "A red zone is always available on PPC64"); 1113 if (HasSTUX) { 1114 // The negated frame size is in ScratchReg, and the SPReg has been 1115 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 1116 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 1117 // the stack frame (i.e. the old SP), ideally, we would put the old 1118 // SP into a register and use it as the base for the stores. The 1119 // problem is that the only available register may be ScratchReg, 1120 // which could be R0, and R0 cannot be used as a base address. 1121 1122 // First, set ScratchReg to the old SP. This may need to be modified 1123 // later. 1124 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1125 .addReg(ScratchReg, RegState::Kill) 1126 .addReg(SPReg); 1127 1128 if (ScratchReg == PPC::R0) { 1129 // R0 cannot be used as a base register, but it can be used as an 1130 // index in a store-indexed. 1131 int LastOffset = 0; 1132 if (HasFP) { 1133 // R0 += (FPOffset-LastOffset). 1134 // Need addic, since addi treats R0 as 0. 1135 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1136 .addReg(ScratchReg) 1137 .addImm(FPOffset-LastOffset); 1138 LastOffset = FPOffset; 1139 // Store FP into *R0. 1140 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1141 .addReg(FPReg, RegState::Kill) // Save FP. 1142 .addReg(PPC::ZERO) 1143 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1144 } 1145 if (FI->usesPICBase()) { 1146 // R0 += (PBPOffset-LastOffset). 1147 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1148 .addReg(ScratchReg) 1149 .addImm(PBPOffset-LastOffset); 1150 LastOffset = PBPOffset; 1151 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1152 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1153 .addReg(PPC::ZERO) 1154 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1155 } 1156 if (HasBP) { 1157 // R0 += (BPOffset-LastOffset). 1158 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1159 .addReg(ScratchReg) 1160 .addImm(BPOffset-LastOffset); 1161 LastOffset = BPOffset; 1162 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1163 .addReg(BPReg, RegState::Kill) // Save BP. 1164 .addReg(PPC::ZERO) 1165 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1166 // BP = R0-LastOffset 1167 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1168 .addReg(ScratchReg, RegState::Kill) 1169 .addImm(-LastOffset); 1170 } 1171 } else { 1172 // ScratchReg is not R0, so use it as the base register. It is 1173 // already set to the old SP, so we can use the offsets directly. 1174 1175 // Now that the stack frame has been allocated, save all the necessary 1176 // registers using ScratchReg as the base address. 1177 if (HasFP) 1178 BuildMI(MBB, MBBI, dl, StoreInst) 1179 .addReg(FPReg) 1180 .addImm(FPOffset) 1181 .addReg(ScratchReg); 1182 if (FI->usesPICBase()) 1183 BuildMI(MBB, MBBI, dl, StoreInst) 1184 .addReg(PPC::R30) 1185 .addImm(PBPOffset) 1186 .addReg(ScratchReg); 1187 if (HasBP) { 1188 BuildMI(MBB, MBBI, dl, StoreInst) 1189 .addReg(BPReg) 1190 .addImm(BPOffset) 1191 .addReg(ScratchReg); 1192 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1193 .addReg(ScratchReg, RegState::Kill) 1194 .addReg(ScratchReg); 1195 } 1196 } 1197 } else { 1198 // The frame size is a known 16-bit constant (fitting in the immediate 1199 // field of STWU). To be here we have to be compiling for PPC32. 1200 // Since the SPReg has been decreased by FrameSize, add it back to each 1201 // offset. 1202 if (HasFP) 1203 BuildMI(MBB, MBBI, dl, StoreInst) 1204 .addReg(FPReg) 1205 .addImm(FrameSize + FPOffset) 1206 .addReg(SPReg); 1207 if (FI->usesPICBase()) 1208 BuildMI(MBB, MBBI, dl, StoreInst) 1209 .addReg(PPC::R30) 1210 .addImm(FrameSize + PBPOffset) 1211 .addReg(SPReg); 1212 if (HasBP) { 1213 BuildMI(MBB, MBBI, dl, StoreInst) 1214 .addReg(BPReg) 1215 .addImm(FrameSize + BPOffset) 1216 .addReg(SPReg); 1217 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1218 .addReg(SPReg) 1219 .addImm(FrameSize); 1220 } 1221 } 1222 } 1223 1224 // Add Call Frame Information for the instructions we generated above. 1225 if (needsCFI) { 1226 unsigned CFIIndex; 1227 1228 if (HasBP) { 1229 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1230 // because if the stack needed aligning then CFA won't be at a fixed 1231 // offset from FP/SP. 1232 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1233 CFIIndex = MF.addFrameInst( 1234 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1235 } else { 1236 // Adjust the definition of CFA to account for the change in SP. 1237 assert(NegFrameSize); 1238 CFIIndex = MF.addFrameInst( 1239 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1240 } 1241 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1242 .addCFIIndex(CFIIndex); 1243 1244 if (HasFP) { 1245 // Describe where FP was saved, at a fixed offset from CFA. 1246 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1247 CFIIndex = MF.addFrameInst( 1248 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1249 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1250 .addCFIIndex(CFIIndex); 1251 } 1252 1253 if (FI->usesPICBase()) { 1254 // Describe where FP was saved, at a fixed offset from CFA. 1255 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1256 CFIIndex = MF.addFrameInst( 1257 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1258 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1259 .addCFIIndex(CFIIndex); 1260 } 1261 1262 if (HasBP) { 1263 // Describe where BP was saved, at a fixed offset from CFA. 1264 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1265 CFIIndex = MF.addFrameInst( 1266 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1267 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1268 .addCFIIndex(CFIIndex); 1269 } 1270 1271 if (MustSaveLR) { 1272 // Describe where LR was saved, at a fixed offset from CFA. 1273 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1274 CFIIndex = MF.addFrameInst( 1275 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1276 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1277 .addCFIIndex(CFIIndex); 1278 } 1279 } 1280 1281 // If there is a frame pointer, copy R1 into R31 1282 if (HasFP) { 1283 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1284 .addReg(SPReg) 1285 .addReg(SPReg); 1286 1287 if (!HasBP && needsCFI) { 1288 // Change the definition of CFA from SP+offset to FP+offset, because SP 1289 // will change at every alloca. 1290 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1291 unsigned CFIIndex = MF.addFrameInst( 1292 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1293 1294 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1295 .addCFIIndex(CFIIndex); 1296 } 1297 } 1298 1299 if (needsCFI) { 1300 // Describe where callee saved registers were saved, at fixed offsets from 1301 // CFA. 1302 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1303 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1304 unsigned Reg = CSI[I].getReg(); 1305 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1306 1307 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1308 // subregisters of CR2. We just need to emit a move of CR2. 1309 if (PPC::CRBITRCRegClass.contains(Reg)) 1310 continue; 1311 1312 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1313 continue; 1314 1315 // For SVR4, don't emit a move for the CR spill slot if we haven't 1316 // spilled CRs. 1317 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1318 && !MustSaveCR) 1319 continue; 1320 1321 // For 64-bit SVR4 when we have spilled CRs, the spill location 1322 // is SP+8, not a frame-relative slot. 1323 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1324 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1325 // the whole CR word. In the ELFv2 ABI, every CR that was 1326 // actually saved gets its own CFI record. 1327 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1328 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1329 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1330 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1331 .addCFIIndex(CFIIndex); 1332 continue; 1333 } 1334 1335 if (CSI[I].isSpilledToReg()) { 1336 unsigned SpilledReg = CSI[I].getDstReg(); 1337 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1338 nullptr, MRI->getDwarfRegNum(Reg, true), 1339 MRI->getDwarfRegNum(SpilledReg, true))); 1340 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1341 .addCFIIndex(CFIRegister); 1342 } else { 1343 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1344 // We have changed the object offset above but we do not want to change 1345 // the actual offsets in the CFI instruction so we have to undo the 1346 // offset change here. 1347 if (MovingStackUpdateDown) 1348 Offset -= NegFrameSize; 1349 1350 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1351 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1352 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1353 .addCFIIndex(CFIIndex); 1354 } 1355 } 1356 } 1357 } 1358 1359 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, 1360 MachineBasicBlock &PrologMBB) const { 1361 // TODO: Generate CFI instructions. 1362 bool isPPC64 = Subtarget.isPPC64(); 1363 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 1364 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1365 MachineFrameInfo &MFI = MF.getFrameInfo(); 1366 MachineModuleInfo &MMI = MF.getMMI(); 1367 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 1368 // AIX assembler does not support cfi directives. 1369 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 1370 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 1371 int Opc = MI.getOpcode(); 1372 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; 1373 }); 1374 if (StackAllocMIPos == PrologMBB.end()) 1375 return; 1376 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); 1377 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); 1378 MachineInstr &MI = *StackAllocMIPos; 1379 int64_t NegFrameSize = MI.getOperand(2).getImm(); 1380 int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF); 1381 assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); 1382 int64_t NumBlocks = NegFrameSize / NegProbeSize; 1383 int64_t NegResidualSize = NegFrameSize % NegProbeSize; 1384 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1385 Register ScratchReg = MI.getOperand(0).getReg(); 1386 Register FPReg = MI.getOperand(1).getReg(); 1387 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1388 bool HasBP = RegInfo->hasBasePointer(MF); 1389 Align MaxAlign = MFI.getMaxAlign(); 1390 // Initialize current frame pointer. 1391 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); 1392 BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); 1393 // Subroutines to generate .cfi_* directives. 1394 auto buildDefCFAReg = [&](MachineBasicBlock &MBB, 1395 MachineBasicBlock::iterator MBBI, Register Reg) { 1396 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1397 unsigned CFIIndex = MF.addFrameInst( 1398 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); 1399 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1400 .addCFIIndex(CFIIndex); 1401 }; 1402 auto buildDefCFA = [&](MachineBasicBlock &MBB, 1403 MachineBasicBlock::iterator MBBI, Register Reg, 1404 int Offset) { 1405 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1406 unsigned CFIIndex = MBB.getParent()->addFrameInst( 1407 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); 1408 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1409 .addCFIIndex(CFIIndex); 1410 }; 1411 // Subroutine to determine if we can use the Imm as part of d-form. 1412 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; 1413 // Subroutine to materialize the Imm into TempReg. 1414 auto MaterializeImm = [&](MachineBasicBlock &MBB, 1415 MachineBasicBlock::iterator MBBI, int64_t Imm, 1416 Register &TempReg) { 1417 assert(isInt<32>(Imm) && "Unhandled imm"); 1418 if (isInt<16>(Imm)) 1419 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) 1420 .addImm(Imm); 1421 else { 1422 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) 1423 .addImm(Imm >> 16); 1424 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) 1425 .addReg(TempReg) 1426 .addImm(Imm & 0xFFFF); 1427 } 1428 }; 1429 // Subroutine to store frame pointer and decrease stack pointer by probe size. 1430 auto allocateAndProbe = [&](MachineBasicBlock &MBB, 1431 MachineBasicBlock::iterator MBBI, int64_t NegSize, 1432 Register NegSizeReg, bool UseDForm) { 1433 if (UseDForm) 1434 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) 1435 .addReg(FPReg) 1436 .addImm(NegSize) 1437 .addReg(SPReg); 1438 else 1439 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) 1440 .addReg(FPReg) 1441 .addReg(SPReg) 1442 .addReg(NegSizeReg); 1443 }; 1444 // Use FPReg to calculate CFA. 1445 if (needsCFI) 1446 buildDefCFA(PrologMBB, {MI}, FPReg, 0); 1447 // For case HasBP && MaxAlign > 1, we have to align the SP by performing 1448 // SP = SP - SP % MaxAlign. 1449 if (HasBP && MaxAlign > 1) { 1450 if (isPPC64) 1451 BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) 1452 .addReg(FPReg) 1453 .addImm(0) 1454 .addImm(64 - Log2(MaxAlign)); 1455 else 1456 BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) 1457 .addReg(FPReg) 1458 .addImm(0) 1459 .addImm(32 - Log2(MaxAlign)) 1460 .addImm(31); 1461 BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC), 1462 SPReg) 1463 .addReg(ScratchReg) 1464 .addReg(SPReg); 1465 } 1466 // Probe residual part. 1467 if (NegResidualSize) { 1468 bool ResidualUseDForm = CanUseDForm(NegResidualSize); 1469 if (!ResidualUseDForm) 1470 MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg); 1471 allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg, 1472 ResidualUseDForm); 1473 } 1474 bool UseDForm = CanUseDForm(NegProbeSize); 1475 // If number of blocks is small, just probe them directly. 1476 if (NumBlocks < 3) { 1477 if (!UseDForm) 1478 MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); 1479 for (int i = 0; i < NumBlocks; ++i) 1480 allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm); 1481 if (needsCFI) { 1482 // Restore using SPReg to calculate CFA. 1483 buildDefCFAReg(PrologMBB, {MI}, SPReg); 1484 } 1485 } else { 1486 // Since CTR is a volatile register and current shrinkwrap implementation 1487 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a 1488 // CTR loop to probe. 1489 // Calculate trip count and stores it in CTRReg. 1490 MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg); 1491 BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) 1492 .addReg(ScratchReg, RegState::Kill); 1493 if (!UseDForm) 1494 MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); 1495 // Create MBBs of the loop. 1496 MachineFunction::iterator MBBInsertPoint = 1497 std::next(PrologMBB.getIterator()); 1498 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); 1499 MF.insert(MBBInsertPoint, LoopMBB); 1500 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1501 MF.insert(MBBInsertPoint, ExitMBB); 1502 // Synthesize the loop body. 1503 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, 1504 UseDForm); 1505 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) 1506 .addMBB(LoopMBB); 1507 LoopMBB->addSuccessor(ExitMBB); 1508 LoopMBB->addSuccessor(LoopMBB); 1509 // Synthesize the exit MBB. 1510 ExitMBB->splice(ExitMBB->end(), &PrologMBB, 1511 std::next(MachineBasicBlock::iterator(MI)), 1512 PrologMBB.end()); 1513 ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB); 1514 PrologMBB.addSuccessor(LoopMBB); 1515 if (needsCFI) { 1516 // Restore using SPReg to calculate CFA. 1517 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); 1518 } 1519 // Update liveins. 1520 recomputeLiveIns(*LoopMBB); 1521 recomputeLiveIns(*ExitMBB); 1522 } 1523 ++NumPrologProbed; 1524 MI.eraseFromParent(); 1525 } 1526 1527 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1528 MachineBasicBlock &MBB) const { 1529 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1530 DebugLoc dl; 1531 1532 if (MBBI != MBB.end()) 1533 dl = MBBI->getDebugLoc(); 1534 1535 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1536 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1537 1538 // Get alignment info so we know how to restore the SP. 1539 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1540 1541 // Get the number of bytes allocated from the FrameInfo. 1542 int FrameSize = MFI.getStackSize(); 1543 1544 // Get processor type. 1545 bool isPPC64 = Subtarget.isPPC64(); 1546 1547 // Check if the link register (LR) has been saved. 1548 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1549 bool MustSaveLR = FI->mustSaveLR(); 1550 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1551 bool MustSaveCR = !MustSaveCRs.empty(); 1552 // Do we have a frame pointer and/or base pointer for this function? 1553 bool HasFP = hasFP(MF); 1554 bool HasBP = RegInfo->hasBasePointer(MF); 1555 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1556 1557 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1558 Register BPReg = RegInfo->getBaseRegister(MF); 1559 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1560 Register ScratchReg; 1561 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1562 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1563 : PPC::MTLR ); 1564 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1565 : PPC::LWZ ); 1566 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1567 : PPC::LIS ); 1568 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1569 : PPC::OR ); 1570 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1571 : PPC::ORI ); 1572 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1573 : PPC::ADDI ); 1574 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1575 : PPC::ADD4 ); 1576 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1577 : PPC::LWZ); 1578 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1579 : PPC::MTOCRF); 1580 int LROffset = getReturnSaveOffset(); 1581 1582 int FPOffset = 0; 1583 1584 // Using the same bool variable as below to suppress compiler warnings. 1585 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1586 &TempReg); 1587 assert(SingleScratchReg && 1588 "Could not find an available scratch register"); 1589 1590 SingleScratchReg = ScratchReg == TempReg; 1591 1592 if (HasFP) { 1593 int FPIndex = FI->getFramePointerSaveIndex(); 1594 assert(FPIndex && "No Frame Pointer Save Slot!"); 1595 FPOffset = MFI.getObjectOffset(FPIndex); 1596 } 1597 1598 int BPOffset = 0; 1599 if (HasBP) { 1600 int BPIndex = FI->getBasePointerSaveIndex(); 1601 assert(BPIndex && "No Base Pointer Save Slot!"); 1602 BPOffset = MFI.getObjectOffset(BPIndex); 1603 } 1604 1605 int PBPOffset = 0; 1606 if (FI->usesPICBase()) { 1607 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1608 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1609 PBPOffset = MFI.getObjectOffset(PBPIndex); 1610 } 1611 1612 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1613 1614 if (IsReturnBlock) { 1615 unsigned RetOpcode = MBBI->getOpcode(); 1616 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1617 RetOpcode == PPC::TCRETURNdi || 1618 RetOpcode == PPC::TCRETURNai || 1619 RetOpcode == PPC::TCRETURNri8 || 1620 RetOpcode == PPC::TCRETURNdi8 || 1621 RetOpcode == PPC::TCRETURNai8; 1622 1623 if (UsesTCRet) { 1624 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1625 MachineOperand &StackAdjust = MBBI->getOperand(1); 1626 assert(StackAdjust.isImm() && "Expecting immediate value."); 1627 // Adjust stack pointer. 1628 int StackAdj = StackAdjust.getImm(); 1629 int Delta = StackAdj - MaxTCRetDelta; 1630 assert((Delta >= 0) && "Delta must be positive"); 1631 if (MaxTCRetDelta>0) 1632 FrameSize += (StackAdj +Delta); 1633 else 1634 FrameSize += StackAdj; 1635 } 1636 } 1637 1638 // Frames of 32KB & larger require special handling because they cannot be 1639 // indexed into with a simple LD/LWZ immediate offset operand. 1640 bool isLargeFrame = !isInt<16>(FrameSize); 1641 1642 // On targets without red zone, the SP needs to be restored last, so that 1643 // all live contents of the stack frame are upwards of the SP. This means 1644 // that we cannot restore SP just now, since there may be more registers 1645 // to restore from the stack frame (e.g. R31). If the frame size is not 1646 // a simple immediate value, we will need a spare register to hold the 1647 // restored SP. If the frame size is known and small, we can simply adjust 1648 // the offsets of the registers to be restored, and still use SP to restore 1649 // them. In such case, the final update of SP will be to add the frame 1650 // size to it. 1651 // To simplify the code, set RBReg to the base register used to restore 1652 // values from the stack, and set SPAdd to the value that needs to be added 1653 // to the SP at the end. The default values are as if red zone was present. 1654 unsigned RBReg = SPReg; 1655 unsigned SPAdd = 0; 1656 1657 // Check if we can move the stack update instruction up the epilogue 1658 // past the callee saves. This will allow the move to LR instruction 1659 // to be executed before the restores of the callee saves which means 1660 // that the callee saves can hide the latency from the MTLR instrcution. 1661 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1662 if (stackUpdateCanBeMoved(MF)) { 1663 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1664 for (CalleeSavedInfo CSI : Info) { 1665 int FrIdx = CSI.getFrameIdx(); 1666 // If the frame index is not negative the callee saved info belongs to a 1667 // stack object that is not a fixed stack object. We ignore non-fixed 1668 // stack objects because we won't move the update of the stack pointer 1669 // past them. 1670 if (FrIdx >= 0) 1671 continue; 1672 1673 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1674 StackUpdateLoc--; 1675 else { 1676 // Abort the operation as we can't update all CSR restores. 1677 StackUpdateLoc = MBBI; 1678 break; 1679 } 1680 } 1681 } 1682 1683 if (FrameSize) { 1684 // In the prologue, the loaded (or persistent) stack pointer value is 1685 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1686 // zone add this offset back now. 1687 1688 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1689 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1690 // call which invalidates the stack pointer value in SP(0). So we use the 1691 // value of R31 in this case. 1692 if (FI->hasFastCall()) { 1693 assert(HasFP && "Expecting a valid frame pointer."); 1694 if (!HasRedZone) 1695 RBReg = FPReg; 1696 if (!isLargeFrame) { 1697 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1698 .addReg(FPReg).addImm(FrameSize); 1699 } else { 1700 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1701 .addImm(FrameSize >> 16); 1702 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1703 .addReg(ScratchReg, RegState::Kill) 1704 .addImm(FrameSize & 0xFFFF); 1705 BuildMI(MBB, MBBI, dl, AddInst) 1706 .addReg(RBReg) 1707 .addReg(FPReg) 1708 .addReg(ScratchReg); 1709 } 1710 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1711 if (HasRedZone) { 1712 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1713 .addReg(SPReg) 1714 .addImm(FrameSize); 1715 } else { 1716 // Make sure that adding FrameSize will not overflow the max offset 1717 // size. 1718 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1719 "Local offsets should be negative"); 1720 SPAdd = FrameSize; 1721 FPOffset += FrameSize; 1722 BPOffset += FrameSize; 1723 PBPOffset += FrameSize; 1724 } 1725 } else { 1726 // We don't want to use ScratchReg as a base register, because it 1727 // could happen to be R0. Use FP instead, but make sure to preserve it. 1728 if (!HasRedZone) { 1729 // If FP is not saved, copy it to ScratchReg. 1730 if (!HasFP) 1731 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1732 .addReg(FPReg) 1733 .addReg(FPReg); 1734 RBReg = FPReg; 1735 } 1736 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1737 .addImm(0) 1738 .addReg(SPReg); 1739 } 1740 } 1741 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1742 // If there is no red zone, ScratchReg may be needed for holding a useful 1743 // value (although not the base register). Make sure it is not overwritten 1744 // too early. 1745 1746 // If we need to restore both the LR and the CR and we only have one 1747 // available scratch register, we must do them one at a time. 1748 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1749 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1750 // is live here. 1751 assert(HasRedZone && "Expecting red zone"); 1752 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1753 .addImm(CRSaveOffset) 1754 .addReg(SPReg); 1755 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1756 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1757 .addReg(TempReg, getKillRegState(i == e-1)); 1758 } 1759 1760 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1761 // LR is stored in the caller's stack frame. ScratchReg will be needed 1762 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1763 // a base register anyway, because it may happen to be R0. 1764 bool LoadedLR = false; 1765 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1766 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1767 .addImm(LROffset+SPAdd) 1768 .addReg(RBReg); 1769 LoadedLR = true; 1770 } 1771 1772 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1773 assert(RBReg == SPReg && "Should be using SP as a base register"); 1774 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1775 .addImm(CRSaveOffset) 1776 .addReg(RBReg); 1777 } 1778 1779 if (HasFP) { 1780 // If there is red zone, restore FP directly, since SP has already been 1781 // restored. Otherwise, restore the value of FP into ScratchReg. 1782 if (HasRedZone || RBReg == SPReg) 1783 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1784 .addImm(FPOffset) 1785 .addReg(SPReg); 1786 else 1787 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1788 .addImm(FPOffset) 1789 .addReg(RBReg); 1790 } 1791 1792 if (FI->usesPICBase()) 1793 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1794 .addImm(PBPOffset) 1795 .addReg(RBReg); 1796 1797 if (HasBP) 1798 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1799 .addImm(BPOffset) 1800 .addReg(RBReg); 1801 1802 // There is nothing more to be loaded from the stack, so now we can 1803 // restore SP: SP = RBReg + SPAdd. 1804 if (RBReg != SPReg || SPAdd != 0) { 1805 assert(!HasRedZone && "This should not happen with red zone"); 1806 // If SPAdd is 0, generate a copy. 1807 if (SPAdd == 0) 1808 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1809 .addReg(RBReg) 1810 .addReg(RBReg); 1811 else 1812 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1813 .addReg(RBReg) 1814 .addImm(SPAdd); 1815 1816 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1817 if (RBReg == FPReg) 1818 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1819 .addReg(ScratchReg) 1820 .addReg(ScratchReg); 1821 1822 // Now load the LR from the caller's stack frame. 1823 if (MustSaveLR && !LoadedLR) 1824 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1825 .addImm(LROffset) 1826 .addReg(SPReg); 1827 } 1828 1829 if (MustSaveCR && 1830 !(SingleScratchReg && MustSaveLR)) 1831 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1832 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1833 .addReg(TempReg, getKillRegState(i == e-1)); 1834 1835 if (MustSaveLR) 1836 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1837 1838 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1839 // call optimization 1840 if (IsReturnBlock) { 1841 unsigned RetOpcode = MBBI->getOpcode(); 1842 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1843 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1844 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1845 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1846 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1847 1848 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1849 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1850 .addReg(SPReg).addImm(CallerAllocatedAmt); 1851 } else { 1852 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1853 .addImm(CallerAllocatedAmt >> 16); 1854 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1855 .addReg(ScratchReg, RegState::Kill) 1856 .addImm(CallerAllocatedAmt & 0xFFFF); 1857 BuildMI(MBB, MBBI, dl, AddInst) 1858 .addReg(SPReg) 1859 .addReg(FPReg) 1860 .addReg(ScratchReg); 1861 } 1862 } else { 1863 createTailCallBranchInstr(MBB); 1864 } 1865 } 1866 } 1867 1868 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1869 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1870 1871 // If we got this far a first terminator should exist. 1872 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1873 1874 DebugLoc dl = MBBI->getDebugLoc(); 1875 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1876 1877 // Create branch instruction for pseudo tail call return instruction. 1878 // The TCRETURNdi variants are direct calls. Valid targets for those are 1879 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1880 // since we can tail call external functions with PC-Rel (i.e. we don't need 1881 // to worry about different TOC pointers). Some of the external functions will 1882 // be MO_GlobalAddress while others like memcpy for example, are going to 1883 // be MO_ExternalSymbol. 1884 unsigned RetOpcode = MBBI->getOpcode(); 1885 if (RetOpcode == PPC::TCRETURNdi) { 1886 MBBI = MBB.getLastNonDebugInstr(); 1887 MachineOperand &JumpTarget = MBBI->getOperand(0); 1888 if (JumpTarget.isGlobal()) 1889 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1890 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1891 else if (JumpTarget.isSymbol()) 1892 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1893 addExternalSymbol(JumpTarget.getSymbolName()); 1894 else 1895 llvm_unreachable("Expecting Global or External Symbol"); 1896 } else if (RetOpcode == PPC::TCRETURNri) { 1897 MBBI = MBB.getLastNonDebugInstr(); 1898 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1899 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1900 } else if (RetOpcode == PPC::TCRETURNai) { 1901 MBBI = MBB.getLastNonDebugInstr(); 1902 MachineOperand &JumpTarget = MBBI->getOperand(0); 1903 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1904 } else if (RetOpcode == PPC::TCRETURNdi8) { 1905 MBBI = MBB.getLastNonDebugInstr(); 1906 MachineOperand &JumpTarget = MBBI->getOperand(0); 1907 if (JumpTarget.isGlobal()) 1908 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1909 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1910 else if (JumpTarget.isSymbol()) 1911 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1912 addExternalSymbol(JumpTarget.getSymbolName()); 1913 else 1914 llvm_unreachable("Expecting Global or External Symbol"); 1915 } else if (RetOpcode == PPC::TCRETURNri8) { 1916 MBBI = MBB.getLastNonDebugInstr(); 1917 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1918 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1919 } else if (RetOpcode == PPC::TCRETURNai8) { 1920 MBBI = MBB.getLastNonDebugInstr(); 1921 MachineOperand &JumpTarget = MBBI->getOperand(0); 1922 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1923 } 1924 } 1925 1926 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1927 BitVector &SavedRegs, 1928 RegScavenger *RS) const { 1929 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1930 1931 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1932 1933 // Save and clear the LR state. 1934 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1935 unsigned LR = RegInfo->getRARegister(); 1936 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1937 SavedRegs.reset(LR); 1938 1939 // Save R31 if necessary 1940 int FPSI = FI->getFramePointerSaveIndex(); 1941 const bool isPPC64 = Subtarget.isPPC64(); 1942 MachineFrameInfo &MFI = MF.getFrameInfo(); 1943 1944 // If the frame pointer save index hasn't been defined yet. 1945 if (!FPSI && needsFP(MF)) { 1946 // Find out what the fix offset of the frame pointer save area. 1947 int FPOffset = getFramePointerSaveOffset(); 1948 // Allocate the frame index for frame pointer save area. 1949 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1950 // Save the result. 1951 FI->setFramePointerSaveIndex(FPSI); 1952 } 1953 1954 int BPSI = FI->getBasePointerSaveIndex(); 1955 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1956 int BPOffset = getBasePointerSaveOffset(); 1957 // Allocate the frame index for the base pointer save area. 1958 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1959 // Save the result. 1960 FI->setBasePointerSaveIndex(BPSI); 1961 } 1962 1963 // Reserve stack space for the PIC Base register (R30). 1964 // Only used in SVR4 32-bit. 1965 if (FI->usesPICBase()) { 1966 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1967 FI->setPICBasePointerSaveIndex(PBPSI); 1968 } 1969 1970 // Make sure we don't explicitly spill r31, because, for example, we have 1971 // some inline asm which explicitly clobbers it, when we otherwise have a 1972 // frame pointer and are using r31's spill slot for the prologue/epilogue 1973 // code. Same goes for the base pointer and the PIC base register. 1974 if (needsFP(MF)) 1975 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1976 if (RegInfo->hasBasePointer(MF)) 1977 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1978 if (FI->usesPICBase()) 1979 SavedRegs.reset(PPC::R30); 1980 1981 // Reserve stack space to move the linkage area to in case of a tail call. 1982 int TCSPDelta = 0; 1983 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1984 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1985 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1986 } 1987 1988 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 1989 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 1990 // object at the offset of the CR-save slot in the linkage area. The actual 1991 // save and restore of the condition register will be created as part of the 1992 // prologue and epilogue insertion, but the FixedStack object is needed to 1993 // keep the CalleSavedInfo valid. 1994 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1995 SavedRegs.test(PPC::CR4))) { 1996 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1997 const int64_t SpillOffset = 1998 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 1999 int FrameIdx = 2000 MFI.CreateFixedObject(SpillSize, SpillOffset, 2001 /* IsImmutable */ true, /* IsAliased */ false); 2002 FI->setCRSpillFrameIndex(FrameIdx); 2003 } 2004 } 2005 2006 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 2007 RegScavenger *RS) const { 2008 // Get callee saved register information. 2009 MachineFrameInfo &MFI = MF.getFrameInfo(); 2010 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 2011 2012 // If the function is shrink-wrapped, and if the function has a tail call, the 2013 // tail call might not be in the new RestoreBlock, so real branch instruction 2014 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 2015 // RestoreBlock. So we handle this case here. 2016 if (MFI.getSavePoint() && MFI.hasTailCall()) { 2017 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 2018 for (MachineBasicBlock &MBB : MF) { 2019 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 2020 createTailCallBranchInstr(MBB); 2021 } 2022 } 2023 2024 // Early exit if no callee saved registers are modified! 2025 if (CSI.empty() && !needsFP(MF)) { 2026 addScavengingSpillSlot(MF, RS); 2027 return; 2028 } 2029 2030 unsigned MinGPR = PPC::R31; 2031 unsigned MinG8R = PPC::X31; 2032 unsigned MinFPR = PPC::F31; 2033 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 2034 2035 bool HasGPSaveArea = false; 2036 bool HasG8SaveArea = false; 2037 bool HasFPSaveArea = false; 2038 bool HasVRSAVESaveArea = false; 2039 bool HasVRSaveArea = false; 2040 2041 SmallVector<CalleeSavedInfo, 18> GPRegs; 2042 SmallVector<CalleeSavedInfo, 18> G8Regs; 2043 SmallVector<CalleeSavedInfo, 18> FPRegs; 2044 SmallVector<CalleeSavedInfo, 18> VRegs; 2045 2046 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2047 unsigned Reg = CSI[i].getReg(); 2048 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 2049 (Reg != PPC::X2 && Reg != PPC::R2)) && 2050 "Not expecting to try to spill R2 in a function that must save TOC"); 2051 if (PPC::GPRCRegClass.contains(Reg)) { 2052 HasGPSaveArea = true; 2053 2054 GPRegs.push_back(CSI[i]); 2055 2056 if (Reg < MinGPR) { 2057 MinGPR = Reg; 2058 } 2059 } else if (PPC::G8RCRegClass.contains(Reg)) { 2060 HasG8SaveArea = true; 2061 2062 G8Regs.push_back(CSI[i]); 2063 2064 if (Reg < MinG8R) { 2065 MinG8R = Reg; 2066 } 2067 } else if (PPC::F8RCRegClass.contains(Reg)) { 2068 HasFPSaveArea = true; 2069 2070 FPRegs.push_back(CSI[i]); 2071 2072 if (Reg < MinFPR) { 2073 MinFPR = Reg; 2074 } 2075 } else if (PPC::CRBITRCRegClass.contains(Reg) || 2076 PPC::CRRCRegClass.contains(Reg)) { 2077 ; // do nothing, as we already know whether CRs are spilled 2078 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 2079 HasVRSAVESaveArea = true; 2080 } else if (PPC::VRRCRegClass.contains(Reg) || 2081 PPC::SPERCRegClass.contains(Reg)) { 2082 // Altivec and SPE are mutually exclusive, but have the same stack 2083 // alignment requirements, so overload the save area for both cases. 2084 HasVRSaveArea = true; 2085 2086 VRegs.push_back(CSI[i]); 2087 2088 if (Reg < MinVR) { 2089 MinVR = Reg; 2090 } 2091 } else { 2092 llvm_unreachable("Unknown RegisterClass!"); 2093 } 2094 } 2095 2096 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 2097 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2098 2099 int64_t LowerBound = 0; 2100 2101 // Take into account stack space reserved for tail calls. 2102 int TCSPDelta = 0; 2103 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2104 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 2105 LowerBound = TCSPDelta; 2106 } 2107 2108 // The Floating-point register save area is right below the back chain word 2109 // of the previous stack frame. 2110 if (HasFPSaveArea) { 2111 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 2112 int FI = FPRegs[i].getFrameIdx(); 2113 2114 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2115 } 2116 2117 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 2118 } 2119 2120 // Check whether the frame pointer register is allocated. If so, make sure it 2121 // is spilled to the correct offset. 2122 if (needsFP(MF)) { 2123 int FI = PFI->getFramePointerSaveIndex(); 2124 assert(FI && "No Frame Pointer Save Slot!"); 2125 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2126 // FP is R31/X31, so no need to update MinGPR/MinG8R. 2127 HasGPSaveArea = true; 2128 } 2129 2130 if (PFI->usesPICBase()) { 2131 int FI = PFI->getPICBasePointerSaveIndex(); 2132 assert(FI && "No PIC Base Pointer Save Slot!"); 2133 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2134 2135 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 2136 HasGPSaveArea = true; 2137 } 2138 2139 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2140 if (RegInfo->hasBasePointer(MF)) { 2141 int FI = PFI->getBasePointerSaveIndex(); 2142 assert(FI && "No Base Pointer Save Slot!"); 2143 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2144 2145 Register BP = RegInfo->getBaseRegister(MF); 2146 if (PPC::G8RCRegClass.contains(BP)) { 2147 MinG8R = std::min<unsigned>(MinG8R, BP); 2148 HasG8SaveArea = true; 2149 } else if (PPC::GPRCRegClass.contains(BP)) { 2150 MinGPR = std::min<unsigned>(MinGPR, BP); 2151 HasGPSaveArea = true; 2152 } 2153 } 2154 2155 // General register save area starts right below the Floating-point 2156 // register save area. 2157 if (HasGPSaveArea || HasG8SaveArea) { 2158 // Move general register save area spill slots down, taking into account 2159 // the size of the Floating-point register save area. 2160 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 2161 if (!GPRegs[i].isSpilledToReg()) { 2162 int FI = GPRegs[i].getFrameIdx(); 2163 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2164 } 2165 } 2166 2167 // Move general register save area spill slots down, taking into account 2168 // the size of the Floating-point register save area. 2169 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 2170 if (!G8Regs[i].isSpilledToReg()) { 2171 int FI = G8Regs[i].getFrameIdx(); 2172 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2173 } 2174 } 2175 2176 unsigned MinReg = 2177 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2178 TRI->getEncodingValue(MinG8R)); 2179 2180 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2181 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2182 } 2183 2184 // For 32-bit only, the CR save area is below the general register 2185 // save area. For 64-bit SVR4, the CR save area is addressed relative 2186 // to the stack pointer and hence does not need an adjustment here. 2187 // Only CR2 (the first nonvolatile spilled) has an associated frame 2188 // index so that we have a single uniform save area. 2189 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2190 // Adjust the frame index of the CR spill slot. 2191 for (const auto &CSInfo : CSI) { 2192 if (CSInfo.getReg() == PPC::CR2) { 2193 int FI = CSInfo.getFrameIdx(); 2194 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2195 break; 2196 } 2197 } 2198 2199 LowerBound -= 4; // The CR save area is always 4 bytes long. 2200 } 2201 2202 if (HasVRSAVESaveArea) { 2203 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 2204 // which have the VRSAVE register class? 2205 // Adjust the frame index of the VRSAVE spill slot. 2206 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2207 unsigned Reg = CSI[i].getReg(); 2208 2209 if (PPC::VRSAVERCRegClass.contains(Reg)) { 2210 int FI = CSI[i].getFrameIdx(); 2211 2212 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2213 } 2214 } 2215 2216 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 2217 } 2218 2219 // Both Altivec and SPE have the same alignment and padding requirements 2220 // within the stack frame. 2221 if (HasVRSaveArea) { 2222 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2223 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2224 // we are using negative number here (the stack grows downward). We should 2225 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2226 // is the alignment size ( n = 16 here) and y is the size after aligning. 2227 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2228 LowerBound &= ~(15); 2229 2230 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2231 int FI = VRegs[i].getFrameIdx(); 2232 2233 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2234 } 2235 } 2236 2237 addScavengingSpillSlot(MF, RS); 2238 } 2239 2240 void 2241 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2242 RegScavenger *RS) const { 2243 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2244 // a large stack, which will require scavenging a register to materialize a 2245 // large offset. 2246 2247 // We need to have a scavenger spill slot for spills if the frame size is 2248 // large. In case there is no free register for large-offset addressing, 2249 // this slot is used for the necessary emergency spill. Also, we need the 2250 // slot for dynamic stack allocations. 2251 2252 // The scavenger might be invoked if the frame offset does not fit into 2253 // the 16-bit immediate. We don't know the complete frame size here 2254 // because we've not yet computed callee-saved register spills or the 2255 // needed alignment padding. 2256 unsigned StackSize = determineFrameLayout(MF, true); 2257 MachineFrameInfo &MFI = MF.getFrameInfo(); 2258 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 2259 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 2260 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2261 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2262 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2263 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2264 unsigned Size = TRI.getSpillSize(RC); 2265 Align Alignment = TRI.getSpillAlign(RC); 2266 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2267 2268 // Might we have over-aligned allocas? 2269 bool HasAlVars = 2270 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2271 2272 // These kinds of spills might need two registers. 2273 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 2274 RS->addScavengingFrameIndex( 2275 MFI.CreateStackObject(Size, Alignment, false)); 2276 } 2277 } 2278 2279 // This function checks if a callee saved gpr can be spilled to a volatile 2280 // vector register. This occurs for leaf functions when the option 2281 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2282 // which were not spilled to vectors, return false so the target independent 2283 // code can handle them by assigning a FrameIdx to a stack slot. 2284 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2285 MachineFunction &MF, const TargetRegisterInfo *TRI, 2286 std::vector<CalleeSavedInfo> &CSI) const { 2287 2288 if (CSI.empty()) 2289 return true; // Early exit if no callee saved registers are modified! 2290 2291 // Early exit if cannot spill gprs to volatile vector registers. 2292 MachineFrameInfo &MFI = MF.getFrameInfo(); 2293 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2294 return false; 2295 2296 // Build a BitVector of VSRs that can be used for spilling GPRs. 2297 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2298 BitVector BVCalleeSaved(TRI->getNumRegs()); 2299 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2300 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2301 for (unsigned i = 0; CSRegs[i]; ++i) 2302 BVCalleeSaved.set(CSRegs[i]); 2303 2304 for (unsigned Reg : BVAllocatable.set_bits()) { 2305 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2306 // used in the function. 2307 if (BVCalleeSaved[Reg] || 2308 (!PPC::F8RCRegClass.contains(Reg) && 2309 !PPC::VFRCRegClass.contains(Reg)) || 2310 (MF.getRegInfo().isPhysRegUsed(Reg))) 2311 BVAllocatable.reset(Reg); 2312 } 2313 2314 bool AllSpilledToReg = true; 2315 for (auto &CS : CSI) { 2316 if (BVAllocatable.none()) 2317 return false; 2318 2319 unsigned Reg = CS.getReg(); 2320 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2321 AllSpilledToReg = false; 2322 continue; 2323 } 2324 2325 unsigned VolatileVFReg = BVAllocatable.find_first(); 2326 if (VolatileVFReg < BVAllocatable.size()) { 2327 CS.setDstReg(VolatileVFReg); 2328 BVAllocatable.reset(VolatileVFReg); 2329 } else { 2330 AllSpilledToReg = false; 2331 } 2332 } 2333 return AllSpilledToReg; 2334 } 2335 2336 bool PPCFrameLowering::spillCalleeSavedRegisters( 2337 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2338 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2339 2340 MachineFunction *MF = MBB.getParent(); 2341 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2342 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2343 bool MustSaveTOC = FI->mustSaveTOC(); 2344 DebugLoc DL; 2345 bool CRSpilled = false; 2346 MachineInstrBuilder CRMIB; 2347 2348 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2349 unsigned Reg = CSI[i].getReg(); 2350 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2351 if (Reg == PPC::VRSAVE) 2352 continue; 2353 2354 // CR2 through CR4 are the nonvolatile CR fields. 2355 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2356 2357 // Add the callee-saved register as live-in; it's killed at the spill. 2358 // Do not do this for callee-saved registers that are live-in to the 2359 // function because they will already be marked live-in and this will be 2360 // adding it for a second time. It is an error to add the same register 2361 // to the set more than once. 2362 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2363 bool IsLiveIn = MRI.isLiveIn(Reg); 2364 if (!IsLiveIn) 2365 MBB.addLiveIn(Reg); 2366 2367 if (CRSpilled && IsCRField) { 2368 CRMIB.addReg(Reg, RegState::ImplicitKill); 2369 continue; 2370 } 2371 2372 // The actual spill will happen in the prologue. 2373 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2374 continue; 2375 2376 // Insert the spill to the stack frame. 2377 if (IsCRField) { 2378 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2379 if (!Subtarget.is32BitELFABI()) { 2380 // The actual spill will happen at the start of the prologue. 2381 FuncInfo->addMustSaveCR(Reg); 2382 } else { 2383 CRSpilled = true; 2384 FuncInfo->setSpillsCR(); 2385 2386 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2387 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2388 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2389 .addReg(Reg, RegState::ImplicitKill); 2390 2391 MBB.insert(MI, CRMIB); 2392 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2393 .addReg(PPC::R12, 2394 getKillRegState(true)), 2395 CSI[i].getFrameIdx())); 2396 } 2397 } else { 2398 if (CSI[i].isSpilledToReg()) { 2399 NumPESpillVSR++; 2400 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2401 .addReg(Reg, getKillRegState(true)); 2402 } else { 2403 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2404 // Use !IsLiveIn for the kill flag. 2405 // We do not want to kill registers that are live in this function 2406 // before their use because they will become undefined registers. 2407 // Functions without NoUnwind need to preserve the order of elements in 2408 // saved vector registers. 2409 if (Subtarget.needsSwapsForVSXMemOps() && 2410 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2411 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2412 CSI[i].getFrameIdx(), RC, TRI); 2413 else 2414 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2415 RC, TRI); 2416 } 2417 } 2418 } 2419 return true; 2420 } 2421 2422 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2423 bool CR4Spilled, MachineBasicBlock &MBB, 2424 MachineBasicBlock::iterator MI, 2425 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2426 2427 MachineFunction *MF = MBB.getParent(); 2428 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2429 DebugLoc DL; 2430 unsigned MoveReg = PPC::R12; 2431 2432 // 32-bit: FP-relative 2433 MBB.insert(MI, 2434 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2435 CSI[CSIIndex].getFrameIdx())); 2436 2437 unsigned RestoreOp = PPC::MTOCRF; 2438 if (CR2Spilled) 2439 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2440 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2441 2442 if (CR3Spilled) 2443 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2444 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2445 2446 if (CR4Spilled) 2447 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2448 .addReg(MoveReg, getKillRegState(true))); 2449 } 2450 2451 MachineBasicBlock::iterator PPCFrameLowering:: 2452 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2453 MachineBasicBlock::iterator I) const { 2454 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2455 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2456 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2457 // Add (actually subtract) back the amount the callee popped on return. 2458 if (int CalleeAmt = I->getOperand(1).getImm()) { 2459 bool is64Bit = Subtarget.isPPC64(); 2460 CalleeAmt *= -1; 2461 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2462 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2463 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2464 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2465 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2466 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2467 const DebugLoc &dl = I->getDebugLoc(); 2468 2469 if (isInt<16>(CalleeAmt)) { 2470 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2471 .addReg(StackReg, RegState::Kill) 2472 .addImm(CalleeAmt); 2473 } else { 2474 MachineBasicBlock::iterator MBBI = I; 2475 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2476 .addImm(CalleeAmt >> 16); 2477 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2478 .addReg(TmpReg, RegState::Kill) 2479 .addImm(CalleeAmt & 0xFFFF); 2480 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2481 .addReg(StackReg, RegState::Kill) 2482 .addReg(TmpReg); 2483 } 2484 } 2485 } 2486 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2487 return MBB.erase(I); 2488 } 2489 2490 static bool isCalleeSavedCR(unsigned Reg) { 2491 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2492 } 2493 2494 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2495 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2496 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2497 MachineFunction *MF = MBB.getParent(); 2498 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2499 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2500 bool MustSaveTOC = FI->mustSaveTOC(); 2501 bool CR2Spilled = false; 2502 bool CR3Spilled = false; 2503 bool CR4Spilled = false; 2504 unsigned CSIIndex = 0; 2505 2506 // Initialize insertion-point logic; we will be restoring in reverse 2507 // order of spill. 2508 MachineBasicBlock::iterator I = MI, BeforeI = I; 2509 bool AtStart = I == MBB.begin(); 2510 2511 if (!AtStart) 2512 --BeforeI; 2513 2514 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2515 unsigned Reg = CSI[i].getReg(); 2516 2517 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2518 if (Reg == PPC::VRSAVE) 2519 continue; 2520 2521 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2522 continue; 2523 2524 // Restore of callee saved condition register field is handled during 2525 // epilogue insertion. 2526 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2527 continue; 2528 2529 if (Reg == PPC::CR2) { 2530 CR2Spilled = true; 2531 // The spill slot is associated only with CR2, which is the 2532 // first nonvolatile spilled. Save it here. 2533 CSIIndex = i; 2534 continue; 2535 } else if (Reg == PPC::CR3) { 2536 CR3Spilled = true; 2537 continue; 2538 } else if (Reg == PPC::CR4) { 2539 CR4Spilled = true; 2540 continue; 2541 } else { 2542 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2543 // least one CR register, restore all spilled CRs together. 2544 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2545 bool is31 = needsFP(*MF); 2546 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2547 CSIIndex); 2548 CR2Spilled = CR3Spilled = CR4Spilled = false; 2549 } 2550 2551 if (CSI[i].isSpilledToReg()) { 2552 DebugLoc DL; 2553 NumPEReloadVSR++; 2554 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2555 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2556 } else { 2557 // Default behavior for non-CR saves. 2558 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2559 2560 // Functions without NoUnwind need to preserve the order of elements in 2561 // saved vector registers. 2562 if (Subtarget.needsSwapsForVSXMemOps() && 2563 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2564 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2565 TRI); 2566 else 2567 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2568 2569 assert(I != MBB.begin() && 2570 "loadRegFromStackSlot didn't insert any code!"); 2571 } 2572 } 2573 2574 // Insert in reverse order. 2575 if (AtStart) 2576 I = MBB.begin(); 2577 else { 2578 I = BeforeI; 2579 ++I; 2580 } 2581 } 2582 2583 // If we haven't yet spilled the CRs, do so now. 2584 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2585 assert(Subtarget.is32BitELFABI() && 2586 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2587 bool is31 = needsFP(*MF); 2588 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2589 } 2590 2591 return true; 2592 } 2593 2594 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2595 return TOCSaveOffset; 2596 } 2597 2598 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2599 return FramePointerSaveOffset; 2600 } 2601 2602 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2603 return BasePointerSaveOffset; 2604 } 2605 2606 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2607 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2608 return false; 2609 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2610 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2611 } 2612