1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the PPC implementation of TargetFrameLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCFrameLowering.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Target/TargetOptions.h" 28 29 using namespace llvm; 30 31 /// VRRegNo - Map from a numbered VR register to its enum value. 32 /// 33 static const MCPhysReg VRRegNo[] = { 34 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 35 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 36 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 37 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 38 }; 39 40 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 41 if (STI.isDarwinABI()) 42 return STI.isPPC64() ? 16 : 8; 43 // SVR4 ABI: 44 return STI.isPPC64() ? 16 : 4; 45 } 46 47 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 48 return STI.isELFv2ABI() ? 24 : 40; 49 } 50 51 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 52 // For the Darwin ABI: 53 // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area 54 // for saving the frame pointer (if needed.) While the published ABI has 55 // not used this slot since at least MacOSX 10.2, there is older code 56 // around that does use it, and that needs to continue to work. 57 if (STI.isDarwinABI()) 58 return STI.isPPC64() ? -8U : -4U; 59 60 // SVR4 ABI: First slot in the general register save area. 61 return STI.isPPC64() ? -8U : -4U; 62 } 63 64 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 65 if (STI.isDarwinABI() || STI.isPPC64()) 66 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 67 68 // SVR4 ABI: 69 return 8; 70 } 71 72 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 73 if (STI.isDarwinABI()) 74 return STI.isPPC64() ? -16U : -8U; 75 76 // SVR4 ABI: First slot in the general register save area. 77 return STI.isPPC64() 78 ? -16U 79 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; 80 } 81 82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 83 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 84 STI.getPlatformStackAlignment(), 0), 85 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 86 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 87 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 88 LinkageSize(computeLinkageSize(Subtarget)), 89 BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {} 90 91 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 92 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 93 unsigned &NumEntries) const { 94 if (Subtarget.isDarwinABI()) { 95 NumEntries = 1; 96 if (Subtarget.isPPC64()) { 97 static const SpillSlot darwin64Offsets = {PPC::X31, -8}; 98 return &darwin64Offsets; 99 } else { 100 static const SpillSlot darwinOffsets = {PPC::R31, -4}; 101 return &darwinOffsets; 102 } 103 } 104 105 // Early exit if not using the SVR4 ABI. 106 if (!Subtarget.isSVR4ABI()) { 107 NumEntries = 0; 108 return nullptr; 109 } 110 111 // Note that the offsets here overlap, but this is fixed up in 112 // processFunctionBeforeFrameFinalized. 113 114 static const SpillSlot Offsets[] = { 115 // Floating-point register save area offsets. 116 {PPC::F31, -8}, 117 {PPC::F30, -16}, 118 {PPC::F29, -24}, 119 {PPC::F28, -32}, 120 {PPC::F27, -40}, 121 {PPC::F26, -48}, 122 {PPC::F25, -56}, 123 {PPC::F24, -64}, 124 {PPC::F23, -72}, 125 {PPC::F22, -80}, 126 {PPC::F21, -88}, 127 {PPC::F20, -96}, 128 {PPC::F19, -104}, 129 {PPC::F18, -112}, 130 {PPC::F17, -120}, 131 {PPC::F16, -128}, 132 {PPC::F15, -136}, 133 {PPC::F14, -144}, 134 135 // General register save area offsets. 136 {PPC::R31, -4}, 137 {PPC::R30, -8}, 138 {PPC::R29, -12}, 139 {PPC::R28, -16}, 140 {PPC::R27, -20}, 141 {PPC::R26, -24}, 142 {PPC::R25, -28}, 143 {PPC::R24, -32}, 144 {PPC::R23, -36}, 145 {PPC::R22, -40}, 146 {PPC::R21, -44}, 147 {PPC::R20, -48}, 148 {PPC::R19, -52}, 149 {PPC::R18, -56}, 150 {PPC::R17, -60}, 151 {PPC::R16, -64}, 152 {PPC::R15, -68}, 153 {PPC::R14, -72}, 154 155 // CR save area offset. We map each of the nonvolatile CR fields 156 // to the slot for CR2, which is the first of the nonvolatile CR 157 // fields to be assigned, so that we only allocate one save slot. 158 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 159 {PPC::CR2, -4}, 160 161 // VRSAVE save area offset. 162 {PPC::VRSAVE, -4}, 163 164 // Vector register save area 165 {PPC::V31, -16}, 166 {PPC::V30, -32}, 167 {PPC::V29, -48}, 168 {PPC::V28, -64}, 169 {PPC::V27, -80}, 170 {PPC::V26, -96}, 171 {PPC::V25, -112}, 172 {PPC::V24, -128}, 173 {PPC::V23, -144}, 174 {PPC::V22, -160}, 175 {PPC::V21, -176}, 176 {PPC::V20, -192}}; 177 178 static const SpillSlot Offsets64[] = { 179 // Floating-point register save area offsets. 180 {PPC::F31, -8}, 181 {PPC::F30, -16}, 182 {PPC::F29, -24}, 183 {PPC::F28, -32}, 184 {PPC::F27, -40}, 185 {PPC::F26, -48}, 186 {PPC::F25, -56}, 187 {PPC::F24, -64}, 188 {PPC::F23, -72}, 189 {PPC::F22, -80}, 190 {PPC::F21, -88}, 191 {PPC::F20, -96}, 192 {PPC::F19, -104}, 193 {PPC::F18, -112}, 194 {PPC::F17, -120}, 195 {PPC::F16, -128}, 196 {PPC::F15, -136}, 197 {PPC::F14, -144}, 198 199 // General register save area offsets. 200 {PPC::X31, -8}, 201 {PPC::X30, -16}, 202 {PPC::X29, -24}, 203 {PPC::X28, -32}, 204 {PPC::X27, -40}, 205 {PPC::X26, -48}, 206 {PPC::X25, -56}, 207 {PPC::X24, -64}, 208 {PPC::X23, -72}, 209 {PPC::X22, -80}, 210 {PPC::X21, -88}, 211 {PPC::X20, -96}, 212 {PPC::X19, -104}, 213 {PPC::X18, -112}, 214 {PPC::X17, -120}, 215 {PPC::X16, -128}, 216 {PPC::X15, -136}, 217 {PPC::X14, -144}, 218 219 // VRSAVE save area offset. 220 {PPC::VRSAVE, -4}, 221 222 // Vector register save area 223 {PPC::V31, -16}, 224 {PPC::V30, -32}, 225 {PPC::V29, -48}, 226 {PPC::V28, -64}, 227 {PPC::V27, -80}, 228 {PPC::V26, -96}, 229 {PPC::V25, -112}, 230 {PPC::V24, -128}, 231 {PPC::V23, -144}, 232 {PPC::V22, -160}, 233 {PPC::V21, -176}, 234 {PPC::V20, -192}}; 235 236 if (Subtarget.isPPC64()) { 237 NumEntries = array_lengthof(Offsets64); 238 239 return Offsets64; 240 } else { 241 NumEntries = array_lengthof(Offsets); 242 243 return Offsets; 244 } 245 } 246 247 /// RemoveVRSaveCode - We have found that this function does not need any code 248 /// to manipulate the VRSAVE register, even though it uses vector registers. 249 /// This can happen when the only registers used are known to be live in or out 250 /// of the function. Remove all of the VRSAVE related code from the function. 251 /// FIXME: The removal of the code results in a compile failure at -O0 when the 252 /// function contains a function call, as the GPR containing original VRSAVE 253 /// contents is spilled and reloaded around the call. Without the prolog code, 254 /// the spill instruction refers to an undefined register. This code needs 255 /// to account for all uses of that GPR. 256 static void RemoveVRSaveCode(MachineInstr *MI) { 257 MachineBasicBlock *Entry = MI->getParent(); 258 MachineFunction *MF = Entry->getParent(); 259 260 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 261 MachineBasicBlock::iterator MBBI = MI; 262 ++MBBI; 263 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 264 MBBI->eraseFromParent(); 265 266 bool RemovedAllMTVRSAVEs = true; 267 // See if we can find and remove the MTVRSAVE instruction from all of the 268 // epilog blocks. 269 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 270 // If last instruction is a return instruction, add an epilogue 271 if (I->isReturnBlock()) { 272 bool FoundIt = false; 273 for (MBBI = I->end(); MBBI != I->begin(); ) { 274 --MBBI; 275 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 276 MBBI->eraseFromParent(); // remove it. 277 FoundIt = true; 278 break; 279 } 280 } 281 RemovedAllMTVRSAVEs &= FoundIt; 282 } 283 } 284 285 // If we found and removed all MTVRSAVE instructions, remove the read of 286 // VRSAVE as well. 287 if (RemovedAllMTVRSAVEs) { 288 MBBI = MI; 289 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 290 --MBBI; 291 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 292 MBBI->eraseFromParent(); 293 } 294 295 // Finally, nuke the UPDATE_VRSAVE. 296 MI->eraseFromParent(); 297 } 298 299 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 300 // instruction selector. Based on the vector registers that have been used, 301 // transform this into the appropriate ORI instruction. 302 static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { 303 MachineFunction *MF = MI->getParent()->getParent(); 304 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 305 DebugLoc dl = MI->getDebugLoc(); 306 307 const MachineRegisterInfo &MRI = MF->getRegInfo(); 308 unsigned UsedRegMask = 0; 309 for (unsigned i = 0; i != 32; ++i) 310 if (MRI.isPhysRegModified(VRRegNo[i])) 311 UsedRegMask |= 1 << (31-i); 312 313 // Live in and live out values already must be in the mask, so don't bother 314 // marking them. 315 for (MachineRegisterInfo::livein_iterator 316 I = MF->getRegInfo().livein_begin(), 317 E = MF->getRegInfo().livein_end(); I != E; ++I) { 318 unsigned RegNo = TRI->getEncodingValue(I->first); 319 if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. 320 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 321 } 322 323 // Live out registers appear as use operands on return instructions. 324 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 325 UsedRegMask != 0 && BI != BE; ++BI) { 326 const MachineBasicBlock &MBB = *BI; 327 if (!MBB.isReturnBlock()) 328 continue; 329 const MachineInstr &Ret = MBB.back(); 330 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 331 const MachineOperand &MO = Ret.getOperand(I); 332 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 333 continue; 334 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 335 UsedRegMask &= ~(1 << (31-RegNo)); 336 } 337 } 338 339 // If no registers are used, turn this into a copy. 340 if (UsedRegMask == 0) { 341 // Remove all VRSAVE code. 342 RemoveVRSaveCode(MI); 343 return; 344 } 345 346 unsigned SrcReg = MI->getOperand(1).getReg(); 347 unsigned DstReg = MI->getOperand(0).getReg(); 348 349 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 350 if (DstReg != SrcReg) 351 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 352 .addReg(SrcReg) 353 .addImm(UsedRegMask); 354 else 355 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 356 .addReg(SrcReg, RegState::Kill) 357 .addImm(UsedRegMask); 358 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 359 if (DstReg != SrcReg) 360 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 361 .addReg(SrcReg) 362 .addImm(UsedRegMask >> 16); 363 else 364 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 365 .addReg(SrcReg, RegState::Kill) 366 .addImm(UsedRegMask >> 16); 367 } else { 368 if (DstReg != SrcReg) 369 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 370 .addReg(SrcReg) 371 .addImm(UsedRegMask >> 16); 372 else 373 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 374 .addReg(SrcReg, RegState::Kill) 375 .addImm(UsedRegMask >> 16); 376 377 BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 378 .addReg(DstReg, RegState::Kill) 379 .addImm(UsedRegMask & 0xFFFF); 380 } 381 382 // Remove the old UPDATE_VRSAVE instruction. 383 MI->eraseFromParent(); 384 } 385 386 static bool spillsCR(const MachineFunction &MF) { 387 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 388 return FuncInfo->isCRSpilled(); 389 } 390 391 static bool spillsVRSAVE(const MachineFunction &MF) { 392 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 393 return FuncInfo->isVRSAVESpilled(); 394 } 395 396 static bool hasSpills(const MachineFunction &MF) { 397 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 398 return FuncInfo->hasSpills(); 399 } 400 401 static bool hasNonRISpills(const MachineFunction &MF) { 402 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 403 return FuncInfo->hasNonRISpills(); 404 } 405 406 /// MustSaveLR - Return true if this function requires that we save the LR 407 /// register onto the stack in the prolog and restore it in the epilog of the 408 /// function. 409 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 410 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 411 412 // We need a save/restore of LR if there is any def of LR (which is 413 // defined by calls, including the PIC setup sequence), or if there is 414 // some use of the LR stack slot (e.g. for builtin_return_address). 415 // (LR comes in 32 and 64 bit versions.) 416 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 417 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 418 } 419 420 /// determineFrameLayout - Determine the size of the frame and maximum call 421 /// frame size. 422 unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, 423 bool UpdateMF, 424 bool UseEstimate) const { 425 MachineFrameInfo *MFI = MF.getFrameInfo(); 426 427 // Get the number of bytes to allocate from the FrameInfo 428 unsigned FrameSize = 429 UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); 430 431 // Get stack alignments. The frame must be aligned to the greatest of these: 432 unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI 433 unsigned MaxAlign = MFI->getMaxAlignment(); // algmt required by data in frame 434 unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; 435 436 const PPCRegisterInfo *RegInfo = 437 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 438 439 // If we are a leaf function, and use up to 224 bytes of stack space, 440 // don't have a frame pointer, calls, or dynamic alloca then we do not need 441 // to adjust the stack pointer (we fit in the Red Zone). 442 // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate 443 // stackless code if all local vars are reg-allocated. 444 bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); 445 unsigned LR = RegInfo->getRARegister(); 446 if (!DisableRedZone && 447 (Subtarget.isPPC64() || // 32-bit SVR4, no stack- 448 !Subtarget.isSVR4ABI() || // allocated locals. 449 FrameSize == 0) && 450 FrameSize <= 224 && // Fits in red zone. 451 !MFI->hasVarSizedObjects() && // No dynamic alloca. 452 !MFI->adjustsStack() && // No calls. 453 !MustSaveLR(MF, LR) && 454 !RegInfo->hasBasePointer(MF)) { // No special alignment. 455 // No need for frame 456 if (UpdateMF) 457 MFI->setStackSize(0); 458 return 0; 459 } 460 461 // Get the maximum call frame size of all the calls. 462 unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); 463 464 // Maximum call frame needs to be at least big enough for linkage area. 465 unsigned minCallFrameSize = getLinkageSize(); 466 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 467 468 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 469 // that allocations will be aligned. 470 if (MFI->hasVarSizedObjects()) 471 maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; 472 473 // Update maximum call frame size. 474 if (UpdateMF) 475 MFI->setMaxCallFrameSize(maxCallFrameSize); 476 477 // Include call frame size in total. 478 FrameSize += maxCallFrameSize; 479 480 // Make sure the frame is aligned. 481 FrameSize = (FrameSize + AlignMask) & ~AlignMask; 482 483 // Update frame info. 484 if (UpdateMF) 485 MFI->setStackSize(FrameSize); 486 487 return FrameSize; 488 } 489 490 // hasFP - Return true if the specified function actually has a dedicated frame 491 // pointer register. 492 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 493 const MachineFrameInfo *MFI = MF.getFrameInfo(); 494 // FIXME: This is pretty much broken by design: hasFP() might be called really 495 // early, before the stack layout was calculated and thus hasFP() might return 496 // true or false here depending on the time of call. 497 return (MFI->getStackSize()) && needsFP(MF); 498 } 499 500 // needsFP - Return true if the specified function should have a dedicated frame 501 // pointer register. This is true if the function has variable sized allocas or 502 // if frame pointer elimination is disabled. 503 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 504 const MachineFrameInfo *MFI = MF.getFrameInfo(); 505 506 // Naked functions have no stack frame pushed, so we don't have a frame 507 // pointer. 508 if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) 509 return false; 510 511 return MF.getTarget().Options.DisableFramePointerElim(MF) || 512 MFI->hasVarSizedObjects() || 513 MFI->hasStackMap() || MFI->hasPatchPoint() || 514 (MF.getTarget().Options.GuaranteedTailCallOpt && 515 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 516 } 517 518 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 519 bool is31 = needsFP(MF); 520 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 521 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 522 523 const PPCRegisterInfo *RegInfo = 524 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 525 bool HasBP = RegInfo->hasBasePointer(MF); 526 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 527 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg; 528 529 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 530 BI != BE; ++BI) 531 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 532 --MBBI; 533 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 534 MachineOperand &MO = MBBI->getOperand(I); 535 if (!MO.isReg()) 536 continue; 537 538 switch (MO.getReg()) { 539 case PPC::FP: 540 MO.setReg(FPReg); 541 break; 542 case PPC::FP8: 543 MO.setReg(FP8Reg); 544 break; 545 case PPC::BP: 546 MO.setReg(BPReg); 547 break; 548 case PPC::BP8: 549 MO.setReg(BP8Reg); 550 break; 551 552 } 553 } 554 } 555 } 556 557 /* This function will do the following: 558 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 559 respectively (defaults recommended by the ABI) and return true 560 - If MBB is not an entry block, initialize the register scavenger and look 561 for available registers. 562 - If the defaults (R0/R12) are available, return true 563 - If TwoUniqueRegsRequired is set to true, it looks for two unique 564 registers. Otherwise, look for a single available register. 565 - If the required registers are found, set SR1 and SR2 and return true. 566 - If the required registers are not found, set SR2 or both SR1 and SR2 to 567 PPC::NoRegister and return false. 568 569 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 570 is not set, this function will attempt to find two different registers, but 571 still return true if only one register is available (and set SR1 == SR2). 572 */ 573 bool 574 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 575 bool UseAtEnd, 576 bool TwoUniqueRegsRequired, 577 unsigned *SR1, 578 unsigned *SR2) const { 579 RegScavenger RS; 580 unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 581 unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 582 583 // Set the defaults for the two scratch registers. 584 if (SR1) 585 *SR1 = R0; 586 587 if (SR2) { 588 assert (SR1 && "Asking for the second scratch register but not the first?"); 589 *SR2 = R12; 590 } 591 592 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 593 if ((UseAtEnd && MBB->isReturnBlock()) || 594 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 595 return true; 596 597 RS.enterBasicBlock(*MBB); 598 599 if (UseAtEnd && !MBB->empty()) { 600 // The scratch register will be used at the end of the block, so must 601 // consider all registers used within the block 602 603 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 604 // If no terminator, back iterator up to previous instruction. 605 if (MBBI == MBB->end()) 606 MBBI = std::prev(MBBI); 607 608 if (MBBI != MBB->begin()) 609 RS.forward(MBBI); 610 } 611 612 // If the two registers are available, we're all good. 613 // Note that we only return here if both R0 and R12 are available because 614 // although the function may not require two unique registers, it may benefit 615 // from having two so we should try to provide them. 616 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 617 return true; 618 619 // Get the list of callee-saved registers for the target. 620 const PPCRegisterInfo *RegInfo = 621 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 622 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 623 624 // Get all the available registers in the block. 625 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 626 &PPC::GPRCRegClass); 627 628 // We shouldn't use callee-saved registers as scratch registers as they may be 629 // available when looking for a candidate block for shrink wrapping but not 630 // available when the actual prologue/epilogue is being emitted because they 631 // were added as live-in to the prologue block by PrologueEpilogueInserter. 632 for (int i = 0; CSRegs[i]; ++i) 633 BV.reset(CSRegs[i]); 634 635 // Set the first scratch register to the first available one. 636 if (SR1) { 637 int FirstScratchReg = BV.find_first(); 638 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 639 } 640 641 // If there is another one available, set the second scratch register to that. 642 // Otherwise, set it to either PPC::NoRegister if this function requires two 643 // or to whatever SR1 is set to if this function doesn't require two. 644 if (SR2) { 645 int SecondScratchReg = BV.find_next(*SR1); 646 if (SecondScratchReg != -1) 647 *SR2 = SecondScratchReg; 648 else 649 *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; 650 } 651 652 // Now that we've done our best to provide both registers, double check 653 // whether we were unable to provide enough. 654 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 655 return false; 656 657 return true; 658 } 659 660 // We need a scratch register for spilling LR and for spilling CR. By default, 661 // we use two scratch registers to hide latency. However, if only one scratch 662 // register is available, we can adjust for that by not overlapping the spill 663 // code. However, if we need to realign the stack (i.e. have a base pointer) 664 // and the stack frame is large, we need two scratch registers. 665 bool 666 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 667 const PPCRegisterInfo *RegInfo = 668 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 669 MachineFunction &MF = *(MBB->getParent()); 670 bool HasBP = RegInfo->hasBasePointer(MF); 671 unsigned FrameSize = determineFrameLayout(MF, false); 672 int NegFrameSize = -FrameSize; 673 bool IsLargeFrame = !isInt<16>(NegFrameSize); 674 MachineFrameInfo *MFI = MF.getFrameInfo(); 675 unsigned MaxAlign = MFI->getMaxAlignment(); 676 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 677 678 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 679 } 680 681 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 682 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 683 684 return findScratchRegister(TmpMBB, false, 685 twoUniqueScratchRegsRequired(TmpMBB)); 686 } 687 688 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 689 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 690 691 return findScratchRegister(TmpMBB, true); 692 } 693 694 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 695 MachineBasicBlock &MBB) const { 696 MachineBasicBlock::iterator MBBI = MBB.begin(); 697 MachineFrameInfo *MFI = MF.getFrameInfo(); 698 const PPCInstrInfo &TII = 699 *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); 700 const PPCRegisterInfo *RegInfo = 701 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 702 703 MachineModuleInfo &MMI = MF.getMMI(); 704 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 705 DebugLoc dl; 706 bool needsCFI = MMI.hasDebugInfo() || 707 MF.getFunction()->needsUnwindTableEntry(); 708 709 // Get processor type. 710 bool isPPC64 = Subtarget.isPPC64(); 711 // Get the ABI. 712 bool isSVR4ABI = Subtarget.isSVR4ABI(); 713 bool isELFv2ABI = Subtarget.isELFv2ABI(); 714 assert((Subtarget.isDarwinABI() || isSVR4ABI) && 715 "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); 716 717 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 718 // process it. 719 if (!isSVR4ABI) 720 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 721 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 722 HandleVRSaveUpdate(MBBI, TII); 723 break; 724 } 725 } 726 727 // Move MBBI back to the beginning of the prologue block. 728 MBBI = MBB.begin(); 729 730 // Work out frame sizes. 731 unsigned FrameSize = determineFrameLayout(MF); 732 int NegFrameSize = -FrameSize; 733 if (!isInt<32>(NegFrameSize)) 734 llvm_unreachable("Unhandled stack size!"); 735 736 if (MFI->isFrameAddressTaken()) 737 replaceFPWithRealFP(MF); 738 739 // Check if the link register (LR) must be saved. 740 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 741 bool MustSaveLR = FI->mustSaveLR(); 742 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 743 bool MustSaveCR = !MustSaveCRs.empty(); 744 // Do we have a frame pointer and/or base pointer for this function? 745 bool HasFP = hasFP(MF); 746 bool HasBP = RegInfo->hasBasePointer(MF); 747 bool HasRedZone = isPPC64 || !isSVR4ABI; 748 749 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 750 unsigned BPReg = RegInfo->getBaseRegister(MF); 751 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 752 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 753 unsigned ScratchReg = 0; 754 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 755 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 756 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 757 : PPC::MFLR ); 758 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 759 : PPC::STW ); 760 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 761 : PPC::STWU ); 762 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 763 : PPC::STWUX); 764 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 765 : PPC::LIS ); 766 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 767 : PPC::ORI ); 768 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 769 : PPC::OR ); 770 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 771 : PPC::SUBFC); 772 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 773 : PPC::SUBFIC); 774 775 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 776 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 777 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 778 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 779 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 780 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 781 782 // Using the same bool variable as below to supress compiler warnings. 783 bool SingleScratchReg = 784 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), 785 &ScratchReg, &TempReg); 786 assert(SingleScratchReg && 787 "Required number of registers not available in this block"); 788 789 SingleScratchReg = ScratchReg == TempReg; 790 791 int LROffset = getReturnSaveOffset(); 792 793 int FPOffset = 0; 794 if (HasFP) { 795 if (isSVR4ABI) { 796 MachineFrameInfo *FFI = MF.getFrameInfo(); 797 int FPIndex = FI->getFramePointerSaveIndex(); 798 assert(FPIndex && "No Frame Pointer Save Slot!"); 799 FPOffset = FFI->getObjectOffset(FPIndex); 800 } else { 801 FPOffset = getFramePointerSaveOffset(); 802 } 803 } 804 805 int BPOffset = 0; 806 if (HasBP) { 807 if (isSVR4ABI) { 808 MachineFrameInfo *FFI = MF.getFrameInfo(); 809 int BPIndex = FI->getBasePointerSaveIndex(); 810 assert(BPIndex && "No Base Pointer Save Slot!"); 811 BPOffset = FFI->getObjectOffset(BPIndex); 812 } else { 813 BPOffset = getBasePointerSaveOffset(); 814 } 815 } 816 817 int PBPOffset = 0; 818 if (FI->usesPICBase()) { 819 MachineFrameInfo *FFI = MF.getFrameInfo(); 820 int PBPIndex = FI->getPICBasePointerSaveIndex(); 821 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 822 PBPOffset = FFI->getObjectOffset(PBPIndex); 823 } 824 825 // Get stack alignments. 826 unsigned MaxAlign = MFI->getMaxAlignment(); 827 if (HasBP && MaxAlign > 1) 828 assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && 829 "Invalid alignment!"); 830 831 // Frames of 32KB & larger require special handling because they cannot be 832 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 833 bool isLargeFrame = !isInt<16>(NegFrameSize); 834 835 assert((isPPC64 || !MustSaveCR) && 836 "Prologue CR saving supported only in 64-bit mode"); 837 838 // If we need to spill the CR and the LR but we don't have two separate 839 // registers available, we must spill them one at a time 840 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 841 // In the ELFv2 ABI, we are not required to save all CR fields. 842 // If only one or two CR fields are clobbered, it is more efficient to use 843 // mfocrf to selectively save just those fields, because mfocrf has short 844 // latency compares to mfcr. 845 unsigned MfcrOpcode = PPC::MFCR8; 846 unsigned CrState = RegState::ImplicitKill; 847 if (isELFv2ABI && MustSaveCRs.size() == 1) { 848 MfcrOpcode = PPC::MFOCRF8; 849 CrState = RegState::Kill; 850 } 851 MachineInstrBuilder MIB = 852 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 853 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 854 MIB.addReg(MustSaveCRs[i], CrState); 855 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 856 .addReg(TempReg, getKillRegState(true)) 857 .addImm(8) 858 .addReg(SPReg); 859 } 860 861 if (MustSaveLR) 862 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 863 864 if (MustSaveCR && 865 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 866 // In the ELFv2 ABI, we are not required to save all CR fields. 867 // If only one or two CR fields are clobbered, it is more efficient to use 868 // mfocrf to selectively save just those fields, because mfocrf has short 869 // latency compares to mfcr. 870 unsigned MfcrOpcode = PPC::MFCR8; 871 unsigned CrState = RegState::ImplicitKill; 872 if (isELFv2ABI && MustSaveCRs.size() == 1) { 873 MfcrOpcode = PPC::MFOCRF8; 874 CrState = RegState::Kill; 875 } 876 MachineInstrBuilder MIB = 877 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 878 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 879 MIB.addReg(MustSaveCRs[i], CrState); 880 } 881 882 if (HasRedZone) { 883 if (HasFP) 884 BuildMI(MBB, MBBI, dl, StoreInst) 885 .addReg(FPReg) 886 .addImm(FPOffset) 887 .addReg(SPReg); 888 if (FI->usesPICBase()) 889 BuildMI(MBB, MBBI, dl, StoreInst) 890 .addReg(PPC::R30) 891 .addImm(PBPOffset) 892 .addReg(SPReg); 893 if (HasBP) 894 BuildMI(MBB, MBBI, dl, StoreInst) 895 .addReg(BPReg) 896 .addImm(BPOffset) 897 .addReg(SPReg); 898 } 899 900 if (MustSaveLR) 901 BuildMI(MBB, MBBI, dl, StoreInst) 902 .addReg(ScratchReg, getKillRegState(true)) 903 .addImm(LROffset) 904 .addReg(SPReg); 905 906 if (MustSaveCR && 907 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 908 assert(HasRedZone && "A red zone is always available on PPC64"); 909 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 910 .addReg(TempReg, getKillRegState(true)) 911 .addImm(8) 912 .addReg(SPReg); 913 } 914 915 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 916 if (!FrameSize) 917 return; 918 919 // Adjust stack pointer: r1 += NegFrameSize. 920 // If there is a preferred stack alignment, align R1 now 921 922 if (HasBP && HasRedZone) { 923 // Save a copy of r1 as the base pointer. 924 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 925 .addReg(SPReg) 926 .addReg(SPReg); 927 } 928 929 // Have we generated a STUX instruction to claim stack frame? If so, 930 // the negated frame size will be placed in ScratchReg. 931 bool HasSTUX = false; 932 933 // This condition must be kept in sync with canUseAsPrologue. 934 if (HasBP && MaxAlign > 1) { 935 if (isPPC64) 936 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 937 .addReg(SPReg) 938 .addImm(0) 939 .addImm(64 - Log2_32(MaxAlign)); 940 else // PPC32... 941 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 942 .addReg(SPReg) 943 .addImm(0) 944 .addImm(32 - Log2_32(MaxAlign)) 945 .addImm(31); 946 if (!isLargeFrame) { 947 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 948 .addReg(ScratchReg, RegState::Kill) 949 .addImm(NegFrameSize); 950 } else { 951 assert(!SingleScratchReg && "Only a single scratch reg available"); 952 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 953 .addImm(NegFrameSize >> 16); 954 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 955 .addReg(TempReg, RegState::Kill) 956 .addImm(NegFrameSize & 0xFFFF); 957 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 958 .addReg(ScratchReg, RegState::Kill) 959 .addReg(TempReg, RegState::Kill); 960 } 961 962 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 963 .addReg(SPReg, RegState::Kill) 964 .addReg(SPReg) 965 .addReg(ScratchReg); 966 HasSTUX = true; 967 968 } else if (!isLargeFrame) { 969 BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg) 970 .addReg(SPReg) 971 .addImm(NegFrameSize) 972 .addReg(SPReg); 973 974 } else { 975 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 976 .addImm(NegFrameSize >> 16); 977 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 978 .addReg(ScratchReg, RegState::Kill) 979 .addImm(NegFrameSize & 0xFFFF); 980 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 981 .addReg(SPReg, RegState::Kill) 982 .addReg(SPReg) 983 .addReg(ScratchReg); 984 HasSTUX = true; 985 } 986 987 if (!HasRedZone) { 988 assert(!isPPC64 && "A red zone is always available on PPC64"); 989 if (HasSTUX) { 990 // The negated frame size is in ScratchReg, and the SPReg has been 991 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 992 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 993 // the stack frame (i.e. the old SP), ideally, we would put the old 994 // SP into a register and use it as the base for the stores. The 995 // problem is that the only available register may be ScratchReg, 996 // which could be R0, and R0 cannot be used as a base address. 997 998 // First, set ScratchReg to the old SP. This may need to be modified 999 // later. 1000 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1001 .addReg(ScratchReg, RegState::Kill) 1002 .addReg(SPReg); 1003 1004 if (ScratchReg == PPC::R0) { 1005 // R0 cannot be used as a base register, but it can be used as an 1006 // index in a store-indexed. 1007 int LastOffset = 0; 1008 if (HasFP) { 1009 // R0 += (FPOffset-LastOffset). 1010 // Need addic, since addi treats R0 as 0. 1011 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1012 .addReg(ScratchReg) 1013 .addImm(FPOffset-LastOffset); 1014 LastOffset = FPOffset; 1015 // Store FP into *R0. 1016 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1017 .addReg(FPReg, RegState::Kill) // Save FP. 1018 .addReg(PPC::ZERO) 1019 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1020 } 1021 if (FI->usesPICBase()) { 1022 // R0 += (PBPOffset-LastOffset). 1023 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1024 .addReg(ScratchReg) 1025 .addImm(PBPOffset-LastOffset); 1026 LastOffset = PBPOffset; 1027 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1028 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1029 .addReg(PPC::ZERO) 1030 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1031 } 1032 if (HasBP) { 1033 // R0 += (BPOffset-LastOffset). 1034 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1035 .addReg(ScratchReg) 1036 .addImm(BPOffset-LastOffset); 1037 LastOffset = BPOffset; 1038 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1039 .addReg(BPReg, RegState::Kill) // Save BP. 1040 .addReg(PPC::ZERO) 1041 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1042 // BP = R0-LastOffset 1043 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1044 .addReg(ScratchReg, RegState::Kill) 1045 .addImm(-LastOffset); 1046 } 1047 } else { 1048 // ScratchReg is not R0, so use it as the base register. It is 1049 // already set to the old SP, so we can use the offsets directly. 1050 1051 // Now that the stack frame has been allocated, save all the necessary 1052 // registers using ScratchReg as the base address. 1053 if (HasFP) 1054 BuildMI(MBB, MBBI, dl, StoreInst) 1055 .addReg(FPReg) 1056 .addImm(FPOffset) 1057 .addReg(ScratchReg); 1058 if (FI->usesPICBase()) 1059 BuildMI(MBB, MBBI, dl, StoreInst) 1060 .addReg(PPC::R30) 1061 .addImm(PBPOffset) 1062 .addReg(ScratchReg); 1063 if (HasBP) { 1064 BuildMI(MBB, MBBI, dl, StoreInst) 1065 .addReg(BPReg) 1066 .addImm(BPOffset) 1067 .addReg(ScratchReg); 1068 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1069 .addReg(ScratchReg, RegState::Kill) 1070 .addReg(ScratchReg); 1071 } 1072 } 1073 } else { 1074 // The frame size is a known 16-bit constant (fitting in the immediate 1075 // field of STWU). To be here we have to be compiling for PPC32. 1076 // Since the SPReg has been decreased by FrameSize, add it back to each 1077 // offset. 1078 if (HasFP) 1079 BuildMI(MBB, MBBI, dl, StoreInst) 1080 .addReg(FPReg) 1081 .addImm(FrameSize + FPOffset) 1082 .addReg(SPReg); 1083 if (FI->usesPICBase()) 1084 BuildMI(MBB, MBBI, dl, StoreInst) 1085 .addReg(PPC::R30) 1086 .addImm(FrameSize + PBPOffset) 1087 .addReg(SPReg); 1088 if (HasBP) { 1089 BuildMI(MBB, MBBI, dl, StoreInst) 1090 .addReg(BPReg) 1091 .addImm(FrameSize + BPOffset) 1092 .addReg(SPReg); 1093 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1094 .addReg(SPReg) 1095 .addImm(FrameSize); 1096 } 1097 } 1098 } 1099 1100 // Add Call Frame Information for the instructions we generated above. 1101 if (needsCFI) { 1102 unsigned CFIIndex; 1103 1104 if (HasBP) { 1105 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1106 // because if the stack needed aligning then CFA won't be at a fixed 1107 // offset from FP/SP. 1108 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1109 CFIIndex = MMI.addFrameInst( 1110 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1111 } else { 1112 // Adjust the definition of CFA to account for the change in SP. 1113 assert(NegFrameSize); 1114 CFIIndex = MMI.addFrameInst( 1115 MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); 1116 } 1117 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1118 .addCFIIndex(CFIIndex); 1119 1120 if (HasFP) { 1121 // Describe where FP was saved, at a fixed offset from CFA. 1122 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1123 CFIIndex = MMI.addFrameInst( 1124 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1125 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1126 .addCFIIndex(CFIIndex); 1127 } 1128 1129 if (FI->usesPICBase()) { 1130 // Describe where FP was saved, at a fixed offset from CFA. 1131 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1132 CFIIndex = MMI.addFrameInst( 1133 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1134 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1135 .addCFIIndex(CFIIndex); 1136 } 1137 1138 if (HasBP) { 1139 // Describe where BP was saved, at a fixed offset from CFA. 1140 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1141 CFIIndex = MMI.addFrameInst( 1142 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1143 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1144 .addCFIIndex(CFIIndex); 1145 } 1146 1147 if (MustSaveLR) { 1148 // Describe where LR was saved, at a fixed offset from CFA. 1149 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1150 CFIIndex = MMI.addFrameInst( 1151 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1152 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1153 .addCFIIndex(CFIIndex); 1154 } 1155 } 1156 1157 // If there is a frame pointer, copy R1 into R31 1158 if (HasFP) { 1159 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1160 .addReg(SPReg) 1161 .addReg(SPReg); 1162 1163 if (!HasBP && needsCFI) { 1164 // Change the definition of CFA from SP+offset to FP+offset, because SP 1165 // will change at every alloca. 1166 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1167 unsigned CFIIndex = MMI.addFrameInst( 1168 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1169 1170 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1171 .addCFIIndex(CFIIndex); 1172 } 1173 } 1174 1175 if (needsCFI) { 1176 // Describe where callee saved registers were saved, at fixed offsets from 1177 // CFA. 1178 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 1179 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1180 unsigned Reg = CSI[I].getReg(); 1181 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1182 1183 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1184 // subregisters of CR2. We just need to emit a move of CR2. 1185 if (PPC::CRBITRCRegClass.contains(Reg)) 1186 continue; 1187 1188 // For SVR4, don't emit a move for the CR spill slot if we haven't 1189 // spilled CRs. 1190 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1191 && !MustSaveCR) 1192 continue; 1193 1194 // For 64-bit SVR4 when we have spilled CRs, the spill location 1195 // is SP+8, not a frame-relative slot. 1196 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1197 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1198 // the whole CR word. In the ELFv2 ABI, every CR that was 1199 // actually saved gets its own CFI record. 1200 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1201 unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( 1202 nullptr, MRI->getDwarfRegNum(CRReg, true), 8)); 1203 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1204 .addCFIIndex(CFIIndex); 1205 continue; 1206 } 1207 1208 int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); 1209 unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( 1210 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1211 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1212 .addCFIIndex(CFIIndex); 1213 } 1214 } 1215 } 1216 1217 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1218 MachineBasicBlock &MBB) const { 1219 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1220 DebugLoc dl; 1221 1222 if (MBBI != MBB.end()) 1223 dl = MBBI->getDebugLoc(); 1224 1225 const PPCInstrInfo &TII = 1226 *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); 1227 const PPCRegisterInfo *RegInfo = 1228 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 1229 1230 // Get alignment info so we know how to restore the SP. 1231 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1232 1233 // Get the number of bytes allocated from the FrameInfo. 1234 int FrameSize = MFI->getStackSize(); 1235 1236 // Get processor type. 1237 bool isPPC64 = Subtarget.isPPC64(); 1238 // Get the ABI. 1239 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1240 1241 // Check if the link register (LR) has been saved. 1242 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1243 bool MustSaveLR = FI->mustSaveLR(); 1244 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 1245 bool MustSaveCR = !MustSaveCRs.empty(); 1246 // Do we have a frame pointer and/or base pointer for this function? 1247 bool HasFP = hasFP(MF); 1248 bool HasBP = RegInfo->hasBasePointer(MF); 1249 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1250 1251 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1252 unsigned BPReg = RegInfo->getBaseRegister(MF); 1253 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1254 unsigned ScratchReg = 0; 1255 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1256 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1257 : PPC::MTLR ); 1258 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1259 : PPC::LWZ ); 1260 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1261 : PPC::LIS ); 1262 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1263 : PPC::OR ); 1264 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1265 : PPC::ORI ); 1266 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1267 : PPC::ADDI ); 1268 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1269 : PPC::ADD4 ); 1270 1271 int LROffset = getReturnSaveOffset(); 1272 1273 int FPOffset = 0; 1274 1275 // Using the same bool variable as below to supress compiler warnings. 1276 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1277 &TempReg); 1278 assert(SingleScratchReg && 1279 "Could not find an available scratch register"); 1280 1281 SingleScratchReg = ScratchReg == TempReg; 1282 1283 if (HasFP) { 1284 if (isSVR4ABI) { 1285 int FPIndex = FI->getFramePointerSaveIndex(); 1286 assert(FPIndex && "No Frame Pointer Save Slot!"); 1287 FPOffset = MFI->getObjectOffset(FPIndex); 1288 } else { 1289 FPOffset = getFramePointerSaveOffset(); 1290 } 1291 } 1292 1293 int BPOffset = 0; 1294 if (HasBP) { 1295 if (isSVR4ABI) { 1296 int BPIndex = FI->getBasePointerSaveIndex(); 1297 assert(BPIndex && "No Base Pointer Save Slot!"); 1298 BPOffset = MFI->getObjectOffset(BPIndex); 1299 } else { 1300 BPOffset = getBasePointerSaveOffset(); 1301 } 1302 } 1303 1304 int PBPOffset = 0; 1305 if (FI->usesPICBase()) { 1306 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1307 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1308 PBPOffset = MFI->getObjectOffset(PBPIndex); 1309 } 1310 1311 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1312 1313 if (IsReturnBlock) { 1314 unsigned RetOpcode = MBBI->getOpcode(); 1315 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1316 RetOpcode == PPC::TCRETURNdi || 1317 RetOpcode == PPC::TCRETURNai || 1318 RetOpcode == PPC::TCRETURNri8 || 1319 RetOpcode == PPC::TCRETURNdi8 || 1320 RetOpcode == PPC::TCRETURNai8; 1321 1322 if (UsesTCRet) { 1323 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1324 MachineOperand &StackAdjust = MBBI->getOperand(1); 1325 assert(StackAdjust.isImm() && "Expecting immediate value."); 1326 // Adjust stack pointer. 1327 int StackAdj = StackAdjust.getImm(); 1328 int Delta = StackAdj - MaxTCRetDelta; 1329 assert((Delta >= 0) && "Delta must be positive"); 1330 if (MaxTCRetDelta>0) 1331 FrameSize += (StackAdj +Delta); 1332 else 1333 FrameSize += StackAdj; 1334 } 1335 } 1336 1337 // Frames of 32KB & larger require special handling because they cannot be 1338 // indexed into with a simple LD/LWZ immediate offset operand. 1339 bool isLargeFrame = !isInt<16>(FrameSize); 1340 1341 // On targets without red zone, the SP needs to be restored last, so that 1342 // all live contents of the stack frame are upwards of the SP. This means 1343 // that we cannot restore SP just now, since there may be more registers 1344 // to restore from the stack frame (e.g. R31). If the frame size is not 1345 // a simple immediate value, we will need a spare register to hold the 1346 // restored SP. If the frame size is known and small, we can simply adjust 1347 // the offsets of the registers to be restored, and still use SP to restore 1348 // them. In such case, the final update of SP will be to add the frame 1349 // size to it. 1350 // To simplify the code, set RBReg to the base register used to restore 1351 // values from the stack, and set SPAdd to the value that needs to be added 1352 // to the SP at the end. The default values are as if red zone was present. 1353 unsigned RBReg = SPReg; 1354 unsigned SPAdd = 0; 1355 1356 if (FrameSize) { 1357 // In the prologue, the loaded (or persistent) stack pointer value is 1358 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1359 // zone add this offset back now. 1360 1361 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1362 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1363 // call which invalidates the stack pointer value in SP(0). So we use the 1364 // value of R31 in this case. 1365 if (FI->hasFastCall()) { 1366 assert(HasFP && "Expecting a valid frame pointer."); 1367 if (!HasRedZone) 1368 RBReg = FPReg; 1369 if (!isLargeFrame) { 1370 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1371 .addReg(FPReg).addImm(FrameSize); 1372 } else { 1373 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1374 .addImm(FrameSize >> 16); 1375 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1376 .addReg(ScratchReg, RegState::Kill) 1377 .addImm(FrameSize & 0xFFFF); 1378 BuildMI(MBB, MBBI, dl, AddInst) 1379 .addReg(RBReg) 1380 .addReg(FPReg) 1381 .addReg(ScratchReg); 1382 } 1383 } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) { 1384 if (HasRedZone) { 1385 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1386 .addReg(SPReg) 1387 .addImm(FrameSize); 1388 } else { 1389 // Make sure that adding FrameSize will not overflow the max offset 1390 // size. 1391 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1392 "Local offsets should be negative"); 1393 SPAdd = FrameSize; 1394 FPOffset += FrameSize; 1395 BPOffset += FrameSize; 1396 PBPOffset += FrameSize; 1397 } 1398 } else { 1399 // We don't want to use ScratchReg as a base register, because it 1400 // could happen to be R0. Use FP instead, but make sure to preserve it. 1401 if (!HasRedZone) { 1402 // If FP is not saved, copy it to ScratchReg. 1403 if (!HasFP) 1404 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1405 .addReg(FPReg) 1406 .addReg(FPReg); 1407 RBReg = FPReg; 1408 } 1409 BuildMI(MBB, MBBI, dl, LoadInst, RBReg) 1410 .addImm(0) 1411 .addReg(SPReg); 1412 } 1413 } 1414 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1415 // If there is no red zone, ScratchReg may be needed for holding a useful 1416 // value (although not the base register). Make sure it is not overwritten 1417 // too early. 1418 1419 assert((isPPC64 || !MustSaveCR) && 1420 "Epilogue CR restoring supported only in 64-bit mode"); 1421 1422 // If we need to restore both the LR and the CR and we only have one 1423 // available scratch register, we must do them one at a time. 1424 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1425 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1426 // is live here. 1427 assert(HasRedZone && "Expecting red zone"); 1428 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1429 .addImm(8) 1430 .addReg(SPReg); 1431 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1432 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1433 .addReg(TempReg, getKillRegState(i == e-1)); 1434 } 1435 1436 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1437 // LR is stored in the caller's stack frame. ScratchReg will be needed 1438 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1439 // a base register anyway, because it may happen to be R0. 1440 bool LoadedLR = false; 1441 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1442 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1443 .addImm(LROffset+SPAdd) 1444 .addReg(RBReg); 1445 LoadedLR = true; 1446 } 1447 1448 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1449 // This will only occur for PPC64. 1450 assert(isPPC64 && "Expecting 64-bit mode"); 1451 assert(RBReg == SPReg && "Should be using SP as a base register"); 1452 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1453 .addImm(8) 1454 .addReg(RBReg); 1455 } 1456 1457 if (HasFP) { 1458 // If there is red zone, restore FP directly, since SP has already been 1459 // restored. Otherwise, restore the value of FP into ScratchReg. 1460 if (HasRedZone || RBReg == SPReg) 1461 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1462 .addImm(FPOffset) 1463 .addReg(SPReg); 1464 else 1465 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1466 .addImm(FPOffset) 1467 .addReg(RBReg); 1468 } 1469 1470 if (FI->usesPICBase()) 1471 BuildMI(MBB, MBBI, dl, LoadInst) 1472 .addReg(PPC::R30) 1473 .addImm(PBPOffset) 1474 .addReg(RBReg); 1475 1476 if (HasBP) 1477 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1478 .addImm(BPOffset) 1479 .addReg(RBReg); 1480 1481 // There is nothing more to be loaded from the stack, so now we can 1482 // restore SP: SP = RBReg + SPAdd. 1483 if (RBReg != SPReg || SPAdd != 0) { 1484 assert(!HasRedZone && "This should not happen with red zone"); 1485 // If SPAdd is 0, generate a copy. 1486 if (SPAdd == 0) 1487 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1488 .addReg(RBReg) 1489 .addReg(RBReg); 1490 else 1491 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1492 .addReg(RBReg) 1493 .addImm(SPAdd); 1494 1495 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1496 if (RBReg == FPReg) 1497 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1498 .addReg(ScratchReg) 1499 .addReg(ScratchReg); 1500 1501 // Now load the LR from the caller's stack frame. 1502 if (MustSaveLR && !LoadedLR) 1503 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1504 .addImm(LROffset) 1505 .addReg(SPReg); 1506 } 1507 1508 if (MustSaveCR && 1509 !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 1510 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1511 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1512 .addReg(TempReg, getKillRegState(i == e-1)); 1513 1514 if (MustSaveLR) 1515 BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg); 1516 1517 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1518 // call optimization 1519 if (IsReturnBlock) { 1520 unsigned RetOpcode = MBBI->getOpcode(); 1521 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1522 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1523 MF.getFunction()->getCallingConv() == CallingConv::Fast) { 1524 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1525 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1526 1527 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1528 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1529 .addReg(SPReg).addImm(CallerAllocatedAmt); 1530 } else { 1531 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1532 .addImm(CallerAllocatedAmt >> 16); 1533 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1534 .addReg(ScratchReg, RegState::Kill) 1535 .addImm(CallerAllocatedAmt & 0xFFFF); 1536 BuildMI(MBB, MBBI, dl, AddInst) 1537 .addReg(SPReg) 1538 .addReg(FPReg) 1539 .addReg(ScratchReg); 1540 } 1541 } else { 1542 createTailCallBranchInstr(MBB); 1543 } 1544 } 1545 } 1546 1547 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1548 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1549 DebugLoc dl; 1550 1551 if (MBBI != MBB.end()) 1552 dl = MBBI->getDebugLoc(); 1553 1554 const PPCInstrInfo &TII = 1555 *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); 1556 1557 // Create branch instruction for pseudo tail call return instruction 1558 unsigned RetOpcode = MBBI->getOpcode(); 1559 if (RetOpcode == PPC::TCRETURNdi) { 1560 MBBI = MBB.getLastNonDebugInstr(); 1561 MachineOperand &JumpTarget = MBBI->getOperand(0); 1562 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1563 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1564 } else if (RetOpcode == PPC::TCRETURNri) { 1565 MBBI = MBB.getLastNonDebugInstr(); 1566 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1567 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1568 } else if (RetOpcode == PPC::TCRETURNai) { 1569 MBBI = MBB.getLastNonDebugInstr(); 1570 MachineOperand &JumpTarget = MBBI->getOperand(0); 1571 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1572 } else if (RetOpcode == PPC::TCRETURNdi8) { 1573 MBBI = MBB.getLastNonDebugInstr(); 1574 MachineOperand &JumpTarget = MBBI->getOperand(0); 1575 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1576 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1577 } else if (RetOpcode == PPC::TCRETURNri8) { 1578 MBBI = MBB.getLastNonDebugInstr(); 1579 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1580 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1581 } else if (RetOpcode == PPC::TCRETURNai8) { 1582 MBBI = MBB.getLastNonDebugInstr(); 1583 MachineOperand &JumpTarget = MBBI->getOperand(0); 1584 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1585 } 1586 } 1587 1588 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1589 BitVector &SavedRegs, 1590 RegScavenger *RS) const { 1591 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1592 1593 const PPCRegisterInfo *RegInfo = 1594 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 1595 1596 // Save and clear the LR state. 1597 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1598 unsigned LR = RegInfo->getRARegister(); 1599 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1600 SavedRegs.reset(LR); 1601 1602 // Save R31 if necessary 1603 int FPSI = FI->getFramePointerSaveIndex(); 1604 bool isPPC64 = Subtarget.isPPC64(); 1605 bool isDarwinABI = Subtarget.isDarwinABI(); 1606 MachineFrameInfo *MFI = MF.getFrameInfo(); 1607 1608 // If the frame pointer save index hasn't been defined yet. 1609 if (!FPSI && needsFP(MF)) { 1610 // Find out what the fix offset of the frame pointer save area. 1611 int FPOffset = getFramePointerSaveOffset(); 1612 // Allocate the frame index for frame pointer save area. 1613 FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1614 // Save the result. 1615 FI->setFramePointerSaveIndex(FPSI); 1616 } 1617 1618 int BPSI = FI->getBasePointerSaveIndex(); 1619 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1620 int BPOffset = getBasePointerSaveOffset(); 1621 // Allocate the frame index for the base pointer save area. 1622 BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1623 // Save the result. 1624 FI->setBasePointerSaveIndex(BPSI); 1625 } 1626 1627 // Reserve stack space for the PIC Base register (R30). 1628 // Only used in SVR4 32-bit. 1629 if (FI->usesPICBase()) { 1630 int PBPSI = MFI->CreateFixedObject(4, -8, true); 1631 FI->setPICBasePointerSaveIndex(PBPSI); 1632 } 1633 1634 // Make sure we don't explicitly spill r31, because, for example, we have 1635 // some inline asm which explicity clobbers it, when we otherwise have a 1636 // frame pointer and are using r31's spill slot for the prologue/epilogue 1637 // code. Same goes for the base pointer and the PIC base register. 1638 if (needsFP(MF)) 1639 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1640 if (RegInfo->hasBasePointer(MF)) 1641 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1642 if (FI->usesPICBase()) 1643 SavedRegs.reset(PPC::R30); 1644 1645 // Reserve stack space to move the linkage area to in case of a tail call. 1646 int TCSPDelta = 0; 1647 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1648 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1649 MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1650 } 1651 1652 // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the 1653 // function uses CR 2, 3, or 4. 1654 if (!isPPC64 && !isDarwinABI && 1655 (SavedRegs.test(PPC::CR2) || 1656 SavedRegs.test(PPC::CR3) || 1657 SavedRegs.test(PPC::CR4))) { 1658 int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); 1659 FI->setCRSpillFrameIndex(FrameIdx); 1660 } 1661 } 1662 1663 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1664 RegScavenger *RS) const { 1665 // Early exit if not using the SVR4 ABI. 1666 if (!Subtarget.isSVR4ABI()) { 1667 addScavengingSpillSlot(MF, RS); 1668 return; 1669 } 1670 1671 // Get callee saved register information. 1672 MachineFrameInfo *FFI = MF.getFrameInfo(); 1673 const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); 1674 1675 // If the function is shrink-wrapped, and if the function has a tail call, the 1676 // tail call might not be in the new RestoreBlock, so real branch instruction 1677 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1678 // RestoreBlock. So we handle this case here. 1679 if (FFI->getSavePoint() && FFI->hasTailCall()) { 1680 MachineBasicBlock *RestoreBlock = FFI->getRestorePoint(); 1681 for (MachineBasicBlock &MBB : MF) { 1682 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1683 createTailCallBranchInstr(MBB); 1684 } 1685 } 1686 1687 // Early exit if no callee saved registers are modified! 1688 if (CSI.empty() && !needsFP(MF)) { 1689 addScavengingSpillSlot(MF, RS); 1690 return; 1691 } 1692 1693 unsigned MinGPR = PPC::R31; 1694 unsigned MinG8R = PPC::X31; 1695 unsigned MinFPR = PPC::F31; 1696 unsigned MinVR = PPC::V31; 1697 1698 bool HasGPSaveArea = false; 1699 bool HasG8SaveArea = false; 1700 bool HasFPSaveArea = false; 1701 bool HasVRSAVESaveArea = false; 1702 bool HasVRSaveArea = false; 1703 1704 SmallVector<CalleeSavedInfo, 18> GPRegs; 1705 SmallVector<CalleeSavedInfo, 18> G8Regs; 1706 SmallVector<CalleeSavedInfo, 18> FPRegs; 1707 SmallVector<CalleeSavedInfo, 18> VRegs; 1708 1709 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1710 unsigned Reg = CSI[i].getReg(); 1711 if (PPC::GPRCRegClass.contains(Reg)) { 1712 HasGPSaveArea = true; 1713 1714 GPRegs.push_back(CSI[i]); 1715 1716 if (Reg < MinGPR) { 1717 MinGPR = Reg; 1718 } 1719 } else if (PPC::G8RCRegClass.contains(Reg)) { 1720 HasG8SaveArea = true; 1721 1722 G8Regs.push_back(CSI[i]); 1723 1724 if (Reg < MinG8R) { 1725 MinG8R = Reg; 1726 } 1727 } else if (PPC::F8RCRegClass.contains(Reg)) { 1728 HasFPSaveArea = true; 1729 1730 FPRegs.push_back(CSI[i]); 1731 1732 if (Reg < MinFPR) { 1733 MinFPR = Reg; 1734 } 1735 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1736 PPC::CRRCRegClass.contains(Reg)) { 1737 ; // do nothing, as we already know whether CRs are spilled 1738 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 1739 HasVRSAVESaveArea = true; 1740 } else if (PPC::VRRCRegClass.contains(Reg)) { 1741 HasVRSaveArea = true; 1742 1743 VRegs.push_back(CSI[i]); 1744 1745 if (Reg < MinVR) { 1746 MinVR = Reg; 1747 } 1748 } else { 1749 llvm_unreachable("Unknown RegisterClass!"); 1750 } 1751 } 1752 1753 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1754 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1755 1756 int64_t LowerBound = 0; 1757 1758 // Take into account stack space reserved for tail calls. 1759 int TCSPDelta = 0; 1760 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1761 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1762 LowerBound = TCSPDelta; 1763 } 1764 1765 // The Floating-point register save area is right below the back chain word 1766 // of the previous stack frame. 1767 if (HasFPSaveArea) { 1768 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1769 int FI = FPRegs[i].getFrameIdx(); 1770 1771 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1772 } 1773 1774 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1775 } 1776 1777 // Check whether the frame pointer register is allocated. If so, make sure it 1778 // is spilled to the correct offset. 1779 if (needsFP(MF)) { 1780 HasGPSaveArea = true; 1781 1782 int FI = PFI->getFramePointerSaveIndex(); 1783 assert(FI && "No Frame Pointer Save Slot!"); 1784 1785 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1786 } 1787 1788 if (PFI->usesPICBase()) { 1789 HasGPSaveArea = true; 1790 1791 int FI = PFI->getPICBasePointerSaveIndex(); 1792 assert(FI && "No PIC Base Pointer Save Slot!"); 1793 1794 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1795 } 1796 1797 const PPCRegisterInfo *RegInfo = 1798 static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); 1799 if (RegInfo->hasBasePointer(MF)) { 1800 HasGPSaveArea = true; 1801 1802 int FI = PFI->getBasePointerSaveIndex(); 1803 assert(FI && "No Base Pointer Save Slot!"); 1804 1805 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1806 } 1807 1808 // General register save area starts right below the Floating-point 1809 // register save area. 1810 if (HasGPSaveArea || HasG8SaveArea) { 1811 // Move general register save area spill slots down, taking into account 1812 // the size of the Floating-point register save area. 1813 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1814 int FI = GPRegs[i].getFrameIdx(); 1815 1816 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1817 } 1818 1819 // Move general register save area spill slots down, taking into account 1820 // the size of the Floating-point register save area. 1821 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 1822 int FI = G8Regs[i].getFrameIdx(); 1823 1824 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1825 } 1826 1827 unsigned MinReg = 1828 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 1829 TRI->getEncodingValue(MinG8R)); 1830 1831 if (Subtarget.isPPC64()) { 1832 LowerBound -= (31 - MinReg + 1) * 8; 1833 } else { 1834 LowerBound -= (31 - MinReg + 1) * 4; 1835 } 1836 } 1837 1838 // For 32-bit only, the CR save area is below the general register 1839 // save area. For 64-bit SVR4, the CR save area is addressed relative 1840 // to the stack pointer and hence does not need an adjustment here. 1841 // Only CR2 (the first nonvolatile spilled) has an associated frame 1842 // index so that we have a single uniform save area. 1843 if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) { 1844 // Adjust the frame index of the CR spill slot. 1845 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1846 unsigned Reg = CSI[i].getReg(); 1847 1848 if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) 1849 // Leave Darwin logic as-is. 1850 || (!Subtarget.isSVR4ABI() && 1851 (PPC::CRBITRCRegClass.contains(Reg) || 1852 PPC::CRRCRegClass.contains(Reg)))) { 1853 int FI = CSI[i].getFrameIdx(); 1854 1855 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1856 } 1857 } 1858 1859 LowerBound -= 4; // The CR save area is always 4 bytes long. 1860 } 1861 1862 if (HasVRSAVESaveArea) { 1863 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 1864 // which have the VRSAVE register class? 1865 // Adjust the frame index of the VRSAVE spill slot. 1866 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1867 unsigned Reg = CSI[i].getReg(); 1868 1869 if (PPC::VRSAVERCRegClass.contains(Reg)) { 1870 int FI = CSI[i].getFrameIdx(); 1871 1872 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1873 } 1874 } 1875 1876 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 1877 } 1878 1879 if (HasVRSaveArea) { 1880 // Insert alignment padding, we need 16-byte alignment. 1881 LowerBound = (LowerBound - 15) & ~(15); 1882 1883 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 1884 int FI = VRegs[i].getFrameIdx(); 1885 1886 FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); 1887 } 1888 } 1889 1890 addScavengingSpillSlot(MF, RS); 1891 } 1892 1893 void 1894 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 1895 RegScavenger *RS) const { 1896 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 1897 // a large stack, which will require scavenging a register to materialize a 1898 // large offset. 1899 1900 // We need to have a scavenger spill slot for spills if the frame size is 1901 // large. In case there is no free register for large-offset addressing, 1902 // this slot is used for the necessary emergency spill. Also, we need the 1903 // slot for dynamic stack allocations. 1904 1905 // The scavenger might be invoked if the frame offset does not fit into 1906 // the 16-bit immediate. We don't know the complete frame size here 1907 // because we've not yet computed callee-saved register spills or the 1908 // needed alignment padding. 1909 unsigned StackSize = determineFrameLayout(MF, false, true); 1910 MachineFrameInfo *MFI = MF.getFrameInfo(); 1911 if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 1912 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 1913 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; 1914 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; 1915 const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; 1916 RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), 1917 RC->getAlignment(), 1918 false)); 1919 1920 // Might we have over-aligned allocas? 1921 bool HasAlVars = MFI->hasVarSizedObjects() && 1922 MFI->getMaxAlignment() > getStackAlignment(); 1923 1924 // These kinds of spills might need two registers. 1925 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 1926 RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), 1927 RC->getAlignment(), 1928 false)); 1929 1930 } 1931 } 1932 1933 bool 1934 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 1935 MachineBasicBlock::iterator MI, 1936 const std::vector<CalleeSavedInfo> &CSI, 1937 const TargetRegisterInfo *TRI) const { 1938 1939 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 1940 // Return false otherwise to maintain pre-existing behavior. 1941 if (!Subtarget.isSVR4ABI()) 1942 return false; 1943 1944 MachineFunction *MF = MBB.getParent(); 1945 const PPCInstrInfo &TII = 1946 *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); 1947 DebugLoc DL; 1948 bool CRSpilled = false; 1949 MachineInstrBuilder CRMIB; 1950 1951 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1952 unsigned Reg = CSI[i].getReg(); 1953 // Only Darwin actually uses the VRSAVE register, but it can still appear 1954 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on 1955 // Darwin, ignore it. 1956 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) 1957 continue; 1958 1959 // CR2 through CR4 are the nonvolatile CR fields. 1960 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 1961 1962 // Add the callee-saved register as live-in; it's killed at the spill. 1963 MBB.addLiveIn(Reg); 1964 1965 if (CRSpilled && IsCRField) { 1966 CRMIB.addReg(Reg, RegState::ImplicitKill); 1967 continue; 1968 } 1969 1970 // Insert the spill to the stack frame. 1971 if (IsCRField) { 1972 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 1973 if (Subtarget.isPPC64()) { 1974 // The actual spill will happen at the start of the prologue. 1975 FuncInfo->addMustSaveCR(Reg); 1976 } else { 1977 CRSpilled = true; 1978 FuncInfo->setSpillsCR(); 1979 1980 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 1981 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 1982 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 1983 .addReg(Reg, RegState::ImplicitKill); 1984 1985 MBB.insert(MI, CRMIB); 1986 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 1987 .addReg(PPC::R12, 1988 getKillRegState(true)), 1989 CSI[i].getFrameIdx())); 1990 } 1991 } else { 1992 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1993 TII.storeRegToStackSlot(MBB, MI, Reg, true, 1994 CSI[i].getFrameIdx(), RC, TRI); 1995 } 1996 } 1997 return true; 1998 } 1999 2000 static void 2001 restoreCRs(bool isPPC64, bool is31, 2002 bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, 2003 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2004 const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { 2005 2006 MachineFunction *MF = MBB.getParent(); 2007 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2008 DebugLoc DL; 2009 unsigned RestoreOp, MoveReg; 2010 2011 if (isPPC64) 2012 // This is handled during epilogue generation. 2013 return; 2014 else { 2015 // 32-bit: FP-relative 2016 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), 2017 PPC::R12), 2018 CSI[CSIIndex].getFrameIdx())); 2019 RestoreOp = PPC::MTOCRF; 2020 MoveReg = PPC::R12; 2021 } 2022 2023 if (CR2Spilled) 2024 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2025 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2026 2027 if (CR3Spilled) 2028 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2029 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2030 2031 if (CR4Spilled) 2032 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2033 .addReg(MoveReg, getKillRegState(true))); 2034 } 2035 2036 MachineBasicBlock::iterator PPCFrameLowering:: 2037 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2038 MachineBasicBlock::iterator I) const { 2039 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2040 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2041 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2042 // Add (actually subtract) back the amount the callee popped on return. 2043 if (int CalleeAmt = I->getOperand(1).getImm()) { 2044 bool is64Bit = Subtarget.isPPC64(); 2045 CalleeAmt *= -1; 2046 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2047 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2048 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2049 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2050 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2051 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2052 MachineInstr *MI = I; 2053 const DebugLoc &dl = MI->getDebugLoc(); 2054 2055 if (isInt<16>(CalleeAmt)) { 2056 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2057 .addReg(StackReg, RegState::Kill) 2058 .addImm(CalleeAmt); 2059 } else { 2060 MachineBasicBlock::iterator MBBI = I; 2061 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2062 .addImm(CalleeAmt >> 16); 2063 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2064 .addReg(TmpReg, RegState::Kill) 2065 .addImm(CalleeAmt & 0xFFFF); 2066 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2067 .addReg(StackReg, RegState::Kill) 2068 .addReg(TmpReg); 2069 } 2070 } 2071 } 2072 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2073 return MBB.erase(I); 2074 } 2075 2076 bool 2077 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2078 MachineBasicBlock::iterator MI, 2079 const std::vector<CalleeSavedInfo> &CSI, 2080 const TargetRegisterInfo *TRI) const { 2081 2082 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 2083 // Return false otherwise to maintain pre-existing behavior. 2084 if (!Subtarget.isSVR4ABI()) 2085 return false; 2086 2087 MachineFunction *MF = MBB.getParent(); 2088 const PPCInstrInfo &TII = 2089 *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); 2090 bool CR2Spilled = false; 2091 bool CR3Spilled = false; 2092 bool CR4Spilled = false; 2093 unsigned CSIIndex = 0; 2094 2095 // Initialize insertion-point logic; we will be restoring in reverse 2096 // order of spill. 2097 MachineBasicBlock::iterator I = MI, BeforeI = I; 2098 bool AtStart = I == MBB.begin(); 2099 2100 if (!AtStart) 2101 --BeforeI; 2102 2103 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2104 unsigned Reg = CSI[i].getReg(); 2105 2106 // Only Darwin actually uses the VRSAVE register, but it can still appear 2107 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on 2108 // Darwin, ignore it. 2109 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) 2110 continue; 2111 2112 if (Reg == PPC::CR2) { 2113 CR2Spilled = true; 2114 // The spill slot is associated only with CR2, which is the 2115 // first nonvolatile spilled. Save it here. 2116 CSIIndex = i; 2117 continue; 2118 } else if (Reg == PPC::CR3) { 2119 CR3Spilled = true; 2120 continue; 2121 } else if (Reg == PPC::CR4) { 2122 CR4Spilled = true; 2123 continue; 2124 } else { 2125 // When we first encounter a non-CR register after seeing at 2126 // least one CR register, restore all spilled CRs together. 2127 if ((CR2Spilled || CR3Spilled || CR4Spilled) 2128 && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 2129 bool is31 = needsFP(*MF); 2130 restoreCRs(Subtarget.isPPC64(), is31, 2131 CR2Spilled, CR3Spilled, CR4Spilled, 2132 MBB, I, CSI, CSIIndex); 2133 CR2Spilled = CR3Spilled = CR4Spilled = false; 2134 } 2135 2136 // Default behavior for non-CR saves. 2137 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2138 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), 2139 RC, TRI); 2140 assert(I != MBB.begin() && 2141 "loadRegFromStackSlot didn't insert any code!"); 2142 } 2143 2144 // Insert in reverse order. 2145 if (AtStart) 2146 I = MBB.begin(); 2147 else { 2148 I = BeforeI; 2149 ++I; 2150 } 2151 } 2152 2153 // If we haven't yet spilled the CRs, do so now. 2154 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2155 bool is31 = needsFP(*MF); 2156 restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, 2157 MBB, I, CSI, CSIIndex); 2158 } 2159 2160 return true; 2161 } 2162 2163 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2164 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2165 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2166 } 2167