1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MCTargetDesc/PPCPredicates.h" 14 #include "PPCFrameLowering.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/RegisterScavenging.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/Target/TargetOptions.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "framelowering" 33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 35 STATISTIC(NumPrologProbed, "Number of prologues probed"); 36 37 static cl::opt<bool> 38 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 39 cl::desc("Enable spills in prologue to vector registers."), 40 cl::init(false), cl::Hidden); 41 42 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 43 if (STI.isAIXABI()) 44 return STI.isPPC64() ? 16 : 8; 45 // SVR4 ABI: 46 return STI.isPPC64() ? 16 : 4; 47 } 48 49 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isAIXABI()) 51 return STI.isPPC64() ? 40 : 20; 52 return STI.isELFv2ABI() ? 24 : 40; 53 } 54 55 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 56 // First slot in the general register save area. 57 return STI.isPPC64() ? -8U : -4U; 58 } 59 60 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 61 if (STI.isAIXABI() || STI.isPPC64()) 62 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 63 64 // 32-bit SVR4 ABI: 65 return 8; 66 } 67 68 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 69 // Third slot in the general purpose register save area. 70 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 71 return -12U; 72 73 // Second slot in the general purpose register save area. 74 return STI.isPPC64() ? -16U : -8U; 75 } 76 77 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 78 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 79 } 80 81 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 82 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 83 STI.getPlatformStackAlignment(), 0), 84 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 85 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 86 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 87 LinkageSize(computeLinkageSize(Subtarget)), 88 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 89 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 90 91 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 92 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 93 unsigned &NumEntries) const { 94 95 // Floating-point register save area offsets. 96 #define CALLEE_SAVED_FPRS \ 97 {PPC::F31, -8}, \ 98 {PPC::F30, -16}, \ 99 {PPC::F29, -24}, \ 100 {PPC::F28, -32}, \ 101 {PPC::F27, -40}, \ 102 {PPC::F26, -48}, \ 103 {PPC::F25, -56}, \ 104 {PPC::F24, -64}, \ 105 {PPC::F23, -72}, \ 106 {PPC::F22, -80}, \ 107 {PPC::F21, -88}, \ 108 {PPC::F20, -96}, \ 109 {PPC::F19, -104}, \ 110 {PPC::F18, -112}, \ 111 {PPC::F17, -120}, \ 112 {PPC::F16, -128}, \ 113 {PPC::F15, -136}, \ 114 {PPC::F14, -144} 115 116 // 32-bit general purpose register save area offsets shared by ELF and 117 // AIX. AIX has an extra CSR with r13. 118 #define CALLEE_SAVED_GPRS32 \ 119 {PPC::R31, -4}, \ 120 {PPC::R30, -8}, \ 121 {PPC::R29, -12}, \ 122 {PPC::R28, -16}, \ 123 {PPC::R27, -20}, \ 124 {PPC::R26, -24}, \ 125 {PPC::R25, -28}, \ 126 {PPC::R24, -32}, \ 127 {PPC::R23, -36}, \ 128 {PPC::R22, -40}, \ 129 {PPC::R21, -44}, \ 130 {PPC::R20, -48}, \ 131 {PPC::R19, -52}, \ 132 {PPC::R18, -56}, \ 133 {PPC::R17, -60}, \ 134 {PPC::R16, -64}, \ 135 {PPC::R15, -68}, \ 136 {PPC::R14, -72} 137 138 // 64-bit general purpose register save area offsets. 139 #define CALLEE_SAVED_GPRS64 \ 140 {PPC::X31, -8}, \ 141 {PPC::X30, -16}, \ 142 {PPC::X29, -24}, \ 143 {PPC::X28, -32}, \ 144 {PPC::X27, -40}, \ 145 {PPC::X26, -48}, \ 146 {PPC::X25, -56}, \ 147 {PPC::X24, -64}, \ 148 {PPC::X23, -72}, \ 149 {PPC::X22, -80}, \ 150 {PPC::X21, -88}, \ 151 {PPC::X20, -96}, \ 152 {PPC::X19, -104}, \ 153 {PPC::X18, -112}, \ 154 {PPC::X17, -120}, \ 155 {PPC::X16, -128}, \ 156 {PPC::X15, -136}, \ 157 {PPC::X14, -144} 158 159 // Vector register save area offsets. 160 #define CALLEE_SAVED_VRS \ 161 {PPC::V31, -16}, \ 162 {PPC::V30, -32}, \ 163 {PPC::V29, -48}, \ 164 {PPC::V28, -64}, \ 165 {PPC::V27, -80}, \ 166 {PPC::V26, -96}, \ 167 {PPC::V25, -112}, \ 168 {PPC::V24, -128}, \ 169 {PPC::V23, -144}, \ 170 {PPC::V22, -160}, \ 171 {PPC::V21, -176}, \ 172 {PPC::V20, -192} 173 174 // Note that the offsets here overlap, but this is fixed up in 175 // processFunctionBeforeFrameFinalized. 176 177 static const SpillSlot ELFOffsets32[] = { 178 CALLEE_SAVED_FPRS, 179 CALLEE_SAVED_GPRS32, 180 181 // CR save area offset. We map each of the nonvolatile CR fields 182 // to the slot for CR2, which is the first of the nonvolatile CR 183 // fields to be assigned, so that we only allocate one save slot. 184 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 185 {PPC::CR2, -4}, 186 187 // VRSAVE save area offset. 188 {PPC::VRSAVE, -4}, 189 190 CALLEE_SAVED_VRS, 191 192 // SPE register save area (overlaps Vector save area). 193 {PPC::S31, -8}, 194 {PPC::S30, -16}, 195 {PPC::S29, -24}, 196 {PPC::S28, -32}, 197 {PPC::S27, -40}, 198 {PPC::S26, -48}, 199 {PPC::S25, -56}, 200 {PPC::S24, -64}, 201 {PPC::S23, -72}, 202 {PPC::S22, -80}, 203 {PPC::S21, -88}, 204 {PPC::S20, -96}, 205 {PPC::S19, -104}, 206 {PPC::S18, -112}, 207 {PPC::S17, -120}, 208 {PPC::S16, -128}, 209 {PPC::S15, -136}, 210 {PPC::S14, -144}}; 211 212 static const SpillSlot ELFOffsets64[] = { 213 CALLEE_SAVED_FPRS, 214 CALLEE_SAVED_GPRS64, 215 216 // VRSAVE save area offset. 217 {PPC::VRSAVE, -4}, 218 CALLEE_SAVED_VRS 219 }; 220 221 static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, 222 CALLEE_SAVED_GPRS32, 223 // Add AIX's extra CSR. 224 {PPC::R13, -76}, 225 CALLEE_SAVED_VRS}; 226 227 static const SpillSlot AIXOffsets64[] = { 228 CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; 229 230 if (Subtarget.is64BitELFABI()) { 231 NumEntries = array_lengthof(ELFOffsets64); 232 return ELFOffsets64; 233 } 234 235 if (Subtarget.is32BitELFABI()) { 236 NumEntries = array_lengthof(ELFOffsets32); 237 return ELFOffsets32; 238 } 239 240 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 241 242 if (Subtarget.isPPC64()) { 243 NumEntries = array_lengthof(AIXOffsets64); 244 return AIXOffsets64; 245 } 246 247 NumEntries = array_lengthof(AIXOffsets32); 248 return AIXOffsets32; 249 } 250 251 static bool spillsCR(const MachineFunction &MF) { 252 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 253 return FuncInfo->isCRSpilled(); 254 } 255 256 static bool hasSpills(const MachineFunction &MF) { 257 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 258 return FuncInfo->hasSpills(); 259 } 260 261 static bool hasNonRISpills(const MachineFunction &MF) { 262 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 263 return FuncInfo->hasNonRISpills(); 264 } 265 266 /// MustSaveLR - Return true if this function requires that we save the LR 267 /// register onto the stack in the prolog and restore it in the epilog of the 268 /// function. 269 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 270 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 271 272 // We need a save/restore of LR if there is any def of LR (which is 273 // defined by calls, including the PIC setup sequence), or if there is 274 // some use of the LR stack slot (e.g. for builtin_return_address). 275 // (LR comes in 32 and 64 bit versions.) 276 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 277 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 278 } 279 280 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 281 /// call frame size. Update the MachineFunction object with the stack size. 282 unsigned 283 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 284 bool UseEstimate) const { 285 unsigned NewMaxCallFrameSize = 0; 286 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 287 &NewMaxCallFrameSize); 288 MF.getFrameInfo().setStackSize(FrameSize); 289 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 290 return FrameSize; 291 } 292 293 /// determineFrameLayout - Determine the size of the frame and maximum call 294 /// frame size. 295 unsigned 296 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 297 bool UseEstimate, 298 unsigned *NewMaxCallFrameSize) const { 299 const MachineFrameInfo &MFI = MF.getFrameInfo(); 300 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 301 302 // Get the number of bytes to allocate from the FrameInfo 303 unsigned FrameSize = 304 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 305 306 // Get stack alignments. The frame must be aligned to the greatest of these: 307 Align TargetAlign = getStackAlign(); // alignment required per the ABI 308 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 309 Align Alignment = std::max(TargetAlign, MaxAlign); 310 311 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 312 313 unsigned LR = RegInfo->getRARegister(); 314 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 315 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 316 !MFI.adjustsStack() && // No calls. 317 !MustSaveLR(MF, LR) && // No need to save LR. 318 !FI->mustSaveTOC() && // No need to save TOC. 319 !RegInfo->hasBasePointer(MF); // No special alignment. 320 321 // Note: for PPC32 SVR4ABI, we can still generate stackless 322 // code if all local vars are reg-allocated. 323 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 324 325 // Check whether we can skip adjusting the stack pointer (by using red zone) 326 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 327 // No need for frame 328 return 0; 329 } 330 331 // Get the maximum call frame size of all the calls. 332 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 333 334 // Maximum call frame needs to be at least big enough for linkage area. 335 unsigned minCallFrameSize = getLinkageSize(); 336 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 337 338 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 339 // that allocations will be aligned. 340 if (MFI.hasVarSizedObjects()) 341 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 342 343 // Update the new max call frame size if the caller passes in a valid pointer. 344 if (NewMaxCallFrameSize) 345 *NewMaxCallFrameSize = maxCallFrameSize; 346 347 // Include call frame size in total. 348 FrameSize += maxCallFrameSize; 349 350 // Make sure the frame is aligned. 351 FrameSize = alignTo(FrameSize, Alignment); 352 353 return FrameSize; 354 } 355 356 // hasFP - Return true if the specified function actually has a dedicated frame 357 // pointer register. 358 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 359 const MachineFrameInfo &MFI = MF.getFrameInfo(); 360 // FIXME: This is pretty much broken by design: hasFP() might be called really 361 // early, before the stack layout was calculated and thus hasFP() might return 362 // true or false here depending on the time of call. 363 return (MFI.getStackSize()) && needsFP(MF); 364 } 365 366 // needsFP - Return true if the specified function should have a dedicated frame 367 // pointer register. This is true if the function has variable sized allocas or 368 // if frame pointer elimination is disabled. 369 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 370 const MachineFrameInfo &MFI = MF.getFrameInfo(); 371 372 // Naked functions have no stack frame pushed, so we don't have a frame 373 // pointer. 374 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 375 return false; 376 377 return MF.getTarget().Options.DisableFramePointerElim(MF) || 378 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 379 (MF.getTarget().Options.GuaranteedTailCallOpt && 380 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 381 } 382 383 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 384 bool is31 = needsFP(MF); 385 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 386 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 387 388 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 389 bool HasBP = RegInfo->hasBasePointer(MF); 390 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 391 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 392 393 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 394 BI != BE; ++BI) 395 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 396 --MBBI; 397 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 398 MachineOperand &MO = MBBI->getOperand(I); 399 if (!MO.isReg()) 400 continue; 401 402 switch (MO.getReg()) { 403 case PPC::FP: 404 MO.setReg(FPReg); 405 break; 406 case PPC::FP8: 407 MO.setReg(FP8Reg); 408 break; 409 case PPC::BP: 410 MO.setReg(BPReg); 411 break; 412 case PPC::BP8: 413 MO.setReg(BP8Reg); 414 break; 415 416 } 417 } 418 } 419 } 420 421 /* This function will do the following: 422 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 423 respectively (defaults recommended by the ABI) and return true 424 - If MBB is not an entry block, initialize the register scavenger and look 425 for available registers. 426 - If the defaults (R0/R12) are available, return true 427 - If TwoUniqueRegsRequired is set to true, it looks for two unique 428 registers. Otherwise, look for a single available register. 429 - If the required registers are found, set SR1 and SR2 and return true. 430 - If the required registers are not found, set SR2 or both SR1 and SR2 to 431 PPC::NoRegister and return false. 432 433 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 434 is not set, this function will attempt to find two different registers, but 435 still return true if only one register is available (and set SR1 == SR2). 436 */ 437 bool 438 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 439 bool UseAtEnd, 440 bool TwoUniqueRegsRequired, 441 Register *SR1, 442 Register *SR2) const { 443 RegScavenger RS; 444 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 445 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 446 447 // Set the defaults for the two scratch registers. 448 if (SR1) 449 *SR1 = R0; 450 451 if (SR2) { 452 assert (SR1 && "Asking for the second scratch register but not the first?"); 453 *SR2 = R12; 454 } 455 456 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 457 if ((UseAtEnd && MBB->isReturnBlock()) || 458 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 459 return true; 460 461 RS.enterBasicBlock(*MBB); 462 463 if (UseAtEnd && !MBB->empty()) { 464 // The scratch register will be used at the end of the block, so must 465 // consider all registers used within the block 466 467 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 468 // If no terminator, back iterator up to previous instruction. 469 if (MBBI == MBB->end()) 470 MBBI = std::prev(MBBI); 471 472 if (MBBI != MBB->begin()) 473 RS.forward(MBBI); 474 } 475 476 // If the two registers are available, we're all good. 477 // Note that we only return here if both R0 and R12 are available because 478 // although the function may not require two unique registers, it may benefit 479 // from having two so we should try to provide them. 480 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 481 return true; 482 483 // Get the list of callee-saved registers for the target. 484 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 485 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 486 487 // Get all the available registers in the block. 488 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 489 &PPC::GPRCRegClass); 490 491 // We shouldn't use callee-saved registers as scratch registers as they may be 492 // available when looking for a candidate block for shrink wrapping but not 493 // available when the actual prologue/epilogue is being emitted because they 494 // were added as live-in to the prologue block by PrologueEpilogueInserter. 495 for (int i = 0; CSRegs[i]; ++i) 496 BV.reset(CSRegs[i]); 497 498 // Set the first scratch register to the first available one. 499 if (SR1) { 500 int FirstScratchReg = BV.find_first(); 501 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 502 } 503 504 // If there is another one available, set the second scratch register to that. 505 // Otherwise, set it to either PPC::NoRegister if this function requires two 506 // or to whatever SR1 is set to if this function doesn't require two. 507 if (SR2) { 508 int SecondScratchReg = BV.find_next(*SR1); 509 if (SecondScratchReg != -1) 510 *SR2 = SecondScratchReg; 511 else 512 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 513 } 514 515 // Now that we've done our best to provide both registers, double check 516 // whether we were unable to provide enough. 517 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 518 return false; 519 520 return true; 521 } 522 523 // We need a scratch register for spilling LR and for spilling CR. By default, 524 // we use two scratch registers to hide latency. However, if only one scratch 525 // register is available, we can adjust for that by not overlapping the spill 526 // code. However, if we need to realign the stack (i.e. have a base pointer) 527 // and the stack frame is large, we need two scratch registers. 528 // Also, stack probe requires two scratch registers, one for old sp, one for 529 // large frame and large probe size. 530 bool 531 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 532 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 533 MachineFunction &MF = *(MBB->getParent()); 534 bool HasBP = RegInfo->hasBasePointer(MF); 535 unsigned FrameSize = determineFrameLayout(MF); 536 int NegFrameSize = -FrameSize; 537 bool IsLargeFrame = !isInt<16>(NegFrameSize); 538 MachineFrameInfo &MFI = MF.getFrameInfo(); 539 Align MaxAlign = MFI.getMaxAlign(); 540 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 541 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 542 543 return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || 544 TLI.hasInlineStackProbe(MF); 545 } 546 547 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 548 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 549 550 return findScratchRegister(TmpMBB, false, 551 twoUniqueScratchRegsRequired(TmpMBB)); 552 } 553 554 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 555 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 556 557 return findScratchRegister(TmpMBB, true); 558 } 559 560 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 561 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 562 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 563 564 // Abort if there is no register info or function info. 565 if (!RegInfo || !FI) 566 return false; 567 568 // Only move the stack update on ELFv2 ABI and PPC64. 569 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 570 return false; 571 572 // Check the frame size first and return false if it does not fit the 573 // requirements. 574 // We need a non-zero frame size as well as a frame that will fit in the red 575 // zone. This is because by moving the stack pointer update we are now storing 576 // to the red zone until the stack pointer is updated. If we get an interrupt 577 // inside the prologue but before the stack update we now have a number of 578 // stores to the red zone and those stores must all fit. 579 MachineFrameInfo &MFI = MF.getFrameInfo(); 580 unsigned FrameSize = MFI.getStackSize(); 581 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 582 return false; 583 584 // Frame pointers and base pointers complicate matters so don't do anything 585 // if we have them. For example having a frame pointer will sometimes require 586 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 587 // difficult. 588 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 589 return false; 590 591 // Calls to fast_cc functions use different rules for passing parameters on 592 // the stack from the ABI and using PIC base in the function imposes 593 // similar restrictions to using the base pointer. It is not generally safe 594 // to move the stack pointer update in these situations. 595 if (FI->hasFastCall() || FI->usesPICBase()) 596 return false; 597 598 // Finally we can move the stack update if we do not require register 599 // scavenging. Register scavenging can introduce more spills and so 600 // may make the frame size larger than we have computed. 601 return !RegInfo->requiresFrameIndexScavenging(MF); 602 } 603 604 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 605 MachineBasicBlock &MBB) const { 606 MachineBasicBlock::iterator MBBI = MBB.begin(); 607 MachineFrameInfo &MFI = MF.getFrameInfo(); 608 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 609 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 610 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 611 612 MachineModuleInfo &MMI = MF.getMMI(); 613 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 614 DebugLoc dl; 615 // AIX assembler does not support cfi directives. 616 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 617 618 // Get processor type. 619 bool isPPC64 = Subtarget.isPPC64(); 620 // Get the ABI. 621 bool isSVR4ABI = Subtarget.isSVR4ABI(); 622 bool isELFv2ABI = Subtarget.isELFv2ABI(); 623 assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); 624 625 // Work out frame sizes. 626 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 627 int NegFrameSize = -FrameSize; 628 if (!isInt<32>(NegFrameSize)) 629 llvm_unreachable("Unhandled stack size!"); 630 631 if (MFI.isFrameAddressTaken()) 632 replaceFPWithRealFP(MF); 633 634 // Check if the link register (LR) must be saved. 635 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 636 bool MustSaveLR = FI->mustSaveLR(); 637 bool MustSaveTOC = FI->mustSaveTOC(); 638 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 639 bool MustSaveCR = !MustSaveCRs.empty(); 640 // Do we have a frame pointer and/or base pointer for this function? 641 bool HasFP = hasFP(MF); 642 bool HasBP = RegInfo->hasBasePointer(MF); 643 bool HasRedZone = isPPC64 || !isSVR4ABI; 644 645 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 646 Register BPReg = RegInfo->getBaseRegister(MF); 647 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 648 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 649 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 650 Register ScratchReg; 651 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 652 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 653 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 654 : PPC::MFLR ); 655 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 656 : PPC::STW ); 657 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 658 : PPC::STWU ); 659 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 660 : PPC::STWUX); 661 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 662 : PPC::LIS ); 663 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 664 : PPC::ORI ); 665 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 666 : PPC::OR ); 667 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 668 : PPC::SUBFC); 669 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 670 : PPC::SUBFIC); 671 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 672 : PPC::MFCR); 673 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 674 675 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 676 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 677 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 678 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 679 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 680 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 681 682 // Using the same bool variable as below to suppress compiler warnings. 683 bool SingleScratchReg = findScratchRegister( 684 &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); 685 assert(SingleScratchReg && 686 "Required number of registers not available in this block"); 687 688 SingleScratchReg = ScratchReg == TempReg; 689 690 int LROffset = getReturnSaveOffset(); 691 692 int FPOffset = 0; 693 if (HasFP) { 694 MachineFrameInfo &MFI = MF.getFrameInfo(); 695 int FPIndex = FI->getFramePointerSaveIndex(); 696 assert(FPIndex && "No Frame Pointer Save Slot!"); 697 FPOffset = MFI.getObjectOffset(FPIndex); 698 } 699 700 int BPOffset = 0; 701 if (HasBP) { 702 MachineFrameInfo &MFI = MF.getFrameInfo(); 703 int BPIndex = FI->getBasePointerSaveIndex(); 704 assert(BPIndex && "No Base Pointer Save Slot!"); 705 BPOffset = MFI.getObjectOffset(BPIndex); 706 } 707 708 int PBPOffset = 0; 709 if (FI->usesPICBase()) { 710 MachineFrameInfo &MFI = MF.getFrameInfo(); 711 int PBPIndex = FI->getPICBasePointerSaveIndex(); 712 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 713 PBPOffset = MFI.getObjectOffset(PBPIndex); 714 } 715 716 // Get stack alignments. 717 Align MaxAlign = MFI.getMaxAlign(); 718 if (HasBP && MaxAlign > 1) 719 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 720 721 // Frames of 32KB & larger require special handling because they cannot be 722 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 723 bool isLargeFrame = !isInt<16>(NegFrameSize); 724 725 // Check if we can move the stack update instruction (stdu) down the prologue 726 // past the callee saves. Hopefully this will avoid the situation where the 727 // saves are waiting for the update on the store with update to complete. 728 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 729 bool MovingStackUpdateDown = false; 730 731 // Check if we can move the stack update. 732 if (stackUpdateCanBeMoved(MF)) { 733 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 734 for (CalleeSavedInfo CSI : Info) { 735 int FrIdx = CSI.getFrameIdx(); 736 // If the frame index is not negative the callee saved info belongs to a 737 // stack object that is not a fixed stack object. We ignore non-fixed 738 // stack objects because we won't move the stack update pointer past them. 739 if (FrIdx >= 0) 740 continue; 741 742 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 743 StackUpdateLoc++; 744 MovingStackUpdateDown = true; 745 } else { 746 // We need all of the Frame Indices to meet these conditions. 747 // If they do not, abort the whole operation. 748 StackUpdateLoc = MBBI; 749 MovingStackUpdateDown = false; 750 break; 751 } 752 } 753 754 // If the operation was not aborted then update the object offset. 755 if (MovingStackUpdateDown) { 756 for (CalleeSavedInfo CSI : Info) { 757 int FrIdx = CSI.getFrameIdx(); 758 if (FrIdx < 0) 759 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 760 } 761 } 762 } 763 764 // Where in the prologue we move the CR fields depends on how many scratch 765 // registers we have, and if we need to save the link register or not. This 766 // lambda is to avoid duplicating the logic in 2 places. 767 auto BuildMoveFromCR = [&]() { 768 if (isELFv2ABI && MustSaveCRs.size() == 1) { 769 // In the ELFv2 ABI, we are not required to save all CR fields. 770 // If only one CR field is clobbered, it is more efficient to use 771 // mfocrf to selectively save just that field, because mfocrf has short 772 // latency compares to mfcr. 773 assert(isPPC64 && "V2 ABI is 64-bit only."); 774 MachineInstrBuilder MIB = 775 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 776 MIB.addReg(MustSaveCRs[0], RegState::Kill); 777 } else { 778 MachineInstrBuilder MIB = 779 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 780 for (unsigned CRfield : MustSaveCRs) 781 MIB.addReg(CRfield, RegState::ImplicitKill); 782 } 783 }; 784 785 // If we need to spill the CR and the LR but we don't have two separate 786 // registers available, we must spill them one at a time 787 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 788 BuildMoveFromCR(); 789 BuildMI(MBB, MBBI, dl, StoreWordInst) 790 .addReg(TempReg, getKillRegState(true)) 791 .addImm(CRSaveOffset) 792 .addReg(SPReg); 793 } 794 795 if (MustSaveLR) 796 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 797 798 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 799 BuildMoveFromCR(); 800 801 if (HasRedZone) { 802 if (HasFP) 803 BuildMI(MBB, MBBI, dl, StoreInst) 804 .addReg(FPReg) 805 .addImm(FPOffset) 806 .addReg(SPReg); 807 if (FI->usesPICBase()) 808 BuildMI(MBB, MBBI, dl, StoreInst) 809 .addReg(PPC::R30) 810 .addImm(PBPOffset) 811 .addReg(SPReg); 812 if (HasBP) 813 BuildMI(MBB, MBBI, dl, StoreInst) 814 .addReg(BPReg) 815 .addImm(BPOffset) 816 .addReg(SPReg); 817 } 818 819 if (MustSaveLR) 820 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 821 .addReg(ScratchReg, getKillRegState(true)) 822 .addImm(LROffset) 823 .addReg(SPReg); 824 825 if (MustSaveCR && 826 !(SingleScratchReg && MustSaveLR)) { 827 assert(HasRedZone && "A red zone is always available on PPC64"); 828 BuildMI(MBB, MBBI, dl, StoreWordInst) 829 .addReg(TempReg, getKillRegState(true)) 830 .addImm(CRSaveOffset) 831 .addReg(SPReg); 832 } 833 834 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 835 if (!FrameSize) 836 return; 837 838 // Adjust stack pointer: r1 += NegFrameSize. 839 // If there is a preferred stack alignment, align R1 now 840 841 if (HasBP && HasRedZone) { 842 // Save a copy of r1 as the base pointer. 843 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 844 .addReg(SPReg) 845 .addReg(SPReg); 846 } 847 848 // Have we generated a STUX instruction to claim stack frame? If so, 849 // the negated frame size will be placed in ScratchReg. 850 bool HasSTUX = false; 851 852 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain 853 // pointer is always stored at SP, we will get a free probe due to an essential 854 // STU(X) instruction. 855 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { 856 // To be consistent with other targets, a pseudo instruction is emitted and 857 // will be later expanded in `inlineStackProbe`. 858 BuildMI(MBB, MBBI, dl, 859 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 860 : PPC::PROBED_STACKALLOC_32)) 861 .addDef(ScratchReg) 862 .addDef(TempReg) // TempReg stores the old sp. 863 .addImm(NegFrameSize); 864 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we 865 // update the ScratchReg to meet the assumption that ScratchReg contains 866 // the NegFrameSize. This solution is rather tricky. 867 if (!HasRedZone) { 868 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 869 .addReg(TempReg) 870 .addReg(SPReg); 871 HasSTUX = true; 872 } 873 } else { 874 // This condition must be kept in sync with canUseAsPrologue. 875 if (HasBP && MaxAlign > 1) { 876 if (isPPC64) 877 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 878 .addReg(SPReg) 879 .addImm(0) 880 .addImm(64 - Log2(MaxAlign)); 881 else // PPC32... 882 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 883 .addReg(SPReg) 884 .addImm(0) 885 .addImm(32 - Log2(MaxAlign)) 886 .addImm(31); 887 if (!isLargeFrame) { 888 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 889 .addReg(ScratchReg, RegState::Kill) 890 .addImm(NegFrameSize); 891 } else { 892 assert(!SingleScratchReg && "Only a single scratch reg available"); 893 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 894 .addImm(NegFrameSize >> 16); 895 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 896 .addReg(TempReg, RegState::Kill) 897 .addImm(NegFrameSize & 0xFFFF); 898 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 899 .addReg(ScratchReg, RegState::Kill) 900 .addReg(TempReg, RegState::Kill); 901 } 902 903 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 904 .addReg(SPReg, RegState::Kill) 905 .addReg(SPReg) 906 .addReg(ScratchReg); 907 HasSTUX = true; 908 909 } else if (!isLargeFrame) { 910 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 911 .addReg(SPReg) 912 .addImm(NegFrameSize) 913 .addReg(SPReg); 914 915 } else { 916 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 917 .addImm(NegFrameSize >> 16); 918 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 919 .addReg(ScratchReg, RegState::Kill) 920 .addImm(NegFrameSize & 0xFFFF); 921 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 922 .addReg(SPReg, RegState::Kill) 923 .addReg(SPReg) 924 .addReg(ScratchReg); 925 HasSTUX = true; 926 } 927 } 928 929 // Save the TOC register after the stack pointer update if a prologue TOC 930 // save is required for the function. 931 if (MustSaveTOC) { 932 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 933 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 934 .addReg(TOCReg, getKillRegState(true)) 935 .addImm(TOCSaveOffset) 936 .addReg(SPReg); 937 } 938 939 if (!HasRedZone) { 940 assert(!isPPC64 && "A red zone is always available on PPC64"); 941 if (HasSTUX) { 942 // The negated frame size is in ScratchReg, and the SPReg has been 943 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 944 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 945 // the stack frame (i.e. the old SP), ideally, we would put the old 946 // SP into a register and use it as the base for the stores. The 947 // problem is that the only available register may be ScratchReg, 948 // which could be R0, and R0 cannot be used as a base address. 949 950 // First, set ScratchReg to the old SP. This may need to be modified 951 // later. 952 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 953 .addReg(ScratchReg, RegState::Kill) 954 .addReg(SPReg); 955 956 if (ScratchReg == PPC::R0) { 957 // R0 cannot be used as a base register, but it can be used as an 958 // index in a store-indexed. 959 int LastOffset = 0; 960 if (HasFP) { 961 // R0 += (FPOffset-LastOffset). 962 // Need addic, since addi treats R0 as 0. 963 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 964 .addReg(ScratchReg) 965 .addImm(FPOffset-LastOffset); 966 LastOffset = FPOffset; 967 // Store FP into *R0. 968 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 969 .addReg(FPReg, RegState::Kill) // Save FP. 970 .addReg(PPC::ZERO) 971 .addReg(ScratchReg); // This will be the index (R0 is ok here). 972 } 973 if (FI->usesPICBase()) { 974 // R0 += (PBPOffset-LastOffset). 975 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 976 .addReg(ScratchReg) 977 .addImm(PBPOffset-LastOffset); 978 LastOffset = PBPOffset; 979 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 980 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 981 .addReg(PPC::ZERO) 982 .addReg(ScratchReg); // This will be the index (R0 is ok here). 983 } 984 if (HasBP) { 985 // R0 += (BPOffset-LastOffset). 986 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 987 .addReg(ScratchReg) 988 .addImm(BPOffset-LastOffset); 989 LastOffset = BPOffset; 990 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 991 .addReg(BPReg, RegState::Kill) // Save BP. 992 .addReg(PPC::ZERO) 993 .addReg(ScratchReg); // This will be the index (R0 is ok here). 994 // BP = R0-LastOffset 995 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 996 .addReg(ScratchReg, RegState::Kill) 997 .addImm(-LastOffset); 998 } 999 } else { 1000 // ScratchReg is not R0, so use it as the base register. It is 1001 // already set to the old SP, so we can use the offsets directly. 1002 1003 // Now that the stack frame has been allocated, save all the necessary 1004 // registers using ScratchReg as the base address. 1005 if (HasFP) 1006 BuildMI(MBB, MBBI, dl, StoreInst) 1007 .addReg(FPReg) 1008 .addImm(FPOffset) 1009 .addReg(ScratchReg); 1010 if (FI->usesPICBase()) 1011 BuildMI(MBB, MBBI, dl, StoreInst) 1012 .addReg(PPC::R30) 1013 .addImm(PBPOffset) 1014 .addReg(ScratchReg); 1015 if (HasBP) { 1016 BuildMI(MBB, MBBI, dl, StoreInst) 1017 .addReg(BPReg) 1018 .addImm(BPOffset) 1019 .addReg(ScratchReg); 1020 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1021 .addReg(ScratchReg, RegState::Kill) 1022 .addReg(ScratchReg); 1023 } 1024 } 1025 } else { 1026 // The frame size is a known 16-bit constant (fitting in the immediate 1027 // field of STWU). To be here we have to be compiling for PPC32. 1028 // Since the SPReg has been decreased by FrameSize, add it back to each 1029 // offset. 1030 if (HasFP) 1031 BuildMI(MBB, MBBI, dl, StoreInst) 1032 .addReg(FPReg) 1033 .addImm(FrameSize + FPOffset) 1034 .addReg(SPReg); 1035 if (FI->usesPICBase()) 1036 BuildMI(MBB, MBBI, dl, StoreInst) 1037 .addReg(PPC::R30) 1038 .addImm(FrameSize + PBPOffset) 1039 .addReg(SPReg); 1040 if (HasBP) { 1041 BuildMI(MBB, MBBI, dl, StoreInst) 1042 .addReg(BPReg) 1043 .addImm(FrameSize + BPOffset) 1044 .addReg(SPReg); 1045 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1046 .addReg(SPReg) 1047 .addImm(FrameSize); 1048 } 1049 } 1050 } 1051 1052 // Add Call Frame Information for the instructions we generated above. 1053 if (needsCFI) { 1054 unsigned CFIIndex; 1055 1056 if (HasBP) { 1057 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1058 // because if the stack needed aligning then CFA won't be at a fixed 1059 // offset from FP/SP. 1060 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1061 CFIIndex = MF.addFrameInst( 1062 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1063 } else { 1064 // Adjust the definition of CFA to account for the change in SP. 1065 assert(NegFrameSize); 1066 CFIIndex = MF.addFrameInst( 1067 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1068 } 1069 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1070 .addCFIIndex(CFIIndex); 1071 1072 if (HasFP) { 1073 // Describe where FP was saved, at a fixed offset from CFA. 1074 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1075 CFIIndex = MF.addFrameInst( 1076 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1077 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1078 .addCFIIndex(CFIIndex); 1079 } 1080 1081 if (FI->usesPICBase()) { 1082 // Describe where FP was saved, at a fixed offset from CFA. 1083 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1084 CFIIndex = MF.addFrameInst( 1085 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1086 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1087 .addCFIIndex(CFIIndex); 1088 } 1089 1090 if (HasBP) { 1091 // Describe where BP was saved, at a fixed offset from CFA. 1092 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1093 CFIIndex = MF.addFrameInst( 1094 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1095 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1096 .addCFIIndex(CFIIndex); 1097 } 1098 1099 if (MustSaveLR) { 1100 // Describe where LR was saved, at a fixed offset from CFA. 1101 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1102 CFIIndex = MF.addFrameInst( 1103 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1104 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1105 .addCFIIndex(CFIIndex); 1106 } 1107 } 1108 1109 // If there is a frame pointer, copy R1 into R31 1110 if (HasFP) { 1111 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1112 .addReg(SPReg) 1113 .addReg(SPReg); 1114 1115 if (!HasBP && needsCFI) { 1116 // Change the definition of CFA from SP+offset to FP+offset, because SP 1117 // will change at every alloca. 1118 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1119 unsigned CFIIndex = MF.addFrameInst( 1120 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1121 1122 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1123 .addCFIIndex(CFIIndex); 1124 } 1125 } 1126 1127 if (needsCFI) { 1128 // Describe where callee saved registers were saved, at fixed offsets from 1129 // CFA. 1130 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1131 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1132 unsigned Reg = CSI[I].getReg(); 1133 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1134 1135 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1136 // subregisters of CR2. We just need to emit a move of CR2. 1137 if (PPC::CRBITRCRegClass.contains(Reg)) 1138 continue; 1139 1140 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1141 continue; 1142 1143 // For SVR4, don't emit a move for the CR spill slot if we haven't 1144 // spilled CRs. 1145 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1146 && !MustSaveCR) 1147 continue; 1148 1149 // For 64-bit SVR4 when we have spilled CRs, the spill location 1150 // is SP+8, not a frame-relative slot. 1151 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1152 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1153 // the whole CR word. In the ELFv2 ABI, every CR that was 1154 // actually saved gets its own CFI record. 1155 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1156 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1157 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1158 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1159 .addCFIIndex(CFIIndex); 1160 continue; 1161 } 1162 1163 if (CSI[I].isSpilledToReg()) { 1164 unsigned SpilledReg = CSI[I].getDstReg(); 1165 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1166 nullptr, MRI->getDwarfRegNum(Reg, true), 1167 MRI->getDwarfRegNum(SpilledReg, true))); 1168 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1169 .addCFIIndex(CFIRegister); 1170 } else { 1171 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1172 // We have changed the object offset above but we do not want to change 1173 // the actual offsets in the CFI instruction so we have to undo the 1174 // offset change here. 1175 if (MovingStackUpdateDown) 1176 Offset -= NegFrameSize; 1177 1178 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1179 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1180 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1181 .addCFIIndex(CFIIndex); 1182 } 1183 } 1184 } 1185 } 1186 1187 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, 1188 MachineBasicBlock &PrologMBB) const { 1189 // TODO: Generate CFI instructions. 1190 bool isPPC64 = Subtarget.isPPC64(); 1191 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 1192 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1193 MachineFrameInfo &MFI = MF.getFrameInfo(); 1194 MachineModuleInfo &MMI = MF.getMMI(); 1195 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 1196 // AIX assembler does not support cfi directives. 1197 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 1198 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 1199 int Opc = MI.getOpcode(); 1200 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; 1201 }); 1202 if (StackAllocMIPos == PrologMBB.end()) 1203 return; 1204 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); 1205 MachineBasicBlock *CurrentMBB = &PrologMBB; 1206 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); 1207 MachineInstr &MI = *StackAllocMIPos; 1208 int64_t NegFrameSize = MI.getOperand(2).getImm(); 1209 unsigned ProbeSize = TLI.getStackProbeSize(MF); 1210 int64_t NegProbeSize = -(int64_t)ProbeSize; 1211 assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); 1212 int64_t NumBlocks = NegFrameSize / NegProbeSize; 1213 int64_t NegResidualSize = NegFrameSize % NegProbeSize; 1214 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1215 Register ScratchReg = MI.getOperand(0).getReg(); 1216 Register FPReg = MI.getOperand(1).getReg(); 1217 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1218 bool HasBP = RegInfo->hasBasePointer(MF); 1219 Register BPReg = RegInfo->getBaseRegister(MF); 1220 Align MaxAlign = MFI.getMaxAlign(); 1221 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); 1222 // Subroutines to generate .cfi_* directives. 1223 auto buildDefCFAReg = [&](MachineBasicBlock &MBB, 1224 MachineBasicBlock::iterator MBBI, Register Reg) { 1225 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1226 unsigned CFIIndex = MF.addFrameInst( 1227 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); 1228 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1229 .addCFIIndex(CFIIndex); 1230 }; 1231 auto buildDefCFA = [&](MachineBasicBlock &MBB, 1232 MachineBasicBlock::iterator MBBI, Register Reg, 1233 int Offset) { 1234 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1235 unsigned CFIIndex = MBB.getParent()->addFrameInst( 1236 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); 1237 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1238 .addCFIIndex(CFIIndex); 1239 }; 1240 // Subroutine to determine if we can use the Imm as part of d-form. 1241 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; 1242 // Subroutine to materialize the Imm into TempReg. 1243 auto MaterializeImm = [&](MachineBasicBlock &MBB, 1244 MachineBasicBlock::iterator MBBI, int64_t Imm, 1245 Register &TempReg) { 1246 assert(isInt<32>(Imm) && "Unhandled imm"); 1247 if (isInt<16>(Imm)) 1248 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) 1249 .addImm(Imm); 1250 else { 1251 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) 1252 .addImm(Imm >> 16); 1253 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) 1254 .addReg(TempReg) 1255 .addImm(Imm & 0xFFFF); 1256 } 1257 }; 1258 // Subroutine to store frame pointer and decrease stack pointer by probe size. 1259 auto allocateAndProbe = [&](MachineBasicBlock &MBB, 1260 MachineBasicBlock::iterator MBBI, int64_t NegSize, 1261 Register NegSizeReg, bool UseDForm, 1262 Register StoreReg) { 1263 if (UseDForm) 1264 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) 1265 .addReg(StoreReg) 1266 .addImm(NegSize) 1267 .addReg(SPReg); 1268 else 1269 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) 1270 .addReg(StoreReg) 1271 .addReg(SPReg) 1272 .addReg(NegSizeReg); 1273 }; 1274 // Used to probe realignment gap [stackptr - (stackptr % align), stackptr) 1275 // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30 1276 // available and r1 is already copied to r30 which is BPReg. So BPReg stores 1277 // the value of stackptr. 1278 // First we have to probe tail interval whose size is less than probesize, 1279 // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage, 1280 // ScratchReg stores the value of ((stackptr % align) % probesize). Then we 1281 // probe each block sized probesize until stackptr meets 1282 // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized 1283 // as negprobesize. At both stages, TempReg stores the value of 1284 // (stackptr - (stackptr % align)). 1285 auto dynamicProbe = [&](MachineBasicBlock &MBB, 1286 MachineBasicBlock::iterator MBBI, Register ScratchReg, 1287 Register TempReg) { 1288 assert(HasBP && isPPC64 && "Probe alignment part not available"); 1289 assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); 1290 // ScratchReg = stackptr % align 1291 BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) 1292 .addReg(BPReg) 1293 .addImm(0) 1294 .addImm(64 - Log2(MaxAlign)); 1295 // TempReg = stackptr - (stackptr % align) 1296 BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg) 1297 .addReg(ScratchReg) 1298 .addReg(BPReg); 1299 // ScratchReg = (stackptr % align) % probesize 1300 BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) 1301 .addReg(ScratchReg) 1302 .addImm(0) 1303 .addImm(64 - Log2(ProbeSize)); 1304 Register CRReg = PPC::CR0; 1305 // If (stackptr % align) % probesize == 0, we should not generate probe 1306 // code. Layout of output assembly kinda like: 1307 // bb.0: 1308 // ... 1309 // cmpldi $scratchreg, 0 1310 // beq bb.2 1311 // bb.1: # Probe tail interval 1312 // neg $scratchreg, $scratchreg 1313 // stdux $bpreg, r1, $scratchreg 1314 // bb.2: 1315 // <materialize negprobesize into $scratchreg> 1316 // cmpd r1, $tempreg 1317 // beq bb.4 1318 // bb.3: # Loop to probe each block 1319 // stdux $bpreg, r1, $scratchreg 1320 // cmpd r1, $tempreg 1321 // bne bb.3 1322 // bb.4: 1323 // ... 1324 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); 1325 MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB); 1326 MF.insert(MBBInsertPoint, ProbeResidualMBB); 1327 MachineBasicBlock *ProbeLoopPreHeaderMBB = 1328 MF.CreateMachineBasicBlock(ProbedBB); 1329 MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB); 1330 MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); 1331 MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); 1332 MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1333 MF.insert(MBBInsertPoint, ProbeExitMBB); 1334 // bb.4 1335 ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); 1336 ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 1337 // bb.0 1338 BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0); 1339 BuildMI(&MBB, DL, TII.get(PPC::BCC)) 1340 .addImm(PPC::PRED_EQ) 1341 .addReg(CRReg) 1342 .addMBB(ProbeLoopPreHeaderMBB); 1343 MBB.addSuccessor(ProbeResidualMBB); 1344 MBB.addSuccessor(ProbeLoopPreHeaderMBB); 1345 // bb.1 1346 BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg) 1347 .addReg(ScratchReg); 1348 allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg, 1349 false, BPReg); 1350 ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB); 1351 // bb.2 1352 MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(), 1353 NegProbeSize, ScratchReg); 1354 BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg) 1355 .addReg(SPReg) 1356 .addReg(TempReg); 1357 BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC)) 1358 .addImm(PPC::PRED_EQ) 1359 .addReg(CRReg) 1360 .addMBB(ProbeExitMBB); 1361 ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB); 1362 ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB); 1363 // bb.3 1364 allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg, 1365 false, BPReg); 1366 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg) 1367 .addReg(SPReg) 1368 .addReg(TempReg); 1369 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) 1370 .addImm(PPC::PRED_NE) 1371 .addReg(CRReg) 1372 .addMBB(ProbeLoopBodyMBB); 1373 ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); 1374 ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); 1375 // Update liveins. 1376 recomputeLiveIns(*ProbeResidualMBB); 1377 recomputeLiveIns(*ProbeLoopPreHeaderMBB); 1378 recomputeLiveIns(*ProbeLoopBodyMBB); 1379 recomputeLiveIns(*ProbeExitMBB); 1380 return ProbeExitMBB; 1381 }; 1382 // For case HasBP && MaxAlign > 1, we have to realign the SP by performing 1383 // SP = SP - SP % MaxAlign. 1384 if (HasBP && MaxAlign > 1) { 1385 // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in 1386 // 64-bit mode. 1387 if (isPPC64) { 1388 // Use BPReg to calculate CFA. 1389 if (needsCFI) 1390 buildDefCFA(*CurrentMBB, {MI}, BPReg, 0); 1391 // Since we have SPReg copied to BPReg at the moment, FPReg can be used as 1392 // TempReg. 1393 Register TempReg = FPReg; 1394 CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg); 1395 // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64. 1396 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) 1397 .addReg(BPReg) 1398 .addReg(BPReg); 1399 } else { 1400 // Initialize current frame pointer. 1401 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) 1402 .addReg(SPReg) 1403 .addReg(SPReg); 1404 // Use FPReg to calculate CFA. 1405 if (needsCFI) 1406 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); 1407 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) 1408 .addReg(FPReg) 1409 .addImm(0) 1410 .addImm(32 - Log2(MaxAlign)) 1411 .addImm(31); 1412 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg) 1413 .addReg(ScratchReg) 1414 .addReg(SPReg); 1415 } 1416 } else { 1417 // Initialize current frame pointer. 1418 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); 1419 // Use FPReg to calculate CFA. 1420 if (needsCFI) 1421 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); 1422 } 1423 // Probe residual part. 1424 if (NegResidualSize) { 1425 bool ResidualUseDForm = CanUseDForm(NegResidualSize); 1426 if (!ResidualUseDForm) 1427 MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); 1428 allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, 1429 ResidualUseDForm, FPReg); 1430 } 1431 bool UseDForm = CanUseDForm(NegProbeSize); 1432 // If number of blocks is small, just probe them directly. 1433 if (NumBlocks < 3) { 1434 if (!UseDForm) 1435 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1436 for (int i = 0; i < NumBlocks; ++i) 1437 allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, 1438 FPReg); 1439 if (needsCFI) { 1440 // Restore using SPReg to calculate CFA. 1441 buildDefCFAReg(*CurrentMBB, {MI}, SPReg); 1442 } 1443 } else { 1444 // Since CTR is a volatile register and current shrinkwrap implementation 1445 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a 1446 // CTR loop to probe. 1447 // Calculate trip count and stores it in CTRReg. 1448 MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); 1449 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) 1450 .addReg(ScratchReg, RegState::Kill); 1451 if (!UseDForm) 1452 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1453 // Create MBBs of the loop. 1454 MachineFunction::iterator MBBInsertPoint = 1455 std::next(CurrentMBB->getIterator()); 1456 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); 1457 MF.insert(MBBInsertPoint, LoopMBB); 1458 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1459 MF.insert(MBBInsertPoint, ExitMBB); 1460 // Synthesize the loop body. 1461 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, 1462 UseDForm, FPReg); 1463 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) 1464 .addMBB(LoopMBB); 1465 LoopMBB->addSuccessor(ExitMBB); 1466 LoopMBB->addSuccessor(LoopMBB); 1467 // Synthesize the exit MBB. 1468 ExitMBB->splice(ExitMBB->end(), CurrentMBB, 1469 std::next(MachineBasicBlock::iterator(MI)), 1470 CurrentMBB->end()); 1471 ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); 1472 CurrentMBB->addSuccessor(LoopMBB); 1473 if (needsCFI) { 1474 // Restore using SPReg to calculate CFA. 1475 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); 1476 } 1477 // Update liveins. 1478 recomputeLiveIns(*LoopMBB); 1479 recomputeLiveIns(*ExitMBB); 1480 } 1481 ++NumPrologProbed; 1482 MI.eraseFromParent(); 1483 } 1484 1485 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1486 MachineBasicBlock &MBB) const { 1487 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1488 DebugLoc dl; 1489 1490 if (MBBI != MBB.end()) 1491 dl = MBBI->getDebugLoc(); 1492 1493 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1494 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1495 1496 // Get alignment info so we know how to restore the SP. 1497 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1498 1499 // Get the number of bytes allocated from the FrameInfo. 1500 int FrameSize = MFI.getStackSize(); 1501 1502 // Get processor type. 1503 bool isPPC64 = Subtarget.isPPC64(); 1504 1505 // Check if the link register (LR) has been saved. 1506 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1507 bool MustSaveLR = FI->mustSaveLR(); 1508 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1509 bool MustSaveCR = !MustSaveCRs.empty(); 1510 // Do we have a frame pointer and/or base pointer for this function? 1511 bool HasFP = hasFP(MF); 1512 bool HasBP = RegInfo->hasBasePointer(MF); 1513 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1514 1515 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1516 Register BPReg = RegInfo->getBaseRegister(MF); 1517 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1518 Register ScratchReg; 1519 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1520 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1521 : PPC::MTLR ); 1522 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1523 : PPC::LWZ ); 1524 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1525 : PPC::LIS ); 1526 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1527 : PPC::OR ); 1528 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1529 : PPC::ORI ); 1530 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1531 : PPC::ADDI ); 1532 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1533 : PPC::ADD4 ); 1534 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1535 : PPC::LWZ); 1536 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1537 : PPC::MTOCRF); 1538 int LROffset = getReturnSaveOffset(); 1539 1540 int FPOffset = 0; 1541 1542 // Using the same bool variable as below to suppress compiler warnings. 1543 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1544 &TempReg); 1545 assert(SingleScratchReg && 1546 "Could not find an available scratch register"); 1547 1548 SingleScratchReg = ScratchReg == TempReg; 1549 1550 if (HasFP) { 1551 int FPIndex = FI->getFramePointerSaveIndex(); 1552 assert(FPIndex && "No Frame Pointer Save Slot!"); 1553 FPOffset = MFI.getObjectOffset(FPIndex); 1554 } 1555 1556 int BPOffset = 0; 1557 if (HasBP) { 1558 int BPIndex = FI->getBasePointerSaveIndex(); 1559 assert(BPIndex && "No Base Pointer Save Slot!"); 1560 BPOffset = MFI.getObjectOffset(BPIndex); 1561 } 1562 1563 int PBPOffset = 0; 1564 if (FI->usesPICBase()) { 1565 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1566 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1567 PBPOffset = MFI.getObjectOffset(PBPIndex); 1568 } 1569 1570 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1571 1572 if (IsReturnBlock) { 1573 unsigned RetOpcode = MBBI->getOpcode(); 1574 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1575 RetOpcode == PPC::TCRETURNdi || 1576 RetOpcode == PPC::TCRETURNai || 1577 RetOpcode == PPC::TCRETURNri8 || 1578 RetOpcode == PPC::TCRETURNdi8 || 1579 RetOpcode == PPC::TCRETURNai8; 1580 1581 if (UsesTCRet) { 1582 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1583 MachineOperand &StackAdjust = MBBI->getOperand(1); 1584 assert(StackAdjust.isImm() && "Expecting immediate value."); 1585 // Adjust stack pointer. 1586 int StackAdj = StackAdjust.getImm(); 1587 int Delta = StackAdj - MaxTCRetDelta; 1588 assert((Delta >= 0) && "Delta must be positive"); 1589 if (MaxTCRetDelta>0) 1590 FrameSize += (StackAdj +Delta); 1591 else 1592 FrameSize += StackAdj; 1593 } 1594 } 1595 1596 // Frames of 32KB & larger require special handling because they cannot be 1597 // indexed into with a simple LD/LWZ immediate offset operand. 1598 bool isLargeFrame = !isInt<16>(FrameSize); 1599 1600 // On targets without red zone, the SP needs to be restored last, so that 1601 // all live contents of the stack frame are upwards of the SP. This means 1602 // that we cannot restore SP just now, since there may be more registers 1603 // to restore from the stack frame (e.g. R31). If the frame size is not 1604 // a simple immediate value, we will need a spare register to hold the 1605 // restored SP. If the frame size is known and small, we can simply adjust 1606 // the offsets of the registers to be restored, and still use SP to restore 1607 // them. In such case, the final update of SP will be to add the frame 1608 // size to it. 1609 // To simplify the code, set RBReg to the base register used to restore 1610 // values from the stack, and set SPAdd to the value that needs to be added 1611 // to the SP at the end. The default values are as if red zone was present. 1612 unsigned RBReg = SPReg; 1613 unsigned SPAdd = 0; 1614 1615 // Check if we can move the stack update instruction up the epilogue 1616 // past the callee saves. This will allow the move to LR instruction 1617 // to be executed before the restores of the callee saves which means 1618 // that the callee saves can hide the latency from the MTLR instrcution. 1619 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1620 if (stackUpdateCanBeMoved(MF)) { 1621 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1622 for (CalleeSavedInfo CSI : Info) { 1623 int FrIdx = CSI.getFrameIdx(); 1624 // If the frame index is not negative the callee saved info belongs to a 1625 // stack object that is not a fixed stack object. We ignore non-fixed 1626 // stack objects because we won't move the update of the stack pointer 1627 // past them. 1628 if (FrIdx >= 0) 1629 continue; 1630 1631 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1632 StackUpdateLoc--; 1633 else { 1634 // Abort the operation as we can't update all CSR restores. 1635 StackUpdateLoc = MBBI; 1636 break; 1637 } 1638 } 1639 } 1640 1641 if (FrameSize) { 1642 // In the prologue, the loaded (or persistent) stack pointer value is 1643 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1644 // zone add this offset back now. 1645 1646 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1647 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1648 // call which invalidates the stack pointer value in SP(0). So we use the 1649 // value of R31 in this case. 1650 if (FI->hasFastCall()) { 1651 assert(HasFP && "Expecting a valid frame pointer."); 1652 if (!HasRedZone) 1653 RBReg = FPReg; 1654 if (!isLargeFrame) { 1655 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1656 .addReg(FPReg).addImm(FrameSize); 1657 } else { 1658 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1659 .addImm(FrameSize >> 16); 1660 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1661 .addReg(ScratchReg, RegState::Kill) 1662 .addImm(FrameSize & 0xFFFF); 1663 BuildMI(MBB, MBBI, dl, AddInst) 1664 .addReg(RBReg) 1665 .addReg(FPReg) 1666 .addReg(ScratchReg); 1667 } 1668 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1669 if (HasRedZone) { 1670 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1671 .addReg(SPReg) 1672 .addImm(FrameSize); 1673 } else { 1674 // Make sure that adding FrameSize will not overflow the max offset 1675 // size. 1676 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1677 "Local offsets should be negative"); 1678 SPAdd = FrameSize; 1679 FPOffset += FrameSize; 1680 BPOffset += FrameSize; 1681 PBPOffset += FrameSize; 1682 } 1683 } else { 1684 // We don't want to use ScratchReg as a base register, because it 1685 // could happen to be R0. Use FP instead, but make sure to preserve it. 1686 if (!HasRedZone) { 1687 // If FP is not saved, copy it to ScratchReg. 1688 if (!HasFP) 1689 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1690 .addReg(FPReg) 1691 .addReg(FPReg); 1692 RBReg = FPReg; 1693 } 1694 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1695 .addImm(0) 1696 .addReg(SPReg); 1697 } 1698 } 1699 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1700 // If there is no red zone, ScratchReg may be needed for holding a useful 1701 // value (although not the base register). Make sure it is not overwritten 1702 // too early. 1703 1704 // If we need to restore both the LR and the CR and we only have one 1705 // available scratch register, we must do them one at a time. 1706 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1707 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1708 // is live here. 1709 assert(HasRedZone && "Expecting red zone"); 1710 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1711 .addImm(CRSaveOffset) 1712 .addReg(SPReg); 1713 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1714 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1715 .addReg(TempReg, getKillRegState(i == e-1)); 1716 } 1717 1718 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1719 // LR is stored in the caller's stack frame. ScratchReg will be needed 1720 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1721 // a base register anyway, because it may happen to be R0. 1722 bool LoadedLR = false; 1723 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1724 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1725 .addImm(LROffset+SPAdd) 1726 .addReg(RBReg); 1727 LoadedLR = true; 1728 } 1729 1730 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1731 assert(RBReg == SPReg && "Should be using SP as a base register"); 1732 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1733 .addImm(CRSaveOffset) 1734 .addReg(RBReg); 1735 } 1736 1737 if (HasFP) { 1738 // If there is red zone, restore FP directly, since SP has already been 1739 // restored. Otherwise, restore the value of FP into ScratchReg. 1740 if (HasRedZone || RBReg == SPReg) 1741 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1742 .addImm(FPOffset) 1743 .addReg(SPReg); 1744 else 1745 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1746 .addImm(FPOffset) 1747 .addReg(RBReg); 1748 } 1749 1750 if (FI->usesPICBase()) 1751 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1752 .addImm(PBPOffset) 1753 .addReg(RBReg); 1754 1755 if (HasBP) 1756 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1757 .addImm(BPOffset) 1758 .addReg(RBReg); 1759 1760 // There is nothing more to be loaded from the stack, so now we can 1761 // restore SP: SP = RBReg + SPAdd. 1762 if (RBReg != SPReg || SPAdd != 0) { 1763 assert(!HasRedZone && "This should not happen with red zone"); 1764 // If SPAdd is 0, generate a copy. 1765 if (SPAdd == 0) 1766 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1767 .addReg(RBReg) 1768 .addReg(RBReg); 1769 else 1770 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1771 .addReg(RBReg) 1772 .addImm(SPAdd); 1773 1774 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1775 if (RBReg == FPReg) 1776 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1777 .addReg(ScratchReg) 1778 .addReg(ScratchReg); 1779 1780 // Now load the LR from the caller's stack frame. 1781 if (MustSaveLR && !LoadedLR) 1782 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1783 .addImm(LROffset) 1784 .addReg(SPReg); 1785 } 1786 1787 if (MustSaveCR && 1788 !(SingleScratchReg && MustSaveLR)) 1789 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1790 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1791 .addReg(TempReg, getKillRegState(i == e-1)); 1792 1793 if (MustSaveLR) 1794 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1795 1796 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1797 // call optimization 1798 if (IsReturnBlock) { 1799 unsigned RetOpcode = MBBI->getOpcode(); 1800 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1801 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1802 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1803 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1804 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1805 1806 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1807 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1808 .addReg(SPReg).addImm(CallerAllocatedAmt); 1809 } else { 1810 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1811 .addImm(CallerAllocatedAmt >> 16); 1812 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1813 .addReg(ScratchReg, RegState::Kill) 1814 .addImm(CallerAllocatedAmt & 0xFFFF); 1815 BuildMI(MBB, MBBI, dl, AddInst) 1816 .addReg(SPReg) 1817 .addReg(FPReg) 1818 .addReg(ScratchReg); 1819 } 1820 } else { 1821 createTailCallBranchInstr(MBB); 1822 } 1823 } 1824 } 1825 1826 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1827 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1828 1829 // If we got this far a first terminator should exist. 1830 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1831 1832 DebugLoc dl = MBBI->getDebugLoc(); 1833 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1834 1835 // Create branch instruction for pseudo tail call return instruction. 1836 // The TCRETURNdi variants are direct calls. Valid targets for those are 1837 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1838 // since we can tail call external functions with PC-Rel (i.e. we don't need 1839 // to worry about different TOC pointers). Some of the external functions will 1840 // be MO_GlobalAddress while others like memcpy for example, are going to 1841 // be MO_ExternalSymbol. 1842 unsigned RetOpcode = MBBI->getOpcode(); 1843 if (RetOpcode == PPC::TCRETURNdi) { 1844 MBBI = MBB.getLastNonDebugInstr(); 1845 MachineOperand &JumpTarget = MBBI->getOperand(0); 1846 if (JumpTarget.isGlobal()) 1847 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1848 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1849 else if (JumpTarget.isSymbol()) 1850 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1851 addExternalSymbol(JumpTarget.getSymbolName()); 1852 else 1853 llvm_unreachable("Expecting Global or External Symbol"); 1854 } else if (RetOpcode == PPC::TCRETURNri) { 1855 MBBI = MBB.getLastNonDebugInstr(); 1856 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1857 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1858 } else if (RetOpcode == PPC::TCRETURNai) { 1859 MBBI = MBB.getLastNonDebugInstr(); 1860 MachineOperand &JumpTarget = MBBI->getOperand(0); 1861 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1862 } else if (RetOpcode == PPC::TCRETURNdi8) { 1863 MBBI = MBB.getLastNonDebugInstr(); 1864 MachineOperand &JumpTarget = MBBI->getOperand(0); 1865 if (JumpTarget.isGlobal()) 1866 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1867 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1868 else if (JumpTarget.isSymbol()) 1869 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1870 addExternalSymbol(JumpTarget.getSymbolName()); 1871 else 1872 llvm_unreachable("Expecting Global or External Symbol"); 1873 } else if (RetOpcode == PPC::TCRETURNri8) { 1874 MBBI = MBB.getLastNonDebugInstr(); 1875 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1876 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1877 } else if (RetOpcode == PPC::TCRETURNai8) { 1878 MBBI = MBB.getLastNonDebugInstr(); 1879 MachineOperand &JumpTarget = MBBI->getOperand(0); 1880 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1881 } 1882 } 1883 1884 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1885 BitVector &SavedRegs, 1886 RegScavenger *RS) const { 1887 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1888 1889 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1890 1891 // Save and clear the LR state. 1892 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1893 unsigned LR = RegInfo->getRARegister(); 1894 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1895 SavedRegs.reset(LR); 1896 1897 // Save R31 if necessary 1898 int FPSI = FI->getFramePointerSaveIndex(); 1899 const bool isPPC64 = Subtarget.isPPC64(); 1900 MachineFrameInfo &MFI = MF.getFrameInfo(); 1901 1902 // If the frame pointer save index hasn't been defined yet. 1903 if (!FPSI && needsFP(MF)) { 1904 // Find out what the fix offset of the frame pointer save area. 1905 int FPOffset = getFramePointerSaveOffset(); 1906 // Allocate the frame index for frame pointer save area. 1907 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1908 // Save the result. 1909 FI->setFramePointerSaveIndex(FPSI); 1910 } 1911 1912 int BPSI = FI->getBasePointerSaveIndex(); 1913 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1914 int BPOffset = getBasePointerSaveOffset(); 1915 // Allocate the frame index for the base pointer save area. 1916 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1917 // Save the result. 1918 FI->setBasePointerSaveIndex(BPSI); 1919 } 1920 1921 // Reserve stack space for the PIC Base register (R30). 1922 // Only used in SVR4 32-bit. 1923 if (FI->usesPICBase()) { 1924 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1925 FI->setPICBasePointerSaveIndex(PBPSI); 1926 } 1927 1928 // Make sure we don't explicitly spill r31, because, for example, we have 1929 // some inline asm which explicitly clobbers it, when we otherwise have a 1930 // frame pointer and are using r31's spill slot for the prologue/epilogue 1931 // code. Same goes for the base pointer and the PIC base register. 1932 if (needsFP(MF)) 1933 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1934 if (RegInfo->hasBasePointer(MF)) 1935 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1936 if (FI->usesPICBase()) 1937 SavedRegs.reset(PPC::R30); 1938 1939 // Reserve stack space to move the linkage area to in case of a tail call. 1940 int TCSPDelta = 0; 1941 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1942 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1943 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1944 } 1945 1946 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 1947 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 1948 // object at the offset of the CR-save slot in the linkage area. The actual 1949 // save and restore of the condition register will be created as part of the 1950 // prologue and epilogue insertion, but the FixedStack object is needed to 1951 // keep the CalleSavedInfo valid. 1952 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1953 SavedRegs.test(PPC::CR4))) { 1954 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1955 const int64_t SpillOffset = 1956 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 1957 int FrameIdx = 1958 MFI.CreateFixedObject(SpillSize, SpillOffset, 1959 /* IsImmutable */ true, /* IsAliased */ false); 1960 FI->setCRSpillFrameIndex(FrameIdx); 1961 } 1962 } 1963 1964 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1965 RegScavenger *RS) const { 1966 // Get callee saved register information. 1967 MachineFrameInfo &MFI = MF.getFrameInfo(); 1968 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1969 1970 // If the function is shrink-wrapped, and if the function has a tail call, the 1971 // tail call might not be in the new RestoreBlock, so real branch instruction 1972 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1973 // RestoreBlock. So we handle this case here. 1974 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1975 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1976 for (MachineBasicBlock &MBB : MF) { 1977 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1978 createTailCallBranchInstr(MBB); 1979 } 1980 } 1981 1982 // Early exit if no callee saved registers are modified! 1983 if (CSI.empty() && !needsFP(MF)) { 1984 addScavengingSpillSlot(MF, RS); 1985 return; 1986 } 1987 1988 unsigned MinGPR = PPC::R31; 1989 unsigned MinG8R = PPC::X31; 1990 unsigned MinFPR = PPC::F31; 1991 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1992 1993 bool HasGPSaveArea = false; 1994 bool HasG8SaveArea = false; 1995 bool HasFPSaveArea = false; 1996 bool HasVRSaveArea = false; 1997 1998 SmallVector<CalleeSavedInfo, 18> GPRegs; 1999 SmallVector<CalleeSavedInfo, 18> G8Regs; 2000 SmallVector<CalleeSavedInfo, 18> FPRegs; 2001 SmallVector<CalleeSavedInfo, 18> VRegs; 2002 2003 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2004 unsigned Reg = CSI[i].getReg(); 2005 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 2006 (Reg != PPC::X2 && Reg != PPC::R2)) && 2007 "Not expecting to try to spill R2 in a function that must save TOC"); 2008 if (PPC::GPRCRegClass.contains(Reg)) { 2009 HasGPSaveArea = true; 2010 2011 GPRegs.push_back(CSI[i]); 2012 2013 if (Reg < MinGPR) { 2014 MinGPR = Reg; 2015 } 2016 } else if (PPC::G8RCRegClass.contains(Reg)) { 2017 HasG8SaveArea = true; 2018 2019 G8Regs.push_back(CSI[i]); 2020 2021 if (Reg < MinG8R) { 2022 MinG8R = Reg; 2023 } 2024 } else if (PPC::F8RCRegClass.contains(Reg)) { 2025 HasFPSaveArea = true; 2026 2027 FPRegs.push_back(CSI[i]); 2028 2029 if (Reg < MinFPR) { 2030 MinFPR = Reg; 2031 } 2032 } else if (PPC::CRBITRCRegClass.contains(Reg) || 2033 PPC::CRRCRegClass.contains(Reg)) { 2034 ; // do nothing, as we already know whether CRs are spilled 2035 } else if (PPC::VRRCRegClass.contains(Reg) || 2036 PPC::SPERCRegClass.contains(Reg)) { 2037 // Altivec and SPE are mutually exclusive, but have the same stack 2038 // alignment requirements, so overload the save area for both cases. 2039 HasVRSaveArea = true; 2040 2041 VRegs.push_back(CSI[i]); 2042 2043 if (Reg < MinVR) { 2044 MinVR = Reg; 2045 } 2046 } else { 2047 llvm_unreachable("Unknown RegisterClass!"); 2048 } 2049 } 2050 2051 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 2052 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2053 2054 int64_t LowerBound = 0; 2055 2056 // Take into account stack space reserved for tail calls. 2057 int TCSPDelta = 0; 2058 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2059 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 2060 LowerBound = TCSPDelta; 2061 } 2062 2063 // The Floating-point register save area is right below the back chain word 2064 // of the previous stack frame. 2065 if (HasFPSaveArea) { 2066 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 2067 int FI = FPRegs[i].getFrameIdx(); 2068 2069 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2070 } 2071 2072 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 2073 } 2074 2075 // Check whether the frame pointer register is allocated. If so, make sure it 2076 // is spilled to the correct offset. 2077 if (needsFP(MF)) { 2078 int FI = PFI->getFramePointerSaveIndex(); 2079 assert(FI && "No Frame Pointer Save Slot!"); 2080 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2081 // FP is R31/X31, so no need to update MinGPR/MinG8R. 2082 HasGPSaveArea = true; 2083 } 2084 2085 if (PFI->usesPICBase()) { 2086 int FI = PFI->getPICBasePointerSaveIndex(); 2087 assert(FI && "No PIC Base Pointer Save Slot!"); 2088 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2089 2090 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 2091 HasGPSaveArea = true; 2092 } 2093 2094 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2095 if (RegInfo->hasBasePointer(MF)) { 2096 int FI = PFI->getBasePointerSaveIndex(); 2097 assert(FI && "No Base Pointer Save Slot!"); 2098 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2099 2100 Register BP = RegInfo->getBaseRegister(MF); 2101 if (PPC::G8RCRegClass.contains(BP)) { 2102 MinG8R = std::min<unsigned>(MinG8R, BP); 2103 HasG8SaveArea = true; 2104 } else if (PPC::GPRCRegClass.contains(BP)) { 2105 MinGPR = std::min<unsigned>(MinGPR, BP); 2106 HasGPSaveArea = true; 2107 } 2108 } 2109 2110 // General register save area starts right below the Floating-point 2111 // register save area. 2112 if (HasGPSaveArea || HasG8SaveArea) { 2113 // Move general register save area spill slots down, taking into account 2114 // the size of the Floating-point register save area. 2115 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 2116 if (!GPRegs[i].isSpilledToReg()) { 2117 int FI = GPRegs[i].getFrameIdx(); 2118 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2119 } 2120 } 2121 2122 // Move general register save area spill slots down, taking into account 2123 // the size of the Floating-point register save area. 2124 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 2125 if (!G8Regs[i].isSpilledToReg()) { 2126 int FI = G8Regs[i].getFrameIdx(); 2127 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2128 } 2129 } 2130 2131 unsigned MinReg = 2132 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2133 TRI->getEncodingValue(MinG8R)); 2134 2135 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2136 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2137 } 2138 2139 // For 32-bit only, the CR save area is below the general register 2140 // save area. For 64-bit SVR4, the CR save area is addressed relative 2141 // to the stack pointer and hence does not need an adjustment here. 2142 // Only CR2 (the first nonvolatile spilled) has an associated frame 2143 // index so that we have a single uniform save area. 2144 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2145 // Adjust the frame index of the CR spill slot. 2146 for (const auto &CSInfo : CSI) { 2147 if (CSInfo.getReg() == PPC::CR2) { 2148 int FI = CSInfo.getFrameIdx(); 2149 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2150 break; 2151 } 2152 } 2153 2154 LowerBound -= 4; // The CR save area is always 4 bytes long. 2155 } 2156 2157 // Both Altivec and SPE have the same alignment and padding requirements 2158 // within the stack frame. 2159 if (HasVRSaveArea) { 2160 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2161 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2162 // we are using negative number here (the stack grows downward). We should 2163 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2164 // is the alignment size ( n = 16 here) and y is the size after aligning. 2165 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2166 LowerBound &= ~(15); 2167 2168 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2169 int FI = VRegs[i].getFrameIdx(); 2170 2171 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2172 } 2173 } 2174 2175 addScavengingSpillSlot(MF, RS); 2176 } 2177 2178 void 2179 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2180 RegScavenger *RS) const { 2181 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2182 // a large stack, which will require scavenging a register to materialize a 2183 // large offset. 2184 2185 // We need to have a scavenger spill slot for spills if the frame size is 2186 // large. In case there is no free register for large-offset addressing, 2187 // this slot is used for the necessary emergency spill. Also, we need the 2188 // slot for dynamic stack allocations. 2189 2190 // The scavenger might be invoked if the frame offset does not fit into 2191 // the 16-bit immediate. We don't know the complete frame size here 2192 // because we've not yet computed callee-saved register spills or the 2193 // needed alignment padding. 2194 unsigned StackSize = determineFrameLayout(MF, true); 2195 MachineFrameInfo &MFI = MF.getFrameInfo(); 2196 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || 2197 (hasSpills(MF) && !isInt<16>(StackSize))) { 2198 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2199 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2200 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2201 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2202 unsigned Size = TRI.getSpillSize(RC); 2203 Align Alignment = TRI.getSpillAlign(RC); 2204 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2205 2206 // Might we have over-aligned allocas? 2207 bool HasAlVars = 2208 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2209 2210 // These kinds of spills might need two registers. 2211 if (spillsCR(MF) || HasAlVars) 2212 RS->addScavengingFrameIndex( 2213 MFI.CreateStackObject(Size, Alignment, false)); 2214 } 2215 } 2216 2217 // This function checks if a callee saved gpr can be spilled to a volatile 2218 // vector register. This occurs for leaf functions when the option 2219 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2220 // which were not spilled to vectors, return false so the target independent 2221 // code can handle them by assigning a FrameIdx to a stack slot. 2222 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2223 MachineFunction &MF, const TargetRegisterInfo *TRI, 2224 std::vector<CalleeSavedInfo> &CSI) const { 2225 2226 if (CSI.empty()) 2227 return true; // Early exit if no callee saved registers are modified! 2228 2229 // Early exit if cannot spill gprs to volatile vector registers. 2230 MachineFrameInfo &MFI = MF.getFrameInfo(); 2231 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2232 return false; 2233 2234 // Build a BitVector of VSRs that can be used for spilling GPRs. 2235 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2236 BitVector BVCalleeSaved(TRI->getNumRegs()); 2237 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2238 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2239 for (unsigned i = 0; CSRegs[i]; ++i) 2240 BVCalleeSaved.set(CSRegs[i]); 2241 2242 for (unsigned Reg : BVAllocatable.set_bits()) { 2243 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2244 // used in the function. 2245 if (BVCalleeSaved[Reg] || 2246 (!PPC::F8RCRegClass.contains(Reg) && 2247 !PPC::VFRCRegClass.contains(Reg)) || 2248 (MF.getRegInfo().isPhysRegUsed(Reg))) 2249 BVAllocatable.reset(Reg); 2250 } 2251 2252 bool AllSpilledToReg = true; 2253 for (auto &CS : CSI) { 2254 if (BVAllocatable.none()) 2255 return false; 2256 2257 unsigned Reg = CS.getReg(); 2258 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2259 AllSpilledToReg = false; 2260 continue; 2261 } 2262 2263 unsigned VolatileVFReg = BVAllocatable.find_first(); 2264 if (VolatileVFReg < BVAllocatable.size()) { 2265 CS.setDstReg(VolatileVFReg); 2266 BVAllocatable.reset(VolatileVFReg); 2267 } else { 2268 AllSpilledToReg = false; 2269 } 2270 } 2271 return AllSpilledToReg; 2272 } 2273 2274 bool PPCFrameLowering::spillCalleeSavedRegisters( 2275 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2276 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2277 2278 MachineFunction *MF = MBB.getParent(); 2279 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2280 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2281 bool MustSaveTOC = FI->mustSaveTOC(); 2282 DebugLoc DL; 2283 bool CRSpilled = false; 2284 MachineInstrBuilder CRMIB; 2285 2286 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2287 unsigned Reg = CSI[i].getReg(); 2288 2289 // CR2 through CR4 are the nonvolatile CR fields. 2290 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2291 2292 // Add the callee-saved register as live-in; it's killed at the spill. 2293 // Do not do this for callee-saved registers that are live-in to the 2294 // function because they will already be marked live-in and this will be 2295 // adding it for a second time. It is an error to add the same register 2296 // to the set more than once. 2297 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2298 bool IsLiveIn = MRI.isLiveIn(Reg); 2299 if (!IsLiveIn) 2300 MBB.addLiveIn(Reg); 2301 2302 if (CRSpilled && IsCRField) { 2303 CRMIB.addReg(Reg, RegState::ImplicitKill); 2304 continue; 2305 } 2306 2307 // The actual spill will happen in the prologue. 2308 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2309 continue; 2310 2311 // Insert the spill to the stack frame. 2312 if (IsCRField) { 2313 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2314 if (!Subtarget.is32BitELFABI()) { 2315 // The actual spill will happen at the start of the prologue. 2316 FuncInfo->addMustSaveCR(Reg); 2317 } else { 2318 CRSpilled = true; 2319 FuncInfo->setSpillsCR(); 2320 2321 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2322 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2323 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2324 .addReg(Reg, RegState::ImplicitKill); 2325 2326 MBB.insert(MI, CRMIB); 2327 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2328 .addReg(PPC::R12, 2329 getKillRegState(true)), 2330 CSI[i].getFrameIdx())); 2331 } 2332 } else { 2333 if (CSI[i].isSpilledToReg()) { 2334 NumPESpillVSR++; 2335 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2336 .addReg(Reg, getKillRegState(true)); 2337 } else { 2338 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2339 // Use !IsLiveIn for the kill flag. 2340 // We do not want to kill registers that are live in this function 2341 // before their use because they will become undefined registers. 2342 // Functions without NoUnwind need to preserve the order of elements in 2343 // saved vector registers. 2344 if (Subtarget.needsSwapsForVSXMemOps() && 2345 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2346 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2347 CSI[i].getFrameIdx(), RC, TRI); 2348 else 2349 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2350 RC, TRI); 2351 } 2352 } 2353 } 2354 return true; 2355 } 2356 2357 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2358 bool CR4Spilled, MachineBasicBlock &MBB, 2359 MachineBasicBlock::iterator MI, 2360 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2361 2362 MachineFunction *MF = MBB.getParent(); 2363 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2364 DebugLoc DL; 2365 unsigned MoveReg = PPC::R12; 2366 2367 // 32-bit: FP-relative 2368 MBB.insert(MI, 2369 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2370 CSI[CSIIndex].getFrameIdx())); 2371 2372 unsigned RestoreOp = PPC::MTOCRF; 2373 if (CR2Spilled) 2374 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2375 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2376 2377 if (CR3Spilled) 2378 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2379 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2380 2381 if (CR4Spilled) 2382 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2383 .addReg(MoveReg, getKillRegState(true))); 2384 } 2385 2386 MachineBasicBlock::iterator PPCFrameLowering:: 2387 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2388 MachineBasicBlock::iterator I) const { 2389 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2390 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2391 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2392 // Add (actually subtract) back the amount the callee popped on return. 2393 if (int CalleeAmt = I->getOperand(1).getImm()) { 2394 bool is64Bit = Subtarget.isPPC64(); 2395 CalleeAmt *= -1; 2396 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2397 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2398 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2399 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2400 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2401 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2402 const DebugLoc &dl = I->getDebugLoc(); 2403 2404 if (isInt<16>(CalleeAmt)) { 2405 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2406 .addReg(StackReg, RegState::Kill) 2407 .addImm(CalleeAmt); 2408 } else { 2409 MachineBasicBlock::iterator MBBI = I; 2410 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2411 .addImm(CalleeAmt >> 16); 2412 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2413 .addReg(TmpReg, RegState::Kill) 2414 .addImm(CalleeAmt & 0xFFFF); 2415 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2416 .addReg(StackReg, RegState::Kill) 2417 .addReg(TmpReg); 2418 } 2419 } 2420 } 2421 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2422 return MBB.erase(I); 2423 } 2424 2425 static bool isCalleeSavedCR(unsigned Reg) { 2426 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2427 } 2428 2429 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2430 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2431 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2432 MachineFunction *MF = MBB.getParent(); 2433 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2434 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2435 bool MustSaveTOC = FI->mustSaveTOC(); 2436 bool CR2Spilled = false; 2437 bool CR3Spilled = false; 2438 bool CR4Spilled = false; 2439 unsigned CSIIndex = 0; 2440 2441 // Initialize insertion-point logic; we will be restoring in reverse 2442 // order of spill. 2443 MachineBasicBlock::iterator I = MI, BeforeI = I; 2444 bool AtStart = I == MBB.begin(); 2445 2446 if (!AtStart) 2447 --BeforeI; 2448 2449 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2450 unsigned Reg = CSI[i].getReg(); 2451 2452 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2453 continue; 2454 2455 // Restore of callee saved condition register field is handled during 2456 // epilogue insertion. 2457 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2458 continue; 2459 2460 if (Reg == PPC::CR2) { 2461 CR2Spilled = true; 2462 // The spill slot is associated only with CR2, which is the 2463 // first nonvolatile spilled. Save it here. 2464 CSIIndex = i; 2465 continue; 2466 } else if (Reg == PPC::CR3) { 2467 CR3Spilled = true; 2468 continue; 2469 } else if (Reg == PPC::CR4) { 2470 CR4Spilled = true; 2471 continue; 2472 } else { 2473 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2474 // least one CR register, restore all spilled CRs together. 2475 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2476 bool is31 = needsFP(*MF); 2477 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2478 CSIIndex); 2479 CR2Spilled = CR3Spilled = CR4Spilled = false; 2480 } 2481 2482 if (CSI[i].isSpilledToReg()) { 2483 DebugLoc DL; 2484 NumPEReloadVSR++; 2485 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2486 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2487 } else { 2488 // Default behavior for non-CR saves. 2489 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2490 2491 // Functions without NoUnwind need to preserve the order of elements in 2492 // saved vector registers. 2493 if (Subtarget.needsSwapsForVSXMemOps() && 2494 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2495 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2496 TRI); 2497 else 2498 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2499 2500 assert(I != MBB.begin() && 2501 "loadRegFromStackSlot didn't insert any code!"); 2502 } 2503 } 2504 2505 // Insert in reverse order. 2506 if (AtStart) 2507 I = MBB.begin(); 2508 else { 2509 I = BeforeI; 2510 ++I; 2511 } 2512 } 2513 2514 // If we haven't yet spilled the CRs, do so now. 2515 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2516 assert(Subtarget.is32BitELFABI() && 2517 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2518 bool is31 = needsFP(*MF); 2519 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2520 } 2521 2522 return true; 2523 } 2524 2525 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2526 return TOCSaveOffset; 2527 } 2528 2529 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2530 return FramePointerSaveOffset; 2531 } 2532 2533 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2534 return BasePointerSaveOffset; 2535 } 2536 2537 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2538 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2539 return false; 2540 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2541 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2542 } 2543