1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MCTargetDesc/PPCPredicates.h" 14 #include "PPCFrameLowering.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/RegisterScavenging.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/Target/TargetOptions.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "framelowering" 33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 35 STATISTIC(NumPrologProbed, "Number of prologues probed"); 36 37 static cl::opt<bool> 38 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 39 cl::desc("Enable spills in prologue to vector registers."), 40 cl::init(false), cl::Hidden); 41 42 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 43 if (STI.isAIXABI()) 44 return STI.isPPC64() ? 16 : 8; 45 // SVR4 ABI: 46 return STI.isPPC64() ? 16 : 4; 47 } 48 49 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isAIXABI()) 51 return STI.isPPC64() ? 40 : 20; 52 return STI.isELFv2ABI() ? 24 : 40; 53 } 54 55 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 56 // First slot in the general register save area. 57 return STI.isPPC64() ? -8U : -4U; 58 } 59 60 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 61 if (STI.isAIXABI() || STI.isPPC64()) 62 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 63 64 // 32-bit SVR4 ABI: 65 return 8; 66 } 67 68 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 69 // Third slot in the general purpose register save area. 70 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 71 return -12U; 72 73 // Second slot in the general purpose register save area. 74 return STI.isPPC64() ? -16U : -8U; 75 } 76 77 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 78 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 79 } 80 81 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 82 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 83 STI.getPlatformStackAlignment(), 0), 84 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 85 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 86 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 87 LinkageSize(computeLinkageSize(Subtarget)), 88 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 89 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 90 91 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 92 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 93 unsigned &NumEntries) const { 94 95 // Floating-point register save area offsets. 96 #define CALLEE_SAVED_FPRS \ 97 {PPC::F31, -8}, \ 98 {PPC::F30, -16}, \ 99 {PPC::F29, -24}, \ 100 {PPC::F28, -32}, \ 101 {PPC::F27, -40}, \ 102 {PPC::F26, -48}, \ 103 {PPC::F25, -56}, \ 104 {PPC::F24, -64}, \ 105 {PPC::F23, -72}, \ 106 {PPC::F22, -80}, \ 107 {PPC::F21, -88}, \ 108 {PPC::F20, -96}, \ 109 {PPC::F19, -104}, \ 110 {PPC::F18, -112}, \ 111 {PPC::F17, -120}, \ 112 {PPC::F16, -128}, \ 113 {PPC::F15, -136}, \ 114 {PPC::F14, -144} 115 116 // 32-bit general purpose register save area offsets shared by ELF and 117 // AIX. AIX has an extra CSR with r13. 118 #define CALLEE_SAVED_GPRS32 \ 119 {PPC::R31, -4}, \ 120 {PPC::R30, -8}, \ 121 {PPC::R29, -12}, \ 122 {PPC::R28, -16}, \ 123 {PPC::R27, -20}, \ 124 {PPC::R26, -24}, \ 125 {PPC::R25, -28}, \ 126 {PPC::R24, -32}, \ 127 {PPC::R23, -36}, \ 128 {PPC::R22, -40}, \ 129 {PPC::R21, -44}, \ 130 {PPC::R20, -48}, \ 131 {PPC::R19, -52}, \ 132 {PPC::R18, -56}, \ 133 {PPC::R17, -60}, \ 134 {PPC::R16, -64}, \ 135 {PPC::R15, -68}, \ 136 {PPC::R14, -72} 137 138 // 64-bit general purpose register save area offsets. 139 #define CALLEE_SAVED_GPRS64 \ 140 {PPC::X31, -8}, \ 141 {PPC::X30, -16}, \ 142 {PPC::X29, -24}, \ 143 {PPC::X28, -32}, \ 144 {PPC::X27, -40}, \ 145 {PPC::X26, -48}, \ 146 {PPC::X25, -56}, \ 147 {PPC::X24, -64}, \ 148 {PPC::X23, -72}, \ 149 {PPC::X22, -80}, \ 150 {PPC::X21, -88}, \ 151 {PPC::X20, -96}, \ 152 {PPC::X19, -104}, \ 153 {PPC::X18, -112}, \ 154 {PPC::X17, -120}, \ 155 {PPC::X16, -128}, \ 156 {PPC::X15, -136}, \ 157 {PPC::X14, -144} 158 159 // Vector register save area offsets. 160 #define CALLEE_SAVED_VRS \ 161 {PPC::V31, -16}, \ 162 {PPC::V30, -32}, \ 163 {PPC::V29, -48}, \ 164 {PPC::V28, -64}, \ 165 {PPC::V27, -80}, \ 166 {PPC::V26, -96}, \ 167 {PPC::V25, -112}, \ 168 {PPC::V24, -128}, \ 169 {PPC::V23, -144}, \ 170 {PPC::V22, -160}, \ 171 {PPC::V21, -176}, \ 172 {PPC::V20, -192} 173 174 // Note that the offsets here overlap, but this is fixed up in 175 // processFunctionBeforeFrameFinalized. 176 177 static const SpillSlot ELFOffsets32[] = { 178 CALLEE_SAVED_FPRS, 179 CALLEE_SAVED_GPRS32, 180 181 // CR save area offset. We map each of the nonvolatile CR fields 182 // to the slot for CR2, which is the first of the nonvolatile CR 183 // fields to be assigned, so that we only allocate one save slot. 184 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 185 {PPC::CR2, -4}, 186 187 // VRSAVE save area offset. 188 {PPC::VRSAVE, -4}, 189 190 CALLEE_SAVED_VRS, 191 192 // SPE register save area (overlaps Vector save area). 193 {PPC::S31, -8}, 194 {PPC::S30, -16}, 195 {PPC::S29, -24}, 196 {PPC::S28, -32}, 197 {PPC::S27, -40}, 198 {PPC::S26, -48}, 199 {PPC::S25, -56}, 200 {PPC::S24, -64}, 201 {PPC::S23, -72}, 202 {PPC::S22, -80}, 203 {PPC::S21, -88}, 204 {PPC::S20, -96}, 205 {PPC::S19, -104}, 206 {PPC::S18, -112}, 207 {PPC::S17, -120}, 208 {PPC::S16, -128}, 209 {PPC::S15, -136}, 210 {PPC::S14, -144}}; 211 212 static const SpillSlot ELFOffsets64[] = { 213 CALLEE_SAVED_FPRS, 214 CALLEE_SAVED_GPRS64, 215 216 // VRSAVE save area offset. 217 {PPC::VRSAVE, -4}, 218 CALLEE_SAVED_VRS 219 }; 220 221 static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, 222 CALLEE_SAVED_GPRS32, 223 // Add AIX's extra CSR. 224 {PPC::R13, -76}, 225 CALLEE_SAVED_VRS}; 226 227 static const SpillSlot AIXOffsets64[] = { 228 CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; 229 230 if (Subtarget.is64BitELFABI()) { 231 NumEntries = array_lengthof(ELFOffsets64); 232 return ELFOffsets64; 233 } 234 235 if (Subtarget.is32BitELFABI()) { 236 NumEntries = array_lengthof(ELFOffsets32); 237 return ELFOffsets32; 238 } 239 240 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 241 242 if (Subtarget.isPPC64()) { 243 NumEntries = array_lengthof(AIXOffsets64); 244 return AIXOffsets64; 245 } 246 247 NumEntries = array_lengthof(AIXOffsets32); 248 return AIXOffsets32; 249 } 250 251 static bool spillsCR(const MachineFunction &MF) { 252 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 253 return FuncInfo->isCRSpilled(); 254 } 255 256 static bool hasSpills(const MachineFunction &MF) { 257 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 258 return FuncInfo->hasSpills(); 259 } 260 261 static bool hasNonRISpills(const MachineFunction &MF) { 262 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 263 return FuncInfo->hasNonRISpills(); 264 } 265 266 /// MustSaveLR - Return true if this function requires that we save the LR 267 /// register onto the stack in the prolog and restore it in the epilog of the 268 /// function. 269 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 270 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 271 272 // We need a save/restore of LR if there is any def of LR (which is 273 // defined by calls, including the PIC setup sequence), or if there is 274 // some use of the LR stack slot (e.g. for builtin_return_address). 275 // (LR comes in 32 and 64 bit versions.) 276 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 277 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 278 } 279 280 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 281 /// call frame size. Update the MachineFunction object with the stack size. 282 unsigned 283 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 284 bool UseEstimate) const { 285 unsigned NewMaxCallFrameSize = 0; 286 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 287 &NewMaxCallFrameSize); 288 MF.getFrameInfo().setStackSize(FrameSize); 289 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 290 return FrameSize; 291 } 292 293 /// determineFrameLayout - Determine the size of the frame and maximum call 294 /// frame size. 295 unsigned 296 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 297 bool UseEstimate, 298 unsigned *NewMaxCallFrameSize) const { 299 const MachineFrameInfo &MFI = MF.getFrameInfo(); 300 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 301 302 // Get the number of bytes to allocate from the FrameInfo 303 unsigned FrameSize = 304 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 305 306 // Get stack alignments. The frame must be aligned to the greatest of these: 307 Align TargetAlign = getStackAlign(); // alignment required per the ABI 308 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 309 Align Alignment = std::max(TargetAlign, MaxAlign); 310 311 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 312 313 unsigned LR = RegInfo->getRARegister(); 314 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 315 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 316 !MFI.adjustsStack() && // No calls. 317 !MustSaveLR(MF, LR) && // No need to save LR. 318 !FI->mustSaveTOC() && // No need to save TOC. 319 !RegInfo->hasBasePointer(MF); // No special alignment. 320 321 // Note: for PPC32 SVR4ABI, we can still generate stackless 322 // code if all local vars are reg-allocated. 323 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 324 325 // Check whether we can skip adjusting the stack pointer (by using red zone) 326 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 327 // No need for frame 328 return 0; 329 } 330 331 // Get the maximum call frame size of all the calls. 332 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 333 334 // Maximum call frame needs to be at least big enough for linkage area. 335 unsigned minCallFrameSize = getLinkageSize(); 336 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 337 338 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 339 // that allocations will be aligned. 340 if (MFI.hasVarSizedObjects()) 341 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 342 343 // Update the new max call frame size if the caller passes in a valid pointer. 344 if (NewMaxCallFrameSize) 345 *NewMaxCallFrameSize = maxCallFrameSize; 346 347 // Include call frame size in total. 348 FrameSize += maxCallFrameSize; 349 350 // Make sure the frame is aligned. 351 FrameSize = alignTo(FrameSize, Alignment); 352 353 return FrameSize; 354 } 355 356 // hasFP - Return true if the specified function actually has a dedicated frame 357 // pointer register. 358 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 359 const MachineFrameInfo &MFI = MF.getFrameInfo(); 360 // FIXME: This is pretty much broken by design: hasFP() might be called really 361 // early, before the stack layout was calculated and thus hasFP() might return 362 // true or false here depending on the time of call. 363 return (MFI.getStackSize()) && needsFP(MF); 364 } 365 366 // needsFP - Return true if the specified function should have a dedicated frame 367 // pointer register. This is true if the function has variable sized allocas or 368 // if frame pointer elimination is disabled. 369 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 370 const MachineFrameInfo &MFI = MF.getFrameInfo(); 371 372 // Naked functions have no stack frame pushed, so we don't have a frame 373 // pointer. 374 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 375 return false; 376 377 return MF.getTarget().Options.DisableFramePointerElim(MF) || 378 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 379 (MF.getTarget().Options.GuaranteedTailCallOpt && 380 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 381 } 382 383 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 384 bool is31 = needsFP(MF); 385 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 386 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 387 388 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 389 bool HasBP = RegInfo->hasBasePointer(MF); 390 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 391 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 392 393 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 394 BI != BE; ++BI) 395 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 396 --MBBI; 397 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 398 MachineOperand &MO = MBBI->getOperand(I); 399 if (!MO.isReg()) 400 continue; 401 402 switch (MO.getReg()) { 403 case PPC::FP: 404 MO.setReg(FPReg); 405 break; 406 case PPC::FP8: 407 MO.setReg(FP8Reg); 408 break; 409 case PPC::BP: 410 MO.setReg(BPReg); 411 break; 412 case PPC::BP8: 413 MO.setReg(BP8Reg); 414 break; 415 416 } 417 } 418 } 419 } 420 421 /* This function will do the following: 422 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 423 respectively (defaults recommended by the ABI) and return true 424 - If MBB is not an entry block, initialize the register scavenger and look 425 for available registers. 426 - If the defaults (R0/R12) are available, return true 427 - If TwoUniqueRegsRequired is set to true, it looks for two unique 428 registers. Otherwise, look for a single available register. 429 - If the required registers are found, set SR1 and SR2 and return true. 430 - If the required registers are not found, set SR2 or both SR1 and SR2 to 431 PPC::NoRegister and return false. 432 433 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 434 is not set, this function will attempt to find two different registers, but 435 still return true if only one register is available (and set SR1 == SR2). 436 */ 437 bool 438 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 439 bool UseAtEnd, 440 bool TwoUniqueRegsRequired, 441 Register *SR1, 442 Register *SR2) const { 443 RegScavenger RS; 444 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 445 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 446 447 // Set the defaults for the two scratch registers. 448 if (SR1) 449 *SR1 = R0; 450 451 if (SR2) { 452 assert (SR1 && "Asking for the second scratch register but not the first?"); 453 *SR2 = R12; 454 } 455 456 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 457 if ((UseAtEnd && MBB->isReturnBlock()) || 458 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 459 return true; 460 461 RS.enterBasicBlock(*MBB); 462 463 if (UseAtEnd && !MBB->empty()) { 464 // The scratch register will be used at the end of the block, so must 465 // consider all registers used within the block 466 467 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 468 // If no terminator, back iterator up to previous instruction. 469 if (MBBI == MBB->end()) 470 MBBI = std::prev(MBBI); 471 472 if (MBBI != MBB->begin()) 473 RS.forward(MBBI); 474 } 475 476 // If the two registers are available, we're all good. 477 // Note that we only return here if both R0 and R12 are available because 478 // although the function may not require two unique registers, it may benefit 479 // from having two so we should try to provide them. 480 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 481 return true; 482 483 // Get the list of callee-saved registers for the target. 484 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 485 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 486 487 // Get all the available registers in the block. 488 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 489 &PPC::GPRCRegClass); 490 491 // We shouldn't use callee-saved registers as scratch registers as they may be 492 // available when looking for a candidate block for shrink wrapping but not 493 // available when the actual prologue/epilogue is being emitted because they 494 // were added as live-in to the prologue block by PrologueEpilogueInserter. 495 for (int i = 0; CSRegs[i]; ++i) 496 BV.reset(CSRegs[i]); 497 498 // Set the first scratch register to the first available one. 499 if (SR1) { 500 int FirstScratchReg = BV.find_first(); 501 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 502 } 503 504 // If there is another one available, set the second scratch register to that. 505 // Otherwise, set it to either PPC::NoRegister if this function requires two 506 // or to whatever SR1 is set to if this function doesn't require two. 507 if (SR2) { 508 int SecondScratchReg = BV.find_next(*SR1); 509 if (SecondScratchReg != -1) 510 *SR2 = SecondScratchReg; 511 else 512 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 513 } 514 515 // Now that we've done our best to provide both registers, double check 516 // whether we were unable to provide enough. 517 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 518 return false; 519 520 return true; 521 } 522 523 // We need a scratch register for spilling LR and for spilling CR. By default, 524 // we use two scratch registers to hide latency. However, if only one scratch 525 // register is available, we can adjust for that by not overlapping the spill 526 // code. However, if we need to realign the stack (i.e. have a base pointer) 527 // and the stack frame is large, we need two scratch registers. 528 bool 529 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 530 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 531 MachineFunction &MF = *(MBB->getParent()); 532 bool HasBP = RegInfo->hasBasePointer(MF); 533 unsigned FrameSize = determineFrameLayout(MF); 534 int NegFrameSize = -FrameSize; 535 bool IsLargeFrame = !isInt<16>(NegFrameSize); 536 MachineFrameInfo &MFI = MF.getFrameInfo(); 537 Align MaxAlign = MFI.getMaxAlign(); 538 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 539 540 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 541 } 542 543 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 544 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 545 546 return findScratchRegister(TmpMBB, false, 547 twoUniqueScratchRegsRequired(TmpMBB)); 548 } 549 550 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 551 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 552 553 return findScratchRegister(TmpMBB, true); 554 } 555 556 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 557 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 558 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 559 560 // Abort if there is no register info or function info. 561 if (!RegInfo || !FI) 562 return false; 563 564 // Only move the stack update on ELFv2 ABI and PPC64. 565 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 566 return false; 567 568 // Check the frame size first and return false if it does not fit the 569 // requirements. 570 // We need a non-zero frame size as well as a frame that will fit in the red 571 // zone. This is because by moving the stack pointer update we are now storing 572 // to the red zone until the stack pointer is updated. If we get an interrupt 573 // inside the prologue but before the stack update we now have a number of 574 // stores to the red zone and those stores must all fit. 575 MachineFrameInfo &MFI = MF.getFrameInfo(); 576 unsigned FrameSize = MFI.getStackSize(); 577 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 578 return false; 579 580 // Frame pointers and base pointers complicate matters so don't do anything 581 // if we have them. For example having a frame pointer will sometimes require 582 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 583 // difficult. 584 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 585 return false; 586 587 // Calls to fast_cc functions use different rules for passing parameters on 588 // the stack from the ABI and using PIC base in the function imposes 589 // similar restrictions to using the base pointer. It is not generally safe 590 // to move the stack pointer update in these situations. 591 if (FI->hasFastCall() || FI->usesPICBase()) 592 return false; 593 594 // Finally we can move the stack update if we do not require register 595 // scavenging. Register scavenging can introduce more spills and so 596 // may make the frame size larger than we have computed. 597 return !RegInfo->requiresFrameIndexScavenging(MF); 598 } 599 600 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 601 MachineBasicBlock &MBB) const { 602 MachineBasicBlock::iterator MBBI = MBB.begin(); 603 MachineFrameInfo &MFI = MF.getFrameInfo(); 604 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 605 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 606 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 607 608 MachineModuleInfo &MMI = MF.getMMI(); 609 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 610 DebugLoc dl; 611 // AIX assembler does not support cfi directives. 612 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 613 614 // Get processor type. 615 bool isPPC64 = Subtarget.isPPC64(); 616 // Get the ABI. 617 bool isSVR4ABI = Subtarget.isSVR4ABI(); 618 bool isELFv2ABI = Subtarget.isELFv2ABI(); 619 assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); 620 621 // Work out frame sizes. 622 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 623 int NegFrameSize = -FrameSize; 624 if (!isInt<32>(NegFrameSize)) 625 llvm_unreachable("Unhandled stack size!"); 626 627 if (MFI.isFrameAddressTaken()) 628 replaceFPWithRealFP(MF); 629 630 // Check if the link register (LR) must be saved. 631 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 632 bool MustSaveLR = FI->mustSaveLR(); 633 bool MustSaveTOC = FI->mustSaveTOC(); 634 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 635 bool MustSaveCR = !MustSaveCRs.empty(); 636 // Do we have a frame pointer and/or base pointer for this function? 637 bool HasFP = hasFP(MF); 638 bool HasBP = RegInfo->hasBasePointer(MF); 639 bool HasRedZone = isPPC64 || !isSVR4ABI; 640 641 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 642 Register BPReg = RegInfo->getBaseRegister(MF); 643 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 644 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 645 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 646 Register ScratchReg; 647 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 648 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 649 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 650 : PPC::MFLR ); 651 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 652 : PPC::STW ); 653 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 654 : PPC::STWU ); 655 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 656 : PPC::STWUX); 657 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 658 : PPC::LIS ); 659 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 660 : PPC::ORI ); 661 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 662 : PPC::OR ); 663 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 664 : PPC::SUBFC); 665 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 666 : PPC::SUBFIC); 667 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 668 : PPC::MFCR); 669 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 670 671 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 672 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 673 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 674 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 675 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 676 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 677 678 // Using the same bool variable as below to suppress compiler warnings. 679 // Stack probe requires two scratch registers, one for old sp, one for large 680 // frame and large probe size. 681 bool SingleScratchReg = findScratchRegister( 682 &MBB, false, 683 twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF), 684 &ScratchReg, &TempReg); 685 assert(SingleScratchReg && 686 "Required number of registers not available in this block"); 687 688 SingleScratchReg = ScratchReg == TempReg; 689 690 int LROffset = getReturnSaveOffset(); 691 692 int FPOffset = 0; 693 if (HasFP) { 694 MachineFrameInfo &MFI = MF.getFrameInfo(); 695 int FPIndex = FI->getFramePointerSaveIndex(); 696 assert(FPIndex && "No Frame Pointer Save Slot!"); 697 FPOffset = MFI.getObjectOffset(FPIndex); 698 } 699 700 int BPOffset = 0; 701 if (HasBP) { 702 MachineFrameInfo &MFI = MF.getFrameInfo(); 703 int BPIndex = FI->getBasePointerSaveIndex(); 704 assert(BPIndex && "No Base Pointer Save Slot!"); 705 BPOffset = MFI.getObjectOffset(BPIndex); 706 } 707 708 int PBPOffset = 0; 709 if (FI->usesPICBase()) { 710 MachineFrameInfo &MFI = MF.getFrameInfo(); 711 int PBPIndex = FI->getPICBasePointerSaveIndex(); 712 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 713 PBPOffset = MFI.getObjectOffset(PBPIndex); 714 } 715 716 // Get stack alignments. 717 Align MaxAlign = MFI.getMaxAlign(); 718 if (HasBP && MaxAlign > 1) 719 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 720 721 // Frames of 32KB & larger require special handling because they cannot be 722 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 723 bool isLargeFrame = !isInt<16>(NegFrameSize); 724 725 // Check if we can move the stack update instruction (stdu) down the prologue 726 // past the callee saves. Hopefully this will avoid the situation where the 727 // saves are waiting for the update on the store with update to complete. 728 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 729 bool MovingStackUpdateDown = false; 730 731 // Check if we can move the stack update. 732 if (stackUpdateCanBeMoved(MF)) { 733 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 734 for (CalleeSavedInfo CSI : Info) { 735 int FrIdx = CSI.getFrameIdx(); 736 // If the frame index is not negative the callee saved info belongs to a 737 // stack object that is not a fixed stack object. We ignore non-fixed 738 // stack objects because we won't move the stack update pointer past them. 739 if (FrIdx >= 0) 740 continue; 741 742 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 743 StackUpdateLoc++; 744 MovingStackUpdateDown = true; 745 } else { 746 // We need all of the Frame Indices to meet these conditions. 747 // If they do not, abort the whole operation. 748 StackUpdateLoc = MBBI; 749 MovingStackUpdateDown = false; 750 break; 751 } 752 } 753 754 // If the operation was not aborted then update the object offset. 755 if (MovingStackUpdateDown) { 756 for (CalleeSavedInfo CSI : Info) { 757 int FrIdx = CSI.getFrameIdx(); 758 if (FrIdx < 0) 759 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 760 } 761 } 762 } 763 764 // Where in the prologue we move the CR fields depends on how many scratch 765 // registers we have, and if we need to save the link register or not. This 766 // lambda is to avoid duplicating the logic in 2 places. 767 auto BuildMoveFromCR = [&]() { 768 if (isELFv2ABI && MustSaveCRs.size() == 1) { 769 // In the ELFv2 ABI, we are not required to save all CR fields. 770 // If only one CR field is clobbered, it is more efficient to use 771 // mfocrf to selectively save just that field, because mfocrf has short 772 // latency compares to mfcr. 773 assert(isPPC64 && "V2 ABI is 64-bit only."); 774 MachineInstrBuilder MIB = 775 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 776 MIB.addReg(MustSaveCRs[0], RegState::Kill); 777 } else { 778 MachineInstrBuilder MIB = 779 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 780 for (unsigned CRfield : MustSaveCRs) 781 MIB.addReg(CRfield, RegState::ImplicitKill); 782 } 783 }; 784 785 // If we need to spill the CR and the LR but we don't have two separate 786 // registers available, we must spill them one at a time 787 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 788 BuildMoveFromCR(); 789 BuildMI(MBB, MBBI, dl, StoreWordInst) 790 .addReg(TempReg, getKillRegState(true)) 791 .addImm(CRSaveOffset) 792 .addReg(SPReg); 793 } 794 795 if (MustSaveLR) 796 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 797 798 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 799 BuildMoveFromCR(); 800 801 if (HasRedZone) { 802 if (HasFP) 803 BuildMI(MBB, MBBI, dl, StoreInst) 804 .addReg(FPReg) 805 .addImm(FPOffset) 806 .addReg(SPReg); 807 if (FI->usesPICBase()) 808 BuildMI(MBB, MBBI, dl, StoreInst) 809 .addReg(PPC::R30) 810 .addImm(PBPOffset) 811 .addReg(SPReg); 812 if (HasBP) 813 BuildMI(MBB, MBBI, dl, StoreInst) 814 .addReg(BPReg) 815 .addImm(BPOffset) 816 .addReg(SPReg); 817 } 818 819 if (MustSaveLR) 820 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 821 .addReg(ScratchReg, getKillRegState(true)) 822 .addImm(LROffset) 823 .addReg(SPReg); 824 825 if (MustSaveCR && 826 !(SingleScratchReg && MustSaveLR)) { 827 assert(HasRedZone && "A red zone is always available on PPC64"); 828 BuildMI(MBB, MBBI, dl, StoreWordInst) 829 .addReg(TempReg, getKillRegState(true)) 830 .addImm(CRSaveOffset) 831 .addReg(SPReg); 832 } 833 834 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 835 if (!FrameSize) 836 return; 837 838 // Adjust stack pointer: r1 += NegFrameSize. 839 // If there is a preferred stack alignment, align R1 now 840 841 if (HasBP && HasRedZone) { 842 // Save a copy of r1 as the base pointer. 843 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 844 .addReg(SPReg) 845 .addReg(SPReg); 846 } 847 848 // Have we generated a STUX instruction to claim stack frame? If so, 849 // the negated frame size will be placed in ScratchReg. 850 bool HasSTUX = false; 851 852 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain 853 // pointer is always stored at SP, we will get a free probe due to an essential 854 // STU(X) instruction. 855 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { 856 // To be consistent with other targets, a pseudo instruction is emitted and 857 // will be later expanded in `inlineStackProbe`. 858 BuildMI(MBB, MBBI, dl, 859 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 860 : PPC::PROBED_STACKALLOC_32)) 861 .addDef(ScratchReg) 862 .addDef(TempReg) // TempReg stores the old sp. 863 .addImm(NegFrameSize); 864 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we 865 // update the ScratchReg to meet the assumption that ScratchReg contains 866 // the NegFrameSize. This solution is rather tricky. 867 if (!HasRedZone) { 868 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 869 .addReg(TempReg) 870 .addReg(SPReg); 871 HasSTUX = true; 872 } 873 } else { 874 // This condition must be kept in sync with canUseAsPrologue. 875 if (HasBP && MaxAlign > 1) { 876 if (isPPC64) 877 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 878 .addReg(SPReg) 879 .addImm(0) 880 .addImm(64 - Log2(MaxAlign)); 881 else // PPC32... 882 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 883 .addReg(SPReg) 884 .addImm(0) 885 .addImm(32 - Log2(MaxAlign)) 886 .addImm(31); 887 if (!isLargeFrame) { 888 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 889 .addReg(ScratchReg, RegState::Kill) 890 .addImm(NegFrameSize); 891 } else { 892 assert(!SingleScratchReg && "Only a single scratch reg available"); 893 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 894 .addImm(NegFrameSize >> 16); 895 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 896 .addReg(TempReg, RegState::Kill) 897 .addImm(NegFrameSize & 0xFFFF); 898 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 899 .addReg(ScratchReg, RegState::Kill) 900 .addReg(TempReg, RegState::Kill); 901 } 902 903 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 904 .addReg(SPReg, RegState::Kill) 905 .addReg(SPReg) 906 .addReg(ScratchReg); 907 HasSTUX = true; 908 909 } else if (!isLargeFrame) { 910 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 911 .addReg(SPReg) 912 .addImm(NegFrameSize) 913 .addReg(SPReg); 914 915 } else { 916 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 917 .addImm(NegFrameSize >> 16); 918 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 919 .addReg(ScratchReg, RegState::Kill) 920 .addImm(NegFrameSize & 0xFFFF); 921 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 922 .addReg(SPReg, RegState::Kill) 923 .addReg(SPReg) 924 .addReg(ScratchReg); 925 HasSTUX = true; 926 } 927 } 928 929 // Save the TOC register after the stack pointer update if a prologue TOC 930 // save is required for the function. 931 if (MustSaveTOC) { 932 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 933 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 934 .addReg(TOCReg, getKillRegState(true)) 935 .addImm(TOCSaveOffset) 936 .addReg(SPReg); 937 } 938 939 if (!HasRedZone) { 940 assert(!isPPC64 && "A red zone is always available on PPC64"); 941 if (HasSTUX) { 942 // The negated frame size is in ScratchReg, and the SPReg has been 943 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 944 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 945 // the stack frame (i.e. the old SP), ideally, we would put the old 946 // SP into a register and use it as the base for the stores. The 947 // problem is that the only available register may be ScratchReg, 948 // which could be R0, and R0 cannot be used as a base address. 949 950 // First, set ScratchReg to the old SP. This may need to be modified 951 // later. 952 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 953 .addReg(ScratchReg, RegState::Kill) 954 .addReg(SPReg); 955 956 if (ScratchReg == PPC::R0) { 957 // R0 cannot be used as a base register, but it can be used as an 958 // index in a store-indexed. 959 int LastOffset = 0; 960 if (HasFP) { 961 // R0 += (FPOffset-LastOffset). 962 // Need addic, since addi treats R0 as 0. 963 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 964 .addReg(ScratchReg) 965 .addImm(FPOffset-LastOffset); 966 LastOffset = FPOffset; 967 // Store FP into *R0. 968 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 969 .addReg(FPReg, RegState::Kill) // Save FP. 970 .addReg(PPC::ZERO) 971 .addReg(ScratchReg); // This will be the index (R0 is ok here). 972 } 973 if (FI->usesPICBase()) { 974 // R0 += (PBPOffset-LastOffset). 975 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 976 .addReg(ScratchReg) 977 .addImm(PBPOffset-LastOffset); 978 LastOffset = PBPOffset; 979 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 980 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 981 .addReg(PPC::ZERO) 982 .addReg(ScratchReg); // This will be the index (R0 is ok here). 983 } 984 if (HasBP) { 985 // R0 += (BPOffset-LastOffset). 986 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 987 .addReg(ScratchReg) 988 .addImm(BPOffset-LastOffset); 989 LastOffset = BPOffset; 990 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 991 .addReg(BPReg, RegState::Kill) // Save BP. 992 .addReg(PPC::ZERO) 993 .addReg(ScratchReg); // This will be the index (R0 is ok here). 994 // BP = R0-LastOffset 995 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 996 .addReg(ScratchReg, RegState::Kill) 997 .addImm(-LastOffset); 998 } 999 } else { 1000 // ScratchReg is not R0, so use it as the base register. It is 1001 // already set to the old SP, so we can use the offsets directly. 1002 1003 // Now that the stack frame has been allocated, save all the necessary 1004 // registers using ScratchReg as the base address. 1005 if (HasFP) 1006 BuildMI(MBB, MBBI, dl, StoreInst) 1007 .addReg(FPReg) 1008 .addImm(FPOffset) 1009 .addReg(ScratchReg); 1010 if (FI->usesPICBase()) 1011 BuildMI(MBB, MBBI, dl, StoreInst) 1012 .addReg(PPC::R30) 1013 .addImm(PBPOffset) 1014 .addReg(ScratchReg); 1015 if (HasBP) { 1016 BuildMI(MBB, MBBI, dl, StoreInst) 1017 .addReg(BPReg) 1018 .addImm(BPOffset) 1019 .addReg(ScratchReg); 1020 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1021 .addReg(ScratchReg, RegState::Kill) 1022 .addReg(ScratchReg); 1023 } 1024 } 1025 } else { 1026 // The frame size is a known 16-bit constant (fitting in the immediate 1027 // field of STWU). To be here we have to be compiling for PPC32. 1028 // Since the SPReg has been decreased by FrameSize, add it back to each 1029 // offset. 1030 if (HasFP) 1031 BuildMI(MBB, MBBI, dl, StoreInst) 1032 .addReg(FPReg) 1033 .addImm(FrameSize + FPOffset) 1034 .addReg(SPReg); 1035 if (FI->usesPICBase()) 1036 BuildMI(MBB, MBBI, dl, StoreInst) 1037 .addReg(PPC::R30) 1038 .addImm(FrameSize + PBPOffset) 1039 .addReg(SPReg); 1040 if (HasBP) { 1041 BuildMI(MBB, MBBI, dl, StoreInst) 1042 .addReg(BPReg) 1043 .addImm(FrameSize + BPOffset) 1044 .addReg(SPReg); 1045 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1046 .addReg(SPReg) 1047 .addImm(FrameSize); 1048 } 1049 } 1050 } 1051 1052 // Add Call Frame Information for the instructions we generated above. 1053 if (needsCFI) { 1054 unsigned CFIIndex; 1055 1056 if (HasBP) { 1057 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1058 // because if the stack needed aligning then CFA won't be at a fixed 1059 // offset from FP/SP. 1060 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1061 CFIIndex = MF.addFrameInst( 1062 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1063 } else { 1064 // Adjust the definition of CFA to account for the change in SP. 1065 assert(NegFrameSize); 1066 CFIIndex = MF.addFrameInst( 1067 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1068 } 1069 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1070 .addCFIIndex(CFIIndex); 1071 1072 if (HasFP) { 1073 // Describe where FP was saved, at a fixed offset from CFA. 1074 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1075 CFIIndex = MF.addFrameInst( 1076 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1077 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1078 .addCFIIndex(CFIIndex); 1079 } 1080 1081 if (FI->usesPICBase()) { 1082 // Describe where FP was saved, at a fixed offset from CFA. 1083 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1084 CFIIndex = MF.addFrameInst( 1085 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1086 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1087 .addCFIIndex(CFIIndex); 1088 } 1089 1090 if (HasBP) { 1091 // Describe where BP was saved, at a fixed offset from CFA. 1092 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1093 CFIIndex = MF.addFrameInst( 1094 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1095 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1096 .addCFIIndex(CFIIndex); 1097 } 1098 1099 if (MustSaveLR) { 1100 // Describe where LR was saved, at a fixed offset from CFA. 1101 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1102 CFIIndex = MF.addFrameInst( 1103 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1104 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1105 .addCFIIndex(CFIIndex); 1106 } 1107 } 1108 1109 // If there is a frame pointer, copy R1 into R31 1110 if (HasFP) { 1111 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1112 .addReg(SPReg) 1113 .addReg(SPReg); 1114 1115 if (!HasBP && needsCFI) { 1116 // Change the definition of CFA from SP+offset to FP+offset, because SP 1117 // will change at every alloca. 1118 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1119 unsigned CFIIndex = MF.addFrameInst( 1120 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1121 1122 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1123 .addCFIIndex(CFIIndex); 1124 } 1125 } 1126 1127 if (needsCFI) { 1128 // Describe where callee saved registers were saved, at fixed offsets from 1129 // CFA. 1130 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1131 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1132 unsigned Reg = CSI[I].getReg(); 1133 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1134 1135 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1136 // subregisters of CR2. We just need to emit a move of CR2. 1137 if (PPC::CRBITRCRegClass.contains(Reg)) 1138 continue; 1139 1140 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1141 continue; 1142 1143 // For SVR4, don't emit a move for the CR spill slot if we haven't 1144 // spilled CRs. 1145 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1146 && !MustSaveCR) 1147 continue; 1148 1149 // For 64-bit SVR4 when we have spilled CRs, the spill location 1150 // is SP+8, not a frame-relative slot. 1151 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1152 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1153 // the whole CR word. In the ELFv2 ABI, every CR that was 1154 // actually saved gets its own CFI record. 1155 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1156 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1157 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1158 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1159 .addCFIIndex(CFIIndex); 1160 continue; 1161 } 1162 1163 if (CSI[I].isSpilledToReg()) { 1164 unsigned SpilledReg = CSI[I].getDstReg(); 1165 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1166 nullptr, MRI->getDwarfRegNum(Reg, true), 1167 MRI->getDwarfRegNum(SpilledReg, true))); 1168 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1169 .addCFIIndex(CFIRegister); 1170 } else { 1171 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1172 // We have changed the object offset above but we do not want to change 1173 // the actual offsets in the CFI instruction so we have to undo the 1174 // offset change here. 1175 if (MovingStackUpdateDown) 1176 Offset -= NegFrameSize; 1177 1178 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1179 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1180 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1181 .addCFIIndex(CFIIndex); 1182 } 1183 } 1184 } 1185 } 1186 1187 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, 1188 MachineBasicBlock &PrologMBB) const { 1189 // TODO: Generate CFI instructions. 1190 bool isPPC64 = Subtarget.isPPC64(); 1191 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 1192 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1193 MachineFrameInfo &MFI = MF.getFrameInfo(); 1194 MachineModuleInfo &MMI = MF.getMMI(); 1195 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 1196 // AIX assembler does not support cfi directives. 1197 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 1198 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 1199 int Opc = MI.getOpcode(); 1200 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; 1201 }); 1202 if (StackAllocMIPos == PrologMBB.end()) 1203 return; 1204 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); 1205 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); 1206 MachineInstr &MI = *StackAllocMIPos; 1207 int64_t NegFrameSize = MI.getOperand(2).getImm(); 1208 int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF); 1209 assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); 1210 int64_t NumBlocks = NegFrameSize / NegProbeSize; 1211 int64_t NegResidualSize = NegFrameSize % NegProbeSize; 1212 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1213 Register ScratchReg = MI.getOperand(0).getReg(); 1214 Register FPReg = MI.getOperand(1).getReg(); 1215 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1216 bool HasBP = RegInfo->hasBasePointer(MF); 1217 Align MaxAlign = MFI.getMaxAlign(); 1218 // Initialize current frame pointer. 1219 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); 1220 BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); 1221 // Subroutines to generate .cfi_* directives. 1222 auto buildDefCFAReg = [&](MachineBasicBlock &MBB, 1223 MachineBasicBlock::iterator MBBI, Register Reg) { 1224 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1225 unsigned CFIIndex = MF.addFrameInst( 1226 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); 1227 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1228 .addCFIIndex(CFIIndex); 1229 }; 1230 auto buildDefCFA = [&](MachineBasicBlock &MBB, 1231 MachineBasicBlock::iterator MBBI, Register Reg, 1232 int Offset) { 1233 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1234 unsigned CFIIndex = MBB.getParent()->addFrameInst( 1235 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); 1236 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1237 .addCFIIndex(CFIIndex); 1238 }; 1239 // Subroutine to determine if we can use the Imm as part of d-form. 1240 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; 1241 // Subroutine to materialize the Imm into TempReg. 1242 auto MaterializeImm = [&](MachineBasicBlock &MBB, 1243 MachineBasicBlock::iterator MBBI, int64_t Imm, 1244 Register &TempReg) { 1245 assert(isInt<32>(Imm) && "Unhandled imm"); 1246 if (isInt<16>(Imm)) 1247 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) 1248 .addImm(Imm); 1249 else { 1250 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) 1251 .addImm(Imm >> 16); 1252 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) 1253 .addReg(TempReg) 1254 .addImm(Imm & 0xFFFF); 1255 } 1256 }; 1257 // Subroutine to store frame pointer and decrease stack pointer by probe size. 1258 auto allocateAndProbe = [&](MachineBasicBlock &MBB, 1259 MachineBasicBlock::iterator MBBI, int64_t NegSize, 1260 Register NegSizeReg, bool UseDForm) { 1261 if (UseDForm) 1262 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) 1263 .addReg(FPReg) 1264 .addImm(NegSize) 1265 .addReg(SPReg); 1266 else 1267 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) 1268 .addReg(FPReg) 1269 .addReg(SPReg) 1270 .addReg(NegSizeReg); 1271 }; 1272 // Use FPReg to calculate CFA. 1273 if (needsCFI) 1274 buildDefCFA(PrologMBB, {MI}, FPReg, 0); 1275 // For case HasBP && MaxAlign > 1, we have to align the SP by performing 1276 // SP = SP - SP % MaxAlign. 1277 if (HasBP && MaxAlign > 1) { 1278 if (isPPC64) 1279 BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) 1280 .addReg(FPReg) 1281 .addImm(0) 1282 .addImm(64 - Log2(MaxAlign)); 1283 else 1284 BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) 1285 .addReg(FPReg) 1286 .addImm(0) 1287 .addImm(32 - Log2(MaxAlign)) 1288 .addImm(31); 1289 BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC), 1290 SPReg) 1291 .addReg(ScratchReg) 1292 .addReg(SPReg); 1293 } 1294 // Probe residual part. 1295 if (NegResidualSize) { 1296 bool ResidualUseDForm = CanUseDForm(NegResidualSize); 1297 if (!ResidualUseDForm) 1298 MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg); 1299 allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg, 1300 ResidualUseDForm); 1301 } 1302 bool UseDForm = CanUseDForm(NegProbeSize); 1303 // If number of blocks is small, just probe them directly. 1304 if (NumBlocks < 3) { 1305 if (!UseDForm) 1306 MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); 1307 for (int i = 0; i < NumBlocks; ++i) 1308 allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm); 1309 if (needsCFI) { 1310 // Restore using SPReg to calculate CFA. 1311 buildDefCFAReg(PrologMBB, {MI}, SPReg); 1312 } 1313 } else { 1314 // Since CTR is a volatile register and current shrinkwrap implementation 1315 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a 1316 // CTR loop to probe. 1317 // Calculate trip count and stores it in CTRReg. 1318 MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg); 1319 BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) 1320 .addReg(ScratchReg, RegState::Kill); 1321 if (!UseDForm) 1322 MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); 1323 // Create MBBs of the loop. 1324 MachineFunction::iterator MBBInsertPoint = 1325 std::next(PrologMBB.getIterator()); 1326 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); 1327 MF.insert(MBBInsertPoint, LoopMBB); 1328 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1329 MF.insert(MBBInsertPoint, ExitMBB); 1330 // Synthesize the loop body. 1331 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, 1332 UseDForm); 1333 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) 1334 .addMBB(LoopMBB); 1335 LoopMBB->addSuccessor(ExitMBB); 1336 LoopMBB->addSuccessor(LoopMBB); 1337 // Synthesize the exit MBB. 1338 ExitMBB->splice(ExitMBB->end(), &PrologMBB, 1339 std::next(MachineBasicBlock::iterator(MI)), 1340 PrologMBB.end()); 1341 ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB); 1342 PrologMBB.addSuccessor(LoopMBB); 1343 if (needsCFI) { 1344 // Restore using SPReg to calculate CFA. 1345 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); 1346 } 1347 // Update liveins. 1348 recomputeLiveIns(*LoopMBB); 1349 recomputeLiveIns(*ExitMBB); 1350 } 1351 ++NumPrologProbed; 1352 MI.eraseFromParent(); 1353 } 1354 1355 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1356 MachineBasicBlock &MBB) const { 1357 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1358 DebugLoc dl; 1359 1360 if (MBBI != MBB.end()) 1361 dl = MBBI->getDebugLoc(); 1362 1363 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1364 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1365 1366 // Get alignment info so we know how to restore the SP. 1367 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1368 1369 // Get the number of bytes allocated from the FrameInfo. 1370 int FrameSize = MFI.getStackSize(); 1371 1372 // Get processor type. 1373 bool isPPC64 = Subtarget.isPPC64(); 1374 1375 // Check if the link register (LR) has been saved. 1376 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1377 bool MustSaveLR = FI->mustSaveLR(); 1378 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1379 bool MustSaveCR = !MustSaveCRs.empty(); 1380 // Do we have a frame pointer and/or base pointer for this function? 1381 bool HasFP = hasFP(MF); 1382 bool HasBP = RegInfo->hasBasePointer(MF); 1383 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1384 1385 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1386 Register BPReg = RegInfo->getBaseRegister(MF); 1387 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1388 Register ScratchReg; 1389 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1390 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1391 : PPC::MTLR ); 1392 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1393 : PPC::LWZ ); 1394 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1395 : PPC::LIS ); 1396 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1397 : PPC::OR ); 1398 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1399 : PPC::ORI ); 1400 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1401 : PPC::ADDI ); 1402 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1403 : PPC::ADD4 ); 1404 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1405 : PPC::LWZ); 1406 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1407 : PPC::MTOCRF); 1408 int LROffset = getReturnSaveOffset(); 1409 1410 int FPOffset = 0; 1411 1412 // Using the same bool variable as below to suppress compiler warnings. 1413 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1414 &TempReg); 1415 assert(SingleScratchReg && 1416 "Could not find an available scratch register"); 1417 1418 SingleScratchReg = ScratchReg == TempReg; 1419 1420 if (HasFP) { 1421 int FPIndex = FI->getFramePointerSaveIndex(); 1422 assert(FPIndex && "No Frame Pointer Save Slot!"); 1423 FPOffset = MFI.getObjectOffset(FPIndex); 1424 } 1425 1426 int BPOffset = 0; 1427 if (HasBP) { 1428 int BPIndex = FI->getBasePointerSaveIndex(); 1429 assert(BPIndex && "No Base Pointer Save Slot!"); 1430 BPOffset = MFI.getObjectOffset(BPIndex); 1431 } 1432 1433 int PBPOffset = 0; 1434 if (FI->usesPICBase()) { 1435 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1436 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1437 PBPOffset = MFI.getObjectOffset(PBPIndex); 1438 } 1439 1440 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1441 1442 if (IsReturnBlock) { 1443 unsigned RetOpcode = MBBI->getOpcode(); 1444 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1445 RetOpcode == PPC::TCRETURNdi || 1446 RetOpcode == PPC::TCRETURNai || 1447 RetOpcode == PPC::TCRETURNri8 || 1448 RetOpcode == PPC::TCRETURNdi8 || 1449 RetOpcode == PPC::TCRETURNai8; 1450 1451 if (UsesTCRet) { 1452 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1453 MachineOperand &StackAdjust = MBBI->getOperand(1); 1454 assert(StackAdjust.isImm() && "Expecting immediate value."); 1455 // Adjust stack pointer. 1456 int StackAdj = StackAdjust.getImm(); 1457 int Delta = StackAdj - MaxTCRetDelta; 1458 assert((Delta >= 0) && "Delta must be positive"); 1459 if (MaxTCRetDelta>0) 1460 FrameSize += (StackAdj +Delta); 1461 else 1462 FrameSize += StackAdj; 1463 } 1464 } 1465 1466 // Frames of 32KB & larger require special handling because they cannot be 1467 // indexed into with a simple LD/LWZ immediate offset operand. 1468 bool isLargeFrame = !isInt<16>(FrameSize); 1469 1470 // On targets without red zone, the SP needs to be restored last, so that 1471 // all live contents of the stack frame are upwards of the SP. This means 1472 // that we cannot restore SP just now, since there may be more registers 1473 // to restore from the stack frame (e.g. R31). If the frame size is not 1474 // a simple immediate value, we will need a spare register to hold the 1475 // restored SP. If the frame size is known and small, we can simply adjust 1476 // the offsets of the registers to be restored, and still use SP to restore 1477 // them. In such case, the final update of SP will be to add the frame 1478 // size to it. 1479 // To simplify the code, set RBReg to the base register used to restore 1480 // values from the stack, and set SPAdd to the value that needs to be added 1481 // to the SP at the end. The default values are as if red zone was present. 1482 unsigned RBReg = SPReg; 1483 unsigned SPAdd = 0; 1484 1485 // Check if we can move the stack update instruction up the epilogue 1486 // past the callee saves. This will allow the move to LR instruction 1487 // to be executed before the restores of the callee saves which means 1488 // that the callee saves can hide the latency from the MTLR instrcution. 1489 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1490 if (stackUpdateCanBeMoved(MF)) { 1491 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1492 for (CalleeSavedInfo CSI : Info) { 1493 int FrIdx = CSI.getFrameIdx(); 1494 // If the frame index is not negative the callee saved info belongs to a 1495 // stack object that is not a fixed stack object. We ignore non-fixed 1496 // stack objects because we won't move the update of the stack pointer 1497 // past them. 1498 if (FrIdx >= 0) 1499 continue; 1500 1501 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1502 StackUpdateLoc--; 1503 else { 1504 // Abort the operation as we can't update all CSR restores. 1505 StackUpdateLoc = MBBI; 1506 break; 1507 } 1508 } 1509 } 1510 1511 if (FrameSize) { 1512 // In the prologue, the loaded (or persistent) stack pointer value is 1513 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1514 // zone add this offset back now. 1515 1516 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1517 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1518 // call which invalidates the stack pointer value in SP(0). So we use the 1519 // value of R31 in this case. 1520 if (FI->hasFastCall()) { 1521 assert(HasFP && "Expecting a valid frame pointer."); 1522 if (!HasRedZone) 1523 RBReg = FPReg; 1524 if (!isLargeFrame) { 1525 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1526 .addReg(FPReg).addImm(FrameSize); 1527 } else { 1528 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1529 .addImm(FrameSize >> 16); 1530 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1531 .addReg(ScratchReg, RegState::Kill) 1532 .addImm(FrameSize & 0xFFFF); 1533 BuildMI(MBB, MBBI, dl, AddInst) 1534 .addReg(RBReg) 1535 .addReg(FPReg) 1536 .addReg(ScratchReg); 1537 } 1538 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1539 if (HasRedZone) { 1540 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1541 .addReg(SPReg) 1542 .addImm(FrameSize); 1543 } else { 1544 // Make sure that adding FrameSize will not overflow the max offset 1545 // size. 1546 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1547 "Local offsets should be negative"); 1548 SPAdd = FrameSize; 1549 FPOffset += FrameSize; 1550 BPOffset += FrameSize; 1551 PBPOffset += FrameSize; 1552 } 1553 } else { 1554 // We don't want to use ScratchReg as a base register, because it 1555 // could happen to be R0. Use FP instead, but make sure to preserve it. 1556 if (!HasRedZone) { 1557 // If FP is not saved, copy it to ScratchReg. 1558 if (!HasFP) 1559 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1560 .addReg(FPReg) 1561 .addReg(FPReg); 1562 RBReg = FPReg; 1563 } 1564 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1565 .addImm(0) 1566 .addReg(SPReg); 1567 } 1568 } 1569 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1570 // If there is no red zone, ScratchReg may be needed for holding a useful 1571 // value (although not the base register). Make sure it is not overwritten 1572 // too early. 1573 1574 // If we need to restore both the LR and the CR and we only have one 1575 // available scratch register, we must do them one at a time. 1576 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1577 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1578 // is live here. 1579 assert(HasRedZone && "Expecting red zone"); 1580 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1581 .addImm(CRSaveOffset) 1582 .addReg(SPReg); 1583 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1584 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1585 .addReg(TempReg, getKillRegState(i == e-1)); 1586 } 1587 1588 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1589 // LR is stored in the caller's stack frame. ScratchReg will be needed 1590 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1591 // a base register anyway, because it may happen to be R0. 1592 bool LoadedLR = false; 1593 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1594 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1595 .addImm(LROffset+SPAdd) 1596 .addReg(RBReg); 1597 LoadedLR = true; 1598 } 1599 1600 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1601 assert(RBReg == SPReg && "Should be using SP as a base register"); 1602 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1603 .addImm(CRSaveOffset) 1604 .addReg(RBReg); 1605 } 1606 1607 if (HasFP) { 1608 // If there is red zone, restore FP directly, since SP has already been 1609 // restored. Otherwise, restore the value of FP into ScratchReg. 1610 if (HasRedZone || RBReg == SPReg) 1611 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1612 .addImm(FPOffset) 1613 .addReg(SPReg); 1614 else 1615 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1616 .addImm(FPOffset) 1617 .addReg(RBReg); 1618 } 1619 1620 if (FI->usesPICBase()) 1621 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1622 .addImm(PBPOffset) 1623 .addReg(RBReg); 1624 1625 if (HasBP) 1626 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1627 .addImm(BPOffset) 1628 .addReg(RBReg); 1629 1630 // There is nothing more to be loaded from the stack, so now we can 1631 // restore SP: SP = RBReg + SPAdd. 1632 if (RBReg != SPReg || SPAdd != 0) { 1633 assert(!HasRedZone && "This should not happen with red zone"); 1634 // If SPAdd is 0, generate a copy. 1635 if (SPAdd == 0) 1636 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1637 .addReg(RBReg) 1638 .addReg(RBReg); 1639 else 1640 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1641 .addReg(RBReg) 1642 .addImm(SPAdd); 1643 1644 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1645 if (RBReg == FPReg) 1646 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1647 .addReg(ScratchReg) 1648 .addReg(ScratchReg); 1649 1650 // Now load the LR from the caller's stack frame. 1651 if (MustSaveLR && !LoadedLR) 1652 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1653 .addImm(LROffset) 1654 .addReg(SPReg); 1655 } 1656 1657 if (MustSaveCR && 1658 !(SingleScratchReg && MustSaveLR)) 1659 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1660 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1661 .addReg(TempReg, getKillRegState(i == e-1)); 1662 1663 if (MustSaveLR) 1664 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1665 1666 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1667 // call optimization 1668 if (IsReturnBlock) { 1669 unsigned RetOpcode = MBBI->getOpcode(); 1670 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1671 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1672 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1673 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1674 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1675 1676 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1677 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1678 .addReg(SPReg).addImm(CallerAllocatedAmt); 1679 } else { 1680 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1681 .addImm(CallerAllocatedAmt >> 16); 1682 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1683 .addReg(ScratchReg, RegState::Kill) 1684 .addImm(CallerAllocatedAmt & 0xFFFF); 1685 BuildMI(MBB, MBBI, dl, AddInst) 1686 .addReg(SPReg) 1687 .addReg(FPReg) 1688 .addReg(ScratchReg); 1689 } 1690 } else { 1691 createTailCallBranchInstr(MBB); 1692 } 1693 } 1694 } 1695 1696 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1697 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1698 1699 // If we got this far a first terminator should exist. 1700 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1701 1702 DebugLoc dl = MBBI->getDebugLoc(); 1703 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1704 1705 // Create branch instruction for pseudo tail call return instruction. 1706 // The TCRETURNdi variants are direct calls. Valid targets for those are 1707 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1708 // since we can tail call external functions with PC-Rel (i.e. we don't need 1709 // to worry about different TOC pointers). Some of the external functions will 1710 // be MO_GlobalAddress while others like memcpy for example, are going to 1711 // be MO_ExternalSymbol. 1712 unsigned RetOpcode = MBBI->getOpcode(); 1713 if (RetOpcode == PPC::TCRETURNdi) { 1714 MBBI = MBB.getLastNonDebugInstr(); 1715 MachineOperand &JumpTarget = MBBI->getOperand(0); 1716 if (JumpTarget.isGlobal()) 1717 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1718 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1719 else if (JumpTarget.isSymbol()) 1720 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1721 addExternalSymbol(JumpTarget.getSymbolName()); 1722 else 1723 llvm_unreachable("Expecting Global or External Symbol"); 1724 } else if (RetOpcode == PPC::TCRETURNri) { 1725 MBBI = MBB.getLastNonDebugInstr(); 1726 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1727 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1728 } else if (RetOpcode == PPC::TCRETURNai) { 1729 MBBI = MBB.getLastNonDebugInstr(); 1730 MachineOperand &JumpTarget = MBBI->getOperand(0); 1731 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1732 } else if (RetOpcode == PPC::TCRETURNdi8) { 1733 MBBI = MBB.getLastNonDebugInstr(); 1734 MachineOperand &JumpTarget = MBBI->getOperand(0); 1735 if (JumpTarget.isGlobal()) 1736 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1737 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1738 else if (JumpTarget.isSymbol()) 1739 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1740 addExternalSymbol(JumpTarget.getSymbolName()); 1741 else 1742 llvm_unreachable("Expecting Global or External Symbol"); 1743 } else if (RetOpcode == PPC::TCRETURNri8) { 1744 MBBI = MBB.getLastNonDebugInstr(); 1745 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1746 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1747 } else if (RetOpcode == PPC::TCRETURNai8) { 1748 MBBI = MBB.getLastNonDebugInstr(); 1749 MachineOperand &JumpTarget = MBBI->getOperand(0); 1750 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1751 } 1752 } 1753 1754 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1755 BitVector &SavedRegs, 1756 RegScavenger *RS) const { 1757 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1758 1759 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1760 1761 // Save and clear the LR state. 1762 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1763 unsigned LR = RegInfo->getRARegister(); 1764 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1765 SavedRegs.reset(LR); 1766 1767 // Save R31 if necessary 1768 int FPSI = FI->getFramePointerSaveIndex(); 1769 const bool isPPC64 = Subtarget.isPPC64(); 1770 MachineFrameInfo &MFI = MF.getFrameInfo(); 1771 1772 // If the frame pointer save index hasn't been defined yet. 1773 if (!FPSI && needsFP(MF)) { 1774 // Find out what the fix offset of the frame pointer save area. 1775 int FPOffset = getFramePointerSaveOffset(); 1776 // Allocate the frame index for frame pointer save area. 1777 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1778 // Save the result. 1779 FI->setFramePointerSaveIndex(FPSI); 1780 } 1781 1782 int BPSI = FI->getBasePointerSaveIndex(); 1783 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1784 int BPOffset = getBasePointerSaveOffset(); 1785 // Allocate the frame index for the base pointer save area. 1786 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1787 // Save the result. 1788 FI->setBasePointerSaveIndex(BPSI); 1789 } 1790 1791 // Reserve stack space for the PIC Base register (R30). 1792 // Only used in SVR4 32-bit. 1793 if (FI->usesPICBase()) { 1794 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1795 FI->setPICBasePointerSaveIndex(PBPSI); 1796 } 1797 1798 // Make sure we don't explicitly spill r31, because, for example, we have 1799 // some inline asm which explicitly clobbers it, when we otherwise have a 1800 // frame pointer and are using r31's spill slot for the prologue/epilogue 1801 // code. Same goes for the base pointer and the PIC base register. 1802 if (needsFP(MF)) 1803 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1804 if (RegInfo->hasBasePointer(MF)) 1805 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1806 if (FI->usesPICBase()) 1807 SavedRegs.reset(PPC::R30); 1808 1809 // Reserve stack space to move the linkage area to in case of a tail call. 1810 int TCSPDelta = 0; 1811 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1812 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1813 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1814 } 1815 1816 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 1817 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 1818 // object at the offset of the CR-save slot in the linkage area. The actual 1819 // save and restore of the condition register will be created as part of the 1820 // prologue and epilogue insertion, but the FixedStack object is needed to 1821 // keep the CalleSavedInfo valid. 1822 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1823 SavedRegs.test(PPC::CR4))) { 1824 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1825 const int64_t SpillOffset = 1826 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 1827 int FrameIdx = 1828 MFI.CreateFixedObject(SpillSize, SpillOffset, 1829 /* IsImmutable */ true, /* IsAliased */ false); 1830 FI->setCRSpillFrameIndex(FrameIdx); 1831 } 1832 } 1833 1834 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1835 RegScavenger *RS) const { 1836 // Get callee saved register information. 1837 MachineFrameInfo &MFI = MF.getFrameInfo(); 1838 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1839 1840 // If the function is shrink-wrapped, and if the function has a tail call, the 1841 // tail call might not be in the new RestoreBlock, so real branch instruction 1842 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1843 // RestoreBlock. So we handle this case here. 1844 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1845 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1846 for (MachineBasicBlock &MBB : MF) { 1847 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1848 createTailCallBranchInstr(MBB); 1849 } 1850 } 1851 1852 // Early exit if no callee saved registers are modified! 1853 if (CSI.empty() && !needsFP(MF)) { 1854 addScavengingSpillSlot(MF, RS); 1855 return; 1856 } 1857 1858 unsigned MinGPR = PPC::R31; 1859 unsigned MinG8R = PPC::X31; 1860 unsigned MinFPR = PPC::F31; 1861 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1862 1863 bool HasGPSaveArea = false; 1864 bool HasG8SaveArea = false; 1865 bool HasFPSaveArea = false; 1866 bool HasVRSaveArea = false; 1867 1868 SmallVector<CalleeSavedInfo, 18> GPRegs; 1869 SmallVector<CalleeSavedInfo, 18> G8Regs; 1870 SmallVector<CalleeSavedInfo, 18> FPRegs; 1871 SmallVector<CalleeSavedInfo, 18> VRegs; 1872 1873 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1874 unsigned Reg = CSI[i].getReg(); 1875 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 1876 (Reg != PPC::X2 && Reg != PPC::R2)) && 1877 "Not expecting to try to spill R2 in a function that must save TOC"); 1878 if (PPC::GPRCRegClass.contains(Reg)) { 1879 HasGPSaveArea = true; 1880 1881 GPRegs.push_back(CSI[i]); 1882 1883 if (Reg < MinGPR) { 1884 MinGPR = Reg; 1885 } 1886 } else if (PPC::G8RCRegClass.contains(Reg)) { 1887 HasG8SaveArea = true; 1888 1889 G8Regs.push_back(CSI[i]); 1890 1891 if (Reg < MinG8R) { 1892 MinG8R = Reg; 1893 } 1894 } else if (PPC::F8RCRegClass.contains(Reg)) { 1895 HasFPSaveArea = true; 1896 1897 FPRegs.push_back(CSI[i]); 1898 1899 if (Reg < MinFPR) { 1900 MinFPR = Reg; 1901 } 1902 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1903 PPC::CRRCRegClass.contains(Reg)) { 1904 ; // do nothing, as we already know whether CRs are spilled 1905 } else if (PPC::VRRCRegClass.contains(Reg) || 1906 PPC::SPERCRegClass.contains(Reg)) { 1907 // Altivec and SPE are mutually exclusive, but have the same stack 1908 // alignment requirements, so overload the save area for both cases. 1909 HasVRSaveArea = true; 1910 1911 VRegs.push_back(CSI[i]); 1912 1913 if (Reg < MinVR) { 1914 MinVR = Reg; 1915 } 1916 } else { 1917 llvm_unreachable("Unknown RegisterClass!"); 1918 } 1919 } 1920 1921 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1922 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1923 1924 int64_t LowerBound = 0; 1925 1926 // Take into account stack space reserved for tail calls. 1927 int TCSPDelta = 0; 1928 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1929 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1930 LowerBound = TCSPDelta; 1931 } 1932 1933 // The Floating-point register save area is right below the back chain word 1934 // of the previous stack frame. 1935 if (HasFPSaveArea) { 1936 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1937 int FI = FPRegs[i].getFrameIdx(); 1938 1939 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1940 } 1941 1942 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1943 } 1944 1945 // Check whether the frame pointer register is allocated. If so, make sure it 1946 // is spilled to the correct offset. 1947 if (needsFP(MF)) { 1948 int FI = PFI->getFramePointerSaveIndex(); 1949 assert(FI && "No Frame Pointer Save Slot!"); 1950 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1951 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1952 HasGPSaveArea = true; 1953 } 1954 1955 if (PFI->usesPICBase()) { 1956 int FI = PFI->getPICBasePointerSaveIndex(); 1957 assert(FI && "No PIC Base Pointer Save Slot!"); 1958 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1959 1960 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1961 HasGPSaveArea = true; 1962 } 1963 1964 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1965 if (RegInfo->hasBasePointer(MF)) { 1966 int FI = PFI->getBasePointerSaveIndex(); 1967 assert(FI && "No Base Pointer Save Slot!"); 1968 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1969 1970 Register BP = RegInfo->getBaseRegister(MF); 1971 if (PPC::G8RCRegClass.contains(BP)) { 1972 MinG8R = std::min<unsigned>(MinG8R, BP); 1973 HasG8SaveArea = true; 1974 } else if (PPC::GPRCRegClass.contains(BP)) { 1975 MinGPR = std::min<unsigned>(MinGPR, BP); 1976 HasGPSaveArea = true; 1977 } 1978 } 1979 1980 // General register save area starts right below the Floating-point 1981 // register save area. 1982 if (HasGPSaveArea || HasG8SaveArea) { 1983 // Move general register save area spill slots down, taking into account 1984 // the size of the Floating-point register save area. 1985 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1986 if (!GPRegs[i].isSpilledToReg()) { 1987 int FI = GPRegs[i].getFrameIdx(); 1988 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1989 } 1990 } 1991 1992 // Move general register save area spill slots down, taking into account 1993 // the size of the Floating-point register save area. 1994 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 1995 if (!G8Regs[i].isSpilledToReg()) { 1996 int FI = G8Regs[i].getFrameIdx(); 1997 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1998 } 1999 } 2000 2001 unsigned MinReg = 2002 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2003 TRI->getEncodingValue(MinG8R)); 2004 2005 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2006 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2007 } 2008 2009 // For 32-bit only, the CR save area is below the general register 2010 // save area. For 64-bit SVR4, the CR save area is addressed relative 2011 // to the stack pointer and hence does not need an adjustment here. 2012 // Only CR2 (the first nonvolatile spilled) has an associated frame 2013 // index so that we have a single uniform save area. 2014 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2015 // Adjust the frame index of the CR spill slot. 2016 for (const auto &CSInfo : CSI) { 2017 if (CSInfo.getReg() == PPC::CR2) { 2018 int FI = CSInfo.getFrameIdx(); 2019 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2020 break; 2021 } 2022 } 2023 2024 LowerBound -= 4; // The CR save area is always 4 bytes long. 2025 } 2026 2027 // Both Altivec and SPE have the same alignment and padding requirements 2028 // within the stack frame. 2029 if (HasVRSaveArea) { 2030 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2031 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2032 // we are using negative number here (the stack grows downward). We should 2033 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2034 // is the alignment size ( n = 16 here) and y is the size after aligning. 2035 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2036 LowerBound &= ~(15); 2037 2038 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2039 int FI = VRegs[i].getFrameIdx(); 2040 2041 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2042 } 2043 } 2044 2045 addScavengingSpillSlot(MF, RS); 2046 } 2047 2048 void 2049 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2050 RegScavenger *RS) const { 2051 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2052 // a large stack, which will require scavenging a register to materialize a 2053 // large offset. 2054 2055 // We need to have a scavenger spill slot for spills if the frame size is 2056 // large. In case there is no free register for large-offset addressing, 2057 // this slot is used for the necessary emergency spill. Also, we need the 2058 // slot for dynamic stack allocations. 2059 2060 // The scavenger might be invoked if the frame offset does not fit into 2061 // the 16-bit immediate. We don't know the complete frame size here 2062 // because we've not yet computed callee-saved register spills or the 2063 // needed alignment padding. 2064 unsigned StackSize = determineFrameLayout(MF, true); 2065 MachineFrameInfo &MFI = MF.getFrameInfo(); 2066 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || 2067 (hasSpills(MF) && !isInt<16>(StackSize))) { 2068 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2069 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2070 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2071 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2072 unsigned Size = TRI.getSpillSize(RC); 2073 Align Alignment = TRI.getSpillAlign(RC); 2074 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2075 2076 // Might we have over-aligned allocas? 2077 bool HasAlVars = 2078 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2079 2080 // These kinds of spills might need two registers. 2081 if (spillsCR(MF) || HasAlVars) 2082 RS->addScavengingFrameIndex( 2083 MFI.CreateStackObject(Size, Alignment, false)); 2084 } 2085 } 2086 2087 // This function checks if a callee saved gpr can be spilled to a volatile 2088 // vector register. This occurs for leaf functions when the option 2089 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2090 // which were not spilled to vectors, return false so the target independent 2091 // code can handle them by assigning a FrameIdx to a stack slot. 2092 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2093 MachineFunction &MF, const TargetRegisterInfo *TRI, 2094 std::vector<CalleeSavedInfo> &CSI) const { 2095 2096 if (CSI.empty()) 2097 return true; // Early exit if no callee saved registers are modified! 2098 2099 // Early exit if cannot spill gprs to volatile vector registers. 2100 MachineFrameInfo &MFI = MF.getFrameInfo(); 2101 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2102 return false; 2103 2104 // Build a BitVector of VSRs that can be used for spilling GPRs. 2105 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2106 BitVector BVCalleeSaved(TRI->getNumRegs()); 2107 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2108 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2109 for (unsigned i = 0; CSRegs[i]; ++i) 2110 BVCalleeSaved.set(CSRegs[i]); 2111 2112 for (unsigned Reg : BVAllocatable.set_bits()) { 2113 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2114 // used in the function. 2115 if (BVCalleeSaved[Reg] || 2116 (!PPC::F8RCRegClass.contains(Reg) && 2117 !PPC::VFRCRegClass.contains(Reg)) || 2118 (MF.getRegInfo().isPhysRegUsed(Reg))) 2119 BVAllocatable.reset(Reg); 2120 } 2121 2122 bool AllSpilledToReg = true; 2123 for (auto &CS : CSI) { 2124 if (BVAllocatable.none()) 2125 return false; 2126 2127 unsigned Reg = CS.getReg(); 2128 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2129 AllSpilledToReg = false; 2130 continue; 2131 } 2132 2133 unsigned VolatileVFReg = BVAllocatable.find_first(); 2134 if (VolatileVFReg < BVAllocatable.size()) { 2135 CS.setDstReg(VolatileVFReg); 2136 BVAllocatable.reset(VolatileVFReg); 2137 } else { 2138 AllSpilledToReg = false; 2139 } 2140 } 2141 return AllSpilledToReg; 2142 } 2143 2144 bool PPCFrameLowering::spillCalleeSavedRegisters( 2145 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2146 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2147 2148 MachineFunction *MF = MBB.getParent(); 2149 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2150 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2151 bool MustSaveTOC = FI->mustSaveTOC(); 2152 DebugLoc DL; 2153 bool CRSpilled = false; 2154 MachineInstrBuilder CRMIB; 2155 2156 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2157 unsigned Reg = CSI[i].getReg(); 2158 2159 // CR2 through CR4 are the nonvolatile CR fields. 2160 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2161 2162 // Add the callee-saved register as live-in; it's killed at the spill. 2163 // Do not do this for callee-saved registers that are live-in to the 2164 // function because they will already be marked live-in and this will be 2165 // adding it for a second time. It is an error to add the same register 2166 // to the set more than once. 2167 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2168 bool IsLiveIn = MRI.isLiveIn(Reg); 2169 if (!IsLiveIn) 2170 MBB.addLiveIn(Reg); 2171 2172 if (CRSpilled && IsCRField) { 2173 CRMIB.addReg(Reg, RegState::ImplicitKill); 2174 continue; 2175 } 2176 2177 // The actual spill will happen in the prologue. 2178 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2179 continue; 2180 2181 // Insert the spill to the stack frame. 2182 if (IsCRField) { 2183 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2184 if (!Subtarget.is32BitELFABI()) { 2185 // The actual spill will happen at the start of the prologue. 2186 FuncInfo->addMustSaveCR(Reg); 2187 } else { 2188 CRSpilled = true; 2189 FuncInfo->setSpillsCR(); 2190 2191 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2192 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2193 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2194 .addReg(Reg, RegState::ImplicitKill); 2195 2196 MBB.insert(MI, CRMIB); 2197 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2198 .addReg(PPC::R12, 2199 getKillRegState(true)), 2200 CSI[i].getFrameIdx())); 2201 } 2202 } else { 2203 if (CSI[i].isSpilledToReg()) { 2204 NumPESpillVSR++; 2205 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2206 .addReg(Reg, getKillRegState(true)); 2207 } else { 2208 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2209 // Use !IsLiveIn for the kill flag. 2210 // We do not want to kill registers that are live in this function 2211 // before their use because they will become undefined registers. 2212 // Functions without NoUnwind need to preserve the order of elements in 2213 // saved vector registers. 2214 if (Subtarget.needsSwapsForVSXMemOps() && 2215 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2216 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2217 CSI[i].getFrameIdx(), RC, TRI); 2218 else 2219 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2220 RC, TRI); 2221 } 2222 } 2223 } 2224 return true; 2225 } 2226 2227 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2228 bool CR4Spilled, MachineBasicBlock &MBB, 2229 MachineBasicBlock::iterator MI, 2230 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2231 2232 MachineFunction *MF = MBB.getParent(); 2233 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2234 DebugLoc DL; 2235 unsigned MoveReg = PPC::R12; 2236 2237 // 32-bit: FP-relative 2238 MBB.insert(MI, 2239 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2240 CSI[CSIIndex].getFrameIdx())); 2241 2242 unsigned RestoreOp = PPC::MTOCRF; 2243 if (CR2Spilled) 2244 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2245 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2246 2247 if (CR3Spilled) 2248 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2249 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2250 2251 if (CR4Spilled) 2252 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2253 .addReg(MoveReg, getKillRegState(true))); 2254 } 2255 2256 MachineBasicBlock::iterator PPCFrameLowering:: 2257 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2258 MachineBasicBlock::iterator I) const { 2259 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2260 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2261 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2262 // Add (actually subtract) back the amount the callee popped on return. 2263 if (int CalleeAmt = I->getOperand(1).getImm()) { 2264 bool is64Bit = Subtarget.isPPC64(); 2265 CalleeAmt *= -1; 2266 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2267 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2268 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2269 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2270 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2271 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2272 const DebugLoc &dl = I->getDebugLoc(); 2273 2274 if (isInt<16>(CalleeAmt)) { 2275 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2276 .addReg(StackReg, RegState::Kill) 2277 .addImm(CalleeAmt); 2278 } else { 2279 MachineBasicBlock::iterator MBBI = I; 2280 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2281 .addImm(CalleeAmt >> 16); 2282 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2283 .addReg(TmpReg, RegState::Kill) 2284 .addImm(CalleeAmt & 0xFFFF); 2285 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2286 .addReg(StackReg, RegState::Kill) 2287 .addReg(TmpReg); 2288 } 2289 } 2290 } 2291 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2292 return MBB.erase(I); 2293 } 2294 2295 static bool isCalleeSavedCR(unsigned Reg) { 2296 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2297 } 2298 2299 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2300 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2301 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2302 MachineFunction *MF = MBB.getParent(); 2303 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2304 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2305 bool MustSaveTOC = FI->mustSaveTOC(); 2306 bool CR2Spilled = false; 2307 bool CR3Spilled = false; 2308 bool CR4Spilled = false; 2309 unsigned CSIIndex = 0; 2310 2311 // Initialize insertion-point logic; we will be restoring in reverse 2312 // order of spill. 2313 MachineBasicBlock::iterator I = MI, BeforeI = I; 2314 bool AtStart = I == MBB.begin(); 2315 2316 if (!AtStart) 2317 --BeforeI; 2318 2319 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2320 unsigned Reg = CSI[i].getReg(); 2321 2322 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2323 continue; 2324 2325 // Restore of callee saved condition register field is handled during 2326 // epilogue insertion. 2327 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2328 continue; 2329 2330 if (Reg == PPC::CR2) { 2331 CR2Spilled = true; 2332 // The spill slot is associated only with CR2, which is the 2333 // first nonvolatile spilled. Save it here. 2334 CSIIndex = i; 2335 continue; 2336 } else if (Reg == PPC::CR3) { 2337 CR3Spilled = true; 2338 continue; 2339 } else if (Reg == PPC::CR4) { 2340 CR4Spilled = true; 2341 continue; 2342 } else { 2343 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2344 // least one CR register, restore all spilled CRs together. 2345 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2346 bool is31 = needsFP(*MF); 2347 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2348 CSIIndex); 2349 CR2Spilled = CR3Spilled = CR4Spilled = false; 2350 } 2351 2352 if (CSI[i].isSpilledToReg()) { 2353 DebugLoc DL; 2354 NumPEReloadVSR++; 2355 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2356 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2357 } else { 2358 // Default behavior for non-CR saves. 2359 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2360 2361 // Functions without NoUnwind need to preserve the order of elements in 2362 // saved vector registers. 2363 if (Subtarget.needsSwapsForVSXMemOps() && 2364 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2365 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2366 TRI); 2367 else 2368 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2369 2370 assert(I != MBB.begin() && 2371 "loadRegFromStackSlot didn't insert any code!"); 2372 } 2373 } 2374 2375 // Insert in reverse order. 2376 if (AtStart) 2377 I = MBB.begin(); 2378 else { 2379 I = BeforeI; 2380 ++I; 2381 } 2382 } 2383 2384 // If we haven't yet spilled the CRs, do so now. 2385 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2386 assert(Subtarget.is32BitELFABI() && 2387 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2388 bool is31 = needsFP(*MF); 2389 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2390 } 2391 2392 return true; 2393 } 2394 2395 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2396 return TOCSaveOffset; 2397 } 2398 2399 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2400 return FramePointerSaveOffset; 2401 } 2402 2403 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2404 return BasePointerSaveOffset; 2405 } 2406 2407 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2408 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2409 return false; 2410 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2411 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2412 } 2413