1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MCTargetDesc/PPCPredicates.h" 14 #include "PPCFrameLowering.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/RegisterScavenging.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/Target/TargetOptions.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "framelowering" 33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 35 STATISTIC(NumPrologProbed, "Number of prologues probed"); 36 37 static cl::opt<bool> 38 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 39 cl::desc("Enable spills in prologue to vector registers."), 40 cl::init(false), cl::Hidden); 41 42 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 43 if (STI.isAIXABI()) 44 return STI.isPPC64() ? 16 : 8; 45 // SVR4 ABI: 46 return STI.isPPC64() ? 16 : 4; 47 } 48 49 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isAIXABI()) 51 return STI.isPPC64() ? 40 : 20; 52 return STI.isELFv2ABI() ? 24 : 40; 53 } 54 55 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 56 // First slot in the general register save area. 57 return STI.isPPC64() ? -8U : -4U; 58 } 59 60 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 61 if (STI.isAIXABI() || STI.isPPC64()) 62 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 63 64 // 32-bit SVR4 ABI: 65 return 8; 66 } 67 68 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 69 // Third slot in the general purpose register save area. 70 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 71 return -12U; 72 73 // Second slot in the general purpose register save area. 74 return STI.isPPC64() ? -16U : -8U; 75 } 76 77 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 78 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 79 } 80 81 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 82 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 83 STI.getPlatformStackAlignment(), 0), 84 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 85 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 86 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 87 LinkageSize(computeLinkageSize(Subtarget)), 88 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 89 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 90 91 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 92 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 93 unsigned &NumEntries) const { 94 95 // Floating-point register save area offsets. 96 #define CALLEE_SAVED_FPRS \ 97 {PPC::F31, -8}, \ 98 {PPC::F30, -16}, \ 99 {PPC::F29, -24}, \ 100 {PPC::F28, -32}, \ 101 {PPC::F27, -40}, \ 102 {PPC::F26, -48}, \ 103 {PPC::F25, -56}, \ 104 {PPC::F24, -64}, \ 105 {PPC::F23, -72}, \ 106 {PPC::F22, -80}, \ 107 {PPC::F21, -88}, \ 108 {PPC::F20, -96}, \ 109 {PPC::F19, -104}, \ 110 {PPC::F18, -112}, \ 111 {PPC::F17, -120}, \ 112 {PPC::F16, -128}, \ 113 {PPC::F15, -136}, \ 114 {PPC::F14, -144} 115 116 // 32-bit general purpose register save area offsets shared by ELF and 117 // AIX. AIX has an extra CSR with r13. 118 #define CALLEE_SAVED_GPRS32 \ 119 {PPC::R31, -4}, \ 120 {PPC::R30, -8}, \ 121 {PPC::R29, -12}, \ 122 {PPC::R28, -16}, \ 123 {PPC::R27, -20}, \ 124 {PPC::R26, -24}, \ 125 {PPC::R25, -28}, \ 126 {PPC::R24, -32}, \ 127 {PPC::R23, -36}, \ 128 {PPC::R22, -40}, \ 129 {PPC::R21, -44}, \ 130 {PPC::R20, -48}, \ 131 {PPC::R19, -52}, \ 132 {PPC::R18, -56}, \ 133 {PPC::R17, -60}, \ 134 {PPC::R16, -64}, \ 135 {PPC::R15, -68}, \ 136 {PPC::R14, -72} 137 138 // 64-bit general purpose register save area offsets. 139 #define CALLEE_SAVED_GPRS64 \ 140 {PPC::X31, -8}, \ 141 {PPC::X30, -16}, \ 142 {PPC::X29, -24}, \ 143 {PPC::X28, -32}, \ 144 {PPC::X27, -40}, \ 145 {PPC::X26, -48}, \ 146 {PPC::X25, -56}, \ 147 {PPC::X24, -64}, \ 148 {PPC::X23, -72}, \ 149 {PPC::X22, -80}, \ 150 {PPC::X21, -88}, \ 151 {PPC::X20, -96}, \ 152 {PPC::X19, -104}, \ 153 {PPC::X18, -112}, \ 154 {PPC::X17, -120}, \ 155 {PPC::X16, -128}, \ 156 {PPC::X15, -136}, \ 157 {PPC::X14, -144} 158 159 // Vector register save area offsets. 160 #define CALLEE_SAVED_VRS \ 161 {PPC::V31, -16}, \ 162 {PPC::V30, -32}, \ 163 {PPC::V29, -48}, \ 164 {PPC::V28, -64}, \ 165 {PPC::V27, -80}, \ 166 {PPC::V26, -96}, \ 167 {PPC::V25, -112}, \ 168 {PPC::V24, -128}, \ 169 {PPC::V23, -144}, \ 170 {PPC::V22, -160}, \ 171 {PPC::V21, -176}, \ 172 {PPC::V20, -192} 173 174 // Note that the offsets here overlap, but this is fixed up in 175 // processFunctionBeforeFrameFinalized. 176 177 static const SpillSlot ELFOffsets32[] = { 178 CALLEE_SAVED_FPRS, 179 CALLEE_SAVED_GPRS32, 180 181 // CR save area offset. We map each of the nonvolatile CR fields 182 // to the slot for CR2, which is the first of the nonvolatile CR 183 // fields to be assigned, so that we only allocate one save slot. 184 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 185 {PPC::CR2, -4}, 186 187 // VRSAVE save area offset. 188 {PPC::VRSAVE, -4}, 189 190 CALLEE_SAVED_VRS, 191 192 // SPE register save area (overlaps Vector save area). 193 {PPC::S31, -8}, 194 {PPC::S30, -16}, 195 {PPC::S29, -24}, 196 {PPC::S28, -32}, 197 {PPC::S27, -40}, 198 {PPC::S26, -48}, 199 {PPC::S25, -56}, 200 {PPC::S24, -64}, 201 {PPC::S23, -72}, 202 {PPC::S22, -80}, 203 {PPC::S21, -88}, 204 {PPC::S20, -96}, 205 {PPC::S19, -104}, 206 {PPC::S18, -112}, 207 {PPC::S17, -120}, 208 {PPC::S16, -128}, 209 {PPC::S15, -136}, 210 {PPC::S14, -144}}; 211 212 static const SpillSlot ELFOffsets64[] = { 213 CALLEE_SAVED_FPRS, 214 CALLEE_SAVED_GPRS64, 215 216 // VRSAVE save area offset. 217 {PPC::VRSAVE, -4}, 218 CALLEE_SAVED_VRS 219 }; 220 221 static const SpillSlot AIXOffsets32[] = { 222 CALLEE_SAVED_FPRS, 223 CALLEE_SAVED_GPRS32, 224 // Add AIX's extra CSR. 225 {PPC::R13, -76}, 226 // TODO: Update when we add vector support for AIX. 227 }; 228 229 static const SpillSlot AIXOffsets64[] = { 230 CALLEE_SAVED_FPRS, 231 CALLEE_SAVED_GPRS64, 232 // TODO: Update when we add vector support for AIX. 233 }; 234 235 if (Subtarget.is64BitELFABI()) { 236 NumEntries = array_lengthof(ELFOffsets64); 237 return ELFOffsets64; 238 } 239 240 if (Subtarget.is32BitELFABI()) { 241 NumEntries = array_lengthof(ELFOffsets32); 242 return ELFOffsets32; 243 } 244 245 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 246 247 if (Subtarget.isPPC64()) { 248 NumEntries = array_lengthof(AIXOffsets64); 249 return AIXOffsets64; 250 } 251 252 NumEntries = array_lengthof(AIXOffsets32); 253 return AIXOffsets32; 254 } 255 256 static bool spillsCR(const MachineFunction &MF) { 257 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 258 return FuncInfo->isCRSpilled(); 259 } 260 261 static bool hasSpills(const MachineFunction &MF) { 262 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 263 return FuncInfo->hasSpills(); 264 } 265 266 static bool hasNonRISpills(const MachineFunction &MF) { 267 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 268 return FuncInfo->hasNonRISpills(); 269 } 270 271 /// MustSaveLR - Return true if this function requires that we save the LR 272 /// register onto the stack in the prolog and restore it in the epilog of the 273 /// function. 274 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 275 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 276 277 // We need a save/restore of LR if there is any def of LR (which is 278 // defined by calls, including the PIC setup sequence), or if there is 279 // some use of the LR stack slot (e.g. for builtin_return_address). 280 // (LR comes in 32 and 64 bit versions.) 281 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 282 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 283 } 284 285 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 286 /// call frame size. Update the MachineFunction object with the stack size. 287 unsigned 288 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 289 bool UseEstimate) const { 290 unsigned NewMaxCallFrameSize = 0; 291 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 292 &NewMaxCallFrameSize); 293 MF.getFrameInfo().setStackSize(FrameSize); 294 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 295 return FrameSize; 296 } 297 298 /// determineFrameLayout - Determine the size of the frame and maximum call 299 /// frame size. 300 unsigned 301 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 302 bool UseEstimate, 303 unsigned *NewMaxCallFrameSize) const { 304 const MachineFrameInfo &MFI = MF.getFrameInfo(); 305 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 306 307 // Get the number of bytes to allocate from the FrameInfo 308 unsigned FrameSize = 309 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 310 311 // Get stack alignments. The frame must be aligned to the greatest of these: 312 Align TargetAlign = getStackAlign(); // alignment required per the ABI 313 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 314 Align Alignment = std::max(TargetAlign, MaxAlign); 315 316 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 317 318 unsigned LR = RegInfo->getRARegister(); 319 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 320 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 321 !MFI.adjustsStack() && // No calls. 322 !MustSaveLR(MF, LR) && // No need to save LR. 323 !FI->mustSaveTOC() && // No need to save TOC. 324 !RegInfo->hasBasePointer(MF); // No special alignment. 325 326 // Note: for PPC32 SVR4ABI, we can still generate stackless 327 // code if all local vars are reg-allocated. 328 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 329 330 // Check whether we can skip adjusting the stack pointer (by using red zone) 331 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 332 // No need for frame 333 return 0; 334 } 335 336 // Get the maximum call frame size of all the calls. 337 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 338 339 // Maximum call frame needs to be at least big enough for linkage area. 340 unsigned minCallFrameSize = getLinkageSize(); 341 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 342 343 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 344 // that allocations will be aligned. 345 if (MFI.hasVarSizedObjects()) 346 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 347 348 // Update the new max call frame size if the caller passes in a valid pointer. 349 if (NewMaxCallFrameSize) 350 *NewMaxCallFrameSize = maxCallFrameSize; 351 352 // Include call frame size in total. 353 FrameSize += maxCallFrameSize; 354 355 // Make sure the frame is aligned. 356 FrameSize = alignTo(FrameSize, Alignment); 357 358 return FrameSize; 359 } 360 361 // hasFP - Return true if the specified function actually has a dedicated frame 362 // pointer register. 363 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 364 const MachineFrameInfo &MFI = MF.getFrameInfo(); 365 // FIXME: This is pretty much broken by design: hasFP() might be called really 366 // early, before the stack layout was calculated and thus hasFP() might return 367 // true or false here depending on the time of call. 368 return (MFI.getStackSize()) && needsFP(MF); 369 } 370 371 // needsFP - Return true if the specified function should have a dedicated frame 372 // pointer register. This is true if the function has variable sized allocas or 373 // if frame pointer elimination is disabled. 374 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 375 const MachineFrameInfo &MFI = MF.getFrameInfo(); 376 377 // Naked functions have no stack frame pushed, so we don't have a frame 378 // pointer. 379 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 380 return false; 381 382 return MF.getTarget().Options.DisableFramePointerElim(MF) || 383 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 384 (MF.getTarget().Options.GuaranteedTailCallOpt && 385 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 386 } 387 388 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 389 bool is31 = needsFP(MF); 390 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 391 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 392 393 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 394 bool HasBP = RegInfo->hasBasePointer(MF); 395 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 396 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 397 398 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 399 BI != BE; ++BI) 400 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 401 --MBBI; 402 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 403 MachineOperand &MO = MBBI->getOperand(I); 404 if (!MO.isReg()) 405 continue; 406 407 switch (MO.getReg()) { 408 case PPC::FP: 409 MO.setReg(FPReg); 410 break; 411 case PPC::FP8: 412 MO.setReg(FP8Reg); 413 break; 414 case PPC::BP: 415 MO.setReg(BPReg); 416 break; 417 case PPC::BP8: 418 MO.setReg(BP8Reg); 419 break; 420 421 } 422 } 423 } 424 } 425 426 /* This function will do the following: 427 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 428 respectively (defaults recommended by the ABI) and return true 429 - If MBB is not an entry block, initialize the register scavenger and look 430 for available registers. 431 - If the defaults (R0/R12) are available, return true 432 - If TwoUniqueRegsRequired is set to true, it looks for two unique 433 registers. Otherwise, look for a single available register. 434 - If the required registers are found, set SR1 and SR2 and return true. 435 - If the required registers are not found, set SR2 or both SR1 and SR2 to 436 PPC::NoRegister and return false. 437 438 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 439 is not set, this function will attempt to find two different registers, but 440 still return true if only one register is available (and set SR1 == SR2). 441 */ 442 bool 443 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 444 bool UseAtEnd, 445 bool TwoUniqueRegsRequired, 446 Register *SR1, 447 Register *SR2) const { 448 RegScavenger RS; 449 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 450 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 451 452 // Set the defaults for the two scratch registers. 453 if (SR1) 454 *SR1 = R0; 455 456 if (SR2) { 457 assert (SR1 && "Asking for the second scratch register but not the first?"); 458 *SR2 = R12; 459 } 460 461 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 462 if ((UseAtEnd && MBB->isReturnBlock()) || 463 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 464 return true; 465 466 RS.enterBasicBlock(*MBB); 467 468 if (UseAtEnd && !MBB->empty()) { 469 // The scratch register will be used at the end of the block, so must 470 // consider all registers used within the block 471 472 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 473 // If no terminator, back iterator up to previous instruction. 474 if (MBBI == MBB->end()) 475 MBBI = std::prev(MBBI); 476 477 if (MBBI != MBB->begin()) 478 RS.forward(MBBI); 479 } 480 481 // If the two registers are available, we're all good. 482 // Note that we only return here if both R0 and R12 are available because 483 // although the function may not require two unique registers, it may benefit 484 // from having two so we should try to provide them. 485 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 486 return true; 487 488 // Get the list of callee-saved registers for the target. 489 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 490 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 491 492 // Get all the available registers in the block. 493 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 494 &PPC::GPRCRegClass); 495 496 // We shouldn't use callee-saved registers as scratch registers as they may be 497 // available when looking for a candidate block for shrink wrapping but not 498 // available when the actual prologue/epilogue is being emitted because they 499 // were added as live-in to the prologue block by PrologueEpilogueInserter. 500 for (int i = 0; CSRegs[i]; ++i) 501 BV.reset(CSRegs[i]); 502 503 // Set the first scratch register to the first available one. 504 if (SR1) { 505 int FirstScratchReg = BV.find_first(); 506 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 507 } 508 509 // If there is another one available, set the second scratch register to that. 510 // Otherwise, set it to either PPC::NoRegister if this function requires two 511 // or to whatever SR1 is set to if this function doesn't require two. 512 if (SR2) { 513 int SecondScratchReg = BV.find_next(*SR1); 514 if (SecondScratchReg != -1) 515 *SR2 = SecondScratchReg; 516 else 517 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 518 } 519 520 // Now that we've done our best to provide both registers, double check 521 // whether we were unable to provide enough. 522 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 523 return false; 524 525 return true; 526 } 527 528 // We need a scratch register for spilling LR and for spilling CR. By default, 529 // we use two scratch registers to hide latency. However, if only one scratch 530 // register is available, we can adjust for that by not overlapping the spill 531 // code. However, if we need to realign the stack (i.e. have a base pointer) 532 // and the stack frame is large, we need two scratch registers. 533 bool 534 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 535 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 536 MachineFunction &MF = *(MBB->getParent()); 537 bool HasBP = RegInfo->hasBasePointer(MF); 538 unsigned FrameSize = determineFrameLayout(MF); 539 int NegFrameSize = -FrameSize; 540 bool IsLargeFrame = !isInt<16>(NegFrameSize); 541 MachineFrameInfo &MFI = MF.getFrameInfo(); 542 Align MaxAlign = MFI.getMaxAlign(); 543 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 544 545 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 546 } 547 548 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 549 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 550 551 return findScratchRegister(TmpMBB, false, 552 twoUniqueScratchRegsRequired(TmpMBB)); 553 } 554 555 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 556 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 557 558 return findScratchRegister(TmpMBB, true); 559 } 560 561 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 562 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 563 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 564 565 // Abort if there is no register info or function info. 566 if (!RegInfo || !FI) 567 return false; 568 569 // Only move the stack update on ELFv2 ABI and PPC64. 570 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 571 return false; 572 573 // Check the frame size first and return false if it does not fit the 574 // requirements. 575 // We need a non-zero frame size as well as a frame that will fit in the red 576 // zone. This is because by moving the stack pointer update we are now storing 577 // to the red zone until the stack pointer is updated. If we get an interrupt 578 // inside the prologue but before the stack update we now have a number of 579 // stores to the red zone and those stores must all fit. 580 MachineFrameInfo &MFI = MF.getFrameInfo(); 581 unsigned FrameSize = MFI.getStackSize(); 582 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 583 return false; 584 585 // Frame pointers and base pointers complicate matters so don't do anything 586 // if we have them. For example having a frame pointer will sometimes require 587 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 588 // difficult. 589 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 590 return false; 591 592 // Calls to fast_cc functions use different rules for passing parameters on 593 // the stack from the ABI and using PIC base in the function imposes 594 // similar restrictions to using the base pointer. It is not generally safe 595 // to move the stack pointer update in these situations. 596 if (FI->hasFastCall() || FI->usesPICBase()) 597 return false; 598 599 // Finally we can move the stack update if we do not require register 600 // scavenging. Register scavenging can introduce more spills and so 601 // may make the frame size larger than we have computed. 602 return !RegInfo->requiresFrameIndexScavenging(MF); 603 } 604 605 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 606 MachineBasicBlock &MBB) const { 607 MachineBasicBlock::iterator MBBI = MBB.begin(); 608 MachineFrameInfo &MFI = MF.getFrameInfo(); 609 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 610 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 611 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 612 613 MachineModuleInfo &MMI = MF.getMMI(); 614 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 615 DebugLoc dl; 616 // AIX assembler does not support cfi directives. 617 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 618 619 // Get processor type. 620 bool isPPC64 = Subtarget.isPPC64(); 621 // Get the ABI. 622 bool isSVR4ABI = Subtarget.isSVR4ABI(); 623 bool isELFv2ABI = Subtarget.isELFv2ABI(); 624 assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); 625 626 // Work out frame sizes. 627 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 628 int NegFrameSize = -FrameSize; 629 if (!isInt<32>(NegFrameSize)) 630 llvm_unreachable("Unhandled stack size!"); 631 632 if (MFI.isFrameAddressTaken()) 633 replaceFPWithRealFP(MF); 634 635 // Check if the link register (LR) must be saved. 636 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 637 bool MustSaveLR = FI->mustSaveLR(); 638 bool MustSaveTOC = FI->mustSaveTOC(); 639 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 640 bool MustSaveCR = !MustSaveCRs.empty(); 641 // Do we have a frame pointer and/or base pointer for this function? 642 bool HasFP = hasFP(MF); 643 bool HasBP = RegInfo->hasBasePointer(MF); 644 bool HasRedZone = isPPC64 || !isSVR4ABI; 645 646 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 647 Register BPReg = RegInfo->getBaseRegister(MF); 648 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 649 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 650 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 651 Register ScratchReg; 652 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 653 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 654 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 655 : PPC::MFLR ); 656 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 657 : PPC::STW ); 658 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 659 : PPC::STWU ); 660 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 661 : PPC::STWUX); 662 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 663 : PPC::LIS ); 664 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 665 : PPC::ORI ); 666 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 667 : PPC::OR ); 668 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 669 : PPC::SUBFC); 670 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 671 : PPC::SUBFIC); 672 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 673 : PPC::MFCR); 674 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 675 676 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 677 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 678 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 679 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 680 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 681 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 682 683 // Using the same bool variable as below to suppress compiler warnings. 684 // Stack probe requires two scratch registers, one for old sp, one for large 685 // frame and large probe size. 686 bool SingleScratchReg = findScratchRegister( 687 &MBB, false, 688 twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF), 689 &ScratchReg, &TempReg); 690 assert(SingleScratchReg && 691 "Required number of registers not available in this block"); 692 693 SingleScratchReg = ScratchReg == TempReg; 694 695 int LROffset = getReturnSaveOffset(); 696 697 int FPOffset = 0; 698 if (HasFP) { 699 MachineFrameInfo &MFI = MF.getFrameInfo(); 700 int FPIndex = FI->getFramePointerSaveIndex(); 701 assert(FPIndex && "No Frame Pointer Save Slot!"); 702 FPOffset = MFI.getObjectOffset(FPIndex); 703 } 704 705 int BPOffset = 0; 706 if (HasBP) { 707 MachineFrameInfo &MFI = MF.getFrameInfo(); 708 int BPIndex = FI->getBasePointerSaveIndex(); 709 assert(BPIndex && "No Base Pointer Save Slot!"); 710 BPOffset = MFI.getObjectOffset(BPIndex); 711 } 712 713 int PBPOffset = 0; 714 if (FI->usesPICBase()) { 715 MachineFrameInfo &MFI = MF.getFrameInfo(); 716 int PBPIndex = FI->getPICBasePointerSaveIndex(); 717 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 718 PBPOffset = MFI.getObjectOffset(PBPIndex); 719 } 720 721 // Get stack alignments. 722 Align MaxAlign = MFI.getMaxAlign(); 723 if (HasBP && MaxAlign > 1) 724 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 725 726 // Frames of 32KB & larger require special handling because they cannot be 727 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 728 bool isLargeFrame = !isInt<16>(NegFrameSize); 729 730 // Check if we can move the stack update instruction (stdu) down the prologue 731 // past the callee saves. Hopefully this will avoid the situation where the 732 // saves are waiting for the update on the store with update to complete. 733 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 734 bool MovingStackUpdateDown = false; 735 736 // Check if we can move the stack update. 737 if (stackUpdateCanBeMoved(MF)) { 738 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 739 for (CalleeSavedInfo CSI : Info) { 740 int FrIdx = CSI.getFrameIdx(); 741 // If the frame index is not negative the callee saved info belongs to a 742 // stack object that is not a fixed stack object. We ignore non-fixed 743 // stack objects because we won't move the stack update pointer past them. 744 if (FrIdx >= 0) 745 continue; 746 747 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 748 StackUpdateLoc++; 749 MovingStackUpdateDown = true; 750 } else { 751 // We need all of the Frame Indices to meet these conditions. 752 // If they do not, abort the whole operation. 753 StackUpdateLoc = MBBI; 754 MovingStackUpdateDown = false; 755 break; 756 } 757 } 758 759 // If the operation was not aborted then update the object offset. 760 if (MovingStackUpdateDown) { 761 for (CalleeSavedInfo CSI : Info) { 762 int FrIdx = CSI.getFrameIdx(); 763 if (FrIdx < 0) 764 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 765 } 766 } 767 } 768 769 // Where in the prologue we move the CR fields depends on how many scratch 770 // registers we have, and if we need to save the link register or not. This 771 // lambda is to avoid duplicating the logic in 2 places. 772 auto BuildMoveFromCR = [&]() { 773 if (isELFv2ABI && MustSaveCRs.size() == 1) { 774 // In the ELFv2 ABI, we are not required to save all CR fields. 775 // If only one CR field is clobbered, it is more efficient to use 776 // mfocrf to selectively save just that field, because mfocrf has short 777 // latency compares to mfcr. 778 assert(isPPC64 && "V2 ABI is 64-bit only."); 779 MachineInstrBuilder MIB = 780 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 781 MIB.addReg(MustSaveCRs[0], RegState::Kill); 782 } else { 783 MachineInstrBuilder MIB = 784 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 785 for (unsigned CRfield : MustSaveCRs) 786 MIB.addReg(CRfield, RegState::ImplicitKill); 787 } 788 }; 789 790 // If we need to spill the CR and the LR but we don't have two separate 791 // registers available, we must spill them one at a time 792 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 793 BuildMoveFromCR(); 794 BuildMI(MBB, MBBI, dl, StoreWordInst) 795 .addReg(TempReg, getKillRegState(true)) 796 .addImm(CRSaveOffset) 797 .addReg(SPReg); 798 } 799 800 if (MustSaveLR) 801 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 802 803 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 804 BuildMoveFromCR(); 805 806 if (HasRedZone) { 807 if (HasFP) 808 BuildMI(MBB, MBBI, dl, StoreInst) 809 .addReg(FPReg) 810 .addImm(FPOffset) 811 .addReg(SPReg); 812 if (FI->usesPICBase()) 813 BuildMI(MBB, MBBI, dl, StoreInst) 814 .addReg(PPC::R30) 815 .addImm(PBPOffset) 816 .addReg(SPReg); 817 if (HasBP) 818 BuildMI(MBB, MBBI, dl, StoreInst) 819 .addReg(BPReg) 820 .addImm(BPOffset) 821 .addReg(SPReg); 822 } 823 824 if (MustSaveLR) 825 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 826 .addReg(ScratchReg, getKillRegState(true)) 827 .addImm(LROffset) 828 .addReg(SPReg); 829 830 if (MustSaveCR && 831 !(SingleScratchReg && MustSaveLR)) { 832 assert(HasRedZone && "A red zone is always available on PPC64"); 833 BuildMI(MBB, MBBI, dl, StoreWordInst) 834 .addReg(TempReg, getKillRegState(true)) 835 .addImm(CRSaveOffset) 836 .addReg(SPReg); 837 } 838 839 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 840 if (!FrameSize) 841 return; 842 843 // Adjust stack pointer: r1 += NegFrameSize. 844 // If there is a preferred stack alignment, align R1 now 845 846 if (HasBP && HasRedZone) { 847 // Save a copy of r1 as the base pointer. 848 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 849 .addReg(SPReg) 850 .addReg(SPReg); 851 } 852 853 // Have we generated a STUX instruction to claim stack frame? If so, 854 // the negated frame size will be placed in ScratchReg. 855 bool HasSTUX = false; 856 857 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain 858 // pointer is always stored at SP, we will get a free probe due to an essential 859 // STU(X) instruction. 860 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { 861 // To be consistent with other targets, a pseudo instruction is emitted and 862 // will be later expanded in `inlineStackProbe`. 863 BuildMI(MBB, MBBI, dl, 864 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 865 : PPC::PROBED_STACKALLOC_32)) 866 .addDef(ScratchReg) 867 .addDef(TempReg) // TempReg stores the old sp. 868 .addImm(NegFrameSize); 869 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we 870 // update the ScratchReg to meet the assumption that ScratchReg contains 871 // the NegFrameSize. This solution is rather tricky. 872 if (!HasRedZone) { 873 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 874 .addReg(TempReg) 875 .addReg(SPReg); 876 HasSTUX = true; 877 } 878 } else { 879 // This condition must be kept in sync with canUseAsPrologue. 880 if (HasBP && MaxAlign > 1) { 881 if (isPPC64) 882 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 883 .addReg(SPReg) 884 .addImm(0) 885 .addImm(64 - Log2(MaxAlign)); 886 else // PPC32... 887 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 888 .addReg(SPReg) 889 .addImm(0) 890 .addImm(32 - Log2(MaxAlign)) 891 .addImm(31); 892 if (!isLargeFrame) { 893 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 894 .addReg(ScratchReg, RegState::Kill) 895 .addImm(NegFrameSize); 896 } else { 897 assert(!SingleScratchReg && "Only a single scratch reg available"); 898 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 899 .addImm(NegFrameSize >> 16); 900 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 901 .addReg(TempReg, RegState::Kill) 902 .addImm(NegFrameSize & 0xFFFF); 903 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 904 .addReg(ScratchReg, RegState::Kill) 905 .addReg(TempReg, RegState::Kill); 906 } 907 908 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 909 .addReg(SPReg, RegState::Kill) 910 .addReg(SPReg) 911 .addReg(ScratchReg); 912 HasSTUX = true; 913 914 } else if (!isLargeFrame) { 915 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 916 .addReg(SPReg) 917 .addImm(NegFrameSize) 918 .addReg(SPReg); 919 920 } else { 921 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 922 .addImm(NegFrameSize >> 16); 923 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 924 .addReg(ScratchReg, RegState::Kill) 925 .addImm(NegFrameSize & 0xFFFF); 926 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 927 .addReg(SPReg, RegState::Kill) 928 .addReg(SPReg) 929 .addReg(ScratchReg); 930 HasSTUX = true; 931 } 932 } 933 934 // Save the TOC register after the stack pointer update if a prologue TOC 935 // save is required for the function. 936 if (MustSaveTOC) { 937 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 938 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 939 .addReg(TOCReg, getKillRegState(true)) 940 .addImm(TOCSaveOffset) 941 .addReg(SPReg); 942 } 943 944 if (!HasRedZone) { 945 assert(!isPPC64 && "A red zone is always available on PPC64"); 946 if (HasSTUX) { 947 // The negated frame size is in ScratchReg, and the SPReg has been 948 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 949 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 950 // the stack frame (i.e. the old SP), ideally, we would put the old 951 // SP into a register and use it as the base for the stores. The 952 // problem is that the only available register may be ScratchReg, 953 // which could be R0, and R0 cannot be used as a base address. 954 955 // First, set ScratchReg to the old SP. This may need to be modified 956 // later. 957 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 958 .addReg(ScratchReg, RegState::Kill) 959 .addReg(SPReg); 960 961 if (ScratchReg == PPC::R0) { 962 // R0 cannot be used as a base register, but it can be used as an 963 // index in a store-indexed. 964 int LastOffset = 0; 965 if (HasFP) { 966 // R0 += (FPOffset-LastOffset). 967 // Need addic, since addi treats R0 as 0. 968 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 969 .addReg(ScratchReg) 970 .addImm(FPOffset-LastOffset); 971 LastOffset = FPOffset; 972 // Store FP into *R0. 973 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 974 .addReg(FPReg, RegState::Kill) // Save FP. 975 .addReg(PPC::ZERO) 976 .addReg(ScratchReg); // This will be the index (R0 is ok here). 977 } 978 if (FI->usesPICBase()) { 979 // R0 += (PBPOffset-LastOffset). 980 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 981 .addReg(ScratchReg) 982 .addImm(PBPOffset-LastOffset); 983 LastOffset = PBPOffset; 984 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 985 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 986 .addReg(PPC::ZERO) 987 .addReg(ScratchReg); // This will be the index (R0 is ok here). 988 } 989 if (HasBP) { 990 // R0 += (BPOffset-LastOffset). 991 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 992 .addReg(ScratchReg) 993 .addImm(BPOffset-LastOffset); 994 LastOffset = BPOffset; 995 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 996 .addReg(BPReg, RegState::Kill) // Save BP. 997 .addReg(PPC::ZERO) 998 .addReg(ScratchReg); // This will be the index (R0 is ok here). 999 // BP = R0-LastOffset 1000 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1001 .addReg(ScratchReg, RegState::Kill) 1002 .addImm(-LastOffset); 1003 } 1004 } else { 1005 // ScratchReg is not R0, so use it as the base register. It is 1006 // already set to the old SP, so we can use the offsets directly. 1007 1008 // Now that the stack frame has been allocated, save all the necessary 1009 // registers using ScratchReg as the base address. 1010 if (HasFP) 1011 BuildMI(MBB, MBBI, dl, StoreInst) 1012 .addReg(FPReg) 1013 .addImm(FPOffset) 1014 .addReg(ScratchReg); 1015 if (FI->usesPICBase()) 1016 BuildMI(MBB, MBBI, dl, StoreInst) 1017 .addReg(PPC::R30) 1018 .addImm(PBPOffset) 1019 .addReg(ScratchReg); 1020 if (HasBP) { 1021 BuildMI(MBB, MBBI, dl, StoreInst) 1022 .addReg(BPReg) 1023 .addImm(BPOffset) 1024 .addReg(ScratchReg); 1025 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1026 .addReg(ScratchReg, RegState::Kill) 1027 .addReg(ScratchReg); 1028 } 1029 } 1030 } else { 1031 // The frame size is a known 16-bit constant (fitting in the immediate 1032 // field of STWU). To be here we have to be compiling for PPC32. 1033 // Since the SPReg has been decreased by FrameSize, add it back to each 1034 // offset. 1035 if (HasFP) 1036 BuildMI(MBB, MBBI, dl, StoreInst) 1037 .addReg(FPReg) 1038 .addImm(FrameSize + FPOffset) 1039 .addReg(SPReg); 1040 if (FI->usesPICBase()) 1041 BuildMI(MBB, MBBI, dl, StoreInst) 1042 .addReg(PPC::R30) 1043 .addImm(FrameSize + PBPOffset) 1044 .addReg(SPReg); 1045 if (HasBP) { 1046 BuildMI(MBB, MBBI, dl, StoreInst) 1047 .addReg(BPReg) 1048 .addImm(FrameSize + BPOffset) 1049 .addReg(SPReg); 1050 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1051 .addReg(SPReg) 1052 .addImm(FrameSize); 1053 } 1054 } 1055 } 1056 1057 // Add Call Frame Information for the instructions we generated above. 1058 if (needsCFI) { 1059 unsigned CFIIndex; 1060 1061 if (HasBP) { 1062 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1063 // because if the stack needed aligning then CFA won't be at a fixed 1064 // offset from FP/SP. 1065 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1066 CFIIndex = MF.addFrameInst( 1067 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1068 } else { 1069 // Adjust the definition of CFA to account for the change in SP. 1070 assert(NegFrameSize); 1071 CFIIndex = MF.addFrameInst( 1072 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1073 } 1074 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1075 .addCFIIndex(CFIIndex); 1076 1077 if (HasFP) { 1078 // Describe where FP was saved, at a fixed offset from CFA. 1079 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1080 CFIIndex = MF.addFrameInst( 1081 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1082 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1083 .addCFIIndex(CFIIndex); 1084 } 1085 1086 if (FI->usesPICBase()) { 1087 // Describe where FP was saved, at a fixed offset from CFA. 1088 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1089 CFIIndex = MF.addFrameInst( 1090 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1091 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1092 .addCFIIndex(CFIIndex); 1093 } 1094 1095 if (HasBP) { 1096 // Describe where BP was saved, at a fixed offset from CFA. 1097 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1098 CFIIndex = MF.addFrameInst( 1099 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1100 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1101 .addCFIIndex(CFIIndex); 1102 } 1103 1104 if (MustSaveLR) { 1105 // Describe where LR was saved, at a fixed offset from CFA. 1106 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1107 CFIIndex = MF.addFrameInst( 1108 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1109 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1110 .addCFIIndex(CFIIndex); 1111 } 1112 } 1113 1114 // If there is a frame pointer, copy R1 into R31 1115 if (HasFP) { 1116 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1117 .addReg(SPReg) 1118 .addReg(SPReg); 1119 1120 if (!HasBP && needsCFI) { 1121 // Change the definition of CFA from SP+offset to FP+offset, because SP 1122 // will change at every alloca. 1123 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1124 unsigned CFIIndex = MF.addFrameInst( 1125 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1126 1127 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1128 .addCFIIndex(CFIIndex); 1129 } 1130 } 1131 1132 if (needsCFI) { 1133 // Describe where callee saved registers were saved, at fixed offsets from 1134 // CFA. 1135 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1136 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1137 unsigned Reg = CSI[I].getReg(); 1138 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1139 1140 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1141 // subregisters of CR2. We just need to emit a move of CR2. 1142 if (PPC::CRBITRCRegClass.contains(Reg)) 1143 continue; 1144 1145 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1146 continue; 1147 1148 // For SVR4, don't emit a move for the CR spill slot if we haven't 1149 // spilled CRs. 1150 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1151 && !MustSaveCR) 1152 continue; 1153 1154 // For 64-bit SVR4 when we have spilled CRs, the spill location 1155 // is SP+8, not a frame-relative slot. 1156 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1157 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1158 // the whole CR word. In the ELFv2 ABI, every CR that was 1159 // actually saved gets its own CFI record. 1160 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1161 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1162 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1163 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1164 .addCFIIndex(CFIIndex); 1165 continue; 1166 } 1167 1168 if (CSI[I].isSpilledToReg()) { 1169 unsigned SpilledReg = CSI[I].getDstReg(); 1170 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1171 nullptr, MRI->getDwarfRegNum(Reg, true), 1172 MRI->getDwarfRegNum(SpilledReg, true))); 1173 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1174 .addCFIIndex(CFIRegister); 1175 } else { 1176 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1177 // We have changed the object offset above but we do not want to change 1178 // the actual offsets in the CFI instruction so we have to undo the 1179 // offset change here. 1180 if (MovingStackUpdateDown) 1181 Offset -= NegFrameSize; 1182 1183 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1184 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1185 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1186 .addCFIIndex(CFIIndex); 1187 } 1188 } 1189 } 1190 } 1191 1192 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, 1193 MachineBasicBlock &PrologMBB) const { 1194 // TODO: Generate CFI instructions. 1195 bool isPPC64 = Subtarget.isPPC64(); 1196 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 1197 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1198 MachineFrameInfo &MFI = MF.getFrameInfo(); 1199 MachineModuleInfo &MMI = MF.getMMI(); 1200 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 1201 // AIX assembler does not support cfi directives. 1202 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 1203 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 1204 int Opc = MI.getOpcode(); 1205 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; 1206 }); 1207 if (StackAllocMIPos == PrologMBB.end()) 1208 return; 1209 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); 1210 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); 1211 MachineInstr &MI = *StackAllocMIPos; 1212 int64_t NegFrameSize = MI.getOperand(2).getImm(); 1213 int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF); 1214 assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); 1215 int64_t NumBlocks = NegFrameSize / NegProbeSize; 1216 int64_t NegResidualSize = NegFrameSize % NegProbeSize; 1217 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1218 Register ScratchReg = MI.getOperand(0).getReg(); 1219 Register FPReg = MI.getOperand(1).getReg(); 1220 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1221 bool HasBP = RegInfo->hasBasePointer(MF); 1222 Align MaxAlign = MFI.getMaxAlign(); 1223 // Initialize current frame pointer. 1224 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); 1225 BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); 1226 // Subroutines to generate .cfi_* directives. 1227 auto buildDefCFAReg = [&](MachineBasicBlock &MBB, 1228 MachineBasicBlock::iterator MBBI, Register Reg) { 1229 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1230 unsigned CFIIndex = MF.addFrameInst( 1231 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); 1232 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1233 .addCFIIndex(CFIIndex); 1234 }; 1235 auto buildDefCFA = [&](MachineBasicBlock &MBB, 1236 MachineBasicBlock::iterator MBBI, Register Reg, 1237 int Offset) { 1238 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1239 unsigned CFIIndex = MBB.getParent()->addFrameInst( 1240 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); 1241 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1242 .addCFIIndex(CFIIndex); 1243 }; 1244 // Subroutine to determine if we can use the Imm as part of d-form. 1245 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; 1246 // Subroutine to materialize the Imm into TempReg. 1247 auto MaterializeImm = [&](MachineBasicBlock &MBB, 1248 MachineBasicBlock::iterator MBBI, int64_t Imm, 1249 Register &TempReg) { 1250 assert(isInt<32>(Imm) && "Unhandled imm"); 1251 if (isInt<16>(Imm)) 1252 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) 1253 .addImm(Imm); 1254 else { 1255 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) 1256 .addImm(Imm >> 16); 1257 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) 1258 .addReg(TempReg) 1259 .addImm(Imm & 0xFFFF); 1260 } 1261 }; 1262 // Subroutine to store frame pointer and decrease stack pointer by probe size. 1263 auto allocateAndProbe = [&](MachineBasicBlock &MBB, 1264 MachineBasicBlock::iterator MBBI, int64_t NegSize, 1265 Register NegSizeReg, bool UseDForm) { 1266 if (UseDForm) 1267 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) 1268 .addReg(FPReg) 1269 .addImm(NegSize) 1270 .addReg(SPReg); 1271 else 1272 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) 1273 .addReg(FPReg) 1274 .addReg(SPReg) 1275 .addReg(NegSizeReg); 1276 }; 1277 // Use FPReg to calculate CFA. 1278 if (needsCFI) 1279 buildDefCFA(PrologMBB, {MI}, FPReg, 0); 1280 // For case HasBP && MaxAlign > 1, we have to align the SP by performing 1281 // SP = SP - SP % MaxAlign. 1282 if (HasBP && MaxAlign > 1) { 1283 if (isPPC64) 1284 BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) 1285 .addReg(FPReg) 1286 .addImm(0) 1287 .addImm(64 - Log2(MaxAlign)); 1288 else 1289 BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) 1290 .addReg(FPReg) 1291 .addImm(0) 1292 .addImm(32 - Log2(MaxAlign)) 1293 .addImm(31); 1294 BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC), 1295 SPReg) 1296 .addReg(ScratchReg) 1297 .addReg(SPReg); 1298 } 1299 // Probe residual part. 1300 if (NegResidualSize) { 1301 bool ResidualUseDForm = CanUseDForm(NegResidualSize); 1302 if (!ResidualUseDForm) 1303 MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg); 1304 allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg, 1305 ResidualUseDForm); 1306 } 1307 bool UseDForm = CanUseDForm(NegProbeSize); 1308 // If number of blocks is small, just probe them directly. 1309 if (NumBlocks < 3) { 1310 if (!UseDForm) 1311 MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); 1312 for (int i = 0; i < NumBlocks; ++i) 1313 allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm); 1314 if (needsCFI) { 1315 // Restore using SPReg to calculate CFA. 1316 buildDefCFAReg(PrologMBB, {MI}, SPReg); 1317 } 1318 } else { 1319 // Since CTR is a volatile register and current shrinkwrap implementation 1320 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a 1321 // CTR loop to probe. 1322 // Calculate trip count and stores it in CTRReg. 1323 MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg); 1324 BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) 1325 .addReg(ScratchReg, RegState::Kill); 1326 if (!UseDForm) 1327 MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); 1328 // Create MBBs of the loop. 1329 MachineFunction::iterator MBBInsertPoint = 1330 std::next(PrologMBB.getIterator()); 1331 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); 1332 MF.insert(MBBInsertPoint, LoopMBB); 1333 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1334 MF.insert(MBBInsertPoint, ExitMBB); 1335 // Synthesize the loop body. 1336 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, 1337 UseDForm); 1338 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) 1339 .addMBB(LoopMBB); 1340 LoopMBB->addSuccessor(ExitMBB); 1341 LoopMBB->addSuccessor(LoopMBB); 1342 // Synthesize the exit MBB. 1343 ExitMBB->splice(ExitMBB->end(), &PrologMBB, 1344 std::next(MachineBasicBlock::iterator(MI)), 1345 PrologMBB.end()); 1346 ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB); 1347 PrologMBB.addSuccessor(LoopMBB); 1348 if (needsCFI) { 1349 // Restore using SPReg to calculate CFA. 1350 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); 1351 } 1352 // Update liveins. 1353 recomputeLiveIns(*LoopMBB); 1354 recomputeLiveIns(*ExitMBB); 1355 } 1356 ++NumPrologProbed; 1357 MI.eraseFromParent(); 1358 } 1359 1360 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1361 MachineBasicBlock &MBB) const { 1362 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1363 DebugLoc dl; 1364 1365 if (MBBI != MBB.end()) 1366 dl = MBBI->getDebugLoc(); 1367 1368 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1369 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1370 1371 // Get alignment info so we know how to restore the SP. 1372 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1373 1374 // Get the number of bytes allocated from the FrameInfo. 1375 int FrameSize = MFI.getStackSize(); 1376 1377 // Get processor type. 1378 bool isPPC64 = Subtarget.isPPC64(); 1379 1380 // Check if the link register (LR) has been saved. 1381 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1382 bool MustSaveLR = FI->mustSaveLR(); 1383 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1384 bool MustSaveCR = !MustSaveCRs.empty(); 1385 // Do we have a frame pointer and/or base pointer for this function? 1386 bool HasFP = hasFP(MF); 1387 bool HasBP = RegInfo->hasBasePointer(MF); 1388 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1389 1390 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1391 Register BPReg = RegInfo->getBaseRegister(MF); 1392 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1393 Register ScratchReg; 1394 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1395 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1396 : PPC::MTLR ); 1397 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1398 : PPC::LWZ ); 1399 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1400 : PPC::LIS ); 1401 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1402 : PPC::OR ); 1403 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1404 : PPC::ORI ); 1405 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1406 : PPC::ADDI ); 1407 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1408 : PPC::ADD4 ); 1409 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1410 : PPC::LWZ); 1411 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1412 : PPC::MTOCRF); 1413 int LROffset = getReturnSaveOffset(); 1414 1415 int FPOffset = 0; 1416 1417 // Using the same bool variable as below to suppress compiler warnings. 1418 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1419 &TempReg); 1420 assert(SingleScratchReg && 1421 "Could not find an available scratch register"); 1422 1423 SingleScratchReg = ScratchReg == TempReg; 1424 1425 if (HasFP) { 1426 int FPIndex = FI->getFramePointerSaveIndex(); 1427 assert(FPIndex && "No Frame Pointer Save Slot!"); 1428 FPOffset = MFI.getObjectOffset(FPIndex); 1429 } 1430 1431 int BPOffset = 0; 1432 if (HasBP) { 1433 int BPIndex = FI->getBasePointerSaveIndex(); 1434 assert(BPIndex && "No Base Pointer Save Slot!"); 1435 BPOffset = MFI.getObjectOffset(BPIndex); 1436 } 1437 1438 int PBPOffset = 0; 1439 if (FI->usesPICBase()) { 1440 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1441 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1442 PBPOffset = MFI.getObjectOffset(PBPIndex); 1443 } 1444 1445 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1446 1447 if (IsReturnBlock) { 1448 unsigned RetOpcode = MBBI->getOpcode(); 1449 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1450 RetOpcode == PPC::TCRETURNdi || 1451 RetOpcode == PPC::TCRETURNai || 1452 RetOpcode == PPC::TCRETURNri8 || 1453 RetOpcode == PPC::TCRETURNdi8 || 1454 RetOpcode == PPC::TCRETURNai8; 1455 1456 if (UsesTCRet) { 1457 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1458 MachineOperand &StackAdjust = MBBI->getOperand(1); 1459 assert(StackAdjust.isImm() && "Expecting immediate value."); 1460 // Adjust stack pointer. 1461 int StackAdj = StackAdjust.getImm(); 1462 int Delta = StackAdj - MaxTCRetDelta; 1463 assert((Delta >= 0) && "Delta must be positive"); 1464 if (MaxTCRetDelta>0) 1465 FrameSize += (StackAdj +Delta); 1466 else 1467 FrameSize += StackAdj; 1468 } 1469 } 1470 1471 // Frames of 32KB & larger require special handling because they cannot be 1472 // indexed into with a simple LD/LWZ immediate offset operand. 1473 bool isLargeFrame = !isInt<16>(FrameSize); 1474 1475 // On targets without red zone, the SP needs to be restored last, so that 1476 // all live contents of the stack frame are upwards of the SP. This means 1477 // that we cannot restore SP just now, since there may be more registers 1478 // to restore from the stack frame (e.g. R31). If the frame size is not 1479 // a simple immediate value, we will need a spare register to hold the 1480 // restored SP. If the frame size is known and small, we can simply adjust 1481 // the offsets of the registers to be restored, and still use SP to restore 1482 // them. In such case, the final update of SP will be to add the frame 1483 // size to it. 1484 // To simplify the code, set RBReg to the base register used to restore 1485 // values from the stack, and set SPAdd to the value that needs to be added 1486 // to the SP at the end. The default values are as if red zone was present. 1487 unsigned RBReg = SPReg; 1488 unsigned SPAdd = 0; 1489 1490 // Check if we can move the stack update instruction up the epilogue 1491 // past the callee saves. This will allow the move to LR instruction 1492 // to be executed before the restores of the callee saves which means 1493 // that the callee saves can hide the latency from the MTLR instrcution. 1494 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1495 if (stackUpdateCanBeMoved(MF)) { 1496 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1497 for (CalleeSavedInfo CSI : Info) { 1498 int FrIdx = CSI.getFrameIdx(); 1499 // If the frame index is not negative the callee saved info belongs to a 1500 // stack object that is not a fixed stack object. We ignore non-fixed 1501 // stack objects because we won't move the update of the stack pointer 1502 // past them. 1503 if (FrIdx >= 0) 1504 continue; 1505 1506 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1507 StackUpdateLoc--; 1508 else { 1509 // Abort the operation as we can't update all CSR restores. 1510 StackUpdateLoc = MBBI; 1511 break; 1512 } 1513 } 1514 } 1515 1516 if (FrameSize) { 1517 // In the prologue, the loaded (or persistent) stack pointer value is 1518 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1519 // zone add this offset back now. 1520 1521 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1522 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1523 // call which invalidates the stack pointer value in SP(0). So we use the 1524 // value of R31 in this case. 1525 if (FI->hasFastCall()) { 1526 assert(HasFP && "Expecting a valid frame pointer."); 1527 if (!HasRedZone) 1528 RBReg = FPReg; 1529 if (!isLargeFrame) { 1530 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1531 .addReg(FPReg).addImm(FrameSize); 1532 } else { 1533 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1534 .addImm(FrameSize >> 16); 1535 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1536 .addReg(ScratchReg, RegState::Kill) 1537 .addImm(FrameSize & 0xFFFF); 1538 BuildMI(MBB, MBBI, dl, AddInst) 1539 .addReg(RBReg) 1540 .addReg(FPReg) 1541 .addReg(ScratchReg); 1542 } 1543 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1544 if (HasRedZone) { 1545 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1546 .addReg(SPReg) 1547 .addImm(FrameSize); 1548 } else { 1549 // Make sure that adding FrameSize will not overflow the max offset 1550 // size. 1551 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1552 "Local offsets should be negative"); 1553 SPAdd = FrameSize; 1554 FPOffset += FrameSize; 1555 BPOffset += FrameSize; 1556 PBPOffset += FrameSize; 1557 } 1558 } else { 1559 // We don't want to use ScratchReg as a base register, because it 1560 // could happen to be R0. Use FP instead, but make sure to preserve it. 1561 if (!HasRedZone) { 1562 // If FP is not saved, copy it to ScratchReg. 1563 if (!HasFP) 1564 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1565 .addReg(FPReg) 1566 .addReg(FPReg); 1567 RBReg = FPReg; 1568 } 1569 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1570 .addImm(0) 1571 .addReg(SPReg); 1572 } 1573 } 1574 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1575 // If there is no red zone, ScratchReg may be needed for holding a useful 1576 // value (although not the base register). Make sure it is not overwritten 1577 // too early. 1578 1579 // If we need to restore both the LR and the CR and we only have one 1580 // available scratch register, we must do them one at a time. 1581 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1582 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1583 // is live here. 1584 assert(HasRedZone && "Expecting red zone"); 1585 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1586 .addImm(CRSaveOffset) 1587 .addReg(SPReg); 1588 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1589 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1590 .addReg(TempReg, getKillRegState(i == e-1)); 1591 } 1592 1593 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1594 // LR is stored in the caller's stack frame. ScratchReg will be needed 1595 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1596 // a base register anyway, because it may happen to be R0. 1597 bool LoadedLR = false; 1598 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1599 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1600 .addImm(LROffset+SPAdd) 1601 .addReg(RBReg); 1602 LoadedLR = true; 1603 } 1604 1605 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1606 assert(RBReg == SPReg && "Should be using SP as a base register"); 1607 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1608 .addImm(CRSaveOffset) 1609 .addReg(RBReg); 1610 } 1611 1612 if (HasFP) { 1613 // If there is red zone, restore FP directly, since SP has already been 1614 // restored. Otherwise, restore the value of FP into ScratchReg. 1615 if (HasRedZone || RBReg == SPReg) 1616 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1617 .addImm(FPOffset) 1618 .addReg(SPReg); 1619 else 1620 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1621 .addImm(FPOffset) 1622 .addReg(RBReg); 1623 } 1624 1625 if (FI->usesPICBase()) 1626 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1627 .addImm(PBPOffset) 1628 .addReg(RBReg); 1629 1630 if (HasBP) 1631 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1632 .addImm(BPOffset) 1633 .addReg(RBReg); 1634 1635 // There is nothing more to be loaded from the stack, so now we can 1636 // restore SP: SP = RBReg + SPAdd. 1637 if (RBReg != SPReg || SPAdd != 0) { 1638 assert(!HasRedZone && "This should not happen with red zone"); 1639 // If SPAdd is 0, generate a copy. 1640 if (SPAdd == 0) 1641 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1642 .addReg(RBReg) 1643 .addReg(RBReg); 1644 else 1645 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1646 .addReg(RBReg) 1647 .addImm(SPAdd); 1648 1649 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1650 if (RBReg == FPReg) 1651 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1652 .addReg(ScratchReg) 1653 .addReg(ScratchReg); 1654 1655 // Now load the LR from the caller's stack frame. 1656 if (MustSaveLR && !LoadedLR) 1657 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1658 .addImm(LROffset) 1659 .addReg(SPReg); 1660 } 1661 1662 if (MustSaveCR && 1663 !(SingleScratchReg && MustSaveLR)) 1664 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1665 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1666 .addReg(TempReg, getKillRegState(i == e-1)); 1667 1668 if (MustSaveLR) 1669 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1670 1671 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1672 // call optimization 1673 if (IsReturnBlock) { 1674 unsigned RetOpcode = MBBI->getOpcode(); 1675 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1676 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1677 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1678 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1679 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1680 1681 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1682 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1683 .addReg(SPReg).addImm(CallerAllocatedAmt); 1684 } else { 1685 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1686 .addImm(CallerAllocatedAmt >> 16); 1687 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1688 .addReg(ScratchReg, RegState::Kill) 1689 .addImm(CallerAllocatedAmt & 0xFFFF); 1690 BuildMI(MBB, MBBI, dl, AddInst) 1691 .addReg(SPReg) 1692 .addReg(FPReg) 1693 .addReg(ScratchReg); 1694 } 1695 } else { 1696 createTailCallBranchInstr(MBB); 1697 } 1698 } 1699 } 1700 1701 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1702 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1703 1704 // If we got this far a first terminator should exist. 1705 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1706 1707 DebugLoc dl = MBBI->getDebugLoc(); 1708 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1709 1710 // Create branch instruction for pseudo tail call return instruction. 1711 // The TCRETURNdi variants are direct calls. Valid targets for those are 1712 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1713 // since we can tail call external functions with PC-Rel (i.e. we don't need 1714 // to worry about different TOC pointers). Some of the external functions will 1715 // be MO_GlobalAddress while others like memcpy for example, are going to 1716 // be MO_ExternalSymbol. 1717 unsigned RetOpcode = MBBI->getOpcode(); 1718 if (RetOpcode == PPC::TCRETURNdi) { 1719 MBBI = MBB.getLastNonDebugInstr(); 1720 MachineOperand &JumpTarget = MBBI->getOperand(0); 1721 if (JumpTarget.isGlobal()) 1722 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1723 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1724 else if (JumpTarget.isSymbol()) 1725 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1726 addExternalSymbol(JumpTarget.getSymbolName()); 1727 else 1728 llvm_unreachable("Expecting Global or External Symbol"); 1729 } else if (RetOpcode == PPC::TCRETURNri) { 1730 MBBI = MBB.getLastNonDebugInstr(); 1731 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1732 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1733 } else if (RetOpcode == PPC::TCRETURNai) { 1734 MBBI = MBB.getLastNonDebugInstr(); 1735 MachineOperand &JumpTarget = MBBI->getOperand(0); 1736 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1737 } else if (RetOpcode == PPC::TCRETURNdi8) { 1738 MBBI = MBB.getLastNonDebugInstr(); 1739 MachineOperand &JumpTarget = MBBI->getOperand(0); 1740 if (JumpTarget.isGlobal()) 1741 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1742 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1743 else if (JumpTarget.isSymbol()) 1744 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1745 addExternalSymbol(JumpTarget.getSymbolName()); 1746 else 1747 llvm_unreachable("Expecting Global or External Symbol"); 1748 } else if (RetOpcode == PPC::TCRETURNri8) { 1749 MBBI = MBB.getLastNonDebugInstr(); 1750 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1751 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1752 } else if (RetOpcode == PPC::TCRETURNai8) { 1753 MBBI = MBB.getLastNonDebugInstr(); 1754 MachineOperand &JumpTarget = MBBI->getOperand(0); 1755 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1756 } 1757 } 1758 1759 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1760 BitVector &SavedRegs, 1761 RegScavenger *RS) const { 1762 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1763 1764 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1765 1766 // Save and clear the LR state. 1767 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1768 unsigned LR = RegInfo->getRARegister(); 1769 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1770 SavedRegs.reset(LR); 1771 1772 // Save R31 if necessary 1773 int FPSI = FI->getFramePointerSaveIndex(); 1774 const bool isPPC64 = Subtarget.isPPC64(); 1775 MachineFrameInfo &MFI = MF.getFrameInfo(); 1776 1777 // If the frame pointer save index hasn't been defined yet. 1778 if (!FPSI && needsFP(MF)) { 1779 // Find out what the fix offset of the frame pointer save area. 1780 int FPOffset = getFramePointerSaveOffset(); 1781 // Allocate the frame index for frame pointer save area. 1782 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1783 // Save the result. 1784 FI->setFramePointerSaveIndex(FPSI); 1785 } 1786 1787 int BPSI = FI->getBasePointerSaveIndex(); 1788 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1789 int BPOffset = getBasePointerSaveOffset(); 1790 // Allocate the frame index for the base pointer save area. 1791 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1792 // Save the result. 1793 FI->setBasePointerSaveIndex(BPSI); 1794 } 1795 1796 // Reserve stack space for the PIC Base register (R30). 1797 // Only used in SVR4 32-bit. 1798 if (FI->usesPICBase()) { 1799 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1800 FI->setPICBasePointerSaveIndex(PBPSI); 1801 } 1802 1803 // Make sure we don't explicitly spill r31, because, for example, we have 1804 // some inline asm which explicitly clobbers it, when we otherwise have a 1805 // frame pointer and are using r31's spill slot for the prologue/epilogue 1806 // code. Same goes for the base pointer and the PIC base register. 1807 if (needsFP(MF)) 1808 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1809 if (RegInfo->hasBasePointer(MF)) 1810 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1811 if (FI->usesPICBase()) 1812 SavedRegs.reset(PPC::R30); 1813 1814 // Reserve stack space to move the linkage area to in case of a tail call. 1815 int TCSPDelta = 0; 1816 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1817 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1818 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1819 } 1820 1821 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 1822 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 1823 // object at the offset of the CR-save slot in the linkage area. The actual 1824 // save and restore of the condition register will be created as part of the 1825 // prologue and epilogue insertion, but the FixedStack object is needed to 1826 // keep the CalleSavedInfo valid. 1827 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1828 SavedRegs.test(PPC::CR4))) { 1829 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1830 const int64_t SpillOffset = 1831 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 1832 int FrameIdx = 1833 MFI.CreateFixedObject(SpillSize, SpillOffset, 1834 /* IsImmutable */ true, /* IsAliased */ false); 1835 FI->setCRSpillFrameIndex(FrameIdx); 1836 } 1837 } 1838 1839 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1840 RegScavenger *RS) const { 1841 // Get callee saved register information. 1842 MachineFrameInfo &MFI = MF.getFrameInfo(); 1843 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1844 1845 // If the function is shrink-wrapped, and if the function has a tail call, the 1846 // tail call might not be in the new RestoreBlock, so real branch instruction 1847 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1848 // RestoreBlock. So we handle this case here. 1849 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1850 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1851 for (MachineBasicBlock &MBB : MF) { 1852 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1853 createTailCallBranchInstr(MBB); 1854 } 1855 } 1856 1857 // Early exit if no callee saved registers are modified! 1858 if (CSI.empty() && !needsFP(MF)) { 1859 addScavengingSpillSlot(MF, RS); 1860 return; 1861 } 1862 1863 unsigned MinGPR = PPC::R31; 1864 unsigned MinG8R = PPC::X31; 1865 unsigned MinFPR = PPC::F31; 1866 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1867 1868 bool HasGPSaveArea = false; 1869 bool HasG8SaveArea = false; 1870 bool HasFPSaveArea = false; 1871 bool HasVRSaveArea = false; 1872 1873 SmallVector<CalleeSavedInfo, 18> GPRegs; 1874 SmallVector<CalleeSavedInfo, 18> G8Regs; 1875 SmallVector<CalleeSavedInfo, 18> FPRegs; 1876 SmallVector<CalleeSavedInfo, 18> VRegs; 1877 1878 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1879 unsigned Reg = CSI[i].getReg(); 1880 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 1881 (Reg != PPC::X2 && Reg != PPC::R2)) && 1882 "Not expecting to try to spill R2 in a function that must save TOC"); 1883 if (PPC::GPRCRegClass.contains(Reg)) { 1884 HasGPSaveArea = true; 1885 1886 GPRegs.push_back(CSI[i]); 1887 1888 if (Reg < MinGPR) { 1889 MinGPR = Reg; 1890 } 1891 } else if (PPC::G8RCRegClass.contains(Reg)) { 1892 HasG8SaveArea = true; 1893 1894 G8Regs.push_back(CSI[i]); 1895 1896 if (Reg < MinG8R) { 1897 MinG8R = Reg; 1898 } 1899 } else if (PPC::F8RCRegClass.contains(Reg)) { 1900 HasFPSaveArea = true; 1901 1902 FPRegs.push_back(CSI[i]); 1903 1904 if (Reg < MinFPR) { 1905 MinFPR = Reg; 1906 } 1907 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1908 PPC::CRRCRegClass.contains(Reg)) { 1909 ; // do nothing, as we already know whether CRs are spilled 1910 } else if (PPC::VRRCRegClass.contains(Reg) || 1911 PPC::SPERCRegClass.contains(Reg)) { 1912 // Altivec and SPE are mutually exclusive, but have the same stack 1913 // alignment requirements, so overload the save area for both cases. 1914 HasVRSaveArea = true; 1915 1916 VRegs.push_back(CSI[i]); 1917 1918 if (Reg < MinVR) { 1919 MinVR = Reg; 1920 } 1921 } else { 1922 llvm_unreachable("Unknown RegisterClass!"); 1923 } 1924 } 1925 1926 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1927 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1928 1929 int64_t LowerBound = 0; 1930 1931 // Take into account stack space reserved for tail calls. 1932 int TCSPDelta = 0; 1933 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1934 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1935 LowerBound = TCSPDelta; 1936 } 1937 1938 // The Floating-point register save area is right below the back chain word 1939 // of the previous stack frame. 1940 if (HasFPSaveArea) { 1941 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1942 int FI = FPRegs[i].getFrameIdx(); 1943 1944 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1945 } 1946 1947 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1948 } 1949 1950 // Check whether the frame pointer register is allocated. If so, make sure it 1951 // is spilled to the correct offset. 1952 if (needsFP(MF)) { 1953 int FI = PFI->getFramePointerSaveIndex(); 1954 assert(FI && "No Frame Pointer Save Slot!"); 1955 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1956 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1957 HasGPSaveArea = true; 1958 } 1959 1960 if (PFI->usesPICBase()) { 1961 int FI = PFI->getPICBasePointerSaveIndex(); 1962 assert(FI && "No PIC Base Pointer Save Slot!"); 1963 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1964 1965 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1966 HasGPSaveArea = true; 1967 } 1968 1969 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1970 if (RegInfo->hasBasePointer(MF)) { 1971 int FI = PFI->getBasePointerSaveIndex(); 1972 assert(FI && "No Base Pointer Save Slot!"); 1973 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1974 1975 Register BP = RegInfo->getBaseRegister(MF); 1976 if (PPC::G8RCRegClass.contains(BP)) { 1977 MinG8R = std::min<unsigned>(MinG8R, BP); 1978 HasG8SaveArea = true; 1979 } else if (PPC::GPRCRegClass.contains(BP)) { 1980 MinGPR = std::min<unsigned>(MinGPR, BP); 1981 HasGPSaveArea = true; 1982 } 1983 } 1984 1985 // General register save area starts right below the Floating-point 1986 // register save area. 1987 if (HasGPSaveArea || HasG8SaveArea) { 1988 // Move general register save area spill slots down, taking into account 1989 // the size of the Floating-point register save area. 1990 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1991 if (!GPRegs[i].isSpilledToReg()) { 1992 int FI = GPRegs[i].getFrameIdx(); 1993 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1994 } 1995 } 1996 1997 // Move general register save area spill slots down, taking into account 1998 // the size of the Floating-point register save area. 1999 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 2000 if (!G8Regs[i].isSpilledToReg()) { 2001 int FI = G8Regs[i].getFrameIdx(); 2002 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2003 } 2004 } 2005 2006 unsigned MinReg = 2007 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2008 TRI->getEncodingValue(MinG8R)); 2009 2010 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2011 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2012 } 2013 2014 // For 32-bit only, the CR save area is below the general register 2015 // save area. For 64-bit SVR4, the CR save area is addressed relative 2016 // to the stack pointer and hence does not need an adjustment here. 2017 // Only CR2 (the first nonvolatile spilled) has an associated frame 2018 // index so that we have a single uniform save area. 2019 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2020 // Adjust the frame index of the CR spill slot. 2021 for (const auto &CSInfo : CSI) { 2022 if (CSInfo.getReg() == PPC::CR2) { 2023 int FI = CSInfo.getFrameIdx(); 2024 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2025 break; 2026 } 2027 } 2028 2029 LowerBound -= 4; // The CR save area is always 4 bytes long. 2030 } 2031 2032 // Both Altivec and SPE have the same alignment and padding requirements 2033 // within the stack frame. 2034 if (HasVRSaveArea) { 2035 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2036 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2037 // we are using negative number here (the stack grows downward). We should 2038 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2039 // is the alignment size ( n = 16 here) and y is the size after aligning. 2040 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2041 LowerBound &= ~(15); 2042 2043 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2044 int FI = VRegs[i].getFrameIdx(); 2045 2046 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2047 } 2048 } 2049 2050 addScavengingSpillSlot(MF, RS); 2051 } 2052 2053 void 2054 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2055 RegScavenger *RS) const { 2056 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2057 // a large stack, which will require scavenging a register to materialize a 2058 // large offset. 2059 2060 // We need to have a scavenger spill slot for spills if the frame size is 2061 // large. In case there is no free register for large-offset addressing, 2062 // this slot is used for the necessary emergency spill. Also, we need the 2063 // slot for dynamic stack allocations. 2064 2065 // The scavenger might be invoked if the frame offset does not fit into 2066 // the 16-bit immediate. We don't know the complete frame size here 2067 // because we've not yet computed callee-saved register spills or the 2068 // needed alignment padding. 2069 unsigned StackSize = determineFrameLayout(MF, true); 2070 MachineFrameInfo &MFI = MF.getFrameInfo(); 2071 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || 2072 (hasSpills(MF) && !isInt<16>(StackSize))) { 2073 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2074 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2075 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2076 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2077 unsigned Size = TRI.getSpillSize(RC); 2078 Align Alignment = TRI.getSpillAlign(RC); 2079 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2080 2081 // Might we have over-aligned allocas? 2082 bool HasAlVars = 2083 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2084 2085 // These kinds of spills might need two registers. 2086 if (spillsCR(MF) || HasAlVars) 2087 RS->addScavengingFrameIndex( 2088 MFI.CreateStackObject(Size, Alignment, false)); 2089 } 2090 } 2091 2092 // This function checks if a callee saved gpr can be spilled to a volatile 2093 // vector register. This occurs for leaf functions when the option 2094 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2095 // which were not spilled to vectors, return false so the target independent 2096 // code can handle them by assigning a FrameIdx to a stack slot. 2097 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2098 MachineFunction &MF, const TargetRegisterInfo *TRI, 2099 std::vector<CalleeSavedInfo> &CSI) const { 2100 2101 if (CSI.empty()) 2102 return true; // Early exit if no callee saved registers are modified! 2103 2104 // Early exit if cannot spill gprs to volatile vector registers. 2105 MachineFrameInfo &MFI = MF.getFrameInfo(); 2106 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2107 return false; 2108 2109 // Build a BitVector of VSRs that can be used for spilling GPRs. 2110 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2111 BitVector BVCalleeSaved(TRI->getNumRegs()); 2112 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2113 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2114 for (unsigned i = 0; CSRegs[i]; ++i) 2115 BVCalleeSaved.set(CSRegs[i]); 2116 2117 for (unsigned Reg : BVAllocatable.set_bits()) { 2118 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2119 // used in the function. 2120 if (BVCalleeSaved[Reg] || 2121 (!PPC::F8RCRegClass.contains(Reg) && 2122 !PPC::VFRCRegClass.contains(Reg)) || 2123 (MF.getRegInfo().isPhysRegUsed(Reg))) 2124 BVAllocatable.reset(Reg); 2125 } 2126 2127 bool AllSpilledToReg = true; 2128 for (auto &CS : CSI) { 2129 if (BVAllocatable.none()) 2130 return false; 2131 2132 unsigned Reg = CS.getReg(); 2133 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2134 AllSpilledToReg = false; 2135 continue; 2136 } 2137 2138 unsigned VolatileVFReg = BVAllocatable.find_first(); 2139 if (VolatileVFReg < BVAllocatable.size()) { 2140 CS.setDstReg(VolatileVFReg); 2141 BVAllocatable.reset(VolatileVFReg); 2142 } else { 2143 AllSpilledToReg = false; 2144 } 2145 } 2146 return AllSpilledToReg; 2147 } 2148 2149 bool PPCFrameLowering::spillCalleeSavedRegisters( 2150 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2151 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2152 2153 MachineFunction *MF = MBB.getParent(); 2154 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2155 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2156 bool MustSaveTOC = FI->mustSaveTOC(); 2157 DebugLoc DL; 2158 bool CRSpilled = false; 2159 MachineInstrBuilder CRMIB; 2160 2161 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2162 unsigned Reg = CSI[i].getReg(); 2163 2164 // CR2 through CR4 are the nonvolatile CR fields. 2165 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2166 2167 // Add the callee-saved register as live-in; it's killed at the spill. 2168 // Do not do this for callee-saved registers that are live-in to the 2169 // function because they will already be marked live-in and this will be 2170 // adding it for a second time. It is an error to add the same register 2171 // to the set more than once. 2172 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2173 bool IsLiveIn = MRI.isLiveIn(Reg); 2174 if (!IsLiveIn) 2175 MBB.addLiveIn(Reg); 2176 2177 if (CRSpilled && IsCRField) { 2178 CRMIB.addReg(Reg, RegState::ImplicitKill); 2179 continue; 2180 } 2181 2182 // The actual spill will happen in the prologue. 2183 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2184 continue; 2185 2186 // Insert the spill to the stack frame. 2187 if (IsCRField) { 2188 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2189 if (!Subtarget.is32BitELFABI()) { 2190 // The actual spill will happen at the start of the prologue. 2191 FuncInfo->addMustSaveCR(Reg); 2192 } else { 2193 CRSpilled = true; 2194 FuncInfo->setSpillsCR(); 2195 2196 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2197 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2198 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2199 .addReg(Reg, RegState::ImplicitKill); 2200 2201 MBB.insert(MI, CRMIB); 2202 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2203 .addReg(PPC::R12, 2204 getKillRegState(true)), 2205 CSI[i].getFrameIdx())); 2206 } 2207 } else { 2208 if (CSI[i].isSpilledToReg()) { 2209 NumPESpillVSR++; 2210 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2211 .addReg(Reg, getKillRegState(true)); 2212 } else { 2213 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2214 // Use !IsLiveIn for the kill flag. 2215 // We do not want to kill registers that are live in this function 2216 // before their use because they will become undefined registers. 2217 // Functions without NoUnwind need to preserve the order of elements in 2218 // saved vector registers. 2219 if (Subtarget.needsSwapsForVSXMemOps() && 2220 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2221 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2222 CSI[i].getFrameIdx(), RC, TRI); 2223 else 2224 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2225 RC, TRI); 2226 } 2227 } 2228 } 2229 return true; 2230 } 2231 2232 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2233 bool CR4Spilled, MachineBasicBlock &MBB, 2234 MachineBasicBlock::iterator MI, 2235 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2236 2237 MachineFunction *MF = MBB.getParent(); 2238 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2239 DebugLoc DL; 2240 unsigned MoveReg = PPC::R12; 2241 2242 // 32-bit: FP-relative 2243 MBB.insert(MI, 2244 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2245 CSI[CSIIndex].getFrameIdx())); 2246 2247 unsigned RestoreOp = PPC::MTOCRF; 2248 if (CR2Spilled) 2249 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2250 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2251 2252 if (CR3Spilled) 2253 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2254 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2255 2256 if (CR4Spilled) 2257 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2258 .addReg(MoveReg, getKillRegState(true))); 2259 } 2260 2261 MachineBasicBlock::iterator PPCFrameLowering:: 2262 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2263 MachineBasicBlock::iterator I) const { 2264 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2265 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2266 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2267 // Add (actually subtract) back the amount the callee popped on return. 2268 if (int CalleeAmt = I->getOperand(1).getImm()) { 2269 bool is64Bit = Subtarget.isPPC64(); 2270 CalleeAmt *= -1; 2271 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2272 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2273 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2274 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2275 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2276 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2277 const DebugLoc &dl = I->getDebugLoc(); 2278 2279 if (isInt<16>(CalleeAmt)) { 2280 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2281 .addReg(StackReg, RegState::Kill) 2282 .addImm(CalleeAmt); 2283 } else { 2284 MachineBasicBlock::iterator MBBI = I; 2285 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2286 .addImm(CalleeAmt >> 16); 2287 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2288 .addReg(TmpReg, RegState::Kill) 2289 .addImm(CalleeAmt & 0xFFFF); 2290 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2291 .addReg(StackReg, RegState::Kill) 2292 .addReg(TmpReg); 2293 } 2294 } 2295 } 2296 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2297 return MBB.erase(I); 2298 } 2299 2300 static bool isCalleeSavedCR(unsigned Reg) { 2301 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2302 } 2303 2304 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2305 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2306 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2307 MachineFunction *MF = MBB.getParent(); 2308 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2309 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2310 bool MustSaveTOC = FI->mustSaveTOC(); 2311 bool CR2Spilled = false; 2312 bool CR3Spilled = false; 2313 bool CR4Spilled = false; 2314 unsigned CSIIndex = 0; 2315 2316 // Initialize insertion-point logic; we will be restoring in reverse 2317 // order of spill. 2318 MachineBasicBlock::iterator I = MI, BeforeI = I; 2319 bool AtStart = I == MBB.begin(); 2320 2321 if (!AtStart) 2322 --BeforeI; 2323 2324 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2325 unsigned Reg = CSI[i].getReg(); 2326 2327 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2328 continue; 2329 2330 // Restore of callee saved condition register field is handled during 2331 // epilogue insertion. 2332 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2333 continue; 2334 2335 if (Reg == PPC::CR2) { 2336 CR2Spilled = true; 2337 // The spill slot is associated only with CR2, which is the 2338 // first nonvolatile spilled. Save it here. 2339 CSIIndex = i; 2340 continue; 2341 } else if (Reg == PPC::CR3) { 2342 CR3Spilled = true; 2343 continue; 2344 } else if (Reg == PPC::CR4) { 2345 CR4Spilled = true; 2346 continue; 2347 } else { 2348 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2349 // least one CR register, restore all spilled CRs together. 2350 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2351 bool is31 = needsFP(*MF); 2352 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2353 CSIIndex); 2354 CR2Spilled = CR3Spilled = CR4Spilled = false; 2355 } 2356 2357 if (CSI[i].isSpilledToReg()) { 2358 DebugLoc DL; 2359 NumPEReloadVSR++; 2360 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2361 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2362 } else { 2363 // Default behavior for non-CR saves. 2364 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2365 2366 // Functions without NoUnwind need to preserve the order of elements in 2367 // saved vector registers. 2368 if (Subtarget.needsSwapsForVSXMemOps() && 2369 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2370 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2371 TRI); 2372 else 2373 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2374 2375 assert(I != MBB.begin() && 2376 "loadRegFromStackSlot didn't insert any code!"); 2377 } 2378 } 2379 2380 // Insert in reverse order. 2381 if (AtStart) 2382 I = MBB.begin(); 2383 else { 2384 I = BeforeI; 2385 ++I; 2386 } 2387 } 2388 2389 // If we haven't yet spilled the CRs, do so now. 2390 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2391 assert(Subtarget.is32BitELFABI() && 2392 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2393 bool is31 = needsFP(*MF); 2394 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2395 } 2396 2397 return true; 2398 } 2399 2400 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2401 return TOCSaveOffset; 2402 } 2403 2404 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2405 return FramePointerSaveOffset; 2406 } 2407 2408 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2409 return BasePointerSaveOffset; 2410 } 2411 2412 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2413 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2414 return false; 2415 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2416 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2417 } 2418