1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MCTargetDesc/PPCPredicates.h" 14 #include "PPCFrameLowering.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/RegisterScavenging.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/Target/TargetOptions.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "framelowering" 33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 35 STATISTIC(NumPrologProbed, "Number of prologues probed"); 36 37 static cl::opt<bool> 38 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 39 cl::desc("Enable spills in prologue to vector registers."), 40 cl::init(false), cl::Hidden); 41 42 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 43 if (STI.isAIXABI()) 44 return STI.isPPC64() ? 16 : 8; 45 // SVR4 ABI: 46 return STI.isPPC64() ? 16 : 4; 47 } 48 49 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isAIXABI()) 51 return STI.isPPC64() ? 40 : 20; 52 return STI.isELFv2ABI() ? 24 : 40; 53 } 54 55 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 56 // First slot in the general register save area. 57 return STI.isPPC64() ? -8U : -4U; 58 } 59 60 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 61 if (STI.isAIXABI() || STI.isPPC64()) 62 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 63 64 // 32-bit SVR4 ABI: 65 return 8; 66 } 67 68 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 69 // Third slot in the general purpose register save area. 70 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 71 return -12U; 72 73 // Second slot in the general purpose register save area. 74 return STI.isPPC64() ? -16U : -8U; 75 } 76 77 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 78 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 79 } 80 81 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 82 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 83 STI.getPlatformStackAlignment(), 0), 84 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 85 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 86 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 87 LinkageSize(computeLinkageSize(Subtarget)), 88 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 89 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 90 91 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 92 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 93 unsigned &NumEntries) const { 94 95 // Floating-point register save area offsets. 96 #define CALLEE_SAVED_FPRS \ 97 {PPC::F31, -8}, \ 98 {PPC::F30, -16}, \ 99 {PPC::F29, -24}, \ 100 {PPC::F28, -32}, \ 101 {PPC::F27, -40}, \ 102 {PPC::F26, -48}, \ 103 {PPC::F25, -56}, \ 104 {PPC::F24, -64}, \ 105 {PPC::F23, -72}, \ 106 {PPC::F22, -80}, \ 107 {PPC::F21, -88}, \ 108 {PPC::F20, -96}, \ 109 {PPC::F19, -104}, \ 110 {PPC::F18, -112}, \ 111 {PPC::F17, -120}, \ 112 {PPC::F16, -128}, \ 113 {PPC::F15, -136}, \ 114 {PPC::F14, -144} 115 116 // 32-bit general purpose register save area offsets shared by ELF and 117 // AIX. AIX has an extra CSR with r13. 118 #define CALLEE_SAVED_GPRS32 \ 119 {PPC::R31, -4}, \ 120 {PPC::R30, -8}, \ 121 {PPC::R29, -12}, \ 122 {PPC::R28, -16}, \ 123 {PPC::R27, -20}, \ 124 {PPC::R26, -24}, \ 125 {PPC::R25, -28}, \ 126 {PPC::R24, -32}, \ 127 {PPC::R23, -36}, \ 128 {PPC::R22, -40}, \ 129 {PPC::R21, -44}, \ 130 {PPC::R20, -48}, \ 131 {PPC::R19, -52}, \ 132 {PPC::R18, -56}, \ 133 {PPC::R17, -60}, \ 134 {PPC::R16, -64}, \ 135 {PPC::R15, -68}, \ 136 {PPC::R14, -72} 137 138 // 64-bit general purpose register save area offsets. 139 #define CALLEE_SAVED_GPRS64 \ 140 {PPC::X31, -8}, \ 141 {PPC::X30, -16}, \ 142 {PPC::X29, -24}, \ 143 {PPC::X28, -32}, \ 144 {PPC::X27, -40}, \ 145 {PPC::X26, -48}, \ 146 {PPC::X25, -56}, \ 147 {PPC::X24, -64}, \ 148 {PPC::X23, -72}, \ 149 {PPC::X22, -80}, \ 150 {PPC::X21, -88}, \ 151 {PPC::X20, -96}, \ 152 {PPC::X19, -104}, \ 153 {PPC::X18, -112}, \ 154 {PPC::X17, -120}, \ 155 {PPC::X16, -128}, \ 156 {PPC::X15, -136}, \ 157 {PPC::X14, -144} 158 159 // Vector register save area offsets. 160 #define CALLEE_SAVED_VRS \ 161 {PPC::V31, -16}, \ 162 {PPC::V30, -32}, \ 163 {PPC::V29, -48}, \ 164 {PPC::V28, -64}, \ 165 {PPC::V27, -80}, \ 166 {PPC::V26, -96}, \ 167 {PPC::V25, -112}, \ 168 {PPC::V24, -128}, \ 169 {PPC::V23, -144}, \ 170 {PPC::V22, -160}, \ 171 {PPC::V21, -176}, \ 172 {PPC::V20, -192} 173 174 // Note that the offsets here overlap, but this is fixed up in 175 // processFunctionBeforeFrameFinalized. 176 177 static const SpillSlot ELFOffsets32[] = { 178 CALLEE_SAVED_FPRS, 179 CALLEE_SAVED_GPRS32, 180 181 // CR save area offset. We map each of the nonvolatile CR fields 182 // to the slot for CR2, which is the first of the nonvolatile CR 183 // fields to be assigned, so that we only allocate one save slot. 184 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 185 {PPC::CR2, -4}, 186 187 // VRSAVE save area offset. 188 {PPC::VRSAVE, -4}, 189 190 CALLEE_SAVED_VRS, 191 192 // SPE register save area (overlaps Vector save area). 193 {PPC::S31, -8}, 194 {PPC::S30, -16}, 195 {PPC::S29, -24}, 196 {PPC::S28, -32}, 197 {PPC::S27, -40}, 198 {PPC::S26, -48}, 199 {PPC::S25, -56}, 200 {PPC::S24, -64}, 201 {PPC::S23, -72}, 202 {PPC::S22, -80}, 203 {PPC::S21, -88}, 204 {PPC::S20, -96}, 205 {PPC::S19, -104}, 206 {PPC::S18, -112}, 207 {PPC::S17, -120}, 208 {PPC::S16, -128}, 209 {PPC::S15, -136}, 210 {PPC::S14, -144}}; 211 212 static const SpillSlot ELFOffsets64[] = { 213 CALLEE_SAVED_FPRS, 214 CALLEE_SAVED_GPRS64, 215 216 // VRSAVE save area offset. 217 {PPC::VRSAVE, -4}, 218 CALLEE_SAVED_VRS 219 }; 220 221 static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, 222 CALLEE_SAVED_GPRS32, 223 // Add AIX's extra CSR. 224 {PPC::R13, -76}, 225 CALLEE_SAVED_VRS}; 226 227 static const SpillSlot AIXOffsets64[] = { 228 CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; 229 230 if (Subtarget.is64BitELFABI()) { 231 NumEntries = array_lengthof(ELFOffsets64); 232 return ELFOffsets64; 233 } 234 235 if (Subtarget.is32BitELFABI()) { 236 NumEntries = array_lengthof(ELFOffsets32); 237 return ELFOffsets32; 238 } 239 240 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 241 242 if (Subtarget.isPPC64()) { 243 NumEntries = array_lengthof(AIXOffsets64); 244 return AIXOffsets64; 245 } 246 247 NumEntries = array_lengthof(AIXOffsets32); 248 return AIXOffsets32; 249 } 250 251 static bool spillsCR(const MachineFunction &MF) { 252 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 253 return FuncInfo->isCRSpilled(); 254 } 255 256 static bool hasSpills(const MachineFunction &MF) { 257 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 258 return FuncInfo->hasSpills(); 259 } 260 261 static bool hasNonRISpills(const MachineFunction &MF) { 262 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 263 return FuncInfo->hasNonRISpills(); 264 } 265 266 /// MustSaveLR - Return true if this function requires that we save the LR 267 /// register onto the stack in the prolog and restore it in the epilog of the 268 /// function. 269 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 270 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 271 272 // We need a save/restore of LR if there is any def of LR (which is 273 // defined by calls, including the PIC setup sequence), or if there is 274 // some use of the LR stack slot (e.g. for builtin_return_address). 275 // (LR comes in 32 and 64 bit versions.) 276 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 277 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 278 } 279 280 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 281 /// call frame size. Update the MachineFunction object with the stack size. 282 unsigned 283 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 284 bool UseEstimate) const { 285 unsigned NewMaxCallFrameSize = 0; 286 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 287 &NewMaxCallFrameSize); 288 MF.getFrameInfo().setStackSize(FrameSize); 289 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 290 return FrameSize; 291 } 292 293 /// determineFrameLayout - Determine the size of the frame and maximum call 294 /// frame size. 295 unsigned 296 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 297 bool UseEstimate, 298 unsigned *NewMaxCallFrameSize) const { 299 const MachineFrameInfo &MFI = MF.getFrameInfo(); 300 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 301 302 // Get the number of bytes to allocate from the FrameInfo 303 unsigned FrameSize = 304 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 305 306 // Get stack alignments. The frame must be aligned to the greatest of these: 307 Align TargetAlign = getStackAlign(); // alignment required per the ABI 308 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 309 Align Alignment = std::max(TargetAlign, MaxAlign); 310 311 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 312 313 unsigned LR = RegInfo->getRARegister(); 314 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 315 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 316 !MFI.adjustsStack() && // No calls. 317 !MustSaveLR(MF, LR) && // No need to save LR. 318 !FI->mustSaveTOC() && // No need to save TOC. 319 !RegInfo->hasBasePointer(MF); // No special alignment. 320 321 // Note: for PPC32 SVR4ABI, we can still generate stackless 322 // code if all local vars are reg-allocated. 323 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 324 325 // Check whether we can skip adjusting the stack pointer (by using red zone) 326 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 327 // No need for frame 328 return 0; 329 } 330 331 // Get the maximum call frame size of all the calls. 332 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 333 334 // Maximum call frame needs to be at least big enough for linkage area. 335 unsigned minCallFrameSize = getLinkageSize(); 336 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 337 338 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 339 // that allocations will be aligned. 340 if (MFI.hasVarSizedObjects()) 341 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 342 343 // Update the new max call frame size if the caller passes in a valid pointer. 344 if (NewMaxCallFrameSize) 345 *NewMaxCallFrameSize = maxCallFrameSize; 346 347 // Include call frame size in total. 348 FrameSize += maxCallFrameSize; 349 350 // Make sure the frame is aligned. 351 FrameSize = alignTo(FrameSize, Alignment); 352 353 return FrameSize; 354 } 355 356 // hasFP - Return true if the specified function actually has a dedicated frame 357 // pointer register. 358 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 359 const MachineFrameInfo &MFI = MF.getFrameInfo(); 360 // FIXME: This is pretty much broken by design: hasFP() might be called really 361 // early, before the stack layout was calculated and thus hasFP() might return 362 // true or false here depending on the time of call. 363 return (MFI.getStackSize()) && needsFP(MF); 364 } 365 366 // needsFP - Return true if the specified function should have a dedicated frame 367 // pointer register. This is true if the function has variable sized allocas or 368 // if frame pointer elimination is disabled. 369 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 370 const MachineFrameInfo &MFI = MF.getFrameInfo(); 371 372 // Naked functions have no stack frame pushed, so we don't have a frame 373 // pointer. 374 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 375 return false; 376 377 return MF.getTarget().Options.DisableFramePointerElim(MF) || 378 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 379 MF.exposesReturnsTwice() || 380 (MF.getTarget().Options.GuaranteedTailCallOpt && 381 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 382 } 383 384 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 385 bool is31 = needsFP(MF); 386 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 387 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 388 389 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 390 bool HasBP = RegInfo->hasBasePointer(MF); 391 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 392 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 393 394 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 395 BI != BE; ++BI) 396 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 397 --MBBI; 398 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 399 MachineOperand &MO = MBBI->getOperand(I); 400 if (!MO.isReg()) 401 continue; 402 403 switch (MO.getReg()) { 404 case PPC::FP: 405 MO.setReg(FPReg); 406 break; 407 case PPC::FP8: 408 MO.setReg(FP8Reg); 409 break; 410 case PPC::BP: 411 MO.setReg(BPReg); 412 break; 413 case PPC::BP8: 414 MO.setReg(BP8Reg); 415 break; 416 417 } 418 } 419 } 420 } 421 422 /* This function will do the following: 423 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 424 respectively (defaults recommended by the ABI) and return true 425 - If MBB is not an entry block, initialize the register scavenger and look 426 for available registers. 427 - If the defaults (R0/R12) are available, return true 428 - If TwoUniqueRegsRequired is set to true, it looks for two unique 429 registers. Otherwise, look for a single available register. 430 - If the required registers are found, set SR1 and SR2 and return true. 431 - If the required registers are not found, set SR2 or both SR1 and SR2 to 432 PPC::NoRegister and return false. 433 434 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 435 is not set, this function will attempt to find two different registers, but 436 still return true if only one register is available (and set SR1 == SR2). 437 */ 438 bool 439 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 440 bool UseAtEnd, 441 bool TwoUniqueRegsRequired, 442 Register *SR1, 443 Register *SR2) const { 444 RegScavenger RS; 445 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 446 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 447 448 // Set the defaults for the two scratch registers. 449 if (SR1) 450 *SR1 = R0; 451 452 if (SR2) { 453 assert (SR1 && "Asking for the second scratch register but not the first?"); 454 *SR2 = R12; 455 } 456 457 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 458 if ((UseAtEnd && MBB->isReturnBlock()) || 459 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 460 return true; 461 462 RS.enterBasicBlock(*MBB); 463 464 if (UseAtEnd && !MBB->empty()) { 465 // The scratch register will be used at the end of the block, so must 466 // consider all registers used within the block 467 468 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 469 // If no terminator, back iterator up to previous instruction. 470 if (MBBI == MBB->end()) 471 MBBI = std::prev(MBBI); 472 473 if (MBBI != MBB->begin()) 474 RS.forward(MBBI); 475 } 476 477 // If the two registers are available, we're all good. 478 // Note that we only return here if both R0 and R12 are available because 479 // although the function may not require two unique registers, it may benefit 480 // from having two so we should try to provide them. 481 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 482 return true; 483 484 // Get the list of callee-saved registers for the target. 485 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 486 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 487 488 // Get all the available registers in the block. 489 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 490 &PPC::GPRCRegClass); 491 492 // We shouldn't use callee-saved registers as scratch registers as they may be 493 // available when looking for a candidate block for shrink wrapping but not 494 // available when the actual prologue/epilogue is being emitted because they 495 // were added as live-in to the prologue block by PrologueEpilogueInserter. 496 for (int i = 0; CSRegs[i]; ++i) 497 BV.reset(CSRegs[i]); 498 499 // Set the first scratch register to the first available one. 500 if (SR1) { 501 int FirstScratchReg = BV.find_first(); 502 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 503 } 504 505 // If there is another one available, set the second scratch register to that. 506 // Otherwise, set it to either PPC::NoRegister if this function requires two 507 // or to whatever SR1 is set to if this function doesn't require two. 508 if (SR2) { 509 int SecondScratchReg = BV.find_next(*SR1); 510 if (SecondScratchReg != -1) 511 *SR2 = SecondScratchReg; 512 else 513 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 514 } 515 516 // Now that we've done our best to provide both registers, double check 517 // whether we were unable to provide enough. 518 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 519 return false; 520 521 return true; 522 } 523 524 // We need a scratch register for spilling LR and for spilling CR. By default, 525 // we use two scratch registers to hide latency. However, if only one scratch 526 // register is available, we can adjust for that by not overlapping the spill 527 // code. However, if we need to realign the stack (i.e. have a base pointer) 528 // and the stack frame is large, we need two scratch registers. 529 // Also, stack probe requires two scratch registers, one for old sp, one for 530 // large frame and large probe size. 531 bool 532 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 533 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 534 MachineFunction &MF = *(MBB->getParent()); 535 bool HasBP = RegInfo->hasBasePointer(MF); 536 unsigned FrameSize = determineFrameLayout(MF); 537 int NegFrameSize = -FrameSize; 538 bool IsLargeFrame = !isInt<16>(NegFrameSize); 539 MachineFrameInfo &MFI = MF.getFrameInfo(); 540 Align MaxAlign = MFI.getMaxAlign(); 541 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 542 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 543 544 return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || 545 TLI.hasInlineStackProbe(MF); 546 } 547 548 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 549 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 550 551 return findScratchRegister(TmpMBB, false, 552 twoUniqueScratchRegsRequired(TmpMBB)); 553 } 554 555 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 556 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 557 558 return findScratchRegister(TmpMBB, true); 559 } 560 561 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 562 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 563 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 564 565 // Abort if there is no register info or function info. 566 if (!RegInfo || !FI) 567 return false; 568 569 // Only move the stack update on ELFv2 ABI and PPC64. 570 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 571 return false; 572 573 // Check the frame size first and return false if it does not fit the 574 // requirements. 575 // We need a non-zero frame size as well as a frame that will fit in the red 576 // zone. This is because by moving the stack pointer update we are now storing 577 // to the red zone until the stack pointer is updated. If we get an interrupt 578 // inside the prologue but before the stack update we now have a number of 579 // stores to the red zone and those stores must all fit. 580 MachineFrameInfo &MFI = MF.getFrameInfo(); 581 unsigned FrameSize = MFI.getStackSize(); 582 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 583 return false; 584 585 // Frame pointers and base pointers complicate matters so don't do anything 586 // if we have them. For example having a frame pointer will sometimes require 587 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 588 // difficult. Similar situation exists with setjmp. 589 if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) 590 return false; 591 592 // Calls to fast_cc functions use different rules for passing parameters on 593 // the stack from the ABI and using PIC base in the function imposes 594 // similar restrictions to using the base pointer. It is not generally safe 595 // to move the stack pointer update in these situations. 596 if (FI->hasFastCall() || FI->usesPICBase()) 597 return false; 598 599 // Finally we can move the stack update if we do not require register 600 // scavenging. Register scavenging can introduce more spills and so 601 // may make the frame size larger than we have computed. 602 return !RegInfo->requiresFrameIndexScavenging(MF); 603 } 604 605 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 606 MachineBasicBlock &MBB) const { 607 MachineBasicBlock::iterator MBBI = MBB.begin(); 608 MachineFrameInfo &MFI = MF.getFrameInfo(); 609 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 610 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 611 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 612 613 MachineModuleInfo &MMI = MF.getMMI(); 614 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 615 DebugLoc dl; 616 // AIX assembler does not support cfi directives. 617 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 618 619 // Get processor type. 620 bool isPPC64 = Subtarget.isPPC64(); 621 // Get the ABI. 622 bool isSVR4ABI = Subtarget.isSVR4ABI(); 623 bool isELFv2ABI = Subtarget.isELFv2ABI(); 624 assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); 625 626 // Work out frame sizes. 627 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 628 int NegFrameSize = -FrameSize; 629 if (!isInt<32>(NegFrameSize)) 630 llvm_unreachable("Unhandled stack size!"); 631 632 if (MFI.isFrameAddressTaken()) 633 replaceFPWithRealFP(MF); 634 635 // Check if the link register (LR) must be saved. 636 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 637 bool MustSaveLR = FI->mustSaveLR(); 638 bool MustSaveTOC = FI->mustSaveTOC(); 639 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 640 bool MustSaveCR = !MustSaveCRs.empty(); 641 // Do we have a frame pointer and/or base pointer for this function? 642 bool HasFP = hasFP(MF); 643 bool HasBP = RegInfo->hasBasePointer(MF); 644 bool HasRedZone = isPPC64 || !isSVR4ABI; 645 646 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 647 Register BPReg = RegInfo->getBaseRegister(MF); 648 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 649 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 650 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 651 Register ScratchReg; 652 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 653 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 654 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 655 : PPC::MFLR ); 656 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 657 : PPC::STW ); 658 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 659 : PPC::STWU ); 660 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 661 : PPC::STWUX); 662 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 663 : PPC::LIS ); 664 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 665 : PPC::ORI ); 666 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 667 : PPC::OR ); 668 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 669 : PPC::SUBFC); 670 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 671 : PPC::SUBFIC); 672 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 673 : PPC::MFCR); 674 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 675 676 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 677 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 678 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 679 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 680 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 681 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 682 683 // Using the same bool variable as below to suppress compiler warnings. 684 bool SingleScratchReg = findScratchRegister( 685 &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); 686 assert(SingleScratchReg && 687 "Required number of registers not available in this block"); 688 689 SingleScratchReg = ScratchReg == TempReg; 690 691 int LROffset = getReturnSaveOffset(); 692 693 int FPOffset = 0; 694 if (HasFP) { 695 MachineFrameInfo &MFI = MF.getFrameInfo(); 696 int FPIndex = FI->getFramePointerSaveIndex(); 697 assert(FPIndex && "No Frame Pointer Save Slot!"); 698 FPOffset = MFI.getObjectOffset(FPIndex); 699 } 700 701 int BPOffset = 0; 702 if (HasBP) { 703 MachineFrameInfo &MFI = MF.getFrameInfo(); 704 int BPIndex = FI->getBasePointerSaveIndex(); 705 assert(BPIndex && "No Base Pointer Save Slot!"); 706 BPOffset = MFI.getObjectOffset(BPIndex); 707 } 708 709 int PBPOffset = 0; 710 if (FI->usesPICBase()) { 711 MachineFrameInfo &MFI = MF.getFrameInfo(); 712 int PBPIndex = FI->getPICBasePointerSaveIndex(); 713 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 714 PBPOffset = MFI.getObjectOffset(PBPIndex); 715 } 716 717 // Get stack alignments. 718 Align MaxAlign = MFI.getMaxAlign(); 719 if (HasBP && MaxAlign > 1) 720 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 721 722 // Frames of 32KB & larger require special handling because they cannot be 723 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 724 bool isLargeFrame = !isInt<16>(NegFrameSize); 725 726 // Check if we can move the stack update instruction (stdu) down the prologue 727 // past the callee saves. Hopefully this will avoid the situation where the 728 // saves are waiting for the update on the store with update to complete. 729 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 730 bool MovingStackUpdateDown = false; 731 732 // Check if we can move the stack update. 733 if (stackUpdateCanBeMoved(MF)) { 734 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 735 for (CalleeSavedInfo CSI : Info) { 736 // If the callee saved register is spilled to a register instead of the 737 // stack then the spill no longer uses the stack pointer. 738 // This can lead to two consequences: 739 // 1) We no longer need to update the stack because the function does not 740 // spill any callee saved registers to stack. 741 // 2) We have a situation where we still have to update the stack pointer 742 // even though some registers are spilled to other registers. In 743 // this case the current code moves the stack update to an incorrect 744 // position. 745 // In either case we should abort moving the stack update operation. 746 if (CSI.isSpilledToReg()) { 747 StackUpdateLoc = MBBI; 748 MovingStackUpdateDown = false; 749 break; 750 } 751 752 int FrIdx = CSI.getFrameIdx(); 753 // If the frame index is not negative the callee saved info belongs to a 754 // stack object that is not a fixed stack object. We ignore non-fixed 755 // stack objects because we won't move the stack update pointer past them. 756 if (FrIdx >= 0) 757 continue; 758 759 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 760 StackUpdateLoc++; 761 MovingStackUpdateDown = true; 762 } else { 763 // We need all of the Frame Indices to meet these conditions. 764 // If they do not, abort the whole operation. 765 StackUpdateLoc = MBBI; 766 MovingStackUpdateDown = false; 767 break; 768 } 769 } 770 771 // If the operation was not aborted then update the object offset. 772 if (MovingStackUpdateDown) { 773 for (CalleeSavedInfo CSI : Info) { 774 int FrIdx = CSI.getFrameIdx(); 775 if (FrIdx < 0) 776 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 777 } 778 } 779 } 780 781 // Where in the prologue we move the CR fields depends on how many scratch 782 // registers we have, and if we need to save the link register or not. This 783 // lambda is to avoid duplicating the logic in 2 places. 784 auto BuildMoveFromCR = [&]() { 785 if (isELFv2ABI && MustSaveCRs.size() == 1) { 786 // In the ELFv2 ABI, we are not required to save all CR fields. 787 // If only one CR field is clobbered, it is more efficient to use 788 // mfocrf to selectively save just that field, because mfocrf has short 789 // latency compares to mfcr. 790 assert(isPPC64 && "V2 ABI is 64-bit only."); 791 MachineInstrBuilder MIB = 792 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 793 MIB.addReg(MustSaveCRs[0], RegState::Kill); 794 } else { 795 MachineInstrBuilder MIB = 796 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 797 for (unsigned CRfield : MustSaveCRs) 798 MIB.addReg(CRfield, RegState::ImplicitKill); 799 } 800 }; 801 802 // If we need to spill the CR and the LR but we don't have two separate 803 // registers available, we must spill them one at a time 804 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 805 BuildMoveFromCR(); 806 BuildMI(MBB, MBBI, dl, StoreWordInst) 807 .addReg(TempReg, getKillRegState(true)) 808 .addImm(CRSaveOffset) 809 .addReg(SPReg); 810 } 811 812 if (MustSaveLR) 813 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 814 815 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 816 BuildMoveFromCR(); 817 818 if (HasRedZone) { 819 if (HasFP) 820 BuildMI(MBB, MBBI, dl, StoreInst) 821 .addReg(FPReg) 822 .addImm(FPOffset) 823 .addReg(SPReg); 824 if (FI->usesPICBase()) 825 BuildMI(MBB, MBBI, dl, StoreInst) 826 .addReg(PPC::R30) 827 .addImm(PBPOffset) 828 .addReg(SPReg); 829 if (HasBP) 830 BuildMI(MBB, MBBI, dl, StoreInst) 831 .addReg(BPReg) 832 .addImm(BPOffset) 833 .addReg(SPReg); 834 } 835 836 if (MustSaveLR) 837 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 838 .addReg(ScratchReg, getKillRegState(true)) 839 .addImm(LROffset) 840 .addReg(SPReg); 841 842 if (MustSaveCR && 843 !(SingleScratchReg && MustSaveLR)) { 844 assert(HasRedZone && "A red zone is always available on PPC64"); 845 BuildMI(MBB, MBBI, dl, StoreWordInst) 846 .addReg(TempReg, getKillRegState(true)) 847 .addImm(CRSaveOffset) 848 .addReg(SPReg); 849 } 850 851 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 852 if (!FrameSize) 853 return; 854 855 // Adjust stack pointer: r1 += NegFrameSize. 856 // If there is a preferred stack alignment, align R1 now 857 858 if (HasBP && HasRedZone) { 859 // Save a copy of r1 as the base pointer. 860 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 861 .addReg(SPReg) 862 .addReg(SPReg); 863 } 864 865 // Have we generated a STUX instruction to claim stack frame? If so, 866 // the negated frame size will be placed in ScratchReg. 867 bool HasSTUX = false; 868 869 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain 870 // pointer is always stored at SP, we will get a free probe due to an essential 871 // STU(X) instruction. 872 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { 873 // To be consistent with other targets, a pseudo instruction is emitted and 874 // will be later expanded in `inlineStackProbe`. 875 BuildMI(MBB, MBBI, dl, 876 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 877 : PPC::PROBED_STACKALLOC_32)) 878 .addDef(ScratchReg) 879 .addDef(TempReg) // TempReg stores the old sp. 880 .addImm(NegFrameSize); 881 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we 882 // update the ScratchReg to meet the assumption that ScratchReg contains 883 // the NegFrameSize. This solution is rather tricky. 884 if (!HasRedZone) { 885 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 886 .addReg(TempReg) 887 .addReg(SPReg); 888 HasSTUX = true; 889 } 890 } else { 891 // This condition must be kept in sync with canUseAsPrologue. 892 if (HasBP && MaxAlign > 1) { 893 if (isPPC64) 894 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 895 .addReg(SPReg) 896 .addImm(0) 897 .addImm(64 - Log2(MaxAlign)); 898 else // PPC32... 899 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 900 .addReg(SPReg) 901 .addImm(0) 902 .addImm(32 - Log2(MaxAlign)) 903 .addImm(31); 904 if (!isLargeFrame) { 905 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 906 .addReg(ScratchReg, RegState::Kill) 907 .addImm(NegFrameSize); 908 } else { 909 assert(!SingleScratchReg && "Only a single scratch reg available"); 910 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 911 .addImm(NegFrameSize >> 16); 912 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 913 .addReg(TempReg, RegState::Kill) 914 .addImm(NegFrameSize & 0xFFFF); 915 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 916 .addReg(ScratchReg, RegState::Kill) 917 .addReg(TempReg, RegState::Kill); 918 } 919 920 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 921 .addReg(SPReg, RegState::Kill) 922 .addReg(SPReg) 923 .addReg(ScratchReg); 924 HasSTUX = true; 925 926 } else if (!isLargeFrame) { 927 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 928 .addReg(SPReg) 929 .addImm(NegFrameSize) 930 .addReg(SPReg); 931 932 } else { 933 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 934 .addImm(NegFrameSize >> 16); 935 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 936 .addReg(ScratchReg, RegState::Kill) 937 .addImm(NegFrameSize & 0xFFFF); 938 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 939 .addReg(SPReg, RegState::Kill) 940 .addReg(SPReg) 941 .addReg(ScratchReg); 942 HasSTUX = true; 943 } 944 } 945 946 // Save the TOC register after the stack pointer update if a prologue TOC 947 // save is required for the function. 948 if (MustSaveTOC) { 949 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 950 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 951 .addReg(TOCReg, getKillRegState(true)) 952 .addImm(TOCSaveOffset) 953 .addReg(SPReg); 954 } 955 956 if (!HasRedZone) { 957 assert(!isPPC64 && "A red zone is always available on PPC64"); 958 if (HasSTUX) { 959 // The negated frame size is in ScratchReg, and the SPReg has been 960 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 961 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 962 // the stack frame (i.e. the old SP), ideally, we would put the old 963 // SP into a register and use it as the base for the stores. The 964 // problem is that the only available register may be ScratchReg, 965 // which could be R0, and R0 cannot be used as a base address. 966 967 // First, set ScratchReg to the old SP. This may need to be modified 968 // later. 969 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 970 .addReg(ScratchReg, RegState::Kill) 971 .addReg(SPReg); 972 973 if (ScratchReg == PPC::R0) { 974 // R0 cannot be used as a base register, but it can be used as an 975 // index in a store-indexed. 976 int LastOffset = 0; 977 if (HasFP) { 978 // R0 += (FPOffset-LastOffset). 979 // Need addic, since addi treats R0 as 0. 980 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 981 .addReg(ScratchReg) 982 .addImm(FPOffset-LastOffset); 983 LastOffset = FPOffset; 984 // Store FP into *R0. 985 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 986 .addReg(FPReg, RegState::Kill) // Save FP. 987 .addReg(PPC::ZERO) 988 .addReg(ScratchReg); // This will be the index (R0 is ok here). 989 } 990 if (FI->usesPICBase()) { 991 // R0 += (PBPOffset-LastOffset). 992 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 993 .addReg(ScratchReg) 994 .addImm(PBPOffset-LastOffset); 995 LastOffset = PBPOffset; 996 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 997 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 998 .addReg(PPC::ZERO) 999 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1000 } 1001 if (HasBP) { 1002 // R0 += (BPOffset-LastOffset). 1003 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1004 .addReg(ScratchReg) 1005 .addImm(BPOffset-LastOffset); 1006 LastOffset = BPOffset; 1007 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1008 .addReg(BPReg, RegState::Kill) // Save BP. 1009 .addReg(PPC::ZERO) 1010 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1011 // BP = R0-LastOffset 1012 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1013 .addReg(ScratchReg, RegState::Kill) 1014 .addImm(-LastOffset); 1015 } 1016 } else { 1017 // ScratchReg is not R0, so use it as the base register. It is 1018 // already set to the old SP, so we can use the offsets directly. 1019 1020 // Now that the stack frame has been allocated, save all the necessary 1021 // registers using ScratchReg as the base address. 1022 if (HasFP) 1023 BuildMI(MBB, MBBI, dl, StoreInst) 1024 .addReg(FPReg) 1025 .addImm(FPOffset) 1026 .addReg(ScratchReg); 1027 if (FI->usesPICBase()) 1028 BuildMI(MBB, MBBI, dl, StoreInst) 1029 .addReg(PPC::R30) 1030 .addImm(PBPOffset) 1031 .addReg(ScratchReg); 1032 if (HasBP) { 1033 BuildMI(MBB, MBBI, dl, StoreInst) 1034 .addReg(BPReg) 1035 .addImm(BPOffset) 1036 .addReg(ScratchReg); 1037 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1038 .addReg(ScratchReg, RegState::Kill) 1039 .addReg(ScratchReg); 1040 } 1041 } 1042 } else { 1043 // The frame size is a known 16-bit constant (fitting in the immediate 1044 // field of STWU). To be here we have to be compiling for PPC32. 1045 // Since the SPReg has been decreased by FrameSize, add it back to each 1046 // offset. 1047 if (HasFP) 1048 BuildMI(MBB, MBBI, dl, StoreInst) 1049 .addReg(FPReg) 1050 .addImm(FrameSize + FPOffset) 1051 .addReg(SPReg); 1052 if (FI->usesPICBase()) 1053 BuildMI(MBB, MBBI, dl, StoreInst) 1054 .addReg(PPC::R30) 1055 .addImm(FrameSize + PBPOffset) 1056 .addReg(SPReg); 1057 if (HasBP) { 1058 BuildMI(MBB, MBBI, dl, StoreInst) 1059 .addReg(BPReg) 1060 .addImm(FrameSize + BPOffset) 1061 .addReg(SPReg); 1062 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1063 .addReg(SPReg) 1064 .addImm(FrameSize); 1065 } 1066 } 1067 } 1068 1069 // Add Call Frame Information for the instructions we generated above. 1070 if (needsCFI) { 1071 unsigned CFIIndex; 1072 1073 if (HasBP) { 1074 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1075 // because if the stack needed aligning then CFA won't be at a fixed 1076 // offset from FP/SP. 1077 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1078 CFIIndex = MF.addFrameInst( 1079 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1080 } else { 1081 // Adjust the definition of CFA to account for the change in SP. 1082 assert(NegFrameSize); 1083 CFIIndex = MF.addFrameInst( 1084 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1085 } 1086 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1087 .addCFIIndex(CFIIndex); 1088 1089 if (HasFP) { 1090 // Describe where FP was saved, at a fixed offset from CFA. 1091 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1092 CFIIndex = MF.addFrameInst( 1093 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1094 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1095 .addCFIIndex(CFIIndex); 1096 } 1097 1098 if (FI->usesPICBase()) { 1099 // Describe where FP was saved, at a fixed offset from CFA. 1100 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1101 CFIIndex = MF.addFrameInst( 1102 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1103 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1104 .addCFIIndex(CFIIndex); 1105 } 1106 1107 if (HasBP) { 1108 // Describe where BP was saved, at a fixed offset from CFA. 1109 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1110 CFIIndex = MF.addFrameInst( 1111 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1112 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1113 .addCFIIndex(CFIIndex); 1114 } 1115 1116 if (MustSaveLR) { 1117 // Describe where LR was saved, at a fixed offset from CFA. 1118 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1119 CFIIndex = MF.addFrameInst( 1120 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1121 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1122 .addCFIIndex(CFIIndex); 1123 } 1124 } 1125 1126 // If there is a frame pointer, copy R1 into R31 1127 if (HasFP) { 1128 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1129 .addReg(SPReg) 1130 .addReg(SPReg); 1131 1132 if (!HasBP && needsCFI) { 1133 // Change the definition of CFA from SP+offset to FP+offset, because SP 1134 // will change at every alloca. 1135 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1136 unsigned CFIIndex = MF.addFrameInst( 1137 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1138 1139 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1140 .addCFIIndex(CFIIndex); 1141 } 1142 } 1143 1144 if (needsCFI) { 1145 // Describe where callee saved registers were saved, at fixed offsets from 1146 // CFA. 1147 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1148 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1149 unsigned Reg = CSI[I].getReg(); 1150 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1151 1152 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1153 // subregisters of CR2. We just need to emit a move of CR2. 1154 if (PPC::CRBITRCRegClass.contains(Reg)) 1155 continue; 1156 1157 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1158 continue; 1159 1160 // For SVR4, don't emit a move for the CR spill slot if we haven't 1161 // spilled CRs. 1162 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1163 && !MustSaveCR) 1164 continue; 1165 1166 // For 64-bit SVR4 when we have spilled CRs, the spill location 1167 // is SP+8, not a frame-relative slot. 1168 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1169 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1170 // the whole CR word. In the ELFv2 ABI, every CR that was 1171 // actually saved gets its own CFI record. 1172 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1173 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1174 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1175 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1176 .addCFIIndex(CFIIndex); 1177 continue; 1178 } 1179 1180 if (CSI[I].isSpilledToReg()) { 1181 unsigned SpilledReg = CSI[I].getDstReg(); 1182 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1183 nullptr, MRI->getDwarfRegNum(Reg, true), 1184 MRI->getDwarfRegNum(SpilledReg, true))); 1185 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1186 .addCFIIndex(CFIRegister); 1187 } else { 1188 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1189 // We have changed the object offset above but we do not want to change 1190 // the actual offsets in the CFI instruction so we have to undo the 1191 // offset change here. 1192 if (MovingStackUpdateDown) 1193 Offset -= NegFrameSize; 1194 1195 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1196 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1197 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1198 .addCFIIndex(CFIIndex); 1199 } 1200 } 1201 } 1202 } 1203 1204 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, 1205 MachineBasicBlock &PrologMBB) const { 1206 // TODO: Generate CFI instructions. 1207 bool isPPC64 = Subtarget.isPPC64(); 1208 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 1209 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1210 MachineFrameInfo &MFI = MF.getFrameInfo(); 1211 MachineModuleInfo &MMI = MF.getMMI(); 1212 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 1213 // AIX assembler does not support cfi directives. 1214 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 1215 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 1216 int Opc = MI.getOpcode(); 1217 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; 1218 }); 1219 if (StackAllocMIPos == PrologMBB.end()) 1220 return; 1221 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); 1222 MachineBasicBlock *CurrentMBB = &PrologMBB; 1223 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); 1224 MachineInstr &MI = *StackAllocMIPos; 1225 int64_t NegFrameSize = MI.getOperand(2).getImm(); 1226 unsigned ProbeSize = TLI.getStackProbeSize(MF); 1227 int64_t NegProbeSize = -(int64_t)ProbeSize; 1228 assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); 1229 int64_t NumBlocks = NegFrameSize / NegProbeSize; 1230 int64_t NegResidualSize = NegFrameSize % NegProbeSize; 1231 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1232 Register ScratchReg = MI.getOperand(0).getReg(); 1233 Register FPReg = MI.getOperand(1).getReg(); 1234 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1235 bool HasBP = RegInfo->hasBasePointer(MF); 1236 Register BPReg = RegInfo->getBaseRegister(MF); 1237 Align MaxAlign = MFI.getMaxAlign(); 1238 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); 1239 // Subroutines to generate .cfi_* directives. 1240 auto buildDefCFAReg = [&](MachineBasicBlock &MBB, 1241 MachineBasicBlock::iterator MBBI, Register Reg) { 1242 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1243 unsigned CFIIndex = MF.addFrameInst( 1244 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); 1245 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1246 .addCFIIndex(CFIIndex); 1247 }; 1248 auto buildDefCFA = [&](MachineBasicBlock &MBB, 1249 MachineBasicBlock::iterator MBBI, Register Reg, 1250 int Offset) { 1251 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1252 unsigned CFIIndex = MBB.getParent()->addFrameInst( 1253 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); 1254 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1255 .addCFIIndex(CFIIndex); 1256 }; 1257 // Subroutine to determine if we can use the Imm as part of d-form. 1258 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; 1259 // Subroutine to materialize the Imm into TempReg. 1260 auto MaterializeImm = [&](MachineBasicBlock &MBB, 1261 MachineBasicBlock::iterator MBBI, int64_t Imm, 1262 Register &TempReg) { 1263 assert(isInt<32>(Imm) && "Unhandled imm"); 1264 if (isInt<16>(Imm)) 1265 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) 1266 .addImm(Imm); 1267 else { 1268 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) 1269 .addImm(Imm >> 16); 1270 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) 1271 .addReg(TempReg) 1272 .addImm(Imm & 0xFFFF); 1273 } 1274 }; 1275 // Subroutine to store frame pointer and decrease stack pointer by probe size. 1276 auto allocateAndProbe = [&](MachineBasicBlock &MBB, 1277 MachineBasicBlock::iterator MBBI, int64_t NegSize, 1278 Register NegSizeReg, bool UseDForm, 1279 Register StoreReg) { 1280 if (UseDForm) 1281 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) 1282 .addReg(StoreReg) 1283 .addImm(NegSize) 1284 .addReg(SPReg); 1285 else 1286 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) 1287 .addReg(StoreReg) 1288 .addReg(SPReg) 1289 .addReg(NegSizeReg); 1290 }; 1291 // Used to probe realignment gap [stackptr - (stackptr % align), stackptr) 1292 // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30 1293 // available and r1 is already copied to r30 which is BPReg. So BPReg stores 1294 // the value of stackptr. 1295 // First we have to probe tail interval whose size is less than probesize, 1296 // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage, 1297 // ScratchReg stores the value of ((stackptr % align) % probesize). Then we 1298 // probe each block sized probesize until stackptr meets 1299 // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized 1300 // as negprobesize. At both stages, TempReg stores the value of 1301 // (stackptr - (stackptr % align)). 1302 auto dynamicProbe = [&](MachineBasicBlock &MBB, 1303 MachineBasicBlock::iterator MBBI, Register ScratchReg, 1304 Register TempReg) { 1305 assert(HasBP && isPPC64 && "Probe alignment part not available"); 1306 assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); 1307 // ScratchReg = stackptr % align 1308 BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) 1309 .addReg(BPReg) 1310 .addImm(0) 1311 .addImm(64 - Log2(MaxAlign)); 1312 // TempReg = stackptr - (stackptr % align) 1313 BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg) 1314 .addReg(ScratchReg) 1315 .addReg(BPReg); 1316 // ScratchReg = (stackptr % align) % probesize 1317 BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) 1318 .addReg(ScratchReg) 1319 .addImm(0) 1320 .addImm(64 - Log2(ProbeSize)); 1321 Register CRReg = PPC::CR0; 1322 // If (stackptr % align) % probesize == 0, we should not generate probe 1323 // code. Layout of output assembly kinda like: 1324 // bb.0: 1325 // ... 1326 // cmpldi $scratchreg, 0 1327 // beq bb.2 1328 // bb.1: # Probe tail interval 1329 // neg $scratchreg, $scratchreg 1330 // stdux $bpreg, r1, $scratchreg 1331 // bb.2: 1332 // <materialize negprobesize into $scratchreg> 1333 // cmpd r1, $tempreg 1334 // beq bb.4 1335 // bb.3: # Loop to probe each block 1336 // stdux $bpreg, r1, $scratchreg 1337 // cmpd r1, $tempreg 1338 // bne bb.3 1339 // bb.4: 1340 // ... 1341 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); 1342 MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB); 1343 MF.insert(MBBInsertPoint, ProbeResidualMBB); 1344 MachineBasicBlock *ProbeLoopPreHeaderMBB = 1345 MF.CreateMachineBasicBlock(ProbedBB); 1346 MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB); 1347 MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); 1348 MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); 1349 MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1350 MF.insert(MBBInsertPoint, ProbeExitMBB); 1351 // bb.4 1352 ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); 1353 ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 1354 // bb.0 1355 BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0); 1356 BuildMI(&MBB, DL, TII.get(PPC::BCC)) 1357 .addImm(PPC::PRED_EQ) 1358 .addReg(CRReg) 1359 .addMBB(ProbeLoopPreHeaderMBB); 1360 MBB.addSuccessor(ProbeResidualMBB); 1361 MBB.addSuccessor(ProbeLoopPreHeaderMBB); 1362 // bb.1 1363 BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg) 1364 .addReg(ScratchReg); 1365 allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg, 1366 false, BPReg); 1367 ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB); 1368 // bb.2 1369 MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(), 1370 NegProbeSize, ScratchReg); 1371 BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg) 1372 .addReg(SPReg) 1373 .addReg(TempReg); 1374 BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC)) 1375 .addImm(PPC::PRED_EQ) 1376 .addReg(CRReg) 1377 .addMBB(ProbeExitMBB); 1378 ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB); 1379 ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB); 1380 // bb.3 1381 allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg, 1382 false, BPReg); 1383 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg) 1384 .addReg(SPReg) 1385 .addReg(TempReg); 1386 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) 1387 .addImm(PPC::PRED_NE) 1388 .addReg(CRReg) 1389 .addMBB(ProbeLoopBodyMBB); 1390 ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); 1391 ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); 1392 // Update liveins. 1393 recomputeLiveIns(*ProbeResidualMBB); 1394 recomputeLiveIns(*ProbeLoopPreHeaderMBB); 1395 recomputeLiveIns(*ProbeLoopBodyMBB); 1396 recomputeLiveIns(*ProbeExitMBB); 1397 return ProbeExitMBB; 1398 }; 1399 // For case HasBP && MaxAlign > 1, we have to realign the SP by performing 1400 // SP = SP - SP % MaxAlign. 1401 if (HasBP && MaxAlign > 1) { 1402 // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in 1403 // 64-bit mode. 1404 if (isPPC64) { 1405 // Use BPReg to calculate CFA. 1406 if (needsCFI) 1407 buildDefCFA(*CurrentMBB, {MI}, BPReg, 0); 1408 // Since we have SPReg copied to BPReg at the moment, FPReg can be used as 1409 // TempReg. 1410 Register TempReg = FPReg; 1411 CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg); 1412 // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64. 1413 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) 1414 .addReg(BPReg) 1415 .addReg(BPReg); 1416 } else { 1417 // Initialize current frame pointer. 1418 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) 1419 .addReg(SPReg) 1420 .addReg(SPReg); 1421 // Use FPReg to calculate CFA. 1422 if (needsCFI) 1423 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); 1424 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) 1425 .addReg(FPReg) 1426 .addImm(0) 1427 .addImm(32 - Log2(MaxAlign)) 1428 .addImm(31); 1429 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg) 1430 .addReg(ScratchReg) 1431 .addReg(SPReg); 1432 } 1433 } else { 1434 // Initialize current frame pointer. 1435 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); 1436 // Use FPReg to calculate CFA. 1437 if (needsCFI) 1438 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); 1439 } 1440 // Probe residual part. 1441 if (NegResidualSize) { 1442 bool ResidualUseDForm = CanUseDForm(NegResidualSize); 1443 if (!ResidualUseDForm) 1444 MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); 1445 allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, 1446 ResidualUseDForm, FPReg); 1447 } 1448 bool UseDForm = CanUseDForm(NegProbeSize); 1449 // If number of blocks is small, just probe them directly. 1450 if (NumBlocks < 3) { 1451 if (!UseDForm) 1452 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1453 for (int i = 0; i < NumBlocks; ++i) 1454 allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, 1455 FPReg); 1456 if (needsCFI) { 1457 // Restore using SPReg to calculate CFA. 1458 buildDefCFAReg(*CurrentMBB, {MI}, SPReg); 1459 } 1460 } else { 1461 // Since CTR is a volatile register and current shrinkwrap implementation 1462 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a 1463 // CTR loop to probe. 1464 // Calculate trip count and stores it in CTRReg. 1465 MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); 1466 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) 1467 .addReg(ScratchReg, RegState::Kill); 1468 if (!UseDForm) 1469 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1470 // Create MBBs of the loop. 1471 MachineFunction::iterator MBBInsertPoint = 1472 std::next(CurrentMBB->getIterator()); 1473 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); 1474 MF.insert(MBBInsertPoint, LoopMBB); 1475 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1476 MF.insert(MBBInsertPoint, ExitMBB); 1477 // Synthesize the loop body. 1478 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, 1479 UseDForm, FPReg); 1480 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) 1481 .addMBB(LoopMBB); 1482 LoopMBB->addSuccessor(ExitMBB); 1483 LoopMBB->addSuccessor(LoopMBB); 1484 // Synthesize the exit MBB. 1485 ExitMBB->splice(ExitMBB->end(), CurrentMBB, 1486 std::next(MachineBasicBlock::iterator(MI)), 1487 CurrentMBB->end()); 1488 ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); 1489 CurrentMBB->addSuccessor(LoopMBB); 1490 if (needsCFI) { 1491 // Restore using SPReg to calculate CFA. 1492 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); 1493 } 1494 // Update liveins. 1495 recomputeLiveIns(*LoopMBB); 1496 recomputeLiveIns(*ExitMBB); 1497 } 1498 ++NumPrologProbed; 1499 MI.eraseFromParent(); 1500 } 1501 1502 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1503 MachineBasicBlock &MBB) const { 1504 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1505 DebugLoc dl; 1506 1507 if (MBBI != MBB.end()) 1508 dl = MBBI->getDebugLoc(); 1509 1510 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1511 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1512 1513 // Get alignment info so we know how to restore the SP. 1514 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1515 1516 // Get the number of bytes allocated from the FrameInfo. 1517 int FrameSize = MFI.getStackSize(); 1518 1519 // Get processor type. 1520 bool isPPC64 = Subtarget.isPPC64(); 1521 1522 // Check if the link register (LR) has been saved. 1523 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1524 bool MustSaveLR = FI->mustSaveLR(); 1525 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1526 bool MustSaveCR = !MustSaveCRs.empty(); 1527 // Do we have a frame pointer and/or base pointer for this function? 1528 bool HasFP = hasFP(MF); 1529 bool HasBP = RegInfo->hasBasePointer(MF); 1530 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1531 1532 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1533 Register BPReg = RegInfo->getBaseRegister(MF); 1534 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1535 Register ScratchReg; 1536 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1537 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1538 : PPC::MTLR ); 1539 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1540 : PPC::LWZ ); 1541 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1542 : PPC::LIS ); 1543 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1544 : PPC::OR ); 1545 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1546 : PPC::ORI ); 1547 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1548 : PPC::ADDI ); 1549 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1550 : PPC::ADD4 ); 1551 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1552 : PPC::LWZ); 1553 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1554 : PPC::MTOCRF); 1555 int LROffset = getReturnSaveOffset(); 1556 1557 int FPOffset = 0; 1558 1559 // Using the same bool variable as below to suppress compiler warnings. 1560 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1561 &TempReg); 1562 assert(SingleScratchReg && 1563 "Could not find an available scratch register"); 1564 1565 SingleScratchReg = ScratchReg == TempReg; 1566 1567 if (HasFP) { 1568 int FPIndex = FI->getFramePointerSaveIndex(); 1569 assert(FPIndex && "No Frame Pointer Save Slot!"); 1570 FPOffset = MFI.getObjectOffset(FPIndex); 1571 } 1572 1573 int BPOffset = 0; 1574 if (HasBP) { 1575 int BPIndex = FI->getBasePointerSaveIndex(); 1576 assert(BPIndex && "No Base Pointer Save Slot!"); 1577 BPOffset = MFI.getObjectOffset(BPIndex); 1578 } 1579 1580 int PBPOffset = 0; 1581 if (FI->usesPICBase()) { 1582 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1583 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1584 PBPOffset = MFI.getObjectOffset(PBPIndex); 1585 } 1586 1587 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1588 1589 if (IsReturnBlock) { 1590 unsigned RetOpcode = MBBI->getOpcode(); 1591 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1592 RetOpcode == PPC::TCRETURNdi || 1593 RetOpcode == PPC::TCRETURNai || 1594 RetOpcode == PPC::TCRETURNri8 || 1595 RetOpcode == PPC::TCRETURNdi8 || 1596 RetOpcode == PPC::TCRETURNai8; 1597 1598 if (UsesTCRet) { 1599 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1600 MachineOperand &StackAdjust = MBBI->getOperand(1); 1601 assert(StackAdjust.isImm() && "Expecting immediate value."); 1602 // Adjust stack pointer. 1603 int StackAdj = StackAdjust.getImm(); 1604 int Delta = StackAdj - MaxTCRetDelta; 1605 assert((Delta >= 0) && "Delta must be positive"); 1606 if (MaxTCRetDelta>0) 1607 FrameSize += (StackAdj +Delta); 1608 else 1609 FrameSize += StackAdj; 1610 } 1611 } 1612 1613 // Frames of 32KB & larger require special handling because they cannot be 1614 // indexed into with a simple LD/LWZ immediate offset operand. 1615 bool isLargeFrame = !isInt<16>(FrameSize); 1616 1617 // On targets without red zone, the SP needs to be restored last, so that 1618 // all live contents of the stack frame are upwards of the SP. This means 1619 // that we cannot restore SP just now, since there may be more registers 1620 // to restore from the stack frame (e.g. R31). If the frame size is not 1621 // a simple immediate value, we will need a spare register to hold the 1622 // restored SP. If the frame size is known and small, we can simply adjust 1623 // the offsets of the registers to be restored, and still use SP to restore 1624 // them. In such case, the final update of SP will be to add the frame 1625 // size to it. 1626 // To simplify the code, set RBReg to the base register used to restore 1627 // values from the stack, and set SPAdd to the value that needs to be added 1628 // to the SP at the end. The default values are as if red zone was present. 1629 unsigned RBReg = SPReg; 1630 unsigned SPAdd = 0; 1631 1632 // Check if we can move the stack update instruction up the epilogue 1633 // past the callee saves. This will allow the move to LR instruction 1634 // to be executed before the restores of the callee saves which means 1635 // that the callee saves can hide the latency from the MTLR instrcution. 1636 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1637 if (stackUpdateCanBeMoved(MF)) { 1638 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1639 for (CalleeSavedInfo CSI : Info) { 1640 // If the callee saved register is spilled to another register abort the 1641 // stack update movement. 1642 if (CSI.isSpilledToReg()) { 1643 StackUpdateLoc = MBBI; 1644 break; 1645 } 1646 int FrIdx = CSI.getFrameIdx(); 1647 // If the frame index is not negative the callee saved info belongs to a 1648 // stack object that is not a fixed stack object. We ignore non-fixed 1649 // stack objects because we won't move the update of the stack pointer 1650 // past them. 1651 if (FrIdx >= 0) 1652 continue; 1653 1654 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1655 StackUpdateLoc--; 1656 else { 1657 // Abort the operation as we can't update all CSR restores. 1658 StackUpdateLoc = MBBI; 1659 break; 1660 } 1661 } 1662 } 1663 1664 if (FrameSize) { 1665 // In the prologue, the loaded (or persistent) stack pointer value is 1666 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1667 // zone add this offset back now. 1668 1669 // If the function has a base pointer, the stack pointer has been copied 1670 // to it so we can restore it by copying in the other direction. 1671 if (HasRedZone && HasBP) { 1672 BuildMI(MBB, MBBI, dl, OrInst, RBReg). 1673 addReg(BPReg). 1674 addReg(BPReg); 1675 } 1676 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1677 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1678 // call which invalidates the stack pointer value in SP(0). So we use the 1679 // value of R31 in this case. Similar situation exists with setjmp. 1680 else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { 1681 assert(HasFP && "Expecting a valid frame pointer."); 1682 if (!HasRedZone) 1683 RBReg = FPReg; 1684 if (!isLargeFrame) { 1685 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1686 .addReg(FPReg).addImm(FrameSize); 1687 } else { 1688 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1689 .addImm(FrameSize >> 16); 1690 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1691 .addReg(ScratchReg, RegState::Kill) 1692 .addImm(FrameSize & 0xFFFF); 1693 BuildMI(MBB, MBBI, dl, AddInst) 1694 .addReg(RBReg) 1695 .addReg(FPReg) 1696 .addReg(ScratchReg); 1697 } 1698 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1699 if (HasRedZone) { 1700 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1701 .addReg(SPReg) 1702 .addImm(FrameSize); 1703 } else { 1704 // Make sure that adding FrameSize will not overflow the max offset 1705 // size. 1706 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1707 "Local offsets should be negative"); 1708 SPAdd = FrameSize; 1709 FPOffset += FrameSize; 1710 BPOffset += FrameSize; 1711 PBPOffset += FrameSize; 1712 } 1713 } else { 1714 // We don't want to use ScratchReg as a base register, because it 1715 // could happen to be R0. Use FP instead, but make sure to preserve it. 1716 if (!HasRedZone) { 1717 // If FP is not saved, copy it to ScratchReg. 1718 if (!HasFP) 1719 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1720 .addReg(FPReg) 1721 .addReg(FPReg); 1722 RBReg = FPReg; 1723 } 1724 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1725 .addImm(0) 1726 .addReg(SPReg); 1727 } 1728 } 1729 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1730 // If there is no red zone, ScratchReg may be needed for holding a useful 1731 // value (although not the base register). Make sure it is not overwritten 1732 // too early. 1733 1734 // If we need to restore both the LR and the CR and we only have one 1735 // available scratch register, we must do them one at a time. 1736 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1737 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1738 // is live here. 1739 assert(HasRedZone && "Expecting red zone"); 1740 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1741 .addImm(CRSaveOffset) 1742 .addReg(SPReg); 1743 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1744 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1745 .addReg(TempReg, getKillRegState(i == e-1)); 1746 } 1747 1748 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1749 // LR is stored in the caller's stack frame. ScratchReg will be needed 1750 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1751 // a base register anyway, because it may happen to be R0. 1752 bool LoadedLR = false; 1753 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1754 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1755 .addImm(LROffset+SPAdd) 1756 .addReg(RBReg); 1757 LoadedLR = true; 1758 } 1759 1760 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1761 assert(RBReg == SPReg && "Should be using SP as a base register"); 1762 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1763 .addImm(CRSaveOffset) 1764 .addReg(RBReg); 1765 } 1766 1767 if (HasFP) { 1768 // If there is red zone, restore FP directly, since SP has already been 1769 // restored. Otherwise, restore the value of FP into ScratchReg. 1770 if (HasRedZone || RBReg == SPReg) 1771 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1772 .addImm(FPOffset) 1773 .addReg(SPReg); 1774 else 1775 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1776 .addImm(FPOffset) 1777 .addReg(RBReg); 1778 } 1779 1780 if (FI->usesPICBase()) 1781 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1782 .addImm(PBPOffset) 1783 .addReg(RBReg); 1784 1785 if (HasBP) 1786 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1787 .addImm(BPOffset) 1788 .addReg(RBReg); 1789 1790 // There is nothing more to be loaded from the stack, so now we can 1791 // restore SP: SP = RBReg + SPAdd. 1792 if (RBReg != SPReg || SPAdd != 0) { 1793 assert(!HasRedZone && "This should not happen with red zone"); 1794 // If SPAdd is 0, generate a copy. 1795 if (SPAdd == 0) 1796 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1797 .addReg(RBReg) 1798 .addReg(RBReg); 1799 else 1800 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1801 .addReg(RBReg) 1802 .addImm(SPAdd); 1803 1804 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1805 if (RBReg == FPReg) 1806 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1807 .addReg(ScratchReg) 1808 .addReg(ScratchReg); 1809 1810 // Now load the LR from the caller's stack frame. 1811 if (MustSaveLR && !LoadedLR) 1812 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1813 .addImm(LROffset) 1814 .addReg(SPReg); 1815 } 1816 1817 if (MustSaveCR && 1818 !(SingleScratchReg && MustSaveLR)) 1819 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1820 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1821 .addReg(TempReg, getKillRegState(i == e-1)); 1822 1823 if (MustSaveLR) 1824 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1825 1826 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1827 // call optimization 1828 if (IsReturnBlock) { 1829 unsigned RetOpcode = MBBI->getOpcode(); 1830 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1831 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1832 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1833 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1834 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1835 1836 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1837 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1838 .addReg(SPReg).addImm(CallerAllocatedAmt); 1839 } else { 1840 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1841 .addImm(CallerAllocatedAmt >> 16); 1842 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1843 .addReg(ScratchReg, RegState::Kill) 1844 .addImm(CallerAllocatedAmt & 0xFFFF); 1845 BuildMI(MBB, MBBI, dl, AddInst) 1846 .addReg(SPReg) 1847 .addReg(FPReg) 1848 .addReg(ScratchReg); 1849 } 1850 } else { 1851 createTailCallBranchInstr(MBB); 1852 } 1853 } 1854 } 1855 1856 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1857 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1858 1859 // If we got this far a first terminator should exist. 1860 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1861 1862 DebugLoc dl = MBBI->getDebugLoc(); 1863 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1864 1865 // Create branch instruction for pseudo tail call return instruction. 1866 // The TCRETURNdi variants are direct calls. Valid targets for those are 1867 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1868 // since we can tail call external functions with PC-Rel (i.e. we don't need 1869 // to worry about different TOC pointers). Some of the external functions will 1870 // be MO_GlobalAddress while others like memcpy for example, are going to 1871 // be MO_ExternalSymbol. 1872 unsigned RetOpcode = MBBI->getOpcode(); 1873 if (RetOpcode == PPC::TCRETURNdi) { 1874 MBBI = MBB.getLastNonDebugInstr(); 1875 MachineOperand &JumpTarget = MBBI->getOperand(0); 1876 if (JumpTarget.isGlobal()) 1877 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1878 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1879 else if (JumpTarget.isSymbol()) 1880 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1881 addExternalSymbol(JumpTarget.getSymbolName()); 1882 else 1883 llvm_unreachable("Expecting Global or External Symbol"); 1884 } else if (RetOpcode == PPC::TCRETURNri) { 1885 MBBI = MBB.getLastNonDebugInstr(); 1886 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1887 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1888 } else if (RetOpcode == PPC::TCRETURNai) { 1889 MBBI = MBB.getLastNonDebugInstr(); 1890 MachineOperand &JumpTarget = MBBI->getOperand(0); 1891 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1892 } else if (RetOpcode == PPC::TCRETURNdi8) { 1893 MBBI = MBB.getLastNonDebugInstr(); 1894 MachineOperand &JumpTarget = MBBI->getOperand(0); 1895 if (JumpTarget.isGlobal()) 1896 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1897 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1898 else if (JumpTarget.isSymbol()) 1899 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1900 addExternalSymbol(JumpTarget.getSymbolName()); 1901 else 1902 llvm_unreachable("Expecting Global or External Symbol"); 1903 } else if (RetOpcode == PPC::TCRETURNri8) { 1904 MBBI = MBB.getLastNonDebugInstr(); 1905 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1906 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1907 } else if (RetOpcode == PPC::TCRETURNai8) { 1908 MBBI = MBB.getLastNonDebugInstr(); 1909 MachineOperand &JumpTarget = MBBI->getOperand(0); 1910 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1911 } 1912 } 1913 1914 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1915 BitVector &SavedRegs, 1916 RegScavenger *RS) const { 1917 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1918 1919 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1920 1921 // Save and clear the LR state. 1922 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1923 unsigned LR = RegInfo->getRARegister(); 1924 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1925 SavedRegs.reset(LR); 1926 1927 // Save R31 if necessary 1928 int FPSI = FI->getFramePointerSaveIndex(); 1929 const bool isPPC64 = Subtarget.isPPC64(); 1930 MachineFrameInfo &MFI = MF.getFrameInfo(); 1931 1932 // If the frame pointer save index hasn't been defined yet. 1933 if (!FPSI && needsFP(MF)) { 1934 // Find out what the fix offset of the frame pointer save area. 1935 int FPOffset = getFramePointerSaveOffset(); 1936 // Allocate the frame index for frame pointer save area. 1937 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1938 // Save the result. 1939 FI->setFramePointerSaveIndex(FPSI); 1940 } 1941 1942 int BPSI = FI->getBasePointerSaveIndex(); 1943 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1944 int BPOffset = getBasePointerSaveOffset(); 1945 // Allocate the frame index for the base pointer save area. 1946 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1947 // Save the result. 1948 FI->setBasePointerSaveIndex(BPSI); 1949 } 1950 1951 // Reserve stack space for the PIC Base register (R30). 1952 // Only used in SVR4 32-bit. 1953 if (FI->usesPICBase()) { 1954 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1955 FI->setPICBasePointerSaveIndex(PBPSI); 1956 } 1957 1958 // Make sure we don't explicitly spill r31, because, for example, we have 1959 // some inline asm which explicitly clobbers it, when we otherwise have a 1960 // frame pointer and are using r31's spill slot for the prologue/epilogue 1961 // code. Same goes for the base pointer and the PIC base register. 1962 if (needsFP(MF)) 1963 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1964 if (RegInfo->hasBasePointer(MF)) 1965 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1966 if (FI->usesPICBase()) 1967 SavedRegs.reset(PPC::R30); 1968 1969 // Reserve stack space to move the linkage area to in case of a tail call. 1970 int TCSPDelta = 0; 1971 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1972 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1973 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1974 } 1975 1976 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 1977 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 1978 // object at the offset of the CR-save slot in the linkage area. The actual 1979 // save and restore of the condition register will be created as part of the 1980 // prologue and epilogue insertion, but the FixedStack object is needed to 1981 // keep the CalleSavedInfo valid. 1982 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1983 SavedRegs.test(PPC::CR4))) { 1984 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1985 const int64_t SpillOffset = 1986 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 1987 int FrameIdx = 1988 MFI.CreateFixedObject(SpillSize, SpillOffset, 1989 /* IsImmutable */ true, /* IsAliased */ false); 1990 FI->setCRSpillFrameIndex(FrameIdx); 1991 } 1992 } 1993 1994 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1995 RegScavenger *RS) const { 1996 // Get callee saved register information. 1997 MachineFrameInfo &MFI = MF.getFrameInfo(); 1998 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1999 2000 // If the function is shrink-wrapped, and if the function has a tail call, the 2001 // tail call might not be in the new RestoreBlock, so real branch instruction 2002 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 2003 // RestoreBlock. So we handle this case here. 2004 if (MFI.getSavePoint() && MFI.hasTailCall()) { 2005 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 2006 for (MachineBasicBlock &MBB : MF) { 2007 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 2008 createTailCallBranchInstr(MBB); 2009 } 2010 } 2011 2012 // Early exit if no callee saved registers are modified! 2013 if (CSI.empty() && !needsFP(MF)) { 2014 addScavengingSpillSlot(MF, RS); 2015 return; 2016 } 2017 2018 unsigned MinGPR = PPC::R31; 2019 unsigned MinG8R = PPC::X31; 2020 unsigned MinFPR = PPC::F31; 2021 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 2022 2023 bool HasGPSaveArea = false; 2024 bool HasG8SaveArea = false; 2025 bool HasFPSaveArea = false; 2026 bool HasVRSaveArea = false; 2027 2028 SmallVector<CalleeSavedInfo, 18> GPRegs; 2029 SmallVector<CalleeSavedInfo, 18> G8Regs; 2030 SmallVector<CalleeSavedInfo, 18> FPRegs; 2031 SmallVector<CalleeSavedInfo, 18> VRegs; 2032 2033 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2034 unsigned Reg = CSI[i].getReg(); 2035 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 2036 (Reg != PPC::X2 && Reg != PPC::R2)) && 2037 "Not expecting to try to spill R2 in a function that must save TOC"); 2038 if (PPC::GPRCRegClass.contains(Reg)) { 2039 HasGPSaveArea = true; 2040 2041 GPRegs.push_back(CSI[i]); 2042 2043 if (Reg < MinGPR) { 2044 MinGPR = Reg; 2045 } 2046 } else if (PPC::G8RCRegClass.contains(Reg)) { 2047 HasG8SaveArea = true; 2048 2049 G8Regs.push_back(CSI[i]); 2050 2051 if (Reg < MinG8R) { 2052 MinG8R = Reg; 2053 } 2054 } else if (PPC::F8RCRegClass.contains(Reg)) { 2055 HasFPSaveArea = true; 2056 2057 FPRegs.push_back(CSI[i]); 2058 2059 if (Reg < MinFPR) { 2060 MinFPR = Reg; 2061 } 2062 } else if (PPC::CRBITRCRegClass.contains(Reg) || 2063 PPC::CRRCRegClass.contains(Reg)) { 2064 ; // do nothing, as we already know whether CRs are spilled 2065 } else if (PPC::VRRCRegClass.contains(Reg) || 2066 PPC::SPERCRegClass.contains(Reg)) { 2067 // Altivec and SPE are mutually exclusive, but have the same stack 2068 // alignment requirements, so overload the save area for both cases. 2069 HasVRSaveArea = true; 2070 2071 VRegs.push_back(CSI[i]); 2072 2073 if (Reg < MinVR) { 2074 MinVR = Reg; 2075 } 2076 } else { 2077 llvm_unreachable("Unknown RegisterClass!"); 2078 } 2079 } 2080 2081 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 2082 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2083 2084 int64_t LowerBound = 0; 2085 2086 // Take into account stack space reserved for tail calls. 2087 int TCSPDelta = 0; 2088 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2089 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 2090 LowerBound = TCSPDelta; 2091 } 2092 2093 // The Floating-point register save area is right below the back chain word 2094 // of the previous stack frame. 2095 if (HasFPSaveArea) { 2096 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 2097 int FI = FPRegs[i].getFrameIdx(); 2098 2099 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2100 } 2101 2102 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 2103 } 2104 2105 // Check whether the frame pointer register is allocated. If so, make sure it 2106 // is spilled to the correct offset. 2107 if (needsFP(MF)) { 2108 int FI = PFI->getFramePointerSaveIndex(); 2109 assert(FI && "No Frame Pointer Save Slot!"); 2110 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2111 // FP is R31/X31, so no need to update MinGPR/MinG8R. 2112 HasGPSaveArea = true; 2113 } 2114 2115 if (PFI->usesPICBase()) { 2116 int FI = PFI->getPICBasePointerSaveIndex(); 2117 assert(FI && "No PIC Base Pointer Save Slot!"); 2118 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2119 2120 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 2121 HasGPSaveArea = true; 2122 } 2123 2124 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2125 if (RegInfo->hasBasePointer(MF)) { 2126 int FI = PFI->getBasePointerSaveIndex(); 2127 assert(FI && "No Base Pointer Save Slot!"); 2128 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2129 2130 Register BP = RegInfo->getBaseRegister(MF); 2131 if (PPC::G8RCRegClass.contains(BP)) { 2132 MinG8R = std::min<unsigned>(MinG8R, BP); 2133 HasG8SaveArea = true; 2134 } else if (PPC::GPRCRegClass.contains(BP)) { 2135 MinGPR = std::min<unsigned>(MinGPR, BP); 2136 HasGPSaveArea = true; 2137 } 2138 } 2139 2140 // General register save area starts right below the Floating-point 2141 // register save area. 2142 if (HasGPSaveArea || HasG8SaveArea) { 2143 // Move general register save area spill slots down, taking into account 2144 // the size of the Floating-point register save area. 2145 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 2146 if (!GPRegs[i].isSpilledToReg()) { 2147 int FI = GPRegs[i].getFrameIdx(); 2148 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2149 } 2150 } 2151 2152 // Move general register save area spill slots down, taking into account 2153 // the size of the Floating-point register save area. 2154 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 2155 if (!G8Regs[i].isSpilledToReg()) { 2156 int FI = G8Regs[i].getFrameIdx(); 2157 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2158 } 2159 } 2160 2161 unsigned MinReg = 2162 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2163 TRI->getEncodingValue(MinG8R)); 2164 2165 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2166 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2167 } 2168 2169 // For 32-bit only, the CR save area is below the general register 2170 // save area. For 64-bit SVR4, the CR save area is addressed relative 2171 // to the stack pointer and hence does not need an adjustment here. 2172 // Only CR2 (the first nonvolatile spilled) has an associated frame 2173 // index so that we have a single uniform save area. 2174 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2175 // Adjust the frame index of the CR spill slot. 2176 for (const auto &CSInfo : CSI) { 2177 if (CSInfo.getReg() == PPC::CR2) { 2178 int FI = CSInfo.getFrameIdx(); 2179 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2180 break; 2181 } 2182 } 2183 2184 LowerBound -= 4; // The CR save area is always 4 bytes long. 2185 } 2186 2187 // Both Altivec and SPE have the same alignment and padding requirements 2188 // within the stack frame. 2189 if (HasVRSaveArea) { 2190 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2191 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2192 // we are using negative number here (the stack grows downward). We should 2193 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2194 // is the alignment size ( n = 16 here) and y is the size after aligning. 2195 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2196 LowerBound &= ~(15); 2197 2198 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2199 int FI = VRegs[i].getFrameIdx(); 2200 2201 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2202 } 2203 } 2204 2205 addScavengingSpillSlot(MF, RS); 2206 } 2207 2208 void 2209 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2210 RegScavenger *RS) const { 2211 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2212 // a large stack, which will require scavenging a register to materialize a 2213 // large offset. 2214 2215 // We need to have a scavenger spill slot for spills if the frame size is 2216 // large. In case there is no free register for large-offset addressing, 2217 // this slot is used for the necessary emergency spill. Also, we need the 2218 // slot for dynamic stack allocations. 2219 2220 // The scavenger might be invoked if the frame offset does not fit into 2221 // the 16-bit immediate. We don't know the complete frame size here 2222 // because we've not yet computed callee-saved register spills or the 2223 // needed alignment padding. 2224 unsigned StackSize = determineFrameLayout(MF, true); 2225 MachineFrameInfo &MFI = MF.getFrameInfo(); 2226 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || 2227 (hasSpills(MF) && !isInt<16>(StackSize))) { 2228 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2229 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2230 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2231 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2232 unsigned Size = TRI.getSpillSize(RC); 2233 Align Alignment = TRI.getSpillAlign(RC); 2234 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2235 2236 // Might we have over-aligned allocas? 2237 bool HasAlVars = 2238 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2239 2240 // These kinds of spills might need two registers. 2241 if (spillsCR(MF) || HasAlVars) 2242 RS->addScavengingFrameIndex( 2243 MFI.CreateStackObject(Size, Alignment, false)); 2244 } 2245 } 2246 2247 // This function checks if a callee saved gpr can be spilled to a volatile 2248 // vector register. This occurs for leaf functions when the option 2249 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2250 // which were not spilled to vectors, return false so the target independent 2251 // code can handle them by assigning a FrameIdx to a stack slot. 2252 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2253 MachineFunction &MF, const TargetRegisterInfo *TRI, 2254 std::vector<CalleeSavedInfo> &CSI) const { 2255 2256 if (CSI.empty()) 2257 return true; // Early exit if no callee saved registers are modified! 2258 2259 // Early exit if cannot spill gprs to volatile vector registers. 2260 MachineFrameInfo &MFI = MF.getFrameInfo(); 2261 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2262 return false; 2263 2264 // Build a BitVector of VSRs that can be used for spilling GPRs. 2265 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2266 BitVector BVCalleeSaved(TRI->getNumRegs()); 2267 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2268 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2269 for (unsigned i = 0; CSRegs[i]; ++i) 2270 BVCalleeSaved.set(CSRegs[i]); 2271 2272 for (unsigned Reg : BVAllocatable.set_bits()) { 2273 // Set to 0 if the register is not a volatile VSX register, or if it is 2274 // used in the function. 2275 if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) || 2276 MF.getRegInfo().isPhysRegUsed(Reg)) 2277 BVAllocatable.reset(Reg); 2278 } 2279 2280 bool AllSpilledToReg = true; 2281 unsigned LastVSRUsedForSpill = 0; 2282 for (auto &CS : CSI) { 2283 if (BVAllocatable.none()) 2284 return false; 2285 2286 unsigned Reg = CS.getReg(); 2287 2288 if (!PPC::G8RCRegClass.contains(Reg)) { 2289 AllSpilledToReg = false; 2290 continue; 2291 } 2292 2293 // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs 2294 // into one VSR using the mtvsrdd instruction. 2295 if (LastVSRUsedForSpill != 0) { 2296 CS.setDstReg(LastVSRUsedForSpill); 2297 BVAllocatable.reset(LastVSRUsedForSpill); 2298 LastVSRUsedForSpill = 0; 2299 continue; 2300 } 2301 2302 unsigned VolatileVFReg = BVAllocatable.find_first(); 2303 if (VolatileVFReg < BVAllocatable.size()) { 2304 CS.setDstReg(VolatileVFReg); 2305 LastVSRUsedForSpill = VolatileVFReg; 2306 } else { 2307 AllSpilledToReg = false; 2308 } 2309 } 2310 return AllSpilledToReg; 2311 } 2312 2313 bool PPCFrameLowering::spillCalleeSavedRegisters( 2314 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2315 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2316 2317 MachineFunction *MF = MBB.getParent(); 2318 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2319 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2320 bool MustSaveTOC = FI->mustSaveTOC(); 2321 DebugLoc DL; 2322 bool CRSpilled = false; 2323 MachineInstrBuilder CRMIB; 2324 BitVector Spilled(TRI->getNumRegs()); 2325 2326 VSRContainingGPRs.clear(); 2327 2328 // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one 2329 // or two GPRs, so we need table to record information for later save/restore. 2330 llvm::for_each(CSI, [&](const CalleeSavedInfo &Info) { 2331 if (Info.isSpilledToReg()) { 2332 auto &SpilledVSR = 2333 VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second; 2334 assert(SpilledVSR.second == 0 && 2335 "Can't spill more than two GPRs into VSR!"); 2336 if (SpilledVSR.first == 0) 2337 SpilledVSR.first = Info.getReg(); 2338 else 2339 SpilledVSR.second = Info.getReg(); 2340 } 2341 }); 2342 2343 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2344 unsigned Reg = CSI[i].getReg(); 2345 2346 // CR2 through CR4 are the nonvolatile CR fields. 2347 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2348 2349 // Add the callee-saved register as live-in; it's killed at the spill. 2350 // Do not do this for callee-saved registers that are live-in to the 2351 // function because they will already be marked live-in and this will be 2352 // adding it for a second time. It is an error to add the same register 2353 // to the set more than once. 2354 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2355 bool IsLiveIn = MRI.isLiveIn(Reg); 2356 if (!IsLiveIn) 2357 MBB.addLiveIn(Reg); 2358 2359 if (CRSpilled && IsCRField) { 2360 CRMIB.addReg(Reg, RegState::ImplicitKill); 2361 continue; 2362 } 2363 2364 // The actual spill will happen in the prologue. 2365 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2366 continue; 2367 2368 // Insert the spill to the stack frame. 2369 if (IsCRField) { 2370 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2371 if (!Subtarget.is32BitELFABI()) { 2372 // The actual spill will happen at the start of the prologue. 2373 FuncInfo->addMustSaveCR(Reg); 2374 } else { 2375 CRSpilled = true; 2376 FuncInfo->setSpillsCR(); 2377 2378 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2379 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2380 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2381 .addReg(Reg, RegState::ImplicitKill); 2382 2383 MBB.insert(MI, CRMIB); 2384 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2385 .addReg(PPC::R12, 2386 getKillRegState(true)), 2387 CSI[i].getFrameIdx())); 2388 } 2389 } else { 2390 if (CSI[i].isSpilledToReg()) { 2391 unsigned Dst = CSI[i].getDstReg(); 2392 2393 if (Spilled[Dst]) 2394 continue; 2395 2396 if (VSRContainingGPRs[Dst].second != 0) { 2397 assert(Subtarget.hasP9Vector() && 2398 "mtvsrdd is unavailable on pre-P9 targets."); 2399 2400 NumPESpillVSR += 2; 2401 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst) 2402 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true)) 2403 .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true)); 2404 } else if (VSRContainingGPRs[Dst].second == 0) { 2405 assert(Subtarget.hasP8Vector() && 2406 "Can't move GPR to VSR on pre-P8 targets."); 2407 2408 ++NumPESpillVSR; 2409 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), 2410 TRI->getSubReg(Dst, PPC::sub_64)) 2411 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true)); 2412 } else { 2413 llvm_unreachable("More than two GPRs spilled to a VSR!"); 2414 } 2415 Spilled.set(Dst); 2416 } else { 2417 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2418 // Use !IsLiveIn for the kill flag. 2419 // We do not want to kill registers that are live in this function 2420 // before their use because they will become undefined registers. 2421 // Functions without NoUnwind need to preserve the order of elements in 2422 // saved vector registers. 2423 if (Subtarget.needsSwapsForVSXMemOps() && 2424 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2425 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2426 CSI[i].getFrameIdx(), RC, TRI); 2427 else 2428 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2429 RC, TRI); 2430 } 2431 } 2432 } 2433 return true; 2434 } 2435 2436 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2437 bool CR4Spilled, MachineBasicBlock &MBB, 2438 MachineBasicBlock::iterator MI, 2439 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2440 2441 MachineFunction *MF = MBB.getParent(); 2442 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2443 DebugLoc DL; 2444 unsigned MoveReg = PPC::R12; 2445 2446 // 32-bit: FP-relative 2447 MBB.insert(MI, 2448 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2449 CSI[CSIIndex].getFrameIdx())); 2450 2451 unsigned RestoreOp = PPC::MTOCRF; 2452 if (CR2Spilled) 2453 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2454 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2455 2456 if (CR3Spilled) 2457 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2458 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2459 2460 if (CR4Spilled) 2461 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2462 .addReg(MoveReg, getKillRegState(true))); 2463 } 2464 2465 MachineBasicBlock::iterator PPCFrameLowering:: 2466 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2467 MachineBasicBlock::iterator I) const { 2468 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2469 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2470 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2471 // Add (actually subtract) back the amount the callee popped on return. 2472 if (int CalleeAmt = I->getOperand(1).getImm()) { 2473 bool is64Bit = Subtarget.isPPC64(); 2474 CalleeAmt *= -1; 2475 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2476 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2477 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2478 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2479 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2480 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2481 const DebugLoc &dl = I->getDebugLoc(); 2482 2483 if (isInt<16>(CalleeAmt)) { 2484 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2485 .addReg(StackReg, RegState::Kill) 2486 .addImm(CalleeAmt); 2487 } else { 2488 MachineBasicBlock::iterator MBBI = I; 2489 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2490 .addImm(CalleeAmt >> 16); 2491 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2492 .addReg(TmpReg, RegState::Kill) 2493 .addImm(CalleeAmt & 0xFFFF); 2494 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2495 .addReg(StackReg, RegState::Kill) 2496 .addReg(TmpReg); 2497 } 2498 } 2499 } 2500 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2501 return MBB.erase(I); 2502 } 2503 2504 static bool isCalleeSavedCR(unsigned Reg) { 2505 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2506 } 2507 2508 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2509 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2510 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2511 MachineFunction *MF = MBB.getParent(); 2512 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2513 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2514 bool MustSaveTOC = FI->mustSaveTOC(); 2515 bool CR2Spilled = false; 2516 bool CR3Spilled = false; 2517 bool CR4Spilled = false; 2518 unsigned CSIIndex = 0; 2519 BitVector Restored(TRI->getNumRegs()); 2520 2521 // Initialize insertion-point logic; we will be restoring in reverse 2522 // order of spill. 2523 MachineBasicBlock::iterator I = MI, BeforeI = I; 2524 bool AtStart = I == MBB.begin(); 2525 2526 if (!AtStart) 2527 --BeforeI; 2528 2529 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2530 unsigned Reg = CSI[i].getReg(); 2531 2532 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2533 continue; 2534 2535 // Restore of callee saved condition register field is handled during 2536 // epilogue insertion. 2537 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2538 continue; 2539 2540 if (Reg == PPC::CR2) { 2541 CR2Spilled = true; 2542 // The spill slot is associated only with CR2, which is the 2543 // first nonvolatile spilled. Save it here. 2544 CSIIndex = i; 2545 continue; 2546 } else if (Reg == PPC::CR3) { 2547 CR3Spilled = true; 2548 continue; 2549 } else if (Reg == PPC::CR4) { 2550 CR4Spilled = true; 2551 continue; 2552 } else { 2553 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2554 // least one CR register, restore all spilled CRs together. 2555 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2556 bool is31 = needsFP(*MF); 2557 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2558 CSIIndex); 2559 CR2Spilled = CR3Spilled = CR4Spilled = false; 2560 } 2561 2562 if (CSI[i].isSpilledToReg()) { 2563 DebugLoc DL; 2564 unsigned Dst = CSI[i].getDstReg(); 2565 2566 if (Restored[Dst]) 2567 continue; 2568 2569 if (VSRContainingGPRs[Dst].second != 0) { 2570 assert(Subtarget.hasP9Vector()); 2571 NumPEReloadVSR += 2; 2572 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD), 2573 VSRContainingGPRs[Dst].second) 2574 .addReg(Dst); 2575 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), 2576 VSRContainingGPRs[Dst].first) 2577 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); 2578 } else if (VSRContainingGPRs[Dst].second == 0) { 2579 assert(Subtarget.hasP8Vector()); 2580 ++NumPEReloadVSR; 2581 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), 2582 VSRContainingGPRs[Dst].first) 2583 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); 2584 } else { 2585 llvm_unreachable("More than two GPRs spilled to a VSR!"); 2586 } 2587 2588 Restored.set(Dst); 2589 2590 } else { 2591 // Default behavior for non-CR saves. 2592 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2593 2594 // Functions without NoUnwind need to preserve the order of elements in 2595 // saved vector registers. 2596 if (Subtarget.needsSwapsForVSXMemOps() && 2597 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2598 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2599 TRI); 2600 else 2601 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2602 2603 assert(I != MBB.begin() && 2604 "loadRegFromStackSlot didn't insert any code!"); 2605 } 2606 } 2607 2608 // Insert in reverse order. 2609 if (AtStart) 2610 I = MBB.begin(); 2611 else { 2612 I = BeforeI; 2613 ++I; 2614 } 2615 } 2616 2617 // If we haven't yet spilled the CRs, do so now. 2618 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2619 assert(Subtarget.is32BitELFABI() && 2620 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2621 bool is31 = needsFP(*MF); 2622 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2623 } 2624 2625 return true; 2626 } 2627 2628 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2629 return TOCSaveOffset; 2630 } 2631 2632 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2633 return FramePointerSaveOffset; 2634 } 2635 2636 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2637 return BasePointerSaveOffset; 2638 } 2639 2640 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2641 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2642 return false; 2643 return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI(); 2644 } 2645