1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of TargetFrameLowering class. 11 // 12 // On AArch64, stack frames are structured as follows: 13 // 14 // The stack grows downward. 15 // 16 // All of the individual frame areas on the frame below are optional, i.e. it's 17 // possible to create a function so that the particular area isn't present 18 // in the frame. 19 // 20 // At function entry, the "frame" looks as follows: 21 // 22 // | | Higher address 23 // |-----------------------------------| 24 // | | 25 // | arguments passed on the stack | 26 // | | 27 // |-----------------------------------| <- sp 28 // | | Lower address 29 // 30 // 31 // After the prologue has run, the frame has the following general structure. 32 // Note that this doesn't depict the case where a red-zone is used. Also, 33 // technically the last frame area (VLAs) doesn't get created until in the 34 // main function body, after the prologue is run. However, it's depicted here 35 // for completeness. 36 // 37 // | | Higher address 38 // |-----------------------------------| 39 // | | 40 // | arguments passed on the stack | 41 // | | 42 // |-----------------------------------| 43 // | | 44 // | (Win64 only) varargs from reg | 45 // | | 46 // |-----------------------------------| 47 // | | 48 // | prev_fp, prev_lr | 49 // | (a.k.a. "frame record") | 50 // |-----------------------------------| <- fp(=x29) 51 // | | 52 // | other callee-saved registers | 53 // | | 54 // |-----------------------------------| 55 // |.empty.space.to.make.part.below....| 56 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at 57 // |.the.standard.16-byte.alignment....| compile time; if present) 58 // |-----------------------------------| 59 // | | 60 // | local variables of fixed size | 61 // | including spill slots | 62 // |-----------------------------------| <- bp(not defined by ABI, 63 // |.variable-sized.local.variables....| LLVM chooses X19) 64 // |.(VLAs)............................| (size of this area is unknown at 65 // |...................................| compile time) 66 // |-----------------------------------| <- sp 67 // | | Lower address 68 // 69 // 70 // To access the data in a frame, at-compile time, a constant offset must be 71 // computable from one of the pointers (fp, bp, sp) to access it. The size 72 // of the areas with a dotted background cannot be computed at compile-time 73 // if they are present, making it required to have all three of fp, bp and 74 // sp to be set up to be able to access all contents in the frame areas, 75 // assuming all of the frame areas are non-empty. 76 // 77 // For most functions, some of the frame areas are empty. For those functions, 78 // it may not be necessary to set up fp or bp: 79 // * A base pointer is definitely needed when there are both VLAs and local 80 // variables with more-than-default alignment requirements. 81 // * A frame pointer is definitely needed when there are local variables with 82 // more-than-default alignment requirements. 83 // 84 // In some cases when a base pointer is not strictly needed, it is generated 85 // anyway when offsets from the frame pointer to access local variables become 86 // so large that the offset can't be encoded in the immediate fields of loads 87 // or stores. 88 // 89 // FIXME: also explain the redzone concept. 90 // FIXME: also explain the concept of reserved call frames. 91 // 92 //===----------------------------------------------------------------------===// 93 94 #include "AArch64FrameLowering.h" 95 #include "AArch64InstrInfo.h" 96 #include "AArch64MachineFunctionInfo.h" 97 #include "AArch64RegisterInfo.h" 98 #include "AArch64Subtarget.h" 99 #include "AArch64TargetMachine.h" 100 #include "llvm/ADT/SmallVector.h" 101 #include "llvm/ADT/Statistic.h" 102 #include "llvm/CodeGen/LivePhysRegs.h" 103 #include "llvm/CodeGen/MachineBasicBlock.h" 104 #include "llvm/CodeGen/MachineFrameInfo.h" 105 #include "llvm/CodeGen/MachineFunction.h" 106 #include "llvm/CodeGen/MachineInstr.h" 107 #include "llvm/CodeGen/MachineInstrBuilder.h" 108 #include "llvm/CodeGen/MachineMemOperand.h" 109 #include "llvm/CodeGen/MachineModuleInfo.h" 110 #include "llvm/CodeGen/MachineOperand.h" 111 #include "llvm/CodeGen/MachineRegisterInfo.h" 112 #include "llvm/CodeGen/RegisterScavenging.h" 113 #include "llvm/IR/Attributes.h" 114 #include "llvm/IR/CallingConv.h" 115 #include "llvm/IR/DataLayout.h" 116 #include "llvm/IR/DebugLoc.h" 117 #include "llvm/IR/Function.h" 118 #include "llvm/MC/MCDwarf.h" 119 #include "llvm/Support/CommandLine.h" 120 #include "llvm/Support/Debug.h" 121 #include "llvm/Support/ErrorHandling.h" 122 #include "llvm/Support/MathExtras.h" 123 #include "llvm/Support/raw_ostream.h" 124 #include "llvm/Target/TargetInstrInfo.h" 125 #include "llvm/Target/TargetMachine.h" 126 #include "llvm/Target/TargetOptions.h" 127 #include "llvm/Target/TargetRegisterInfo.h" 128 #include "llvm/Target/TargetSubtargetInfo.h" 129 #include <cassert> 130 #include <cstdint> 131 #include <iterator> 132 #include <vector> 133 134 using namespace llvm; 135 136 #define DEBUG_TYPE "frame-info" 137 138 static cl::opt<bool> EnableRedZone("aarch64-redzone", 139 cl::desc("enable use of redzone on AArch64"), 140 cl::init(false), cl::Hidden); 141 142 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); 143 144 /// Look at each instruction that references stack frames and return the stack 145 /// size limit beyond which some of these instructions will require a scratch 146 /// register during their expansion later. 147 static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { 148 // FIXME: For now, just conservatively guestimate based on unscaled indexing 149 // range. We'll end up allocating an unnecessary spill slot a lot, but 150 // realistically that's not a big deal at this stage of the game. 151 for (MachineBasicBlock &MBB : MF) { 152 for (MachineInstr &MI : MBB) { 153 if (MI.isDebugValue() || MI.isPseudo() || 154 MI.getOpcode() == AArch64::ADDXri || 155 MI.getOpcode() == AArch64::ADDSXri) 156 continue; 157 158 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 159 if (!MI.getOperand(i).isFI()) 160 continue; 161 162 int Offset = 0; 163 if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) == 164 AArch64FrameOffsetCannotUpdate) 165 return 0; 166 } 167 } 168 } 169 return 255; 170 } 171 172 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { 173 if (!EnableRedZone) 174 return false; 175 // Don't use the red zone if the function explicitly asks us not to. 176 // This is typically used for kernel code. 177 if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone)) 178 return false; 179 180 const MachineFrameInfo &MFI = MF.getFrameInfo(); 181 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 182 unsigned NumBytes = AFI->getLocalStackSize(); 183 184 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128); 185 } 186 187 /// hasFP - Return true if the specified function should have a dedicated frame 188 /// pointer register. 189 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { 190 const MachineFrameInfo &MFI = MF.getFrameInfo(); 191 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 192 // Retain behavior of always omitting the FP for leaf functions when possible. 193 return (MFI.hasCalls() && 194 MF.getTarget().Options.DisableFramePointerElim(MF)) || 195 MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || 196 MFI.hasStackMap() || MFI.hasPatchPoint() || 197 RegInfo->needsStackRealignment(MF); 198 } 199 200 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is 201 /// not required, we reserve argument space for call sites in the function 202 /// immediately on entry to the current function. This eliminates the need for 203 /// add/sub sp brackets around call sites. Returns true if the call frame is 204 /// included as part of the stack frame. 205 bool 206 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 207 return !MF.getFrameInfo().hasVarSizedObjects(); 208 } 209 210 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( 211 MachineFunction &MF, MachineBasicBlock &MBB, 212 MachineBasicBlock::iterator I) const { 213 const AArch64InstrInfo *TII = 214 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 215 DebugLoc DL = I->getDebugLoc(); 216 unsigned Opc = I->getOpcode(); 217 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 218 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 219 220 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 221 if (!TFI->hasReservedCallFrame(MF)) { 222 unsigned Align = getStackAlignment(); 223 224 int64_t Amount = I->getOperand(0).getImm(); 225 Amount = alignTo(Amount, Align); 226 if (!IsDestroy) 227 Amount = -Amount; 228 229 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it 230 // doesn't have to pop anything), then the first operand will be zero too so 231 // this adjustment is a no-op. 232 if (CalleePopAmount == 0) { 233 // FIXME: in-function stack adjustment for calls is limited to 24-bits 234 // because there's no guaranteed temporary register available. 235 // 236 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available. 237 // 1) For offset <= 12-bit, we use LSL #0 238 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses 239 // LSL #0, and the other uses LSL #12. 240 // 241 // Most call frames will be allocated at the start of a function so 242 // this is OK, but it is a limitation that needs dealing with. 243 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); 244 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII); 245 } 246 } else if (CalleePopAmount != 0) { 247 // If the calling convention demands that the callee pops arguments from the 248 // stack, we want to add it back if we have a reserved call frame. 249 assert(CalleePopAmount < 0xffffff && "call frame too large"); 250 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount, 251 TII); 252 } 253 return MBB.erase(I); 254 } 255 256 void AArch64FrameLowering::emitCalleeSavedFrameMoves( 257 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 258 MachineFunction &MF = *MBB.getParent(); 259 MachineFrameInfo &MFI = MF.getFrameInfo(); 260 const TargetSubtargetInfo &STI = MF.getSubtarget(); 261 const MCRegisterInfo *MRI = STI.getRegisterInfo(); 262 const TargetInstrInfo *TII = STI.getInstrInfo(); 263 DebugLoc DL = MBB.findDebugLoc(MBBI); 264 265 // Add callee saved registers to move list. 266 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 267 if (CSI.empty()) 268 return; 269 270 for (const auto &Info : CSI) { 271 unsigned Reg = Info.getReg(); 272 int64_t Offset = 273 MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea(); 274 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 275 unsigned CFIIndex = MF.addFrameInst( 276 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 277 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 278 .addCFIIndex(CFIIndex) 279 .setMIFlags(MachineInstr::FrameSetup); 280 } 281 } 282 283 // Find a scratch register that we can use at the start of the prologue to 284 // re-align the stack pointer. We avoid using callee-save registers since they 285 // may appear to be free when this is called from canUseAsPrologue (during 286 // shrink wrapping), but then no longer be free when this is called from 287 // emitPrologue. 288 // 289 // FIXME: This is a bit conservative, since in the above case we could use one 290 // of the callee-save registers as a scratch temp to re-align the stack pointer, 291 // but we would then have to make sure that we were in fact saving at least one 292 // callee-save register in the prologue, which is additional complexity that 293 // doesn't seem worth the benefit. 294 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { 295 MachineFunction *MF = MBB->getParent(); 296 297 // If MBB is an entry block, use X9 as the scratch register 298 if (&MF->front() == MBB) 299 return AArch64::X9; 300 301 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 302 const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); 303 LivePhysRegs LiveRegs(TRI); 304 LiveRegs.addLiveIns(*MBB); 305 306 // Mark callee saved registers as used so we will not choose them. 307 const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF); 308 for (unsigned i = 0; CSRegs[i]; ++i) 309 LiveRegs.addReg(CSRegs[i]); 310 311 // Prefer X9 since it was historically used for the prologue scratch reg. 312 const MachineRegisterInfo &MRI = MF->getRegInfo(); 313 if (LiveRegs.available(MRI, AArch64::X9)) 314 return AArch64::X9; 315 316 for (unsigned Reg : AArch64::GPR64RegClass) { 317 if (LiveRegs.available(MRI, Reg)) 318 return Reg; 319 } 320 return AArch64::NoRegister; 321 } 322 323 bool AArch64FrameLowering::canUseAsPrologue( 324 const MachineBasicBlock &MBB) const { 325 const MachineFunction *MF = MBB.getParent(); 326 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 327 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 328 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 329 330 // Don't need a scratch register if we're not going to re-align the stack. 331 if (!RegInfo->needsStackRealignment(*MF)) 332 return true; 333 // Otherwise, we can use any block as long as it has a scratch register 334 // available. 335 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister; 336 } 337 338 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( 339 MachineFunction &MF, unsigned StackBumpBytes) const { 340 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 341 const MachineFrameInfo &MFI = MF.getFrameInfo(); 342 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 343 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 344 345 if (AFI->getLocalStackSize() == 0) 346 return false; 347 348 // 512 is the maximum immediate for stp/ldp that will be used for 349 // callee-save save/restores 350 if (StackBumpBytes >= 512) 351 return false; 352 353 if (MFI.hasVarSizedObjects()) 354 return false; 355 356 if (RegInfo->needsStackRealignment(MF)) 357 return false; 358 359 // This isn't strictly necessary, but it simplifies things a bit since the 360 // current RedZone handling code assumes the SP is adjusted by the 361 // callee-save save/restore code. 362 if (canUseRedZone(MF)) 363 return false; 364 365 return true; 366 } 367 368 // Convert callee-save register save/restore instruction to do stack pointer 369 // decrement/increment to allocate/deallocate the callee-save stack area by 370 // converting store/load to use pre/post increment version. 371 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( 372 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 373 const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { 374 unsigned NewOpc; 375 bool NewIsUnscaled = false; 376 switch (MBBI->getOpcode()) { 377 default: 378 llvm_unreachable("Unexpected callee-save save/restore opcode!"); 379 case AArch64::STPXi: 380 NewOpc = AArch64::STPXpre; 381 break; 382 case AArch64::STPDi: 383 NewOpc = AArch64::STPDpre; 384 break; 385 case AArch64::STRXui: 386 NewOpc = AArch64::STRXpre; 387 NewIsUnscaled = true; 388 break; 389 case AArch64::STRDui: 390 NewOpc = AArch64::STRDpre; 391 NewIsUnscaled = true; 392 break; 393 case AArch64::LDPXi: 394 NewOpc = AArch64::LDPXpost; 395 break; 396 case AArch64::LDPDi: 397 NewOpc = AArch64::LDPDpost; 398 break; 399 case AArch64::LDRXui: 400 NewOpc = AArch64::LDRXpost; 401 NewIsUnscaled = true; 402 break; 403 case AArch64::LDRDui: 404 NewOpc = AArch64::LDRDpost; 405 NewIsUnscaled = true; 406 break; 407 } 408 409 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 410 MIB.addReg(AArch64::SP, RegState::Define); 411 412 // Copy all operands other than the immediate offset. 413 unsigned OpndIdx = 0; 414 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd; 415 ++OpndIdx) 416 MIB.add(MBBI->getOperand(OpndIdx)); 417 418 assert(MBBI->getOperand(OpndIdx).getImm() == 0 && 419 "Unexpected immediate offset in first/last callee-save save/restore " 420 "instruction!"); 421 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && 422 "Unexpected base register in callee-save save/restore instruction!"); 423 // Last operand is immediate offset that needs fixing. 424 assert(CSStackSizeInc % 8 == 0); 425 int64_t CSStackSizeIncImm = CSStackSizeInc; 426 if (!NewIsUnscaled) 427 CSStackSizeIncImm /= 8; 428 MIB.addImm(CSStackSizeIncImm); 429 430 MIB.setMIFlags(MBBI->getFlags()); 431 MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end()); 432 433 return std::prev(MBB.erase(MBBI)); 434 } 435 436 // Fixup callee-save register save/restore instructions to take into account 437 // combined SP bump by adding the local stack size to the stack offsets. 438 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, 439 unsigned LocalStackSize) { 440 unsigned Opc = MI.getOpcode(); 441 (void)Opc; 442 assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi || 443 Opc == AArch64::STRXui || Opc == AArch64::STRDui || 444 Opc == AArch64::LDPXi || Opc == AArch64::LDPDi || 445 Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) && 446 "Unexpected callee-save save/restore opcode!"); 447 448 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1; 449 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP && 450 "Unexpected base register in callee-save save/restore instruction!"); 451 // Last operand is immediate offset that needs fixing. 452 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); 453 // All generated opcodes have scaled offsets. 454 assert(LocalStackSize % 8 == 0); 455 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); 456 } 457 458 void AArch64FrameLowering::emitPrologue(MachineFunction &MF, 459 MachineBasicBlock &MBB) const { 460 MachineBasicBlock::iterator MBBI = MBB.begin(); 461 const MachineFrameInfo &MFI = MF.getFrameInfo(); 462 const Function *Fn = MF.getFunction(); 463 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 464 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 465 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 466 MachineModuleInfo &MMI = MF.getMMI(); 467 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 468 bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); 469 bool HasFP = hasFP(MF); 470 471 // Debug location must be unknown since the first debug location is used 472 // to determine the end of the prologue. 473 DebugLoc DL; 474 475 // All calls are tail calls in GHC calling conv, and functions have no 476 // prologue/epilogue. 477 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 478 return; 479 480 int NumBytes = (int)MFI.getStackSize(); 481 if (!AFI->hasStackFrame()) { 482 assert(!HasFP && "unexpected function without stack frame but with FP"); 483 484 // All of the stack allocation is for locals. 485 AFI->setLocalStackSize(NumBytes); 486 487 if (!NumBytes) 488 return; 489 // REDZONE: If the stack size is less than 128 bytes, we don't need 490 // to actually allocate. 491 if (canUseRedZone(MF)) 492 ++NumRedZoneFunctions; 493 else { 494 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 495 MachineInstr::FrameSetup); 496 497 // Label used to tie together the PROLOG_LABEL and the MachineMoves. 498 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); 499 // Encode the stack size of the leaf function. 500 unsigned CFIIndex = MF.addFrameInst( 501 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); 502 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 503 .addCFIIndex(CFIIndex) 504 .setMIFlags(MachineInstr::FrameSetup); 505 } 506 return; 507 } 508 509 bool IsWin64 = 510 Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); 511 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; 512 513 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; 514 // All of the remaining stack allocations are for locals. 515 AFI->setLocalStackSize(NumBytes - PrologueSaveSize); 516 517 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); 518 if (CombineSPBump) { 519 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 520 MachineInstr::FrameSetup); 521 NumBytes = 0; 522 } else if (PrologueSaveSize != 0) { 523 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII, 524 -PrologueSaveSize); 525 NumBytes -= PrologueSaveSize; 526 } 527 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 528 529 // Move past the saves of the callee-saved registers, fixing up the offsets 530 // and pre-inc if we decided to combine the callee-save and local stack 531 // pointer bump above. 532 MachineBasicBlock::iterator End = MBB.end(); 533 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) { 534 if (CombineSPBump) 535 fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize()); 536 ++MBBI; 537 } 538 if (HasFP) { 539 // Only set up FP if we actually need to. Frame pointer is fp = 540 // sp - fixedobject - 16. 541 int FPOffset = AFI->getCalleeSavedStackSize() - 16; 542 if (CombineSPBump) 543 FPOffset += AFI->getLocalStackSize(); 544 545 // Issue sub fp, sp, FPOffset or 546 // mov fp,sp when FPOffset is zero. 547 // Note: All stores of callee-saved registers are marked as "FrameSetup". 548 // This code marks the instruction(s) that set the FP also. 549 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, 550 MachineInstr::FrameSetup); 551 } 552 553 // Allocate space for the rest of the frame. 554 if (NumBytes) { 555 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); 556 unsigned scratchSPReg = AArch64::SP; 557 558 if (NeedsRealignment) { 559 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); 560 assert(scratchSPReg != AArch64::NoRegister); 561 } 562 563 // If we're a leaf function, try using the red zone. 564 if (!canUseRedZone(MF)) 565 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have 566 // the correct value here, as NumBytes also includes padding bytes, 567 // which shouldn't be counted here. 568 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, 569 MachineInstr::FrameSetup); 570 571 if (NeedsRealignment) { 572 const unsigned Alignment = MFI.getMaxAlignment(); 573 const unsigned NrBitsToZero = countTrailingZeros(Alignment); 574 assert(NrBitsToZero > 1); 575 assert(scratchSPReg != AArch64::SP); 576 577 // SUB X9, SP, NumBytes 578 // -- X9 is temporary register, so shouldn't contain any live data here, 579 // -- free to use. This is already produced by emitFrameOffset above. 580 // AND SP, X9, 0b11111...0000 581 // The logical immediates have a non-trivial encoding. The following 582 // formula computes the encoded immediate with all ones but 583 // NrBitsToZero zero bits as least significant bits. 584 uint32_t andMaskEncoded = (1 << 12) // = N 585 | ((64 - NrBitsToZero) << 6) // immr 586 | ((64 - NrBitsToZero - 1) << 0); // imms 587 588 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) 589 .addReg(scratchSPReg, RegState::Kill) 590 .addImm(andMaskEncoded); 591 AFI->setStackRealigned(true); 592 } 593 } 594 595 // If we need a base pointer, set it up here. It's whatever the value of the 596 // stack pointer is at this point. Any variable size objects will be allocated 597 // after this, so we can still use the base pointer to reference locals. 598 // 599 // FIXME: Clarify FrameSetup flags here. 600 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is 601 // needed. 602 if (RegInfo->hasBasePointer(MF)) { 603 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, 604 false); 605 } 606 607 if (needsFrameMoves) { 608 const DataLayout &TD = MF.getDataLayout(); 609 const int StackGrowth = -TD.getPointerSize(0); 610 unsigned FramePtr = RegInfo->getFrameRegister(MF); 611 // An example of the prologue: 612 // 613 // .globl __foo 614 // .align 2 615 // __foo: 616 // Ltmp0: 617 // .cfi_startproc 618 // .cfi_personality 155, ___gxx_personality_v0 619 // Leh_func_begin: 620 // .cfi_lsda 16, Lexception33 621 // 622 // stp xa,bx, [sp, -#offset]! 623 // ... 624 // stp x28, x27, [sp, #offset-32] 625 // stp fp, lr, [sp, #offset-16] 626 // add fp, sp, #offset - 16 627 // sub sp, sp, #1360 628 // 629 // The Stack: 630 // +-------------------------------------------+ 631 // 10000 | ........ | ........ | ........ | ........ | 632 // 10004 | ........ | ........ | ........ | ........ | 633 // +-------------------------------------------+ 634 // 10008 | ........ | ........ | ........ | ........ | 635 // 1000c | ........ | ........ | ........ | ........ | 636 // +===========================================+ 637 // 10010 | X28 Register | 638 // 10014 | X28 Register | 639 // +-------------------------------------------+ 640 // 10018 | X27 Register | 641 // 1001c | X27 Register | 642 // +===========================================+ 643 // 10020 | Frame Pointer | 644 // 10024 | Frame Pointer | 645 // +-------------------------------------------+ 646 // 10028 | Link Register | 647 // 1002c | Link Register | 648 // +===========================================+ 649 // 10030 | ........ | ........ | ........ | ........ | 650 // 10034 | ........ | ........ | ........ | ........ | 651 // +-------------------------------------------+ 652 // 10038 | ........ | ........ | ........ | ........ | 653 // 1003c | ........ | ........ | ........ | ........ | 654 // +-------------------------------------------+ 655 // 656 // [sp] = 10030 :: >>initial value<< 657 // sp = 10020 :: stp fp, lr, [sp, #-16]! 658 // fp = sp == 10020 :: mov fp, sp 659 // [sp] == 10020 :: stp x28, x27, [sp, #-16]! 660 // sp == 10010 :: >>final value<< 661 // 662 // The frame pointer (w29) points to address 10020. If we use an offset of 663 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 664 // for w27, and -32 for w28: 665 // 666 // Ltmp1: 667 // .cfi_def_cfa w29, 16 668 // Ltmp2: 669 // .cfi_offset w30, -8 670 // Ltmp3: 671 // .cfi_offset w29, -16 672 // Ltmp4: 673 // .cfi_offset w27, -24 674 // Ltmp5: 675 // .cfi_offset w28, -32 676 677 if (HasFP) { 678 // Define the current CFA rule to use the provided FP. 679 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); 680 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( 681 nullptr, Reg, 2 * StackGrowth - FixedObject)); 682 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 683 .addCFIIndex(CFIIndex) 684 .setMIFlags(MachineInstr::FrameSetup); 685 } else { 686 // Encode the stack size of the leaf function. 687 unsigned CFIIndex = MF.addFrameInst( 688 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize())); 689 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 690 .addCFIIndex(CFIIndex) 691 .setMIFlags(MachineInstr::FrameSetup); 692 } 693 694 // Now emit the moves for whatever callee saved regs we have (including FP, 695 // LR if those are saved). 696 emitCalleeSavedFrameMoves(MBB, MBBI); 697 } 698 } 699 700 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, 701 MachineBasicBlock &MBB) const { 702 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 703 MachineFrameInfo &MFI = MF.getFrameInfo(); 704 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 705 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 706 DebugLoc DL; 707 bool IsTailCallReturn = false; 708 if (MBB.end() != MBBI) { 709 DL = MBBI->getDebugLoc(); 710 unsigned RetOpcode = MBBI->getOpcode(); 711 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || 712 RetOpcode == AArch64::TCRETURNri; 713 } 714 int NumBytes = MFI.getStackSize(); 715 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 716 717 // All calls are tail calls in GHC calling conv, and functions have no 718 // prologue/epilogue. 719 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 720 return; 721 722 // Initial and residual are named for consistency with the prologue. Note that 723 // in the epilogue, the residual adjustment is executed first. 724 uint64_t ArgumentPopSize = 0; 725 if (IsTailCallReturn) { 726 MachineOperand &StackAdjust = MBBI->getOperand(1); 727 728 // For a tail-call in a callee-pops-arguments environment, some or all of 729 // the stack may actually be in use for the call's arguments, this is 730 // calculated during LowerCall and consumed here... 731 ArgumentPopSize = StackAdjust.getImm(); 732 } else { 733 // ... otherwise the amount to pop is *all* of the argument space, 734 // conveniently stored in the MachineFunctionInfo by 735 // LowerFormalArguments. This will, of course, be zero for the C calling 736 // convention. 737 ArgumentPopSize = AFI->getArgumentStackToRestore(); 738 } 739 740 // The stack frame should be like below, 741 // 742 // ---------------------- --- 743 // | | | 744 // | BytesInStackArgArea| CalleeArgStackSize 745 // | (NumReusableBytes) | (of tail call) 746 // | | --- 747 // | | | 748 // ---------------------| --- | 749 // | | | | 750 // | CalleeSavedReg | | | 751 // | (CalleeSavedStackSize)| | | 752 // | | | | 753 // ---------------------| | NumBytes 754 // | | StackSize (StackAdjustUp) 755 // | LocalStackSize | | | 756 // | (covering callee | | | 757 // | args) | | | 758 // | | | | 759 // ---------------------- --- --- 760 // 761 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize 762 // = StackSize + ArgumentPopSize 763 // 764 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps 765 // it as the 2nd argument of AArch64ISD::TC_RETURN. 766 767 bool IsWin64 = 768 Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); 769 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; 770 771 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; 772 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); 773 774 if (!CombineSPBump && PrologueSaveSize != 0) 775 convertCalleeSaveRestoreToSPPrePostIncDec( 776 MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize); 777 778 // Move past the restores of the callee-saved registers. 779 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); 780 MachineBasicBlock::iterator Begin = MBB.begin(); 781 while (LastPopI != Begin) { 782 --LastPopI; 783 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) { 784 ++LastPopI; 785 break; 786 } else if (CombineSPBump) 787 fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize()); 788 } 789 790 // If there is a single SP update, insert it before the ret and we're done. 791 if (CombineSPBump) { 792 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 793 NumBytes + ArgumentPopSize, TII, 794 MachineInstr::FrameDestroy); 795 return; 796 } 797 798 NumBytes -= PrologueSaveSize; 799 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 800 801 if (!hasFP(MF)) { 802 bool RedZone = canUseRedZone(MF); 803 // If this was a redzone leaf function, we don't need to restore the 804 // stack pointer (but we may need to pop stack args for fastcc). 805 if (RedZone && ArgumentPopSize == 0) 806 return; 807 808 bool NoCalleeSaveRestore = PrologueSaveSize == 0; 809 int StackRestoreBytes = RedZone ? 0 : NumBytes; 810 if (NoCalleeSaveRestore) 811 StackRestoreBytes += ArgumentPopSize; 812 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, 813 StackRestoreBytes, TII, MachineInstr::FrameDestroy); 814 // If we were able to combine the local stack pop with the argument pop, 815 // then we're done. 816 if (NoCalleeSaveRestore || ArgumentPopSize == 0) 817 return; 818 NumBytes = 0; 819 } 820 821 // Restore the original stack pointer. 822 // FIXME: Rather than doing the math here, we should instead just use 823 // non-post-indexed loads for the restores if we aren't actually going to 824 // be able to save any instructions. 825 if (MFI.hasVarSizedObjects() || AFI->isStackRealigned()) 826 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, 827 -AFI->getCalleeSavedStackSize() + 16, TII, 828 MachineInstr::FrameDestroy); 829 else if (NumBytes) 830 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, 831 MachineInstr::FrameDestroy); 832 833 // This must be placed after the callee-save restore code because that code 834 // assumes the SP is at the same location as it was after the callee-save save 835 // code in the prologue. 836 if (ArgumentPopSize) 837 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 838 ArgumentPopSize, TII, MachineInstr::FrameDestroy); 839 } 840 841 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for 842 /// debug info. It's the same as what we use for resolving the code-gen 843 /// references for now. FIXME: This can go wrong when references are 844 /// SP-relative and simple call frames aren't used. 845 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, 846 int FI, 847 unsigned &FrameReg) const { 848 return resolveFrameIndexReference(MF, FI, FrameReg); 849 } 850 851 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, 852 int FI, unsigned &FrameReg, 853 bool PreferFP) const { 854 const MachineFrameInfo &MFI = MF.getFrameInfo(); 855 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 856 MF.getSubtarget().getRegisterInfo()); 857 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 858 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 859 bool IsWin64 = 860 Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); 861 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; 862 int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16; 863 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); 864 bool isFixed = MFI.isFixedObjectIndex(FI); 865 866 // Use frame pointer to reference fixed objects. Use it for locals if 867 // there are VLAs or a dynamically realigned SP (and thus the SP isn't 868 // reliable as a base). Make sure useFPForScavengingIndex() does the 869 // right thing for the emergency spill slot. 870 bool UseFP = false; 871 if (AFI->hasStackFrame()) { 872 // Note: Keeping the following as multiple 'if' statements rather than 873 // merging to a single expression for readability. 874 // 875 // Argument access should always use the FP. 876 if (isFixed) { 877 UseFP = hasFP(MF); 878 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && 879 !RegInfo->needsStackRealignment(MF)) { 880 // Use SP or FP, whichever gives us the best chance of the offset 881 // being in range for direct access. If the FPOffset is positive, 882 // that'll always be best, as the SP will be even further away. 883 // If the FPOffset is negative, we have to keep in mind that the 884 // available offset range for negative offsets is smaller than for 885 // positive ones. If we have variable sized objects, we're stuck with 886 // using the FP regardless, though, as the SP offset is unknown 887 // and we don't have a base pointer available. If an offset is 888 // available via the FP and the SP, use whichever is closest. 889 if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 || 890 (FPOffset >= -256 && Offset > -FPOffset)) 891 UseFP = true; 892 } 893 } 894 895 assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && 896 "In the presence of dynamic stack pointer realignment, " 897 "non-argument objects cannot be accessed through the frame pointer"); 898 899 if (UseFP) { 900 FrameReg = RegInfo->getFrameRegister(MF); 901 return FPOffset; 902 } 903 904 // Use the base pointer if we have one. 905 if (RegInfo->hasBasePointer(MF)) 906 FrameReg = RegInfo->getBaseRegister(); 907 else { 908 FrameReg = AArch64::SP; 909 // If we're using the red zone for this function, the SP won't actually 910 // be adjusted, so the offsets will be negative. They're also all 911 // within range of the signed 9-bit immediate instructions. 912 if (canUseRedZone(MF)) 913 Offset -= AFI->getLocalStackSize(); 914 } 915 916 return Offset; 917 } 918 919 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { 920 // Do not set a kill flag on values that are also marked as live-in. This 921 // happens with the @llvm-returnaddress intrinsic and with arguments passed in 922 // callee saved registers. 923 // Omitting the kill flags is conservatively correct even if the live-in 924 // is not used after all. 925 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg); 926 return getKillRegState(!IsLiveIn); 927 } 928 929 static bool produceCompactUnwindFrame(MachineFunction &MF) { 930 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 931 AttributeList Attrs = MF.getFunction()->getAttributes(); 932 return Subtarget.isTargetMachO() && 933 !(Subtarget.getTargetLowering()->supportSwiftError() && 934 Attrs.hasAttrSomewhere(Attribute::SwiftError)); 935 } 936 937 namespace { 938 939 struct RegPairInfo { 940 unsigned Reg1 = AArch64::NoRegister; 941 unsigned Reg2 = AArch64::NoRegister; 942 int FrameIdx; 943 int Offset; 944 bool IsGPR; 945 946 RegPairInfo() = default; 947 948 bool isPaired() const { return Reg2 != AArch64::NoRegister; } 949 }; 950 951 } // end anonymous namespace 952 953 static void computeCalleeSaveRegisterPairs( 954 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, 955 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) { 956 957 if (CSI.empty()) 958 return; 959 960 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 961 MachineFrameInfo &MFI = MF.getFrameInfo(); 962 CallingConv::ID CC = MF.getFunction()->getCallingConv(); 963 unsigned Count = CSI.size(); 964 (void)CC; 965 // MachO's compact unwind format relies on all registers being stored in 966 // pairs. 967 assert((!produceCompactUnwindFrame(MF) || 968 CC == CallingConv::PreserveMost || 969 (Count & 1) == 0) && 970 "Odd number of callee-saved regs to spill!"); 971 int Offset = AFI->getCalleeSavedStackSize(); 972 973 for (unsigned i = 0; i < Count; ++i) { 974 RegPairInfo RPI; 975 RPI.Reg1 = CSI[i].getReg(); 976 977 assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || 978 AArch64::FPR64RegClass.contains(RPI.Reg1)); 979 RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); 980 981 // Add the next reg to the pair if it is in the same register class. 982 if (i + 1 < Count) { 983 unsigned NextReg = CSI[i + 1].getReg(); 984 if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || 985 (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) 986 RPI.Reg2 = NextReg; 987 } 988 989 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI 990 // list to come in sorted by frame index so that we can issue the store 991 // pair instructions directly. Assert if we see anything otherwise. 992 // 993 // The order of the registers in the list is controlled by 994 // getCalleeSavedRegs(), so they will always be in-order, as well. 995 assert((!RPI.isPaired() || 996 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && 997 "Out of order callee saved regs!"); 998 999 // MachO's compact unwind format relies on all registers being stored in 1000 // adjacent register pairs. 1001 assert((!produceCompactUnwindFrame(MF) || 1002 CC == CallingConv::PreserveMost || 1003 (RPI.isPaired() && 1004 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || 1005 RPI.Reg1 + 1 == RPI.Reg2))) && 1006 "Callee-save registers not saved as adjacent register pair!"); 1007 1008 RPI.FrameIdx = CSI[i].getFrameIdx(); 1009 1010 if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { 1011 // Round up size of non-pair to pair size if we need to pad the 1012 // callee-save area to ensure 16-byte alignment. 1013 Offset -= 16; 1014 assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16); 1015 MFI.setObjectAlignment(RPI.FrameIdx, 16); 1016 AFI->setCalleeSaveStackHasFreeSpace(true); 1017 } else 1018 Offset -= RPI.isPaired() ? 16 : 8; 1019 assert(Offset % 8 == 0); 1020 RPI.Offset = Offset / 8; 1021 assert((RPI.Offset >= -64 && RPI.Offset <= 63) && 1022 "Offset out of bounds for LDP/STP immediate"); 1023 1024 RegPairs.push_back(RPI); 1025 if (RPI.isPaired()) 1026 ++i; 1027 } 1028 } 1029 1030 bool AArch64FrameLowering::spillCalleeSavedRegisters( 1031 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1032 const std::vector<CalleeSavedInfo> &CSI, 1033 const TargetRegisterInfo *TRI) const { 1034 MachineFunction &MF = *MBB.getParent(); 1035 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1036 DebugLoc DL; 1037 SmallVector<RegPairInfo, 8> RegPairs; 1038 1039 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); 1040 const MachineRegisterInfo &MRI = MF.getRegInfo(); 1041 1042 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; 1043 ++RPII) { 1044 RegPairInfo RPI = *RPII; 1045 unsigned Reg1 = RPI.Reg1; 1046 unsigned Reg2 = RPI.Reg2; 1047 unsigned StrOpc; 1048 1049 // Issue sequence of spills for cs regs. The first spill may be converted 1050 // to a pre-decrement store later by emitPrologue if the callee-save stack 1051 // area allocation can't be combined with the local stack area allocation. 1052 // For example: 1053 // stp x22, x21, [sp, #0] // addImm(+0) 1054 // stp x20, x19, [sp, #16] // addImm(+2) 1055 // stp fp, lr, [sp, #32] // addImm(+4) 1056 // Rationale: This sequence saves uop updates compared to a sequence of 1057 // pre-increment spills like stp xi,xj,[sp,#-16]! 1058 // Note: Similar rationale and sequence for restores in epilog. 1059 if (RPI.IsGPR) 1060 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; 1061 else 1062 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; 1063 DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1); 1064 if (RPI.isPaired()) 1065 dbgs() << ", " << TRI->getName(Reg2); 1066 dbgs() << ") -> fi#(" << RPI.FrameIdx; 1067 if (RPI.isPaired()) 1068 dbgs() << ", " << RPI.FrameIdx+1; 1069 dbgs() << ")\n"); 1070 1071 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); 1072 if (!MRI.isReserved(Reg1)) 1073 MBB.addLiveIn(Reg1); 1074 if (RPI.isPaired()) { 1075 if (!MRI.isReserved(Reg2)) 1076 MBB.addLiveIn(Reg2); 1077 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); 1078 MIB.addMemOperand(MF.getMachineMemOperand( 1079 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), 1080 MachineMemOperand::MOStore, 8, 8)); 1081 } 1082 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) 1083 .addReg(AArch64::SP) 1084 .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit 1085 .setMIFlag(MachineInstr::FrameSetup); 1086 MIB.addMemOperand(MF.getMachineMemOperand( 1087 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), 1088 MachineMemOperand::MOStore, 8, 8)); 1089 } 1090 return true; 1091 } 1092 1093 bool AArch64FrameLowering::restoreCalleeSavedRegisters( 1094 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1095 const std::vector<CalleeSavedInfo> &CSI, 1096 const TargetRegisterInfo *TRI) const { 1097 MachineFunction &MF = *MBB.getParent(); 1098 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1099 DebugLoc DL; 1100 SmallVector<RegPairInfo, 8> RegPairs; 1101 1102 if (MI != MBB.end()) 1103 DL = MI->getDebugLoc(); 1104 1105 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); 1106 1107 for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; 1108 ++RPII) { 1109 RegPairInfo RPI = *RPII; 1110 unsigned Reg1 = RPI.Reg1; 1111 unsigned Reg2 = RPI.Reg2; 1112 1113 // Issue sequence of restores for cs regs. The last restore may be converted 1114 // to a post-increment load later by emitEpilogue if the callee-save stack 1115 // area allocation can't be combined with the local stack area allocation. 1116 // For example: 1117 // ldp fp, lr, [sp, #32] // addImm(+4) 1118 // ldp x20, x19, [sp, #16] // addImm(+2) 1119 // ldp x22, x21, [sp, #0] // addImm(+0) 1120 // Note: see comment in spillCalleeSavedRegisters() 1121 unsigned LdrOpc; 1122 if (RPI.IsGPR) 1123 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; 1124 else 1125 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; 1126 DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1); 1127 if (RPI.isPaired()) 1128 dbgs() << ", " << TRI->getName(Reg2); 1129 dbgs() << ") -> fi#(" << RPI.FrameIdx; 1130 if (RPI.isPaired()) 1131 dbgs() << ", " << RPI.FrameIdx+1; 1132 dbgs() << ")\n"); 1133 1134 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); 1135 if (RPI.isPaired()) { 1136 MIB.addReg(Reg2, getDefRegState(true)); 1137 MIB.addMemOperand(MF.getMachineMemOperand( 1138 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), 1139 MachineMemOperand::MOLoad, 8, 8)); 1140 } 1141 MIB.addReg(Reg1, getDefRegState(true)) 1142 .addReg(AArch64::SP) 1143 .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit 1144 .setMIFlag(MachineInstr::FrameDestroy); 1145 MIB.addMemOperand(MF.getMachineMemOperand( 1146 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), 1147 MachineMemOperand::MOLoad, 8, 8)); 1148 } 1149 return true; 1150 } 1151 1152 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, 1153 BitVector &SavedRegs, 1154 RegScavenger *RS) const { 1155 // All calls are tail calls in GHC calling conv, and functions have no 1156 // prologue/epilogue. 1157 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 1158 return; 1159 1160 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1161 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 1162 MF.getSubtarget().getRegisterInfo()); 1163 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1164 unsigned UnspilledCSGPR = AArch64::NoRegister; 1165 unsigned UnspilledCSGPRPaired = AArch64::NoRegister; 1166 1167 // The frame record needs to be created by saving the appropriate registers 1168 if (hasFP(MF)) { 1169 SavedRegs.set(AArch64::FP); 1170 SavedRegs.set(AArch64::LR); 1171 } 1172 1173 unsigned BasePointerReg = AArch64::NoRegister; 1174 if (RegInfo->hasBasePointer(MF)) 1175 BasePointerReg = RegInfo->getBaseRegister(); 1176 1177 unsigned ExtraCSSpill = 0; 1178 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 1179 // Figure out which callee-saved registers to save/restore. 1180 for (unsigned i = 0; CSRegs[i]; ++i) { 1181 const unsigned Reg = CSRegs[i]; 1182 1183 // Add the base pointer register to SavedRegs if it is callee-save. 1184 if (Reg == BasePointerReg) 1185 SavedRegs.set(Reg); 1186 1187 bool RegUsed = SavedRegs.test(Reg); 1188 unsigned PairedReg = CSRegs[i ^ 1]; 1189 if (!RegUsed) { 1190 if (AArch64::GPR64RegClass.contains(Reg) && 1191 !RegInfo->isReservedReg(MF, Reg)) { 1192 UnspilledCSGPR = Reg; 1193 UnspilledCSGPRPaired = PairedReg; 1194 } 1195 continue; 1196 } 1197 1198 // MachO's compact unwind format relies on all registers being stored in 1199 // pairs. 1200 // FIXME: the usual format is actually better if unwinding isn't needed. 1201 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) { 1202 SavedRegs.set(PairedReg); 1203 if (AArch64::GPR64RegClass.contains(PairedReg) && 1204 !RegInfo->isReservedReg(MF, PairedReg)) 1205 ExtraCSSpill = PairedReg; 1206 } 1207 } 1208 1209 DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; 1210 for (unsigned Reg : SavedRegs.set_bits()) 1211 dbgs() << ' ' << PrintReg(Reg, RegInfo); 1212 dbgs() << "\n";); 1213 1214 // If any callee-saved registers are used, the frame cannot be eliminated. 1215 unsigned NumRegsSpilled = SavedRegs.count(); 1216 bool CanEliminateFrame = NumRegsSpilled == 0; 1217 1218 // The CSR spill slots have not been allocated yet, so estimateStackSize 1219 // won't include them. 1220 MachineFrameInfo &MFI = MF.getFrameInfo(); 1221 unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled; 1222 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); 1223 unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); 1224 bool BigStack = (CFSize > EstimatedStackSizeLimit); 1225 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) 1226 AFI->setHasStackFrame(true); 1227 1228 // Estimate if we might need to scavenge a register at some point in order 1229 // to materialize a stack offset. If so, either spill one additional 1230 // callee-saved register or reserve a special spill slot to facilitate 1231 // register scavenging. If we already spilled an extra callee-saved register 1232 // above to keep the number of spills even, we don't need to do anything else 1233 // here. 1234 if (BigStack) { 1235 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { 1236 DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo) 1237 << " to get a scratch register.\n"); 1238 SavedRegs.set(UnspilledCSGPR); 1239 // MachO's compact unwind format relies on all registers being stored in 1240 // pairs, so if we need to spill one extra for BigStack, then we need to 1241 // store the pair. 1242 if (produceCompactUnwindFrame(MF)) 1243 SavedRegs.set(UnspilledCSGPRPaired); 1244 ExtraCSSpill = UnspilledCSGPRPaired; 1245 NumRegsSpilled = SavedRegs.count(); 1246 } 1247 1248 // If we didn't find an extra callee-saved register to spill, create 1249 // an emergency spill slot. 1250 if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { 1251 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1252 const TargetRegisterClass &RC = AArch64::GPR64RegClass; 1253 unsigned Size = TRI->getSpillSize(RC); 1254 unsigned Align = TRI->getSpillAlignment(RC); 1255 int FI = MFI.CreateStackObject(Size, Align, false); 1256 RS->addScavengingFrameIndex(FI); 1257 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI 1258 << " as the emergency spill slot.\n"); 1259 } 1260 } 1261 1262 // Round up to register pair alignment to avoid additional SP adjustment 1263 // instructions. 1264 AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16)); 1265 } 1266 1267 bool AArch64FrameLowering::enableStackSlotScavenging( 1268 const MachineFunction &MF) const { 1269 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1270 return AFI->hasCalleeSaveStackFreeSpace(); 1271 } 1272