1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of TargetFrameLowering class. 11 // 12 // On AArch64, stack frames are structured as follows: 13 // 14 // The stack grows downward. 15 // 16 // All of the individual frame areas on the frame below are optional, i.e. it's 17 // possible to create a function so that the particular area isn't present 18 // in the frame. 19 // 20 // At function entry, the "frame" looks as follows: 21 // 22 // | | Higher address 23 // |-----------------------------------| 24 // | | 25 // | arguments passed on the stack | 26 // | | 27 // |-----------------------------------| <- sp 28 // | | Lower address 29 // 30 // 31 // After the prologue has run, the frame has the following general structure. 32 // Note that this doesn't depict the case where a red-zone is used. Also, 33 // technically the last frame area (VLAs) doesn't get created until in the 34 // main function body, after the prologue is run. However, it's depicted here 35 // for completeness. 36 // 37 // | | Higher address 38 // |-----------------------------------| 39 // | | 40 // | arguments passed on the stack | 41 // | | 42 // |-----------------------------------| 43 // | | 44 // | prev_fp, prev_lr | 45 // | (a.k.a. "frame record") | 46 // |-----------------------------------| <- fp(=x29) 47 // | | 48 // | other callee-saved registers | 49 // | | 50 // |-----------------------------------| 51 // |.empty.space.to.make.part.below....| 52 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at 53 // |.the.standard.16-byte.alignment....| compile time; if present) 54 // |-----------------------------------| 55 // | | 56 // | local variables of fixed size | 57 // | including spill slots | 58 // |-----------------------------------| <- bp(not defined by ABI, 59 // |.variable-sized.local.variables....| LLVM chooses X19) 60 // |.(VLAs)............................| (size of this area is unknown at 61 // |...................................| compile time) 62 // |-----------------------------------| <- sp 63 // | | Lower address 64 // 65 // 66 // To access the data in a frame, at-compile time, a constant offset must be 67 // computable from one of the pointers (fp, bp, sp) to access it. The size 68 // of the areas with a dotted background cannot be computed at compile-time 69 // if they are present, making it required to have all three of fp, bp and 70 // sp to be set up to be able to access all contents in the frame areas, 71 // assuming all of the frame areas are non-empty. 72 // 73 // For most functions, some of the frame areas are empty. For those functions, 74 // it may not be necessary to set up fp or bp: 75 // * A base pointer is definitely needed when there are both VLAs and local 76 // variables with more-than-default alignment requirements. 77 // * A frame pointer is definitely needed when there are local variables with 78 // more-than-default alignment requirements. 79 // 80 // In some cases when a base pointer is not strictly needed, it is generated 81 // anyway when offsets from the frame pointer to access local variables become 82 // so large that the offset can't be encoded in the immediate fields of loads 83 // or stores. 84 // 85 // FIXME: also explain the redzone concept. 86 // FIXME: also explain the concept of reserved call frames. 87 // 88 //===----------------------------------------------------------------------===// 89 90 #include "AArch64FrameLowering.h" 91 #include "AArch64InstrInfo.h" 92 #include "AArch64MachineFunctionInfo.h" 93 #include "AArch64RegisterInfo.h" 94 #include "AArch64Subtarget.h" 95 #include "AArch64TargetMachine.h" 96 #include "llvm/ADT/SmallVector.h" 97 #include "llvm/ADT/Statistic.h" 98 #include "llvm/CodeGen/LivePhysRegs.h" 99 #include "llvm/CodeGen/MachineBasicBlock.h" 100 #include "llvm/CodeGen/MachineFrameInfo.h" 101 #include "llvm/CodeGen/MachineFunction.h" 102 #include "llvm/CodeGen/MachineInstr.h" 103 #include "llvm/CodeGen/MachineInstrBuilder.h" 104 #include "llvm/CodeGen/MachineMemOperand.h" 105 #include "llvm/CodeGen/MachineModuleInfo.h" 106 #include "llvm/CodeGen/MachineOperand.h" 107 #include "llvm/CodeGen/MachineRegisterInfo.h" 108 #include "llvm/CodeGen/RegisterScavenging.h" 109 #include "llvm/IR/Attributes.h" 110 #include "llvm/IR/CallingConv.h" 111 #include "llvm/IR/DataLayout.h" 112 #include "llvm/IR/DebugLoc.h" 113 #include "llvm/IR/Function.h" 114 #include "llvm/MC/MCDwarf.h" 115 #include "llvm/Support/CommandLine.h" 116 #include "llvm/Support/Debug.h" 117 #include "llvm/Support/ErrorHandling.h" 118 #include "llvm/Support/MathExtras.h" 119 #include "llvm/Support/raw_ostream.h" 120 #include "llvm/Target/TargetInstrInfo.h" 121 #include "llvm/Target/TargetMachine.h" 122 #include "llvm/Target/TargetOptions.h" 123 #include "llvm/Target/TargetRegisterInfo.h" 124 #include "llvm/Target/TargetSubtargetInfo.h" 125 #include <cassert> 126 #include <cstdint> 127 #include <iterator> 128 #include <vector> 129 130 using namespace llvm; 131 132 #define DEBUG_TYPE "frame-info" 133 134 static cl::opt<bool> EnableRedZone("aarch64-redzone", 135 cl::desc("enable use of redzone on AArch64"), 136 cl::init(false), cl::Hidden); 137 138 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); 139 140 /// Look at each instruction that references stack frames and return the stack 141 /// size limit beyond which some of these instructions will require a scratch 142 /// register during their expansion later. 143 static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { 144 // FIXME: For now, just conservatively guestimate based on unscaled indexing 145 // range. We'll end up allocating an unnecessary spill slot a lot, but 146 // realistically that's not a big deal at this stage of the game. 147 for (MachineBasicBlock &MBB : MF) { 148 for (MachineInstr &MI : MBB) { 149 if (MI.isDebugValue() || MI.isPseudo() || 150 MI.getOpcode() == AArch64::ADDXri || 151 MI.getOpcode() == AArch64::ADDSXri) 152 continue; 153 154 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 155 if (!MI.getOperand(i).isFI()) 156 continue; 157 158 int Offset = 0; 159 if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) == 160 AArch64FrameOffsetCannotUpdate) 161 return 0; 162 } 163 } 164 } 165 return 255; 166 } 167 168 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { 169 if (!EnableRedZone) 170 return false; 171 // Don't use the red zone if the function explicitly asks us not to. 172 // This is typically used for kernel code. 173 if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone)) 174 return false; 175 176 const MachineFrameInfo &MFI = MF.getFrameInfo(); 177 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 178 unsigned NumBytes = AFI->getLocalStackSize(); 179 180 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128); 181 } 182 183 /// hasFP - Return true if the specified function should have a dedicated frame 184 /// pointer register. 185 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { 186 const MachineFrameInfo &MFI = MF.getFrameInfo(); 187 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 188 // Retain behavior of always omitting the FP for leaf functions when possible. 189 return (MFI.hasCalls() && 190 MF.getTarget().Options.DisableFramePointerElim(MF)) || 191 MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || 192 MFI.hasStackMap() || MFI.hasPatchPoint() || 193 RegInfo->needsStackRealignment(MF); 194 } 195 196 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is 197 /// not required, we reserve argument space for call sites in the function 198 /// immediately on entry to the current function. This eliminates the need for 199 /// add/sub sp brackets around call sites. Returns true if the call frame is 200 /// included as part of the stack frame. 201 bool 202 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 203 return !MF.getFrameInfo().hasVarSizedObjects(); 204 } 205 206 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( 207 MachineFunction &MF, MachineBasicBlock &MBB, 208 MachineBasicBlock::iterator I) const { 209 const AArch64InstrInfo *TII = 210 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 211 DebugLoc DL = I->getDebugLoc(); 212 unsigned Opc = I->getOpcode(); 213 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 214 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 215 216 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 217 if (!TFI->hasReservedCallFrame(MF)) { 218 unsigned Align = getStackAlignment(); 219 220 int64_t Amount = I->getOperand(0).getImm(); 221 Amount = alignTo(Amount, Align); 222 if (!IsDestroy) 223 Amount = -Amount; 224 225 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it 226 // doesn't have to pop anything), then the first operand will be zero too so 227 // this adjustment is a no-op. 228 if (CalleePopAmount == 0) { 229 // FIXME: in-function stack adjustment for calls is limited to 24-bits 230 // because there's no guaranteed temporary register available. 231 // 232 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available. 233 // 1) For offset <= 12-bit, we use LSL #0 234 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses 235 // LSL #0, and the other uses LSL #12. 236 // 237 // Most call frames will be allocated at the start of a function so 238 // this is OK, but it is a limitation that needs dealing with. 239 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); 240 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII); 241 } 242 } else if (CalleePopAmount != 0) { 243 // If the calling convention demands that the callee pops arguments from the 244 // stack, we want to add it back if we have a reserved call frame. 245 assert(CalleePopAmount < 0xffffff && "call frame too large"); 246 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount, 247 TII); 248 } 249 return MBB.erase(I); 250 } 251 252 void AArch64FrameLowering::emitCalleeSavedFrameMoves( 253 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 254 MachineFunction &MF = *MBB.getParent(); 255 MachineFrameInfo &MFI = MF.getFrameInfo(); 256 const TargetSubtargetInfo &STI = MF.getSubtarget(); 257 const MCRegisterInfo *MRI = STI.getRegisterInfo(); 258 const TargetInstrInfo *TII = STI.getInstrInfo(); 259 DebugLoc DL = MBB.findDebugLoc(MBBI); 260 261 // Add callee saved registers to move list. 262 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 263 if (CSI.empty()) 264 return; 265 266 for (const auto &Info : CSI) { 267 unsigned Reg = Info.getReg(); 268 int64_t Offset = 269 MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea(); 270 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 271 unsigned CFIIndex = MF.addFrameInst( 272 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 273 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 274 .addCFIIndex(CFIIndex) 275 .setMIFlags(MachineInstr::FrameSetup); 276 } 277 } 278 279 // Find a scratch register that we can use at the start of the prologue to 280 // re-align the stack pointer. We avoid using callee-save registers since they 281 // may appear to be free when this is called from canUseAsPrologue (during 282 // shrink wrapping), but then no longer be free when this is called from 283 // emitPrologue. 284 // 285 // FIXME: This is a bit conservative, since in the above case we could use one 286 // of the callee-save registers as a scratch temp to re-align the stack pointer, 287 // but we would then have to make sure that we were in fact saving at least one 288 // callee-save register in the prologue, which is additional complexity that 289 // doesn't seem worth the benefit. 290 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { 291 MachineFunction *MF = MBB->getParent(); 292 293 // If MBB is an entry block, use X9 as the scratch register 294 if (&MF->front() == MBB) 295 return AArch64::X9; 296 297 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 298 const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); 299 LivePhysRegs LiveRegs(TRI); 300 LiveRegs.addLiveIns(*MBB); 301 302 // Mark callee saved registers as used so we will not choose them. 303 const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF); 304 for (unsigned i = 0; CSRegs[i]; ++i) 305 LiveRegs.addReg(CSRegs[i]); 306 307 // Prefer X9 since it was historically used for the prologue scratch reg. 308 const MachineRegisterInfo &MRI = MF->getRegInfo(); 309 if (LiveRegs.available(MRI, AArch64::X9)) 310 return AArch64::X9; 311 312 for (unsigned Reg : AArch64::GPR64RegClass) { 313 if (LiveRegs.available(MRI, Reg)) 314 return Reg; 315 } 316 return AArch64::NoRegister; 317 } 318 319 bool AArch64FrameLowering::canUseAsPrologue( 320 const MachineBasicBlock &MBB) const { 321 const MachineFunction *MF = MBB.getParent(); 322 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 323 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 324 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 325 326 // Don't need a scratch register if we're not going to re-align the stack. 327 if (!RegInfo->needsStackRealignment(*MF)) 328 return true; 329 // Otherwise, we can use any block as long as it has a scratch register 330 // available. 331 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister; 332 } 333 334 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( 335 MachineFunction &MF, unsigned StackBumpBytes) const { 336 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 337 const MachineFrameInfo &MFI = MF.getFrameInfo(); 338 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 339 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 340 341 if (AFI->getLocalStackSize() == 0) 342 return false; 343 344 // 512 is the maximum immediate for stp/ldp that will be used for 345 // callee-save save/restores 346 if (StackBumpBytes >= 512) 347 return false; 348 349 if (MFI.hasVarSizedObjects()) 350 return false; 351 352 if (RegInfo->needsStackRealignment(MF)) 353 return false; 354 355 // This isn't strictly necessary, but it simplifies things a bit since the 356 // current RedZone handling code assumes the SP is adjusted by the 357 // callee-save save/restore code. 358 if (canUseRedZone(MF)) 359 return false; 360 361 return true; 362 } 363 364 // Convert callee-save register save/restore instruction to do stack pointer 365 // decrement/increment to allocate/deallocate the callee-save stack area by 366 // converting store/load to use pre/post increment version. 367 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( 368 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 369 const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { 370 unsigned NewOpc; 371 bool NewIsUnscaled = false; 372 switch (MBBI->getOpcode()) { 373 default: 374 llvm_unreachable("Unexpected callee-save save/restore opcode!"); 375 case AArch64::STPXi: 376 NewOpc = AArch64::STPXpre; 377 break; 378 case AArch64::STPDi: 379 NewOpc = AArch64::STPDpre; 380 break; 381 case AArch64::STRXui: 382 NewOpc = AArch64::STRXpre; 383 NewIsUnscaled = true; 384 break; 385 case AArch64::STRDui: 386 NewOpc = AArch64::STRDpre; 387 NewIsUnscaled = true; 388 break; 389 case AArch64::LDPXi: 390 NewOpc = AArch64::LDPXpost; 391 break; 392 case AArch64::LDPDi: 393 NewOpc = AArch64::LDPDpost; 394 break; 395 case AArch64::LDRXui: 396 NewOpc = AArch64::LDRXpost; 397 NewIsUnscaled = true; 398 break; 399 case AArch64::LDRDui: 400 NewOpc = AArch64::LDRDpost; 401 NewIsUnscaled = true; 402 break; 403 } 404 405 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 406 MIB.addReg(AArch64::SP, RegState::Define); 407 408 // Copy all operands other than the immediate offset. 409 unsigned OpndIdx = 0; 410 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd; 411 ++OpndIdx) 412 MIB.add(MBBI->getOperand(OpndIdx)); 413 414 assert(MBBI->getOperand(OpndIdx).getImm() == 0 && 415 "Unexpected immediate offset in first/last callee-save save/restore " 416 "instruction!"); 417 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && 418 "Unexpected base register in callee-save save/restore instruction!"); 419 // Last operand is immediate offset that needs fixing. 420 assert(CSStackSizeInc % 8 == 0); 421 int64_t CSStackSizeIncImm = CSStackSizeInc; 422 if (!NewIsUnscaled) 423 CSStackSizeIncImm /= 8; 424 MIB.addImm(CSStackSizeIncImm); 425 426 MIB.setMIFlags(MBBI->getFlags()); 427 MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end()); 428 429 return std::prev(MBB.erase(MBBI)); 430 } 431 432 // Fixup callee-save register save/restore instructions to take into account 433 // combined SP bump by adding the local stack size to the stack offsets. 434 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, 435 unsigned LocalStackSize) { 436 unsigned Opc = MI.getOpcode(); 437 (void)Opc; 438 assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi || 439 Opc == AArch64::STRXui || Opc == AArch64::STRDui || 440 Opc == AArch64::LDPXi || Opc == AArch64::LDPDi || 441 Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) && 442 "Unexpected callee-save save/restore opcode!"); 443 444 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1; 445 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP && 446 "Unexpected base register in callee-save save/restore instruction!"); 447 // Last operand is immediate offset that needs fixing. 448 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); 449 // All generated opcodes have scaled offsets. 450 assert(LocalStackSize % 8 == 0); 451 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); 452 } 453 454 void AArch64FrameLowering::emitPrologue(MachineFunction &MF, 455 MachineBasicBlock &MBB) const { 456 MachineBasicBlock::iterator MBBI = MBB.begin(); 457 const MachineFrameInfo &MFI = MF.getFrameInfo(); 458 const Function *Fn = MF.getFunction(); 459 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 460 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 461 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 462 MachineModuleInfo &MMI = MF.getMMI(); 463 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 464 bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); 465 bool HasFP = hasFP(MF); 466 467 // Debug location must be unknown since the first debug location is used 468 // to determine the end of the prologue. 469 DebugLoc DL; 470 471 // All calls are tail calls in GHC calling conv, and functions have no 472 // prologue/epilogue. 473 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 474 return; 475 476 int NumBytes = (int)MFI.getStackSize(); 477 if (!AFI->hasStackFrame()) { 478 assert(!HasFP && "unexpected function without stack frame but with FP"); 479 480 // All of the stack allocation is for locals. 481 AFI->setLocalStackSize(NumBytes); 482 483 if (!NumBytes) 484 return; 485 // REDZONE: If the stack size is less than 128 bytes, we don't need 486 // to actually allocate. 487 if (canUseRedZone(MF)) 488 ++NumRedZoneFunctions; 489 else { 490 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 491 MachineInstr::FrameSetup); 492 493 // Label used to tie together the PROLOG_LABEL and the MachineMoves. 494 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); 495 // Encode the stack size of the leaf function. 496 unsigned CFIIndex = MF.addFrameInst( 497 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); 498 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 499 .addCFIIndex(CFIIndex) 500 .setMIFlags(MachineInstr::FrameSetup); 501 } 502 return; 503 } 504 505 auto CSStackSize = AFI->getCalleeSavedStackSize(); 506 // All of the remaining stack allocations are for locals. 507 AFI->setLocalStackSize(NumBytes - CSStackSize); 508 509 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); 510 if (CombineSPBump) { 511 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 512 MachineInstr::FrameSetup); 513 NumBytes = 0; 514 } else if (CSStackSize != 0) { 515 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII, 516 -CSStackSize); 517 NumBytes -= CSStackSize; 518 } 519 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 520 521 // Move past the saves of the callee-saved registers, fixing up the offsets 522 // and pre-inc if we decided to combine the callee-save and local stack 523 // pointer bump above. 524 MachineBasicBlock::iterator End = MBB.end(); 525 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) { 526 if (CombineSPBump) 527 fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize()); 528 ++MBBI; 529 } 530 if (HasFP) { 531 // Only set up FP if we actually need to. Frame pointer is fp = sp - 16. 532 int FPOffset = CSStackSize - 16; 533 if (CombineSPBump) 534 FPOffset += AFI->getLocalStackSize(); 535 536 // Issue sub fp, sp, FPOffset or 537 // mov fp,sp when FPOffset is zero. 538 // Note: All stores of callee-saved registers are marked as "FrameSetup". 539 // This code marks the instruction(s) that set the FP also. 540 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, 541 MachineInstr::FrameSetup); 542 } 543 544 // Allocate space for the rest of the frame. 545 if (NumBytes) { 546 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); 547 unsigned scratchSPReg = AArch64::SP; 548 549 if (NeedsRealignment) { 550 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); 551 assert(scratchSPReg != AArch64::NoRegister); 552 } 553 554 // If we're a leaf function, try using the red zone. 555 if (!canUseRedZone(MF)) 556 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have 557 // the correct value here, as NumBytes also includes padding bytes, 558 // which shouldn't be counted here. 559 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, 560 MachineInstr::FrameSetup); 561 562 if (NeedsRealignment) { 563 const unsigned Alignment = MFI.getMaxAlignment(); 564 const unsigned NrBitsToZero = countTrailingZeros(Alignment); 565 assert(NrBitsToZero > 1); 566 assert(scratchSPReg != AArch64::SP); 567 568 // SUB X9, SP, NumBytes 569 // -- X9 is temporary register, so shouldn't contain any live data here, 570 // -- free to use. This is already produced by emitFrameOffset above. 571 // AND SP, X9, 0b11111...0000 572 // The logical immediates have a non-trivial encoding. The following 573 // formula computes the encoded immediate with all ones but 574 // NrBitsToZero zero bits as least significant bits. 575 uint32_t andMaskEncoded = (1 << 12) // = N 576 | ((64 - NrBitsToZero) << 6) // immr 577 | ((64 - NrBitsToZero - 1) << 0); // imms 578 579 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) 580 .addReg(scratchSPReg, RegState::Kill) 581 .addImm(andMaskEncoded); 582 AFI->setStackRealigned(true); 583 } 584 } 585 586 // If we need a base pointer, set it up here. It's whatever the value of the 587 // stack pointer is at this point. Any variable size objects will be allocated 588 // after this, so we can still use the base pointer to reference locals. 589 // 590 // FIXME: Clarify FrameSetup flags here. 591 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is 592 // needed. 593 if (RegInfo->hasBasePointer(MF)) { 594 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, 595 false); 596 } 597 598 if (needsFrameMoves) { 599 const DataLayout &TD = MF.getDataLayout(); 600 const int StackGrowth = -TD.getPointerSize(0); 601 unsigned FramePtr = RegInfo->getFrameRegister(MF); 602 // An example of the prologue: 603 // 604 // .globl __foo 605 // .align 2 606 // __foo: 607 // Ltmp0: 608 // .cfi_startproc 609 // .cfi_personality 155, ___gxx_personality_v0 610 // Leh_func_begin: 611 // .cfi_lsda 16, Lexception33 612 // 613 // stp xa,bx, [sp, -#offset]! 614 // ... 615 // stp x28, x27, [sp, #offset-32] 616 // stp fp, lr, [sp, #offset-16] 617 // add fp, sp, #offset - 16 618 // sub sp, sp, #1360 619 // 620 // The Stack: 621 // +-------------------------------------------+ 622 // 10000 | ........ | ........ | ........ | ........ | 623 // 10004 | ........ | ........ | ........ | ........ | 624 // +-------------------------------------------+ 625 // 10008 | ........ | ........ | ........ | ........ | 626 // 1000c | ........ | ........ | ........ | ........ | 627 // +===========================================+ 628 // 10010 | X28 Register | 629 // 10014 | X28 Register | 630 // +-------------------------------------------+ 631 // 10018 | X27 Register | 632 // 1001c | X27 Register | 633 // +===========================================+ 634 // 10020 | Frame Pointer | 635 // 10024 | Frame Pointer | 636 // +-------------------------------------------+ 637 // 10028 | Link Register | 638 // 1002c | Link Register | 639 // +===========================================+ 640 // 10030 | ........ | ........ | ........ | ........ | 641 // 10034 | ........ | ........ | ........ | ........ | 642 // +-------------------------------------------+ 643 // 10038 | ........ | ........ | ........ | ........ | 644 // 1003c | ........ | ........ | ........ | ........ | 645 // +-------------------------------------------+ 646 // 647 // [sp] = 10030 :: >>initial value<< 648 // sp = 10020 :: stp fp, lr, [sp, #-16]! 649 // fp = sp == 10020 :: mov fp, sp 650 // [sp] == 10020 :: stp x28, x27, [sp, #-16]! 651 // sp == 10010 :: >>final value<< 652 // 653 // The frame pointer (w29) points to address 10020. If we use an offset of 654 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 655 // for w27, and -32 for w28: 656 // 657 // Ltmp1: 658 // .cfi_def_cfa w29, 16 659 // Ltmp2: 660 // .cfi_offset w30, -8 661 // Ltmp3: 662 // .cfi_offset w29, -16 663 // Ltmp4: 664 // .cfi_offset w27, -24 665 // Ltmp5: 666 // .cfi_offset w28, -32 667 668 if (HasFP) { 669 // Define the current CFA rule to use the provided FP. 670 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); 671 unsigned CFIIndex = MF.addFrameInst( 672 MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); 673 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 674 .addCFIIndex(CFIIndex) 675 .setMIFlags(MachineInstr::FrameSetup); 676 } else { 677 // Encode the stack size of the leaf function. 678 unsigned CFIIndex = MF.addFrameInst( 679 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize())); 680 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 681 .addCFIIndex(CFIIndex) 682 .setMIFlags(MachineInstr::FrameSetup); 683 } 684 685 // Now emit the moves for whatever callee saved regs we have (including FP, 686 // LR if those are saved). 687 emitCalleeSavedFrameMoves(MBB, MBBI); 688 } 689 } 690 691 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, 692 MachineBasicBlock &MBB) const { 693 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 694 MachineFrameInfo &MFI = MF.getFrameInfo(); 695 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 696 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 697 DebugLoc DL; 698 bool IsTailCallReturn = false; 699 if (MBB.end() != MBBI) { 700 DL = MBBI->getDebugLoc(); 701 unsigned RetOpcode = MBBI->getOpcode(); 702 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || 703 RetOpcode == AArch64::TCRETURNri; 704 } 705 int NumBytes = MFI.getStackSize(); 706 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 707 708 // All calls are tail calls in GHC calling conv, and functions have no 709 // prologue/epilogue. 710 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 711 return; 712 713 // Initial and residual are named for consistency with the prologue. Note that 714 // in the epilogue, the residual adjustment is executed first. 715 uint64_t ArgumentPopSize = 0; 716 if (IsTailCallReturn) { 717 MachineOperand &StackAdjust = MBBI->getOperand(1); 718 719 // For a tail-call in a callee-pops-arguments environment, some or all of 720 // the stack may actually be in use for the call's arguments, this is 721 // calculated during LowerCall and consumed here... 722 ArgumentPopSize = StackAdjust.getImm(); 723 } else { 724 // ... otherwise the amount to pop is *all* of the argument space, 725 // conveniently stored in the MachineFunctionInfo by 726 // LowerFormalArguments. This will, of course, be zero for the C calling 727 // convention. 728 ArgumentPopSize = AFI->getArgumentStackToRestore(); 729 } 730 731 // The stack frame should be like below, 732 // 733 // ---------------------- --- 734 // | | | 735 // | BytesInStackArgArea| CalleeArgStackSize 736 // | (NumReusableBytes) | (of tail call) 737 // | | --- 738 // | | | 739 // ---------------------| --- | 740 // | | | | 741 // | CalleeSavedReg | | | 742 // | (CalleeSavedStackSize)| | | 743 // | | | | 744 // ---------------------| | NumBytes 745 // | | StackSize (StackAdjustUp) 746 // | LocalStackSize | | | 747 // | (covering callee | | | 748 // | args) | | | 749 // | | | | 750 // ---------------------- --- --- 751 // 752 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize 753 // = StackSize + ArgumentPopSize 754 // 755 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps 756 // it as the 2nd argument of AArch64ISD::TC_RETURN. 757 758 auto CSStackSize = AFI->getCalleeSavedStackSize(); 759 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); 760 761 if (!CombineSPBump && CSStackSize != 0) 762 convertCalleeSaveRestoreToSPPrePostIncDec( 763 MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize); 764 765 // Move past the restores of the callee-saved registers. 766 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); 767 MachineBasicBlock::iterator Begin = MBB.begin(); 768 while (LastPopI != Begin) { 769 --LastPopI; 770 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) { 771 ++LastPopI; 772 break; 773 } else if (CombineSPBump) 774 fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize()); 775 } 776 777 // If there is a single SP update, insert it before the ret and we're done. 778 if (CombineSPBump) { 779 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 780 NumBytes + ArgumentPopSize, TII, 781 MachineInstr::FrameDestroy); 782 return; 783 } 784 785 NumBytes -= CSStackSize; 786 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 787 788 if (!hasFP(MF)) { 789 bool RedZone = canUseRedZone(MF); 790 // If this was a redzone leaf function, we don't need to restore the 791 // stack pointer (but we may need to pop stack args for fastcc). 792 if (RedZone && ArgumentPopSize == 0) 793 return; 794 795 bool NoCalleeSaveRestore = CSStackSize == 0; 796 int StackRestoreBytes = RedZone ? 0 : NumBytes; 797 if (NoCalleeSaveRestore) 798 StackRestoreBytes += ArgumentPopSize; 799 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, 800 StackRestoreBytes, TII, MachineInstr::FrameDestroy); 801 // If we were able to combine the local stack pop with the argument pop, 802 // then we're done. 803 if (NoCalleeSaveRestore || ArgumentPopSize == 0) 804 return; 805 NumBytes = 0; 806 } 807 808 // Restore the original stack pointer. 809 // FIXME: Rather than doing the math here, we should instead just use 810 // non-post-indexed loads for the restores if we aren't actually going to 811 // be able to save any instructions. 812 if (MFI.hasVarSizedObjects() || AFI->isStackRealigned()) 813 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, 814 -CSStackSize + 16, TII, MachineInstr::FrameDestroy); 815 else if (NumBytes) 816 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, 817 MachineInstr::FrameDestroy); 818 819 // This must be placed after the callee-save restore code because that code 820 // assumes the SP is at the same location as it was after the callee-save save 821 // code in the prologue. 822 if (ArgumentPopSize) 823 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 824 ArgumentPopSize, TII, MachineInstr::FrameDestroy); 825 } 826 827 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for 828 /// debug info. It's the same as what we use for resolving the code-gen 829 /// references for now. FIXME: This can go wrong when references are 830 /// SP-relative and simple call frames aren't used. 831 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, 832 int FI, 833 unsigned &FrameReg) const { 834 return resolveFrameIndexReference(MF, FI, FrameReg); 835 } 836 837 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, 838 int FI, unsigned &FrameReg, 839 bool PreferFP) const { 840 const MachineFrameInfo &MFI = MF.getFrameInfo(); 841 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 842 MF.getSubtarget().getRegisterInfo()); 843 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 844 int FPOffset = MFI.getObjectOffset(FI) + 16; 845 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); 846 bool isFixed = MFI.isFixedObjectIndex(FI); 847 848 // Use frame pointer to reference fixed objects. Use it for locals if 849 // there are VLAs or a dynamically realigned SP (and thus the SP isn't 850 // reliable as a base). Make sure useFPForScavengingIndex() does the 851 // right thing for the emergency spill slot. 852 bool UseFP = false; 853 if (AFI->hasStackFrame()) { 854 // Note: Keeping the following as multiple 'if' statements rather than 855 // merging to a single expression for readability. 856 // 857 // Argument access should always use the FP. 858 if (isFixed) { 859 UseFP = hasFP(MF); 860 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && 861 !RegInfo->needsStackRealignment(MF)) { 862 // Use SP or FP, whichever gives us the best chance of the offset 863 // being in range for direct access. If the FPOffset is positive, 864 // that'll always be best, as the SP will be even further away. 865 // If the FPOffset is negative, we have to keep in mind that the 866 // available offset range for negative offsets is smaller than for 867 // positive ones. If we have variable sized objects, we're stuck with 868 // using the FP regardless, though, as the SP offset is unknown 869 // and we don't have a base pointer available. If an offset is 870 // available via the FP and the SP, use whichever is closest. 871 if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 || 872 (FPOffset >= -256 && Offset > -FPOffset)) 873 UseFP = true; 874 } 875 } 876 877 assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && 878 "In the presence of dynamic stack pointer realignment, " 879 "non-argument objects cannot be accessed through the frame pointer"); 880 881 if (UseFP) { 882 FrameReg = RegInfo->getFrameRegister(MF); 883 return FPOffset; 884 } 885 886 // Use the base pointer if we have one. 887 if (RegInfo->hasBasePointer(MF)) 888 FrameReg = RegInfo->getBaseRegister(); 889 else { 890 FrameReg = AArch64::SP; 891 // If we're using the red zone for this function, the SP won't actually 892 // be adjusted, so the offsets will be negative. They're also all 893 // within range of the signed 9-bit immediate instructions. 894 if (canUseRedZone(MF)) 895 Offset -= AFI->getLocalStackSize(); 896 } 897 898 return Offset; 899 } 900 901 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { 902 // Do not set a kill flag on values that are also marked as live-in. This 903 // happens with the @llvm-returnaddress intrinsic and with arguments passed in 904 // callee saved registers. 905 // Omitting the kill flags is conservatively correct even if the live-in 906 // is not used after all. 907 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg); 908 return getKillRegState(!IsLiveIn); 909 } 910 911 static bool produceCompactUnwindFrame(MachineFunction &MF) { 912 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 913 AttributeList Attrs = MF.getFunction()->getAttributes(); 914 return Subtarget.isTargetMachO() && 915 !(Subtarget.getTargetLowering()->supportSwiftError() && 916 Attrs.hasAttrSomewhere(Attribute::SwiftError)); 917 } 918 919 namespace { 920 921 struct RegPairInfo { 922 unsigned Reg1 = AArch64::NoRegister; 923 unsigned Reg2 = AArch64::NoRegister; 924 int FrameIdx; 925 int Offset; 926 bool IsGPR; 927 928 RegPairInfo() = default; 929 930 bool isPaired() const { return Reg2 != AArch64::NoRegister; } 931 }; 932 933 } // end anonymous namespace 934 935 static void computeCalleeSaveRegisterPairs( 936 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, 937 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) { 938 939 if (CSI.empty()) 940 return; 941 942 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 943 MachineFrameInfo &MFI = MF.getFrameInfo(); 944 CallingConv::ID CC = MF.getFunction()->getCallingConv(); 945 unsigned Count = CSI.size(); 946 (void)CC; 947 // MachO's compact unwind format relies on all registers being stored in 948 // pairs. 949 assert((!produceCompactUnwindFrame(MF) || 950 CC == CallingConv::PreserveMost || 951 (Count & 1) == 0) && 952 "Odd number of callee-saved regs to spill!"); 953 unsigned Offset = AFI->getCalleeSavedStackSize(); 954 955 for (unsigned i = 0; i < Count; ++i) { 956 RegPairInfo RPI; 957 RPI.Reg1 = CSI[i].getReg(); 958 959 assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || 960 AArch64::FPR64RegClass.contains(RPI.Reg1)); 961 RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); 962 963 // Add the next reg to the pair if it is in the same register class. 964 if (i + 1 < Count) { 965 unsigned NextReg = CSI[i + 1].getReg(); 966 if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || 967 (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) 968 RPI.Reg2 = NextReg; 969 } 970 971 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI 972 // list to come in sorted by frame index so that we can issue the store 973 // pair instructions directly. Assert if we see anything otherwise. 974 // 975 // The order of the registers in the list is controlled by 976 // getCalleeSavedRegs(), so they will always be in-order, as well. 977 assert((!RPI.isPaired() || 978 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && 979 "Out of order callee saved regs!"); 980 981 // MachO's compact unwind format relies on all registers being stored in 982 // adjacent register pairs. 983 assert((!produceCompactUnwindFrame(MF) || 984 CC == CallingConv::PreserveMost || 985 (RPI.isPaired() && 986 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || 987 RPI.Reg1 + 1 == RPI.Reg2))) && 988 "Callee-save registers not saved as adjacent register pair!"); 989 990 RPI.FrameIdx = CSI[i].getFrameIdx(); 991 992 if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { 993 // Round up size of non-pair to pair size if we need to pad the 994 // callee-save area to ensure 16-byte alignment. 995 Offset -= 16; 996 assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16); 997 MFI.setObjectAlignment(RPI.FrameIdx, 16); 998 AFI->setCalleeSaveStackHasFreeSpace(true); 999 } else 1000 Offset -= RPI.isPaired() ? 16 : 8; 1001 assert(Offset % 8 == 0); 1002 RPI.Offset = Offset / 8; 1003 assert((RPI.Offset >= -64 && RPI.Offset <= 63) && 1004 "Offset out of bounds for LDP/STP immediate"); 1005 1006 RegPairs.push_back(RPI); 1007 if (RPI.isPaired()) 1008 ++i; 1009 } 1010 } 1011 1012 bool AArch64FrameLowering::spillCalleeSavedRegisters( 1013 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1014 const std::vector<CalleeSavedInfo> &CSI, 1015 const TargetRegisterInfo *TRI) const { 1016 MachineFunction &MF = *MBB.getParent(); 1017 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1018 DebugLoc DL; 1019 SmallVector<RegPairInfo, 8> RegPairs; 1020 1021 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); 1022 const MachineRegisterInfo &MRI = MF.getRegInfo(); 1023 1024 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; 1025 ++RPII) { 1026 RegPairInfo RPI = *RPII; 1027 unsigned Reg1 = RPI.Reg1; 1028 unsigned Reg2 = RPI.Reg2; 1029 unsigned StrOpc; 1030 1031 // Issue sequence of spills for cs regs. The first spill may be converted 1032 // to a pre-decrement store later by emitPrologue if the callee-save stack 1033 // area allocation can't be combined with the local stack area allocation. 1034 // For example: 1035 // stp x22, x21, [sp, #0] // addImm(+0) 1036 // stp x20, x19, [sp, #16] // addImm(+2) 1037 // stp fp, lr, [sp, #32] // addImm(+4) 1038 // Rationale: This sequence saves uop updates compared to a sequence of 1039 // pre-increment spills like stp xi,xj,[sp,#-16]! 1040 // Note: Similar rationale and sequence for restores in epilog. 1041 if (RPI.IsGPR) 1042 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; 1043 else 1044 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; 1045 DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1); 1046 if (RPI.isPaired()) 1047 dbgs() << ", " << TRI->getName(Reg2); 1048 dbgs() << ") -> fi#(" << RPI.FrameIdx; 1049 if (RPI.isPaired()) 1050 dbgs() << ", " << RPI.FrameIdx+1; 1051 dbgs() << ")\n"); 1052 1053 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); 1054 if (!MRI.isReserved(Reg1)) 1055 MBB.addLiveIn(Reg1); 1056 if (RPI.isPaired()) { 1057 if (!MRI.isReserved(Reg2)) 1058 MBB.addLiveIn(Reg2); 1059 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); 1060 MIB.addMemOperand(MF.getMachineMemOperand( 1061 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), 1062 MachineMemOperand::MOStore, 8, 8)); 1063 } 1064 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) 1065 .addReg(AArch64::SP) 1066 .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit 1067 .setMIFlag(MachineInstr::FrameSetup); 1068 MIB.addMemOperand(MF.getMachineMemOperand( 1069 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), 1070 MachineMemOperand::MOStore, 8, 8)); 1071 } 1072 return true; 1073 } 1074 1075 bool AArch64FrameLowering::restoreCalleeSavedRegisters( 1076 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1077 const std::vector<CalleeSavedInfo> &CSI, 1078 const TargetRegisterInfo *TRI) const { 1079 MachineFunction &MF = *MBB.getParent(); 1080 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1081 DebugLoc DL; 1082 SmallVector<RegPairInfo, 8> RegPairs; 1083 1084 if (MI != MBB.end()) 1085 DL = MI->getDebugLoc(); 1086 1087 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); 1088 1089 for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; 1090 ++RPII) { 1091 RegPairInfo RPI = *RPII; 1092 unsigned Reg1 = RPI.Reg1; 1093 unsigned Reg2 = RPI.Reg2; 1094 1095 // Issue sequence of restores for cs regs. The last restore may be converted 1096 // to a post-increment load later by emitEpilogue if the callee-save stack 1097 // area allocation can't be combined with the local stack area allocation. 1098 // For example: 1099 // ldp fp, lr, [sp, #32] // addImm(+4) 1100 // ldp x20, x19, [sp, #16] // addImm(+2) 1101 // ldp x22, x21, [sp, #0] // addImm(+0) 1102 // Note: see comment in spillCalleeSavedRegisters() 1103 unsigned LdrOpc; 1104 if (RPI.IsGPR) 1105 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; 1106 else 1107 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; 1108 DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1); 1109 if (RPI.isPaired()) 1110 dbgs() << ", " << TRI->getName(Reg2); 1111 dbgs() << ") -> fi#(" << RPI.FrameIdx; 1112 if (RPI.isPaired()) 1113 dbgs() << ", " << RPI.FrameIdx+1; 1114 dbgs() << ")\n"); 1115 1116 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); 1117 if (RPI.isPaired()) { 1118 MIB.addReg(Reg2, getDefRegState(true)); 1119 MIB.addMemOperand(MF.getMachineMemOperand( 1120 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), 1121 MachineMemOperand::MOLoad, 8, 8)); 1122 } 1123 MIB.addReg(Reg1, getDefRegState(true)) 1124 .addReg(AArch64::SP) 1125 .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit 1126 .setMIFlag(MachineInstr::FrameDestroy); 1127 MIB.addMemOperand(MF.getMachineMemOperand( 1128 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), 1129 MachineMemOperand::MOLoad, 8, 8)); 1130 } 1131 return true; 1132 } 1133 1134 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, 1135 BitVector &SavedRegs, 1136 RegScavenger *RS) const { 1137 // All calls are tail calls in GHC calling conv, and functions have no 1138 // prologue/epilogue. 1139 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 1140 return; 1141 1142 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1143 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 1144 MF.getSubtarget().getRegisterInfo()); 1145 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1146 unsigned UnspilledCSGPR = AArch64::NoRegister; 1147 unsigned UnspilledCSGPRPaired = AArch64::NoRegister; 1148 1149 // The frame record needs to be created by saving the appropriate registers 1150 if (hasFP(MF)) { 1151 SavedRegs.set(AArch64::FP); 1152 SavedRegs.set(AArch64::LR); 1153 } 1154 1155 unsigned BasePointerReg = AArch64::NoRegister; 1156 if (RegInfo->hasBasePointer(MF)) 1157 BasePointerReg = RegInfo->getBaseRegister(); 1158 1159 unsigned ExtraCSSpill = 0; 1160 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 1161 // Figure out which callee-saved registers to save/restore. 1162 for (unsigned i = 0; CSRegs[i]; ++i) { 1163 const unsigned Reg = CSRegs[i]; 1164 1165 // Add the base pointer register to SavedRegs if it is callee-save. 1166 if (Reg == BasePointerReg) 1167 SavedRegs.set(Reg); 1168 1169 bool RegUsed = SavedRegs.test(Reg); 1170 unsigned PairedReg = CSRegs[i ^ 1]; 1171 if (!RegUsed) { 1172 if (AArch64::GPR64RegClass.contains(Reg) && 1173 !RegInfo->isReservedReg(MF, Reg)) { 1174 UnspilledCSGPR = Reg; 1175 UnspilledCSGPRPaired = PairedReg; 1176 } 1177 continue; 1178 } 1179 1180 // MachO's compact unwind format relies on all registers being stored in 1181 // pairs. 1182 // FIXME: the usual format is actually better if unwinding isn't needed. 1183 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) { 1184 SavedRegs.set(PairedReg); 1185 if (AArch64::GPR64RegClass.contains(PairedReg) && 1186 !RegInfo->isReservedReg(MF, PairedReg)) 1187 ExtraCSSpill = PairedReg; 1188 } 1189 } 1190 1191 DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; 1192 for (unsigned Reg : SavedRegs.set_bits()) 1193 dbgs() << ' ' << PrintReg(Reg, RegInfo); 1194 dbgs() << "\n";); 1195 1196 // If any callee-saved registers are used, the frame cannot be eliminated. 1197 unsigned NumRegsSpilled = SavedRegs.count(); 1198 bool CanEliminateFrame = NumRegsSpilled == 0; 1199 1200 // The CSR spill slots have not been allocated yet, so estimateStackSize 1201 // won't include them. 1202 MachineFrameInfo &MFI = MF.getFrameInfo(); 1203 unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled; 1204 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); 1205 unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); 1206 bool BigStack = (CFSize > EstimatedStackSizeLimit); 1207 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) 1208 AFI->setHasStackFrame(true); 1209 1210 // Estimate if we might need to scavenge a register at some point in order 1211 // to materialize a stack offset. If so, either spill one additional 1212 // callee-saved register or reserve a special spill slot to facilitate 1213 // register scavenging. If we already spilled an extra callee-saved register 1214 // above to keep the number of spills even, we don't need to do anything else 1215 // here. 1216 if (BigStack) { 1217 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { 1218 DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo) 1219 << " to get a scratch register.\n"); 1220 SavedRegs.set(UnspilledCSGPR); 1221 // MachO's compact unwind format relies on all registers being stored in 1222 // pairs, so if we need to spill one extra for BigStack, then we need to 1223 // store the pair. 1224 if (produceCompactUnwindFrame(MF)) 1225 SavedRegs.set(UnspilledCSGPRPaired); 1226 ExtraCSSpill = UnspilledCSGPRPaired; 1227 NumRegsSpilled = SavedRegs.count(); 1228 } 1229 1230 // If we didn't find an extra callee-saved register to spill, create 1231 // an emergency spill slot. 1232 if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { 1233 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1234 const TargetRegisterClass &RC = AArch64::GPR64RegClass; 1235 unsigned Size = TRI->getSpillSize(RC); 1236 unsigned Align = TRI->getSpillAlignment(RC); 1237 int FI = MFI.CreateStackObject(Size, Align, false); 1238 RS->addScavengingFrameIndex(FI); 1239 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI 1240 << " as the emergency spill slot.\n"); 1241 } 1242 } 1243 1244 // Round up to register pair alignment to avoid additional SP adjustment 1245 // instructions. 1246 AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16)); 1247 } 1248 1249 bool AArch64FrameLowering::enableStackSlotScavenging( 1250 const MachineFunction &MF) const { 1251 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1252 return AFI->hasCalleeSaveStackFreeSpace(); 1253 } 1254