1 //===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the ARM implementation of TargetFrameLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARMFrameLowering.h" 15 #include "ARMBaseInstrInfo.h" 16 #include "ARMBaseRegisterInfo.h" 17 #include "ARMMachineFunctionInfo.h" 18 #include "MCTargetDesc/ARMAddressingModes.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/RegisterScavenging.h" 24 #include "llvm/IR/CallingConv.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Target/TargetOptions.h" 28 29 using namespace llvm; 30 31 static cl::opt<bool> 32 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), 33 cl::desc("Align ARM NEON spills in prolog and epilog")); 34 35 static MachineBasicBlock::iterator 36 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, 37 unsigned NumAlignedDPRCS2Regs); 38 39 /// hasFP - Return true if the specified function should have a dedicated frame 40 /// pointer register. This is true if the function has variable sized allocas 41 /// or if frame pointer elimination is disabled. 42 bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { 43 const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); 44 45 // iOS requires FP not to be clobbered for backtracing purpose. 46 if (STI.isTargetIOS()) 47 return true; 48 49 const MachineFrameInfo *MFI = MF.getFrameInfo(); 50 // Always eliminate non-leaf frame pointers. 51 return ((MF.getTarget().Options.DisableFramePointerElim(MF) && 52 MFI->hasCalls()) || 53 RegInfo->needsStackRealignment(MF) || 54 MFI->hasVarSizedObjects() || 55 MFI->isFrameAddressTaken()); 56 } 57 58 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is 59 /// not required, we reserve argument space for call sites in the function 60 /// immediately on entry to the current function. This eliminates the need for 61 /// add/sub sp brackets around call sites. Returns true if the call frame is 62 /// included as part of the stack frame. 63 bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 64 const MachineFrameInfo *FFI = MF.getFrameInfo(); 65 unsigned CFSize = FFI->getMaxCallFrameSize(); 66 // It's not always a good idea to include the call frame as part of the 67 // stack frame. ARM (especially Thumb) has small immediate offset to 68 // address the stack frame. So a large call frame can cause poor codegen 69 // and may even makes it impossible to scavenge a register. 70 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 71 return false; 72 73 return !MF.getFrameInfo()->hasVarSizedObjects(); 74 } 75 76 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the 77 /// call frame pseudos can be simplified. Unlike most targets, having a FP 78 /// is not sufficient here since we still may reference some objects via SP 79 /// even when FP is available in Thumb2 mode. 80 bool 81 ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { 82 return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); 83 } 84 85 static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) { 86 for (unsigned i = 0; CSRegs[i]; ++i) 87 if (Reg == CSRegs[i]) 88 return true; 89 return false; 90 } 91 92 static bool isCSRestore(MachineInstr *MI, 93 const ARMBaseInstrInfo &TII, 94 const uint16_t *CSRegs) { 95 // Integer spill area is handled with "pop". 96 if (MI->getOpcode() == ARM::LDMIA_RET || 97 MI->getOpcode() == ARM::t2LDMIA_RET || 98 MI->getOpcode() == ARM::LDMIA_UPD || 99 MI->getOpcode() == ARM::t2LDMIA_UPD || 100 MI->getOpcode() == ARM::VLDMDIA_UPD) { 101 // The first two operands are predicates. The last two are 102 // imp-def and imp-use of SP. Check everything in between. 103 for (int i = 5, e = MI->getNumOperands(); i != e; ++i) 104 if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) 105 return false; 106 return true; 107 } 108 if ((MI->getOpcode() == ARM::LDR_POST_IMM || 109 MI->getOpcode() == ARM::LDR_POST_REG || 110 MI->getOpcode() == ARM::t2LDR_POST) && 111 isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) && 112 MI->getOperand(1).getReg() == ARM::SP) 113 return true; 114 115 return false; 116 } 117 118 static void 119 emitSPUpdate(bool isARM, 120 MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 121 DebugLoc dl, const ARMBaseInstrInfo &TII, 122 int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, 123 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { 124 if (isARM) 125 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, 126 Pred, PredReg, TII, MIFlags); 127 else 128 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, 129 Pred, PredReg, TII, MIFlags); 130 } 131 132 void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { 133 MachineBasicBlock &MBB = MF.front(); 134 MachineBasicBlock::iterator MBBI = MBB.begin(); 135 MachineFrameInfo *MFI = MF.getFrameInfo(); 136 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 137 const ARMBaseRegisterInfo *RegInfo = 138 static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); 139 const ARMBaseInstrInfo &TII = 140 *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); 141 assert(!AFI->isThumb1OnlyFunction() && 142 "This emitPrologue does not support Thumb1!"); 143 bool isARM = !AFI->isThumbFunction(); 144 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 145 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); 146 unsigned NumBytes = MFI->getStackSize(); 147 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 148 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); 149 unsigned FramePtr = RegInfo->getFrameRegister(MF); 150 151 // Determine the sizes of each callee-save spill areas and record which frame 152 // belongs to which callee-save spill areas. 153 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; 154 int FramePtrSpillFI = 0; 155 int D8SpillFI = 0; 156 157 // All calls are tail calls in GHC calling conv, and functions have no 158 // prologue/epilogue. 159 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 160 return; 161 162 // Allocate the vararg register save area. This is not counted in NumBytes. 163 if (ArgRegsSaveSize) 164 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, 165 MachineInstr::FrameSetup); 166 167 if (!AFI->hasStackFrame()) { 168 if (NumBytes != 0) 169 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, 170 MachineInstr::FrameSetup); 171 return; 172 } 173 174 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 175 unsigned Reg = CSI[i].getReg(); 176 int FI = CSI[i].getFrameIdx(); 177 switch (Reg) { 178 case ARM::R0: 179 case ARM::R1: 180 case ARM::R2: 181 case ARM::R3: 182 case ARM::R4: 183 case ARM::R5: 184 case ARM::R6: 185 case ARM::R7: 186 case ARM::LR: 187 if (Reg == FramePtr) 188 FramePtrSpillFI = FI; 189 AFI->addGPRCalleeSavedArea1Frame(FI); 190 GPRCS1Size += 4; 191 break; 192 case ARM::R8: 193 case ARM::R9: 194 case ARM::R10: 195 case ARM::R11: 196 case ARM::R12: 197 if (Reg == FramePtr) 198 FramePtrSpillFI = FI; 199 if (STI.isTargetIOS()) { 200 AFI->addGPRCalleeSavedArea2Frame(FI); 201 GPRCS2Size += 4; 202 } else { 203 AFI->addGPRCalleeSavedArea1Frame(FI); 204 GPRCS1Size += 4; 205 } 206 break; 207 default: 208 // This is a DPR. Exclude the aligned DPRCS2 spills. 209 if (Reg == ARM::D8) 210 D8SpillFI = FI; 211 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) { 212 AFI->addDPRCalleeSavedAreaFrame(FI); 213 DPRCSSize += 8; 214 } 215 } 216 } 217 218 // Move past area 1. 219 if (GPRCS1Size > 0) MBBI++; 220 221 // Set FP to point to the stack slot that contains the previous FP. 222 // For iOS, FP is R7, which has now been stored in spill area 1. 223 // Otherwise, if this is not iOS, all the callee-saved registers go 224 // into spill area 1, including the FP in R11. In either case, it is 225 // now safe to emit this assignment. 226 bool HasFP = hasFP(MF); 227 if (HasFP) { 228 unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; 229 MachineInstrBuilder MIB = 230 BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) 231 .addFrameIndex(FramePtrSpillFI).addImm(0) 232 .setMIFlag(MachineInstr::FrameSetup); 233 AddDefaultCC(AddDefaultPred(MIB)); 234 } 235 236 // Move past area 2. 237 if (GPRCS2Size > 0) MBBI++; 238 239 // Determine starting offsets of spill areas. 240 unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); 241 unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; 242 unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; 243 if (HasFP) 244 AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + 245 NumBytes); 246 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); 247 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); 248 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); 249 250 // Move past area 3. 251 if (DPRCSSize > 0) { 252 MBBI++; 253 // Since vpush register list cannot have gaps, there may be multiple vpush 254 // instructions in the prologue. 255 while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) 256 MBBI++; 257 } 258 259 // Move past the aligned DPRCS2 area. 260 if (AFI->getNumAlignedDPRCS2Regs() > 0) { 261 MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs()); 262 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and 263 // leaves the stack pointer pointing to the DPRCS2 area. 264 // 265 // Adjust NumBytes to represent the stack slots below the DPRCS2 area. 266 NumBytes += MFI->getObjectOffset(D8SpillFI); 267 } else 268 NumBytes = DPRCSOffset; 269 270 if (NumBytes) { 271 // Adjust SP after all the callee-save spills. 272 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, 273 MachineInstr::FrameSetup); 274 if (HasFP && isARM) 275 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 276 // Note it's not safe to do this in Thumb2 mode because it would have 277 // taken two instructions: 278 // mov sp, r7 279 // sub sp, #24 280 // If an interrupt is taken between the two instructions, then sp is in 281 // an inconsistent state (pointing to the middle of callee-saved area). 282 // The interrupt handler can end up clobbering the registers. 283 AFI->setShouldRestoreSPFromFP(true); 284 } 285 286 if (STI.isTargetELF() && hasFP(MF)) 287 MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - 288 AFI->getFramePtrSpillOffset()); 289 290 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); 291 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); 292 AFI->setDPRCalleeSavedAreaSize(DPRCSSize); 293 294 // If we need dynamic stack realignment, do it here. Be paranoid and make 295 // sure if we also have VLAs, we have a base pointer for frame access. 296 // If aligned NEON registers were spilled, the stack has already been 297 // realigned. 298 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { 299 unsigned MaxAlign = MFI->getMaxAlignment(); 300 assert (!AFI->isThumb1OnlyFunction()); 301 if (!AFI->isThumbFunction()) { 302 // Emit bic sp, sp, MaxAlign 303 AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, 304 TII.get(ARM::BICri), ARM::SP) 305 .addReg(ARM::SP, RegState::Kill) 306 .addImm(MaxAlign-1))); 307 } else { 308 // We cannot use sp as source/dest register here, thus we're emitting the 309 // following sequence: 310 // mov r4, sp 311 // bic r4, r4, MaxAlign 312 // mov sp, r4 313 // FIXME: It will be better just to find spare register here. 314 AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) 315 .addReg(ARM::SP, RegState::Kill)); 316 AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, 317 TII.get(ARM::t2BICri), ARM::R4) 318 .addReg(ARM::R4, RegState::Kill) 319 .addImm(MaxAlign-1))); 320 AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 321 .addReg(ARM::R4, RegState::Kill)); 322 } 323 324 AFI->setShouldRestoreSPFromFP(true); 325 } 326 327 // If we need a base pointer, set it up here. It's whatever the value 328 // of the stack pointer is at this point. Any variable size objects 329 // will be allocated after this, so we can still use the base pointer 330 // to reference locals. 331 // FIXME: Clarify FrameSetup flags here. 332 if (RegInfo->hasBasePointer(MF)) { 333 if (isARM) 334 BuildMI(MBB, MBBI, dl, 335 TII.get(ARM::MOVr), RegInfo->getBaseRegister()) 336 .addReg(ARM::SP) 337 .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 338 else 339 AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), 340 RegInfo->getBaseRegister()) 341 .addReg(ARM::SP)); 342 } 343 344 // If the frame has variable sized objects then the epilogue must restore 345 // the sp from fp. We can assume there's an FP here since hasFP already 346 // checks for hasVarSizedObjects. 347 if (MFI->hasVarSizedObjects()) 348 AFI->setShouldRestoreSPFromFP(true); 349 } 350 351 void ARMFrameLowering::emitEpilogue(MachineFunction &MF, 352 MachineBasicBlock &MBB) const { 353 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 354 assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); 355 unsigned RetOpcode = MBBI->getOpcode(); 356 DebugLoc dl = MBBI->getDebugLoc(); 357 MachineFrameInfo *MFI = MF.getFrameInfo(); 358 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 359 const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); 360 const ARMBaseInstrInfo &TII = 361 *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); 362 assert(!AFI->isThumb1OnlyFunction() && 363 "This emitEpilogue does not support Thumb1!"); 364 bool isARM = !AFI->isThumbFunction(); 365 366 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 367 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); 368 int NumBytes = (int)MFI->getStackSize(); 369 unsigned FramePtr = RegInfo->getFrameRegister(MF); 370 371 // All calls are tail calls in GHC calling conv, and functions have no 372 // prologue/epilogue. 373 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 374 return; 375 376 if (!AFI->hasStackFrame()) { 377 if (NumBytes != 0) 378 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); 379 } else { 380 // Unwind MBBI to point to first LDR / VLDRD. 381 const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 382 if (MBBI != MBB.begin()) { 383 do 384 --MBBI; 385 while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); 386 if (!isCSRestore(MBBI, TII, CSRegs)) 387 ++MBBI; 388 } 389 390 // Move SP to start of FP callee save spill area. 391 NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + 392 AFI->getGPRCalleeSavedArea2Size() + 393 AFI->getDPRCalleeSavedAreaSize()); 394 395 // Reset SP based on frame pointer only if the stack frame extends beyond 396 // frame pointer stack slot or target is ELF and the function has FP. 397 if (AFI->shouldRestoreSPFromFP()) { 398 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; 399 if (NumBytes) { 400 if (isARM) 401 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, 402 ARMCC::AL, 0, TII); 403 else { 404 // It's not possible to restore SP from FP in a single instruction. 405 // For iOS, this looks like: 406 // mov sp, r7 407 // sub sp, #24 408 // This is bad, if an interrupt is taken after the mov, sp is in an 409 // inconsistent state. 410 // Use the first callee-saved register as a scratch register. 411 assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && 412 "No scratch register to restore SP from FP!"); 413 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, 414 ARMCC::AL, 0, TII); 415 AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), 416 ARM::SP) 417 .addReg(ARM::R4)); 418 } 419 } else { 420 // Thumb2 or ARM. 421 if (isARM) 422 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) 423 .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 424 else 425 AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), 426 ARM::SP) 427 .addReg(FramePtr)); 428 } 429 } else if (NumBytes) 430 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); 431 432 // Increment past our save areas. 433 if (AFI->getDPRCalleeSavedAreaSize()) { 434 MBBI++; 435 // Since vpop register list cannot have gaps, there may be multiple vpop 436 // instructions in the epilogue. 437 while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) 438 MBBI++; 439 } 440 if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; 441 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; 442 } 443 444 if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) { 445 // Tail call return: adjust the stack pointer and jump to callee. 446 MBBI = MBB.getLastNonDebugInstr(); 447 MachineOperand &JumpTarget = MBBI->getOperand(0); 448 449 // Jump to label or value in register. 450 if (RetOpcode == ARM::TCRETURNdi) { 451 unsigned TCOpcode = STI.isThumb() ? 452 (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : 453 ARM::TAILJMPd; 454 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); 455 if (JumpTarget.isGlobal()) 456 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 457 JumpTarget.getTargetFlags()); 458 else { 459 assert(JumpTarget.isSymbol()); 460 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 461 JumpTarget.getTargetFlags()); 462 } 463 464 // Add the default predicate in Thumb mode. 465 if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); 466 } else if (RetOpcode == ARM::TCRETURNri) { 467 BuildMI(MBB, MBBI, dl, 468 TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). 469 addReg(JumpTarget.getReg(), RegState::Kill); 470 } 471 472 MachineInstr *NewMI = prior(MBBI); 473 for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) 474 NewMI->addOperand(MBBI->getOperand(i)); 475 476 // Delete the pseudo instruction TCRETURN. 477 MBB.erase(MBBI); 478 MBBI = NewMI; 479 } 480 481 if (ArgRegsSaveSize) 482 emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize); 483 } 484 485 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for 486 /// debug info. It's the same as what we use for resolving the code-gen 487 /// references for now. FIXME: This can go wrong when references are 488 /// SP-relative and simple call frames aren't used. 489 int 490 ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 491 unsigned &FrameReg) const { 492 return ResolveFrameIndexReference(MF, FI, FrameReg, 0); 493 } 494 495 int 496 ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, 497 int FI, unsigned &FrameReg, 498 int SPAdj) const { 499 const MachineFrameInfo *MFI = MF.getFrameInfo(); 500 const ARMBaseRegisterInfo *RegInfo = 501 static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); 502 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 503 int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); 504 int FPOffset = Offset - AFI->getFramePtrSpillOffset(); 505 bool isFixed = MFI->isFixedObjectIndex(FI); 506 507 FrameReg = ARM::SP; 508 Offset += SPAdj; 509 if (AFI->isGPRCalleeSavedArea1Frame(FI)) 510 return Offset - AFI->getGPRCalleeSavedArea1Offset(); 511 else if (AFI->isGPRCalleeSavedArea2Frame(FI)) 512 return Offset - AFI->getGPRCalleeSavedArea2Offset(); 513 else if (AFI->isDPRCalleeSavedAreaFrame(FI)) 514 return Offset - AFI->getDPRCalleeSavedAreaOffset(); 515 516 // SP can move around if there are allocas. We may also lose track of SP 517 // when emergency spilling inside a non-reserved call frame setup. 518 bool hasMovingSP = !hasReservedCallFrame(MF); 519 520 // When dynamically realigning the stack, use the frame pointer for 521 // parameters, and the stack/base pointer for locals. 522 if (RegInfo->needsStackRealignment(MF)) { 523 assert (hasFP(MF) && "dynamic stack realignment without a FP!"); 524 if (isFixed) { 525 FrameReg = RegInfo->getFrameRegister(MF); 526 Offset = FPOffset; 527 } else if (hasMovingSP) { 528 assert(RegInfo->hasBasePointer(MF) && 529 "VLAs and dynamic stack alignment, but missing base pointer!"); 530 FrameReg = RegInfo->getBaseRegister(); 531 } 532 return Offset; 533 } 534 535 // If there is a frame pointer, use it when we can. 536 if (hasFP(MF) && AFI->hasStackFrame()) { 537 // Use frame pointer to reference fixed objects. Use it for locals if 538 // there are VLAs (and thus the SP isn't reliable as a base). 539 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) { 540 FrameReg = RegInfo->getFrameRegister(MF); 541 return FPOffset; 542 } else if (hasMovingSP) { 543 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!"); 544 if (AFI->isThumb2Function()) { 545 // Try to use the frame pointer if we can, else use the base pointer 546 // since it's available. This is handy for the emergency spill slot, in 547 // particular. 548 if (FPOffset >= -255 && FPOffset < 0) { 549 FrameReg = RegInfo->getFrameRegister(MF); 550 return FPOffset; 551 } 552 } 553 } else if (AFI->isThumb2Function()) { 554 // Use add <rd>, sp, #<imm8> 555 // ldr <rd>, [sp, #<imm8>] 556 // if at all possible to save space. 557 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020) 558 return Offset; 559 // In Thumb2 mode, the negative offset is very limited. Try to avoid 560 // out of range references. ldr <rt>,[<rn>, #-<imm8>] 561 if (FPOffset >= -255 && FPOffset < 0) { 562 FrameReg = RegInfo->getFrameRegister(MF); 563 return FPOffset; 564 } 565 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) { 566 // Otherwise, use SP or FP, whichever is closer to the stack slot. 567 FrameReg = RegInfo->getFrameRegister(MF); 568 return FPOffset; 569 } 570 } 571 // Use the base pointer if we have one. 572 if (RegInfo->hasBasePointer(MF)) 573 FrameReg = RegInfo->getBaseRegister(); 574 return Offset; 575 } 576 577 int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF, 578 int FI) const { 579 unsigned FrameReg; 580 return getFrameIndexReference(MF, FI, FrameReg); 581 } 582 583 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, 584 MachineBasicBlock::iterator MI, 585 const std::vector<CalleeSavedInfo> &CSI, 586 unsigned StmOpc, unsigned StrOpc, 587 bool NoGap, 588 bool(*Func)(unsigned, bool), 589 unsigned NumAlignedDPRCS2Regs, 590 unsigned MIFlags) const { 591 MachineFunction &MF = *MBB.getParent(); 592 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 593 594 DebugLoc DL; 595 if (MI != MBB.end()) DL = MI->getDebugLoc(); 596 597 SmallVector<std::pair<unsigned,bool>, 4> Regs; 598 unsigned i = CSI.size(); 599 while (i != 0) { 600 unsigned LastReg = 0; 601 for (; i != 0; --i) { 602 unsigned Reg = CSI[i-1].getReg(); 603 if (!(Func)(Reg, STI.isTargetIOS())) continue; 604 605 // D-registers in the aligned area DPRCS2 are NOT spilled here. 606 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) 607 continue; 608 609 // Add the callee-saved register as live-in unless it's LR and 610 // @llvm.returnaddress is called. If LR is returned for 611 // @llvm.returnaddress then it's already added to the function and 612 // entry block live-in sets. 613 bool isKill = true; 614 if (Reg == ARM::LR) { 615 if (MF.getFrameInfo()->isReturnAddressTaken() && 616 MF.getRegInfo().isLiveIn(Reg)) 617 isKill = false; 618 } 619 620 if (isKill) 621 MBB.addLiveIn(Reg); 622 623 // If NoGap is true, push consecutive registers and then leave the rest 624 // for other instructions. e.g. 625 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11} 626 if (NoGap && LastReg && LastReg != Reg-1) 627 break; 628 LastReg = Reg; 629 Regs.push_back(std::make_pair(Reg, isKill)); 630 } 631 632 if (Regs.empty()) 633 continue; 634 if (Regs.size() > 1 || StrOpc== 0) { 635 MachineInstrBuilder MIB = 636 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) 637 .addReg(ARM::SP).setMIFlags(MIFlags)); 638 for (unsigned i = 0, e = Regs.size(); i < e; ++i) 639 MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); 640 } else if (Regs.size() == 1) { 641 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), 642 ARM::SP) 643 .addReg(Regs[0].first, getKillRegState(Regs[0].second)) 644 .addReg(ARM::SP).setMIFlags(MIFlags) 645 .addImm(-4); 646 AddDefaultPred(MIB); 647 } 648 Regs.clear(); 649 } 650 } 651 652 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, 653 MachineBasicBlock::iterator MI, 654 const std::vector<CalleeSavedInfo> &CSI, 655 unsigned LdmOpc, unsigned LdrOpc, 656 bool isVarArg, bool NoGap, 657 bool(*Func)(unsigned, bool), 658 unsigned NumAlignedDPRCS2Regs) const { 659 MachineFunction &MF = *MBB.getParent(); 660 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 661 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 662 DebugLoc DL = MI->getDebugLoc(); 663 unsigned RetOpcode = MI->getOpcode(); 664 bool isTailCall = (RetOpcode == ARM::TCRETURNdi || 665 RetOpcode == ARM::TCRETURNri); 666 bool isInterrupt = 667 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; 668 669 SmallVector<unsigned, 4> Regs; 670 unsigned i = CSI.size(); 671 while (i != 0) { 672 unsigned LastReg = 0; 673 bool DeleteRet = false; 674 for (; i != 0; --i) { 675 unsigned Reg = CSI[i-1].getReg(); 676 if (!(Func)(Reg, STI.isTargetIOS())) continue; 677 678 // The aligned reloads from area DPRCS2 are not inserted here. 679 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) 680 continue; 681 682 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && 683 STI.hasV5TOps()) { 684 Reg = ARM::PC; 685 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; 686 // Fold the return instruction into the LDM. 687 DeleteRet = true; 688 } 689 690 // If NoGap is true, pop consecutive registers and then leave the rest 691 // for other instructions. e.g. 692 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11} 693 if (NoGap && LastReg && LastReg != Reg-1) 694 break; 695 696 LastReg = Reg; 697 Regs.push_back(Reg); 698 } 699 700 if (Regs.empty()) 701 continue; 702 if (Regs.size() > 1 || LdrOpc == 0) { 703 MachineInstrBuilder MIB = 704 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) 705 .addReg(ARM::SP)); 706 for (unsigned i = 0, e = Regs.size(); i < e; ++i) 707 MIB.addReg(Regs[i], getDefRegState(true)); 708 if (DeleteRet) { 709 MIB.copyImplicitOps(&*MI); 710 MI->eraseFromParent(); 711 } 712 MI = MIB; 713 } else if (Regs.size() == 1) { 714 // If we adjusted the reg to PC from LR above, switch it back here. We 715 // only do that for LDM. 716 if (Regs[0] == ARM::PC) 717 Regs[0] = ARM::LR; 718 MachineInstrBuilder MIB = 719 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0]) 720 .addReg(ARM::SP, RegState::Define) 721 .addReg(ARM::SP); 722 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once 723 // that refactoring is complete (eventually). 724 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) { 725 MIB.addReg(0); 726 MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); 727 } else 728 MIB.addImm(4); 729 AddDefaultPred(MIB); 730 } 731 Regs.clear(); 732 } 733 } 734 735 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers 736 /// starting from d8. Also insert stack realignment code and leave the stack 737 /// pointer pointing to the d8 spill slot. 738 static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, 739 MachineBasicBlock::iterator MI, 740 unsigned NumAlignedDPRCS2Regs, 741 const std::vector<CalleeSavedInfo> &CSI, 742 const TargetRegisterInfo *TRI) { 743 MachineFunction &MF = *MBB.getParent(); 744 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 745 DebugLoc DL = MI->getDebugLoc(); 746 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 747 MachineFrameInfo &MFI = *MF.getFrameInfo(); 748 749 // Mark the D-register spill slots as properly aligned. Since MFI computes 750 // stack slot layout backwards, this can actually mean that the d-reg stack 751 // slot offsets can be wrong. The offset for d8 will always be correct. 752 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 753 unsigned DNum = CSI[i].getReg() - ARM::D8; 754 if (DNum >= 8) 755 continue; 756 int FI = CSI[i].getFrameIdx(); 757 // The even-numbered registers will be 16-byte aligned, the odd-numbered 758 // registers will be 8-byte aligned. 759 MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16); 760 761 // The stack slot for D8 needs to be maximally aligned because this is 762 // actually the point where we align the stack pointer. MachineFrameInfo 763 // computes all offsets relative to the incoming stack pointer which is a 764 // bit weird when realigning the stack. Any extra padding for this 765 // over-alignment is not realized because the code inserted below adjusts 766 // the stack pointer by numregs * 8 before aligning the stack pointer. 767 if (DNum == 0) 768 MFI.setObjectAlignment(FI, MFI.getMaxAlignment()); 769 } 770 771 // Move the stack pointer to the d8 spill slot, and align it at the same 772 // time. Leave the stack slot address in the scratch register r4. 773 // 774 // sub r4, sp, #numregs * 8 775 // bic r4, r4, #align - 1 776 // mov sp, r4 777 // 778 bool isThumb = AFI->isThumbFunction(); 779 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); 780 AFI->setShouldRestoreSPFromFP(true); 781 782 // sub r4, sp, #numregs * 8 783 // The immediate is <= 64, so it doesn't need any special encoding. 784 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; 785 AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) 786 .addReg(ARM::SP) 787 .addImm(8 * NumAlignedDPRCS2Regs))); 788 789 // bic r4, r4, #align-1 790 Opc = isThumb ? ARM::t2BICri : ARM::BICri; 791 unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); 792 AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) 793 .addReg(ARM::R4, RegState::Kill) 794 .addImm(MaxAlign - 1))); 795 796 // mov sp, r4 797 // The stack pointer must be adjusted before spilling anything, otherwise 798 // the stack slots could be clobbered by an interrupt handler. 799 // Leave r4 live, it is used below. 800 Opc = isThumb ? ARM::tMOVr : ARM::MOVr; 801 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) 802 .addReg(ARM::R4); 803 MIB = AddDefaultPred(MIB); 804 if (!isThumb) 805 AddDefaultCC(MIB); 806 807 // Now spill NumAlignedDPRCS2Regs registers starting from d8. 808 // r4 holds the stack slot address. 809 unsigned NextReg = ARM::D8; 810 811 // 16-byte aligned vst1.64 with 4 d-regs and address writeback. 812 // The writeback is only needed when emitting two vst1.64 instructions. 813 if (NumAlignedDPRCS2Regs >= 6) { 814 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 815 &ARM::QQPRRegClass); 816 MBB.addLiveIn(SupReg); 817 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), 818 ARM::R4) 819 .addReg(ARM::R4, RegState::Kill).addImm(16) 820 .addReg(NextReg) 821 .addReg(SupReg, RegState::ImplicitKill)); 822 NextReg += 4; 823 NumAlignedDPRCS2Regs -= 4; 824 } 825 826 // We won't modify r4 beyond this point. It currently points to the next 827 // register to be spilled. 828 unsigned R4BaseReg = NextReg; 829 830 // 16-byte aligned vst1.64 with 4 d-regs, no writeback. 831 if (NumAlignedDPRCS2Regs >= 4) { 832 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 833 &ARM::QQPRRegClass); 834 MBB.addLiveIn(SupReg); 835 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) 836 .addReg(ARM::R4).addImm(16).addReg(NextReg) 837 .addReg(SupReg, RegState::ImplicitKill)); 838 NextReg += 4; 839 NumAlignedDPRCS2Regs -= 4; 840 } 841 842 // 16-byte aligned vst1.64 with 2 d-regs. 843 if (NumAlignedDPRCS2Regs >= 2) { 844 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 845 &ARM::QPRRegClass); 846 MBB.addLiveIn(SupReg); 847 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) 848 .addReg(ARM::R4).addImm(16).addReg(SupReg)); 849 NextReg += 2; 850 NumAlignedDPRCS2Regs -= 2; 851 } 852 853 // Finally, use a vanilla vstr.64 for the odd last register. 854 if (NumAlignedDPRCS2Regs) { 855 MBB.addLiveIn(NextReg); 856 // vstr.64 uses addrmode5 which has an offset scale of 4. 857 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) 858 .addReg(NextReg) 859 .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2)); 860 } 861 862 // The last spill instruction inserted should kill the scratch register r4. 863 llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI); 864 } 865 866 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an 867 /// iterator to the following instruction. 868 static MachineBasicBlock::iterator 869 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, 870 unsigned NumAlignedDPRCS2Regs) { 871 // sub r4, sp, #numregs * 8 872 // bic r4, r4, #align - 1 873 // mov sp, r4 874 ++MI; ++MI; ++MI; 875 assert(MI->mayStore() && "Expecting spill instruction"); 876 877 // These switches all fall through. 878 switch(NumAlignedDPRCS2Regs) { 879 case 7: 880 ++MI; 881 assert(MI->mayStore() && "Expecting spill instruction"); 882 default: 883 ++MI; 884 assert(MI->mayStore() && "Expecting spill instruction"); 885 case 1: 886 case 2: 887 case 4: 888 assert(MI->killsRegister(ARM::R4) && "Missed kill flag"); 889 ++MI; 890 } 891 return MI; 892 } 893 894 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers 895 /// starting from d8. These instructions are assumed to execute while the 896 /// stack is still aligned, unlike the code inserted by emitPopInst. 897 static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, 898 MachineBasicBlock::iterator MI, 899 unsigned NumAlignedDPRCS2Regs, 900 const std::vector<CalleeSavedInfo> &CSI, 901 const TargetRegisterInfo *TRI) { 902 MachineFunction &MF = *MBB.getParent(); 903 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 904 DebugLoc DL = MI->getDebugLoc(); 905 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 906 907 // Find the frame index assigned to d8. 908 int D8SpillFI = 0; 909 for (unsigned i = 0, e = CSI.size(); i != e; ++i) 910 if (CSI[i].getReg() == ARM::D8) { 911 D8SpillFI = CSI[i].getFrameIdx(); 912 break; 913 } 914 915 // Materialize the address of the d8 spill slot into the scratch register r4. 916 // This can be fairly complicated if the stack frame is large, so just use 917 // the normal frame index elimination mechanism to do it. This code runs as 918 // the initial part of the epilog where the stack and base pointers haven't 919 // been changed yet. 920 bool isThumb = AFI->isThumbFunction(); 921 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); 922 923 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 924 AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) 925 .addFrameIndex(D8SpillFI).addImm(0))); 926 927 // Now restore NumAlignedDPRCS2Regs registers starting from d8. 928 unsigned NextReg = ARM::D8; 929 930 // 16-byte aligned vld1.64 with 4 d-regs and writeback. 931 if (NumAlignedDPRCS2Regs >= 6) { 932 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 933 &ARM::QQPRRegClass); 934 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) 935 .addReg(ARM::R4, RegState::Define) 936 .addReg(ARM::R4, RegState::Kill).addImm(16) 937 .addReg(SupReg, RegState::ImplicitDefine)); 938 NextReg += 4; 939 NumAlignedDPRCS2Regs -= 4; 940 } 941 942 // We won't modify r4 beyond this point. It currently points to the next 943 // register to be spilled. 944 unsigned R4BaseReg = NextReg; 945 946 // 16-byte aligned vld1.64 with 4 d-regs, no writeback. 947 if (NumAlignedDPRCS2Regs >= 4) { 948 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 949 &ARM::QQPRRegClass); 950 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) 951 .addReg(ARM::R4).addImm(16) 952 .addReg(SupReg, RegState::ImplicitDefine)); 953 NextReg += 4; 954 NumAlignedDPRCS2Regs -= 4; 955 } 956 957 // 16-byte aligned vld1.64 with 2 d-regs. 958 if (NumAlignedDPRCS2Regs >= 2) { 959 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 960 &ARM::QPRRegClass); 961 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) 962 .addReg(ARM::R4).addImm(16)); 963 NextReg += 2; 964 NumAlignedDPRCS2Regs -= 2; 965 } 966 967 // Finally, use a vanilla vldr.64 for the remaining odd register. 968 if (NumAlignedDPRCS2Regs) 969 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) 970 .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); 971 972 // Last store kills r4. 973 llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI); 974 } 975 976 bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 977 MachineBasicBlock::iterator MI, 978 const std::vector<CalleeSavedInfo> &CSI, 979 const TargetRegisterInfo *TRI) const { 980 if (CSI.empty()) 981 return false; 982 983 MachineFunction &MF = *MBB.getParent(); 984 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 985 986 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; 987 unsigned PushOneOpc = AFI->isThumbFunction() ? 988 ARM::t2STR_PRE : ARM::STR_PRE_IMM; 989 unsigned FltOpc = ARM::VSTMDDB_UPD; 990 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); 991 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, 992 MachineInstr::FrameSetup); 993 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, 994 MachineInstr::FrameSetup); 995 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, 996 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); 997 998 // The code above does not insert spill code for the aligned DPRCS2 registers. 999 // The stack realignment code will be inserted between the push instructions 1000 // and these spills. 1001 if (NumAlignedDPRCS2Regs) 1002 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); 1003 1004 return true; 1005 } 1006 1007 bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 1008 MachineBasicBlock::iterator MI, 1009 const std::vector<CalleeSavedInfo> &CSI, 1010 const TargetRegisterInfo *TRI) const { 1011 if (CSI.empty()) 1012 return false; 1013 1014 MachineFunction &MF = *MBB.getParent(); 1015 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1016 bool isVarArg = AFI->getArgRegsSaveSize() > 0; 1017 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); 1018 1019 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2 1020 // registers. Do that here instead. 1021 if (NumAlignedDPRCS2Regs) 1022 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); 1023 1024 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; 1025 unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; 1026 unsigned FltOpc = ARM::VLDMDIA_UPD; 1027 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, 1028 NumAlignedDPRCS2Regs); 1029 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, 1030 &isARMArea2Register, 0); 1031 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, 1032 &isARMArea1Register, 0); 1033 1034 return true; 1035 } 1036 1037 // FIXME: Make generic? 1038 static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, 1039 const ARMBaseInstrInfo &TII) { 1040 unsigned FnSize = 0; 1041 for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); 1042 MBBI != E; ++MBBI) { 1043 const MachineBasicBlock &MBB = *MBBI; 1044 for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); 1045 I != E; ++I) 1046 FnSize += TII.GetInstSizeInBytes(I); 1047 } 1048 return FnSize; 1049 } 1050 1051 /// estimateRSStackSizeLimit - Look at each instruction that references stack 1052 /// frames and return the stack size limit beyond which some of these 1053 /// instructions will require a scratch register during their expansion later. 1054 // FIXME: Move to TII? 1055 static unsigned estimateRSStackSizeLimit(MachineFunction &MF, 1056 const TargetFrameLowering *TFI) { 1057 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1058 unsigned Limit = (1 << 12) - 1; 1059 for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { 1060 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); 1061 I != E; ++I) { 1062 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 1063 if (!I->getOperand(i).isFI()) continue; 1064 1065 // When using ADDri to get the address of a stack object, 255 is the 1066 // largest offset guaranteed to fit in the immediate offset. 1067 if (I->getOpcode() == ARM::ADDri) { 1068 Limit = std::min(Limit, (1U << 8) - 1); 1069 break; 1070 } 1071 1072 // Otherwise check the addressing mode. 1073 switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { 1074 case ARMII::AddrMode3: 1075 case ARMII::AddrModeT2_i8: 1076 Limit = std::min(Limit, (1U << 8) - 1); 1077 break; 1078 case ARMII::AddrMode5: 1079 case ARMII::AddrModeT2_i8s4: 1080 Limit = std::min(Limit, ((1U << 8) - 1) * 4); 1081 break; 1082 case ARMII::AddrModeT2_i12: 1083 // i12 supports only positive offset so these will be converted to 1084 // i8 opcodes. See llvm::rewriteT2FrameIndex. 1085 if (TFI->hasFP(MF) && AFI->hasStackFrame()) 1086 Limit = std::min(Limit, (1U << 8) - 1); 1087 break; 1088 case ARMII::AddrMode4: 1089 case ARMII::AddrMode6: 1090 // Addressing modes 4 & 6 (load/store) instructions can't encode an 1091 // immediate offset for stack references. 1092 return 0; 1093 default: 1094 break; 1095 } 1096 break; // At most one FI per instruction 1097 } 1098 } 1099 } 1100 1101 return Limit; 1102 } 1103 1104 // In functions that realign the stack, it can be an advantage to spill the 1105 // callee-saved vector registers after realigning the stack. The vst1 and vld1 1106 // instructions take alignment hints that can improve performance. 1107 // 1108 static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { 1109 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0); 1110 if (!SpillAlignedNEONRegs) 1111 return; 1112 1113 // Naked functions don't spill callee-saved registers. 1114 if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 1115 Attribute::Naked)) 1116 return; 1117 1118 // We are planning to use NEON instructions vst1 / vld1. 1119 if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON()) 1120 return; 1121 1122 // Don't bother if the default stack alignment is sufficiently high. 1123 if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8) 1124 return; 1125 1126 // Aligned spills require stack realignment. 1127 const ARMBaseRegisterInfo *RegInfo = 1128 static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); 1129 if (!RegInfo->canRealignStack(MF)) 1130 return; 1131 1132 // We always spill contiguous d-registers starting from d8. Count how many 1133 // needs spilling. The register allocator will almost always use the 1134 // callee-saved registers in order, but it can happen that there are holes in 1135 // the range. Registers above the hole will be spilled to the standard DPRCS 1136 // area. 1137 MachineRegisterInfo &MRI = MF.getRegInfo(); 1138 unsigned NumSpills = 0; 1139 for (; NumSpills < 8; ++NumSpills) 1140 if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills)) 1141 break; 1142 1143 // Don't do this for just one d-register. It's not worth it. 1144 if (NumSpills < 2) 1145 return; 1146 1147 // Spill the first NumSpills D-registers after realigning the stack. 1148 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills); 1149 1150 // A scratch register is required for the vst1 / vld1 instructions. 1151 MF.getRegInfo().setPhysRegUsed(ARM::R4); 1152 } 1153 1154 void 1155 ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, 1156 RegScavenger *RS) const { 1157 // This tells PEI to spill the FP as if it is any other callee-save register 1158 // to take advantage the eliminateFrameIndex machinery. This also ensures it 1159 // is spilled in the order specified by getCalleeSavedRegs() to make it easier 1160 // to combine multiple loads / stores. 1161 bool CanEliminateFrame = true; 1162 bool CS1Spilled = false; 1163 bool LRSpilled = false; 1164 unsigned NumGPRSpills = 0; 1165 SmallVector<unsigned, 4> UnspilledCS1GPRs; 1166 SmallVector<unsigned, 4> UnspilledCS2GPRs; 1167 const ARMBaseRegisterInfo *RegInfo = 1168 static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); 1169 const ARMBaseInstrInfo &TII = 1170 *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); 1171 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1172 MachineFrameInfo *MFI = MF.getFrameInfo(); 1173 MachineRegisterInfo &MRI = MF.getRegInfo(); 1174 unsigned FramePtr = RegInfo->getFrameRegister(MF); 1175 1176 // Spill R4 if Thumb2 function requires stack realignment - it will be used as 1177 // scratch register. Also spill R4 if Thumb2 function has varsized objects, 1178 // since it's not always possible to restore sp from fp in a single 1179 // instruction. 1180 // FIXME: It will be better just to find spare register here. 1181 if (AFI->isThumb2Function() && 1182 (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) 1183 MRI.setPhysRegUsed(ARM::R4); 1184 1185 if (AFI->isThumb1OnlyFunction()) { 1186 // Spill LR if Thumb1 function uses variable length argument lists. 1187 if (AFI->getArgRegsSaveSize() > 0) 1188 MRI.setPhysRegUsed(ARM::LR); 1189 1190 // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know 1191 // for sure what the stack size will be, but for this, an estimate is good 1192 // enough. If there anything changes it, it'll be a spill, which implies 1193 // we've used all the registers and so R4 is already used, so not marking 1194 // it here will be OK. 1195 // FIXME: It will be better just to find spare register here. 1196 unsigned StackSize = MFI->estimateStackSize(MF); 1197 if (MFI->hasVarSizedObjects() || StackSize > 508) 1198 MRI.setPhysRegUsed(ARM::R4); 1199 } 1200 1201 // See if we can spill vector registers to aligned stack. 1202 checkNumAlignedDPRCS2Regs(MF); 1203 1204 // Spill the BasePtr if it's used. 1205 if (RegInfo->hasBasePointer(MF)) 1206 MRI.setPhysRegUsed(RegInfo->getBaseRegister()); 1207 1208 // Don't spill FP if the frame can be eliminated. This is determined 1209 // by scanning the callee-save registers to see if any is used. 1210 const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 1211 for (unsigned i = 0; CSRegs[i]; ++i) { 1212 unsigned Reg = CSRegs[i]; 1213 bool Spilled = false; 1214 if (MRI.isPhysRegUsed(Reg)) { 1215 Spilled = true; 1216 CanEliminateFrame = false; 1217 } 1218 1219 if (!ARM::GPRRegClass.contains(Reg)) 1220 continue; 1221 1222 if (Spilled) { 1223 NumGPRSpills++; 1224 1225 if (!STI.isTargetIOS()) { 1226 if (Reg == ARM::LR) 1227 LRSpilled = true; 1228 CS1Spilled = true; 1229 continue; 1230 } 1231 1232 // Keep track if LR and any of R4, R5, R6, and R7 is spilled. 1233 switch (Reg) { 1234 case ARM::LR: 1235 LRSpilled = true; 1236 // Fallthrough 1237 case ARM::R0: case ARM::R1: 1238 case ARM::R2: case ARM::R3: 1239 case ARM::R4: case ARM::R5: 1240 case ARM::R6: case ARM::R7: 1241 CS1Spilled = true; 1242 break; 1243 default: 1244 break; 1245 } 1246 } else { 1247 if (!STI.isTargetIOS()) { 1248 UnspilledCS1GPRs.push_back(Reg); 1249 continue; 1250 } 1251 1252 switch (Reg) { 1253 case ARM::R0: case ARM::R1: 1254 case ARM::R2: case ARM::R3: 1255 case ARM::R4: case ARM::R5: 1256 case ARM::R6: case ARM::R7: 1257 case ARM::LR: 1258 UnspilledCS1GPRs.push_back(Reg); 1259 break; 1260 default: 1261 UnspilledCS2GPRs.push_back(Reg); 1262 break; 1263 } 1264 } 1265 } 1266 1267 bool ForceLRSpill = false; 1268 if (!LRSpilled && AFI->isThumb1OnlyFunction()) { 1269 unsigned FnSize = GetFunctionSizeInBytes(MF, TII); 1270 // Force LR to be spilled if the Thumb function size is > 2048. This enables 1271 // use of BL to implement far jump. If it turns out that it's not needed 1272 // then the branch fix up path will undo it. 1273 if (FnSize >= (1 << 11)) { 1274 CanEliminateFrame = false; 1275 ForceLRSpill = true; 1276 } 1277 } 1278 1279 // If any of the stack slot references may be out of range of an immediate 1280 // offset, make sure a register (or a spill slot) is available for the 1281 // register scavenger. Note that if we're indexing off the frame pointer, the 1282 // effective stack size is 4 bytes larger since the FP points to the stack 1283 // slot of the previous FP. Also, if we have variable sized objects in the 1284 // function, stack slot references will often be negative, and some of 1285 // our instructions are positive-offset only, so conservatively consider 1286 // that case to want a spill slot (or register) as well. Similarly, if 1287 // the function adjusts the stack pointer during execution and the 1288 // adjustments aren't already part of our stack size estimate, our offset 1289 // calculations may be off, so be conservative. 1290 // FIXME: We could add logic to be more precise about negative offsets 1291 // and which instructions will need a scratch register for them. Is it 1292 // worth the effort and added fragility? 1293 bool BigStack = 1294 (RS && 1295 (MFI->estimateStackSize(MF) + 1296 ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= 1297 estimateRSStackSizeLimit(MF, this))) 1298 || MFI->hasVarSizedObjects() 1299 || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); 1300 1301 bool ExtraCSSpill = false; 1302 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { 1303 AFI->setHasStackFrame(true); 1304 1305 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. 1306 // Spill LR as well so we can fold BX_RET to the registers restore (LDM). 1307 if (!LRSpilled && CS1Spilled) { 1308 MRI.setPhysRegUsed(ARM::LR); 1309 NumGPRSpills++; 1310 SmallVectorImpl<unsigned>::iterator LRPos; 1311 LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), 1312 (unsigned)ARM::LR); 1313 if (LRPos != UnspilledCS1GPRs.end()) 1314 UnspilledCS1GPRs.erase(LRPos); 1315 1316 ForceLRSpill = false; 1317 ExtraCSSpill = true; 1318 } 1319 1320 if (hasFP(MF)) { 1321 MRI.setPhysRegUsed(FramePtr); 1322 NumGPRSpills++; 1323 } 1324 1325 // If stack and double are 8-byte aligned and we are spilling an odd number 1326 // of GPRs, spill one extra callee save GPR so we won't have to pad between 1327 // the integer and double callee save areas. 1328 unsigned TargetAlign = getStackAlignment(); 1329 if (TargetAlign == 8 && (NumGPRSpills & 1)) { 1330 if (CS1Spilled && !UnspilledCS1GPRs.empty()) { 1331 for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { 1332 unsigned Reg = UnspilledCS1GPRs[i]; 1333 // Don't spill high register if the function is thumb1 1334 if (!AFI->isThumb1OnlyFunction() || 1335 isARMLowRegister(Reg) || Reg == ARM::LR) { 1336 MRI.setPhysRegUsed(Reg); 1337 if (!MRI.isReserved(Reg)) 1338 ExtraCSSpill = true; 1339 break; 1340 } 1341 } 1342 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { 1343 unsigned Reg = UnspilledCS2GPRs.front(); 1344 MRI.setPhysRegUsed(Reg); 1345 if (!MRI.isReserved(Reg)) 1346 ExtraCSSpill = true; 1347 } 1348 } 1349 1350 // Estimate if we might need to scavenge a register at some point in order 1351 // to materialize a stack offset. If so, either spill one additional 1352 // callee-saved register or reserve a special spill slot to facilitate 1353 // register scavenging. Thumb1 needs a spill slot for stack pointer 1354 // adjustments also, even when the frame itself is small. 1355 if (BigStack && !ExtraCSSpill) { 1356 // If any non-reserved CS register isn't spilled, just spill one or two 1357 // extra. That should take care of it! 1358 unsigned NumExtras = TargetAlign / 4; 1359 SmallVector<unsigned, 2> Extras; 1360 while (NumExtras && !UnspilledCS1GPRs.empty()) { 1361 unsigned Reg = UnspilledCS1GPRs.back(); 1362 UnspilledCS1GPRs.pop_back(); 1363 if (!MRI.isReserved(Reg) && 1364 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || 1365 Reg == ARM::LR)) { 1366 Extras.push_back(Reg); 1367 NumExtras--; 1368 } 1369 } 1370 // For non-Thumb1 functions, also check for hi-reg CS registers 1371 if (!AFI->isThumb1OnlyFunction()) { 1372 while (NumExtras && !UnspilledCS2GPRs.empty()) { 1373 unsigned Reg = UnspilledCS2GPRs.back(); 1374 UnspilledCS2GPRs.pop_back(); 1375 if (!MRI.isReserved(Reg)) { 1376 Extras.push_back(Reg); 1377 NumExtras--; 1378 } 1379 } 1380 } 1381 if (Extras.size() && NumExtras == 0) { 1382 for (unsigned i = 0, e = Extras.size(); i != e; ++i) { 1383 MRI.setPhysRegUsed(Extras[i]); 1384 } 1385 } else if (!AFI->isThumb1OnlyFunction()) { 1386 // note: Thumb1 functions spill to R12, not the stack. Reserve a slot 1387 // closest to SP or frame pointer. 1388 const TargetRegisterClass *RC = &ARM::GPRRegClass; 1389 RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), 1390 RC->getAlignment(), 1391 false)); 1392 } 1393 } 1394 } 1395 1396 if (ForceLRSpill) { 1397 MRI.setPhysRegUsed(ARM::LR); 1398 AFI->setLRIsSpilledForFarJump(true); 1399 } 1400 } 1401 1402 1403 void ARMFrameLowering:: 1404 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 1405 MachineBasicBlock::iterator I) const { 1406 const ARMBaseInstrInfo &TII = 1407 *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); 1408 if (!hasReservedCallFrame(MF)) { 1409 // If we have alloca, convert as follows: 1410 // ADJCALLSTACKDOWN -> sub, sp, sp, amount 1411 // ADJCALLSTACKUP -> add, sp, sp, amount 1412 MachineInstr *Old = I; 1413 DebugLoc dl = Old->getDebugLoc(); 1414 unsigned Amount = Old->getOperand(0).getImm(); 1415 if (Amount != 0) { 1416 // We need to keep the stack aligned properly. To do this, we round the 1417 // amount of space needed for the outgoing arguments up to the next 1418 // alignment boundary. 1419 unsigned Align = getStackAlignment(); 1420 Amount = (Amount+Align-1)/Align*Align; 1421 1422 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1423 assert(!AFI->isThumb1OnlyFunction() && 1424 "This eliminateCallFramePseudoInstr does not support Thumb1!"); 1425 bool isARM = !AFI->isThumbFunction(); 1426 1427 // Replace the pseudo instruction with a new instruction... 1428 unsigned Opc = Old->getOpcode(); 1429 int PIdx = Old->findFirstPredOperandIdx(); 1430 ARMCC::CondCodes Pred = (PIdx == -1) 1431 ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); 1432 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { 1433 // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. 1434 unsigned PredReg = Old->getOperand(2).getReg(); 1435 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, 1436 Pred, PredReg); 1437 } else { 1438 // Note: PredReg is operand 3 for ADJCALLSTACKUP. 1439 unsigned PredReg = Old->getOperand(3).getReg(); 1440 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); 1441 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, 1442 Pred, PredReg); 1443 } 1444 } 1445 } 1446 MBB.erase(I); 1447 } 1448 1449