1 //===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the ARM implementation of TargetFrameLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARMFrameLowering.h" 15 #include "ARMBaseInstrInfo.h" 16 #include "ARMBaseRegisterInfo.h" 17 #include "ARMConstantPoolValue.h" 18 #include "ARMMachineFunctionInfo.h" 19 #include "MCTargetDesc/ARMAddressingModes.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/MC/MCAsmInfo.h" 27 #include "llvm/IR/CallingConv.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Target/TargetOptions.h" 32 33 #define DEBUG_TYPE "arm-frame-lowering" 34 35 using namespace llvm; 36 37 static cl::opt<bool> 38 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), 39 cl::desc("Align ARM NEON spills in prolog and epilog")); 40 41 static MachineBasicBlock::iterator 42 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, 43 unsigned NumAlignedDPRCS2Regs); 44 45 ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) 46 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), 47 STI(sti) {} 48 49 bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const { 50 // iOS always has a FP for backtracking, force other targets to keep their FP 51 // when doing FastISel. The emitted code is currently superior, and in cases 52 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated. 53 return TargetFrameLowering::noFramePointerElim(MF) || 54 MF.getSubtarget<ARMSubtarget>().useFastISel(); 55 } 56 57 /// hasFP - Return true if the specified function should have a dedicated frame 58 /// pointer register. This is true if the function has variable sized allocas 59 /// or if frame pointer elimination is disabled. 60 bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { 61 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 62 const MachineFrameInfo &MFI = MF.getFrameInfo(); 63 64 // ABI-required frame pointer. 65 if (MF.getTarget().Options.DisableFramePointerElim(MF)) 66 return true; 67 68 // Frame pointer required for use within this function. 69 return (RegInfo->needsStackRealignment(MF) || 70 MFI.hasVarSizedObjects() || 71 MFI.isFrameAddressTaken()); 72 } 73 74 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is 75 /// not required, we reserve argument space for call sites in the function 76 /// immediately on entry to the current function. This eliminates the need for 77 /// add/sub sp brackets around call sites. Returns true if the call frame is 78 /// included as part of the stack frame. 79 bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 80 const MachineFrameInfo &MFI = MF.getFrameInfo(); 81 unsigned CFSize = MFI.getMaxCallFrameSize(); 82 // It's not always a good idea to include the call frame as part of the 83 // stack frame. ARM (especially Thumb) has small immediate offset to 84 // address the stack frame. So a large call frame can cause poor codegen 85 // and may even makes it impossible to scavenge a register. 86 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 87 return false; 88 89 return !MFI.hasVarSizedObjects(); 90 } 91 92 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the 93 /// call frame pseudos can be simplified. Unlike most targets, having a FP 94 /// is not sufficient here since we still may reference some objects via SP 95 /// even when FP is available in Thumb2 mode. 96 bool 97 ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { 98 return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects(); 99 } 100 101 static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, 102 const MCPhysReg *CSRegs) { 103 // Integer spill area is handled with "pop". 104 if (isPopOpcode(MI.getOpcode())) { 105 // The first two operands are predicates. The last two are 106 // imp-def and imp-use of SP. Check everything in between. 107 for (int i = 5, e = MI.getNumOperands(); i != e; ++i) 108 if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs)) 109 return false; 110 return true; 111 } 112 if ((MI.getOpcode() == ARM::LDR_POST_IMM || 113 MI.getOpcode() == ARM::LDR_POST_REG || 114 MI.getOpcode() == ARM::t2LDR_POST) && 115 isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) && 116 MI.getOperand(1).getReg() == ARM::SP) 117 return true; 118 119 return false; 120 } 121 122 static void emitRegPlusImmediate( 123 bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 124 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, 125 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, 126 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { 127 if (isARM) 128 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, 129 Pred, PredReg, TII, MIFlags); 130 else 131 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, 132 Pred, PredReg, TII, MIFlags); 133 } 134 135 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, 136 MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, 137 const ARMBaseInstrInfo &TII, int NumBytes, 138 unsigned MIFlags = MachineInstr::NoFlags, 139 ARMCC::CondCodes Pred = ARMCC::AL, 140 unsigned PredReg = 0) { 141 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes, 142 MIFlags, Pred, PredReg); 143 } 144 145 static int sizeOfSPAdjustment(const MachineInstr &MI) { 146 int RegSize; 147 switch (MI.getOpcode()) { 148 case ARM::VSTMDDB_UPD: 149 RegSize = 8; 150 break; 151 case ARM::STMDB_UPD: 152 case ARM::t2STMDB_UPD: 153 RegSize = 4; 154 break; 155 case ARM::t2STR_PRE: 156 case ARM::STR_PRE_IMM: 157 return 4; 158 default: 159 llvm_unreachable("Unknown push or pop like instruction"); 160 } 161 162 int count = 0; 163 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ 164 // pred) so the list starts at 4. 165 for (int i = MI.getNumOperands() - 1; i >= 4; --i) 166 count += RegSize; 167 return count; 168 } 169 170 static bool WindowsRequiresStackProbe(const MachineFunction &MF, 171 size_t StackSizeInBytes) { 172 const MachineFrameInfo &MFI = MF.getFrameInfo(); 173 const Function *F = MF.getFunction(); 174 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096; 175 if (F->hasFnAttribute("stack-probe-size")) 176 F->getFnAttribute("stack-probe-size") 177 .getValueAsString() 178 .getAsInteger(0, StackProbeSize); 179 return StackSizeInBytes >= StackProbeSize; 180 } 181 182 namespace { 183 struct StackAdjustingInsts { 184 struct InstInfo { 185 MachineBasicBlock::iterator I; 186 unsigned SPAdjust; 187 bool BeforeFPSet; 188 }; 189 190 SmallVector<InstInfo, 4> Insts; 191 192 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust, 193 bool BeforeFPSet = false) { 194 InstInfo Info = {I, SPAdjust, BeforeFPSet}; 195 Insts.push_back(Info); 196 } 197 198 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) { 199 auto Info = find_if(Insts, [&](InstInfo &Info) { return Info.I == I; }); 200 assert(Info != Insts.end() && "invalid sp adjusting instruction"); 201 Info->SPAdjust += ExtraBytes; 202 } 203 204 void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB, 205 const DebugLoc &dl, const ARMBaseInstrInfo &TII, 206 bool HasFP) { 207 unsigned CFAOffset = 0; 208 for (auto &Info : Insts) { 209 if (HasFP && !Info.BeforeFPSet) 210 return; 211 212 CFAOffset -= Info.SPAdjust; 213 unsigned CFIIndex = MMI.addFrameInst( 214 MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); 215 BuildMI(MBB, std::next(Info.I), dl, 216 TII.get(TargetOpcode::CFI_INSTRUCTION)) 217 .addCFIIndex(CFIIndex) 218 .setMIFlags(MachineInstr::FrameSetup); 219 } 220 } 221 }; 222 } 223 224 /// Emit an instruction sequence that will align the address in 225 /// register Reg by zero-ing out the lower bits. For versions of the 226 /// architecture that support Neon, this must be done in a single 227 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a 228 /// single instruction. That function only gets called when optimizing 229 /// spilling of D registers on a core with the Neon instruction set 230 /// present. 231 static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, 232 const TargetInstrInfo &TII, 233 MachineBasicBlock &MBB, 234 MachineBasicBlock::iterator MBBI, 235 const DebugLoc &DL, const unsigned Reg, 236 const unsigned Alignment, 237 const bool MustBeSingleInstruction) { 238 const ARMSubtarget &AST = 239 static_cast<const ARMSubtarget &>(MF.getSubtarget()); 240 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops(); 241 const unsigned AlignMask = Alignment - 1; 242 const unsigned NrBitsToZero = countTrailingZeros(Alignment); 243 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported"); 244 if (!AFI->isThumbFunction()) { 245 // if the BFC instruction is available, use that to zero the lower 246 // bits: 247 // bfc Reg, #0, log2(Alignment) 248 // otherwise use BIC, if the mask to zero the required number of bits 249 // can be encoded in the bic immediate field 250 // bic Reg, Reg, Alignment-1 251 // otherwise, emit 252 // lsr Reg, Reg, log2(Alignment) 253 // lsl Reg, Reg, log2(Alignment) 254 if (CanUseBFC) { 255 AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) 256 .addReg(Reg, RegState::Kill) 257 .addImm(~AlignMask)); 258 } else if (AlignMask <= 255) { 259 AddDefaultCC( 260 AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) 261 .addReg(Reg, RegState::Kill) 262 .addImm(AlignMask))); 263 } else { 264 assert(!MustBeSingleInstruction && 265 "Shouldn't call emitAligningInstructions demanding a single " 266 "instruction to be emitted for large stack alignment for a target " 267 "without BFC."); 268 AddDefaultCC(AddDefaultPred( 269 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) 270 .addReg(Reg, RegState::Kill) 271 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)))); 272 AddDefaultCC(AddDefaultPred( 273 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) 274 .addReg(Reg, RegState::Kill) 275 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)))); 276 } 277 } else { 278 // Since this is only reached for Thumb-2 targets, the BFC instruction 279 // should always be available. 280 assert(CanUseBFC); 281 AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) 282 .addReg(Reg, RegState::Kill) 283 .addImm(~AlignMask)); 284 } 285 } 286 287 void ARMFrameLowering::emitPrologue(MachineFunction &MF, 288 MachineBasicBlock &MBB) const { 289 MachineBasicBlock::iterator MBBI = MBB.begin(); 290 MachineFrameInfo &MFI = MF.getFrameInfo(); 291 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 292 MachineModuleInfo &MMI = MF.getMMI(); 293 MCContext &Context = MMI.getContext(); 294 const TargetMachine &TM = MF.getTarget(); 295 const MCRegisterInfo *MRI = Context.getRegisterInfo(); 296 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo(); 297 const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); 298 assert(!AFI->isThumb1OnlyFunction() && 299 "This emitPrologue does not support Thumb1!"); 300 bool isARM = !AFI->isThumbFunction(); 301 unsigned Align = STI.getFrameLowering()->getStackAlignment(); 302 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 303 unsigned NumBytes = MFI.getStackSize(); 304 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 305 306 // Debug location must be unknown since the first debug location is used 307 // to determine the end of the prologue. 308 DebugLoc dl; 309 310 unsigned FramePtr = RegInfo->getFrameRegister(MF); 311 312 // Determine the sizes of each callee-save spill areas and record which frame 313 // belongs to which callee-save spill areas. 314 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; 315 int FramePtrSpillFI = 0; 316 int D8SpillFI = 0; 317 318 // All calls are tail calls in GHC calling conv, and functions have no 319 // prologue/epilogue. 320 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 321 return; 322 323 StackAdjustingInsts DefCFAOffsetCandidates; 324 bool HasFP = hasFP(MF); 325 326 // Allocate the vararg register save area. 327 if (ArgRegsSaveSize) { 328 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, 329 MachineInstr::FrameSetup); 330 DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true); 331 } 332 333 if (!AFI->hasStackFrame() && 334 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) { 335 if (NumBytes - ArgRegsSaveSize != 0) { 336 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), 337 MachineInstr::FrameSetup); 338 DefCFAOffsetCandidates.addInst(std::prev(MBBI), 339 NumBytes - ArgRegsSaveSize, true); 340 } 341 DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); 342 return; 343 } 344 345 // Determine spill area sizes. 346 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 347 unsigned Reg = CSI[i].getReg(); 348 int FI = CSI[i].getFrameIdx(); 349 switch (Reg) { 350 case ARM::R8: 351 case ARM::R9: 352 case ARM::R10: 353 case ARM::R11: 354 case ARM::R12: 355 if (STI.splitFramePushPop(MF)) { 356 GPRCS2Size += 4; 357 break; 358 } 359 LLVM_FALLTHROUGH; 360 case ARM::R0: 361 case ARM::R1: 362 case ARM::R2: 363 case ARM::R3: 364 case ARM::R4: 365 case ARM::R5: 366 case ARM::R6: 367 case ARM::R7: 368 case ARM::LR: 369 if (Reg == FramePtr) 370 FramePtrSpillFI = FI; 371 GPRCS1Size += 4; 372 break; 373 default: 374 // This is a DPR. Exclude the aligned DPRCS2 spills. 375 if (Reg == ARM::D8) 376 D8SpillFI = FI; 377 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) 378 DPRCSSize += 8; 379 } 380 } 381 382 // Move past area 1. 383 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push; 384 if (GPRCS1Size > 0) { 385 GPRCS1Push = LastPush = MBBI++; 386 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true); 387 } 388 389 // Determine starting offsets of spill areas. 390 unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size; 391 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; 392 unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U; 393 unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign; 394 unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; 395 int FramePtrOffsetInPush = 0; 396 if (HasFP) { 397 FramePtrOffsetInPush = 398 MFI.getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; 399 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + 400 NumBytes); 401 } 402 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); 403 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); 404 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); 405 406 // Move past area 2. 407 if (GPRCS2Size > 0) { 408 GPRCS2Push = LastPush = MBBI++; 409 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); 410 } 411 412 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our 413 // .cfi_offset operations will reflect that. 414 if (DPRGapSize) { 415 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs"); 416 if (LastPush != MBB.end() && 417 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize)) 418 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize); 419 else { 420 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, 421 MachineInstr::FrameSetup); 422 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize); 423 } 424 } 425 426 // Move past area 3. 427 if (DPRCSSize > 0) { 428 // Since vpush register list cannot have gaps, there may be multiple vpush 429 // instructions in the prologue. 430 while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) { 431 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI)); 432 LastPush = MBBI++; 433 } 434 } 435 436 // Move past the aligned DPRCS2 area. 437 if (AFI->getNumAlignedDPRCS2Regs() > 0) { 438 MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs()); 439 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and 440 // leaves the stack pointer pointing to the DPRCS2 area. 441 // 442 // Adjust NumBytes to represent the stack slots below the DPRCS2 area. 443 NumBytes += MFI.getObjectOffset(D8SpillFI); 444 } else 445 NumBytes = DPRCSOffset; 446 447 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { 448 uint32_t NumWords = NumBytes >> 2; 449 450 if (NumWords < 65536) 451 AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) 452 .addImm(NumWords) 453 .setMIFlags(MachineInstr::FrameSetup)); 454 else 455 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) 456 .addImm(NumWords) 457 .setMIFlags(MachineInstr::FrameSetup); 458 459 switch (TM.getCodeModel()) { 460 case CodeModel::Small: 461 case CodeModel::Medium: 462 case CodeModel::Default: 463 case CodeModel::Kernel: 464 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL)) 465 .addImm((unsigned)ARMCC::AL).addReg(0) 466 .addExternalSymbol("__chkstk") 467 .addReg(ARM::R4, RegState::Implicit) 468 .setMIFlags(MachineInstr::FrameSetup); 469 break; 470 case CodeModel::Large: 471 case CodeModel::JITDefault: 472 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12) 473 .addExternalSymbol("__chkstk") 474 .setMIFlags(MachineInstr::FrameSetup); 475 476 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr)) 477 .addImm((unsigned)ARMCC::AL).addReg(0) 478 .addReg(ARM::R12, RegState::Kill) 479 .addReg(ARM::R4, RegState::Implicit) 480 .setMIFlags(MachineInstr::FrameSetup); 481 break; 482 } 483 484 AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), 485 ARM::SP) 486 .addReg(ARM::SP, RegState::Kill) 487 .addReg(ARM::R4, RegState::Kill) 488 .setMIFlags(MachineInstr::FrameSetup))); 489 NumBytes = 0; 490 } 491 492 if (NumBytes) { 493 // Adjust SP after all the callee-save spills. 494 if (AFI->getNumAlignedDPRCS2Regs() == 0 && 495 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes)) 496 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes); 497 else { 498 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, 499 MachineInstr::FrameSetup); 500 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes); 501 } 502 503 if (HasFP && isARM) 504 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 505 // Note it's not safe to do this in Thumb2 mode because it would have 506 // taken two instructions: 507 // mov sp, r7 508 // sub sp, #24 509 // If an interrupt is taken between the two instructions, then sp is in 510 // an inconsistent state (pointing to the middle of callee-saved area). 511 // The interrupt handler can end up clobbering the registers. 512 AFI->setShouldRestoreSPFromFP(true); 513 } 514 515 // Set FP to point to the stack slot that contains the previous FP. 516 // For iOS, FP is R7, which has now been stored in spill area 1. 517 // Otherwise, if this is not iOS, all the callee-saved registers go 518 // into spill area 1, including the FP in R11. In either case, it 519 // is in area one and the adjustment needs to take place just after 520 // that push. 521 if (HasFP) { 522 MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); 523 unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); 524 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, 525 dl, TII, FramePtr, ARM::SP, 526 PushSize + FramePtrOffsetInPush, 527 MachineInstr::FrameSetup); 528 if (FramePtrOffsetInPush + PushSize != 0) { 529 unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( 530 nullptr, MRI->getDwarfRegNum(FramePtr, true), 531 -(ArgRegsSaveSize - FramePtrOffsetInPush))); 532 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 533 .addCFIIndex(CFIIndex) 534 .setMIFlags(MachineInstr::FrameSetup); 535 } else { 536 unsigned CFIIndex = 537 MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( 538 nullptr, MRI->getDwarfRegNum(FramePtr, true))); 539 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 540 .addCFIIndex(CFIIndex) 541 .setMIFlags(MachineInstr::FrameSetup); 542 } 543 } 544 545 // Now that the prologue's actual instructions are finalised, we can insert 546 // the necessary DWARF cf instructions to describe the situation. Start by 547 // recording where each register ended up: 548 if (GPRCS1Size > 0) { 549 MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); 550 int CFIIndex; 551 for (const auto &Entry : CSI) { 552 unsigned Reg = Entry.getReg(); 553 int FI = Entry.getFrameIdx(); 554 switch (Reg) { 555 case ARM::R8: 556 case ARM::R9: 557 case ARM::R10: 558 case ARM::R11: 559 case ARM::R12: 560 if (STI.splitFramePushPop(MF)) 561 break; 562 LLVM_FALLTHROUGH; 563 case ARM::R0: 564 case ARM::R1: 565 case ARM::R2: 566 case ARM::R3: 567 case ARM::R4: 568 case ARM::R5: 569 case ARM::R6: 570 case ARM::R7: 571 case ARM::LR: 572 CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( 573 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); 574 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 575 .addCFIIndex(CFIIndex) 576 .setMIFlags(MachineInstr::FrameSetup); 577 break; 578 } 579 } 580 } 581 582 if (GPRCS2Size > 0) { 583 MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); 584 for (const auto &Entry : CSI) { 585 unsigned Reg = Entry.getReg(); 586 int FI = Entry.getFrameIdx(); 587 switch (Reg) { 588 case ARM::R8: 589 case ARM::R9: 590 case ARM::R10: 591 case ARM::R11: 592 case ARM::R12: 593 if (STI.splitFramePushPop(MF)) { 594 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 595 unsigned Offset = MFI.getObjectOffset(FI); 596 unsigned CFIIndex = MMI.addFrameInst( 597 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 598 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 599 .addCFIIndex(CFIIndex) 600 .setMIFlags(MachineInstr::FrameSetup); 601 } 602 break; 603 } 604 } 605 } 606 607 if (DPRCSSize > 0) { 608 // Since vpush register list cannot have gaps, there may be multiple vpush 609 // instructions in the prologue. 610 MachineBasicBlock::iterator Pos = std::next(LastPush); 611 for (const auto &Entry : CSI) { 612 unsigned Reg = Entry.getReg(); 613 int FI = Entry.getFrameIdx(); 614 if ((Reg >= ARM::D0 && Reg <= ARM::D31) && 615 (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) { 616 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 617 unsigned Offset = MFI.getObjectOffset(FI); 618 unsigned CFIIndex = MMI.addFrameInst( 619 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 620 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 621 .addCFIIndex(CFIIndex) 622 .setMIFlags(MachineInstr::FrameSetup); 623 } 624 } 625 } 626 627 // Now we can emit descriptions of where the canonical frame address was 628 // throughout the process. If we have a frame pointer, it takes over the job 629 // half-way through, so only the first few .cfi_def_cfa_offset instructions 630 // actually get emitted. 631 DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); 632 633 if (STI.isTargetELF() && hasFP(MF)) 634 MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - 635 AFI->getFramePtrSpillOffset()); 636 637 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); 638 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); 639 AFI->setDPRCalleeSavedGapSize(DPRGapSize); 640 AFI->setDPRCalleeSavedAreaSize(DPRCSSize); 641 642 // If we need dynamic stack realignment, do it here. Be paranoid and make 643 // sure if we also have VLAs, we have a base pointer for frame access. 644 // If aligned NEON registers were spilled, the stack has already been 645 // realigned. 646 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { 647 unsigned MaxAlign = MFI.getMaxAlignment(); 648 assert(!AFI->isThumb1OnlyFunction()); 649 if (!AFI->isThumbFunction()) { 650 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign, 651 false); 652 } else { 653 // We cannot use sp as source/dest register here, thus we're using r4 to 654 // perform the calculations. We're emitting the following sequence: 655 // mov r4, sp 656 // -- use emitAligningInstructions to produce best sequence to zero 657 // -- out lower bits in r4 658 // mov sp, r4 659 // FIXME: It will be better just to find spare register here. 660 AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) 661 .addReg(ARM::SP, RegState::Kill)); 662 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign, 663 false); 664 AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 665 .addReg(ARM::R4, RegState::Kill)); 666 } 667 668 AFI->setShouldRestoreSPFromFP(true); 669 } 670 671 // If we need a base pointer, set it up here. It's whatever the value 672 // of the stack pointer is at this point. Any variable size objects 673 // will be allocated after this, so we can still use the base pointer 674 // to reference locals. 675 // FIXME: Clarify FrameSetup flags here. 676 if (RegInfo->hasBasePointer(MF)) { 677 if (isARM) 678 BuildMI(MBB, MBBI, dl, 679 TII.get(ARM::MOVr), RegInfo->getBaseRegister()) 680 .addReg(ARM::SP) 681 .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 682 else 683 AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), 684 RegInfo->getBaseRegister()) 685 .addReg(ARM::SP)); 686 } 687 688 // If the frame has variable sized objects then the epilogue must restore 689 // the sp from fp. We can assume there's an FP here since hasFP already 690 // checks for hasVarSizedObjects. 691 if (MFI.hasVarSizedObjects()) 692 AFI->setShouldRestoreSPFromFP(true); 693 } 694 695 void ARMFrameLowering::emitEpilogue(MachineFunction &MF, 696 MachineBasicBlock &MBB) const { 697 MachineFrameInfo &MFI = MF.getFrameInfo(); 698 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 699 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 700 const ARMBaseInstrInfo &TII = 701 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); 702 assert(!AFI->isThumb1OnlyFunction() && 703 "This emitEpilogue does not support Thumb1!"); 704 bool isARM = !AFI->isThumbFunction(); 705 706 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 707 int NumBytes = (int)MFI.getStackSize(); 708 unsigned FramePtr = RegInfo->getFrameRegister(MF); 709 710 // All calls are tail calls in GHC calling conv, and functions have no 711 // prologue/epilogue. 712 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 713 return; 714 715 // First put ourselves on the first (from top) terminator instructions. 716 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 717 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); 718 719 if (!AFI->hasStackFrame()) { 720 if (NumBytes - ArgRegsSaveSize != 0) 721 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize); 722 } else { 723 // Unwind MBBI to point to first LDR / VLDRD. 724 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 725 if (MBBI != MBB.begin()) { 726 do { 727 --MBBI; 728 } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs)); 729 if (!isCSRestore(*MBBI, TII, CSRegs)) 730 ++MBBI; 731 } 732 733 // Move SP to start of FP callee save spill area. 734 NumBytes -= (ArgRegsSaveSize + 735 AFI->getGPRCalleeSavedArea1Size() + 736 AFI->getGPRCalleeSavedArea2Size() + 737 AFI->getDPRCalleeSavedGapSize() + 738 AFI->getDPRCalleeSavedAreaSize()); 739 740 // Reset SP based on frame pointer only if the stack frame extends beyond 741 // frame pointer stack slot or target is ELF and the function has FP. 742 if (AFI->shouldRestoreSPFromFP()) { 743 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; 744 if (NumBytes) { 745 if (isARM) 746 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, 747 ARMCC::AL, 0, TII); 748 else { 749 // It's not possible to restore SP from FP in a single instruction. 750 // For iOS, this looks like: 751 // mov sp, r7 752 // sub sp, #24 753 // This is bad, if an interrupt is taken after the mov, sp is in an 754 // inconsistent state. 755 // Use the first callee-saved register as a scratch register. 756 assert(!MFI.getPristineRegs(MF).test(ARM::R4) && 757 "No scratch register to restore SP from FP!"); 758 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, 759 ARMCC::AL, 0, TII); 760 AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), 761 ARM::SP) 762 .addReg(ARM::R4)); 763 } 764 } else { 765 // Thumb2 or ARM. 766 if (isARM) 767 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) 768 .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 769 else 770 AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), 771 ARM::SP) 772 .addReg(FramePtr)); 773 } 774 } else if (NumBytes && 775 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) 776 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); 777 778 // Increment past our save areas. 779 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) { 780 MBBI++; 781 // Since vpop register list cannot have gaps, there may be multiple vpop 782 // instructions in the epilogue. 783 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD) 784 MBBI++; 785 } 786 if (AFI->getDPRCalleeSavedGapSize()) { 787 assert(AFI->getDPRCalleeSavedGapSize() == 4 && 788 "unexpected DPR alignment gap"); 789 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize()); 790 } 791 792 if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; 793 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; 794 } 795 796 if (ArgRegsSaveSize) 797 emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize); 798 } 799 800 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for 801 /// debug info. It's the same as what we use for resolving the code-gen 802 /// references for now. FIXME: This can go wrong when references are 803 /// SP-relative and simple call frames aren't used. 804 int 805 ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 806 unsigned &FrameReg) const { 807 return ResolveFrameIndexReference(MF, FI, FrameReg, 0); 808 } 809 810 int 811 ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, 812 int FI, unsigned &FrameReg, 813 int SPAdj) const { 814 const MachineFrameInfo &MFI = MF.getFrameInfo(); 815 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( 816 MF.getSubtarget().getRegisterInfo()); 817 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 818 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); 819 int FPOffset = Offset - AFI->getFramePtrSpillOffset(); 820 bool isFixed = MFI.isFixedObjectIndex(FI); 821 822 FrameReg = ARM::SP; 823 Offset += SPAdj; 824 825 // SP can move around if there are allocas. We may also lose track of SP 826 // when emergency spilling inside a non-reserved call frame setup. 827 bool hasMovingSP = !hasReservedCallFrame(MF); 828 829 // When dynamically realigning the stack, use the frame pointer for 830 // parameters, and the stack/base pointer for locals. 831 if (RegInfo->needsStackRealignment(MF)) { 832 assert (hasFP(MF) && "dynamic stack realignment without a FP!"); 833 if (isFixed) { 834 FrameReg = RegInfo->getFrameRegister(MF); 835 Offset = FPOffset; 836 } else if (hasMovingSP) { 837 assert(RegInfo->hasBasePointer(MF) && 838 "VLAs and dynamic stack alignment, but missing base pointer!"); 839 FrameReg = RegInfo->getBaseRegister(); 840 } 841 return Offset; 842 } 843 844 // If there is a frame pointer, use it when we can. 845 if (hasFP(MF) && AFI->hasStackFrame()) { 846 // Use frame pointer to reference fixed objects. Use it for locals if 847 // there are VLAs (and thus the SP isn't reliable as a base). 848 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) { 849 FrameReg = RegInfo->getFrameRegister(MF); 850 return FPOffset; 851 } else if (hasMovingSP) { 852 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!"); 853 if (AFI->isThumb2Function()) { 854 // Try to use the frame pointer if we can, else use the base pointer 855 // since it's available. This is handy for the emergency spill slot, in 856 // particular. 857 if (FPOffset >= -255 && FPOffset < 0) { 858 FrameReg = RegInfo->getFrameRegister(MF); 859 return FPOffset; 860 } 861 } 862 } else if (AFI->isThumb2Function()) { 863 // Use add <rd>, sp, #<imm8> 864 // ldr <rd>, [sp, #<imm8>] 865 // if at all possible to save space. 866 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020) 867 return Offset; 868 // In Thumb2 mode, the negative offset is very limited. Try to avoid 869 // out of range references. ldr <rt>,[<rn>, #-<imm8>] 870 if (FPOffset >= -255 && FPOffset < 0) { 871 FrameReg = RegInfo->getFrameRegister(MF); 872 return FPOffset; 873 } 874 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) { 875 // Otherwise, use SP or FP, whichever is closer to the stack slot. 876 FrameReg = RegInfo->getFrameRegister(MF); 877 return FPOffset; 878 } 879 } 880 // Use the base pointer if we have one. 881 if (RegInfo->hasBasePointer(MF)) 882 FrameReg = RegInfo->getBaseRegister(); 883 return Offset; 884 } 885 886 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, 887 MachineBasicBlock::iterator MI, 888 const std::vector<CalleeSavedInfo> &CSI, 889 unsigned StmOpc, unsigned StrOpc, 890 bool NoGap, 891 bool(*Func)(unsigned, bool), 892 unsigned NumAlignedDPRCS2Regs, 893 unsigned MIFlags) const { 894 MachineFunction &MF = *MBB.getParent(); 895 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 896 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 897 898 DebugLoc DL; 899 900 typedef std::pair<unsigned, bool> RegAndKill; 901 SmallVector<RegAndKill, 4> Regs; 902 unsigned i = CSI.size(); 903 while (i != 0) { 904 unsigned LastReg = 0; 905 for (; i != 0; --i) { 906 unsigned Reg = CSI[i-1].getReg(); 907 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; 908 909 // D-registers in the aligned area DPRCS2 are NOT spilled here. 910 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) 911 continue; 912 913 bool isLiveIn = MF.getRegInfo().isLiveIn(Reg); 914 if (!isLiveIn) 915 MBB.addLiveIn(Reg); 916 // If NoGap is true, push consecutive registers and then leave the rest 917 // for other instructions. e.g. 918 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11} 919 if (NoGap && LastReg && LastReg != Reg-1) 920 break; 921 LastReg = Reg; 922 // Do not set a kill flag on values that are also marked as live-in. This 923 // happens with the @llvm-returnaddress intrinsic and with arguments 924 // passed in callee saved registers. 925 // Omitting the kill flags is conservatively correct even if the live-in 926 // is not used after all. 927 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn)); 928 } 929 930 if (Regs.empty()) 931 continue; 932 933 std::sort(Regs.begin(), Regs.end(), [&](const RegAndKill &LHS, 934 const RegAndKill &RHS) { 935 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first); 936 }); 937 938 if (Regs.size() > 1 || StrOpc== 0) { 939 MachineInstrBuilder MIB = 940 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) 941 .addReg(ARM::SP).setMIFlags(MIFlags)); 942 for (unsigned i = 0, e = Regs.size(); i < e; ++i) 943 MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); 944 } else if (Regs.size() == 1) { 945 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), 946 ARM::SP) 947 .addReg(Regs[0].first, getKillRegState(Regs[0].second)) 948 .addReg(ARM::SP).setMIFlags(MIFlags) 949 .addImm(-4); 950 AddDefaultPred(MIB); 951 } 952 Regs.clear(); 953 954 // Put any subsequent vpush instructions before this one: they will refer to 955 // higher register numbers so need to be pushed first in order to preserve 956 // monotonicity. 957 if (MI != MBB.begin()) 958 --MI; 959 } 960 } 961 962 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, 963 MachineBasicBlock::iterator MI, 964 const std::vector<CalleeSavedInfo> &CSI, 965 unsigned LdmOpc, unsigned LdrOpc, 966 bool isVarArg, bool NoGap, 967 bool(*Func)(unsigned, bool), 968 unsigned NumAlignedDPRCS2Regs) const { 969 MachineFunction &MF = *MBB.getParent(); 970 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 971 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 972 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 973 DebugLoc DL; 974 bool isTailCall = false; 975 bool isInterrupt = false; 976 bool isTrap = false; 977 if (MBB.end() != MI) { 978 DL = MI->getDebugLoc(); 979 unsigned RetOpcode = MI->getOpcode(); 980 isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri); 981 isInterrupt = 982 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; 983 isTrap = 984 RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl || 985 RetOpcode == ARM::tTRAP; 986 } 987 988 SmallVector<unsigned, 4> Regs; 989 unsigned i = CSI.size(); 990 while (i != 0) { 991 unsigned LastReg = 0; 992 bool DeleteRet = false; 993 for (; i != 0; --i) { 994 unsigned Reg = CSI[i-1].getReg(); 995 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; 996 997 // The aligned reloads from area DPRCS2 are not inserted here. 998 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) 999 continue; 1000 1001 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && 1002 !isTrap && STI.hasV5TOps()) { 1003 if (MBB.succ_empty()) { 1004 Reg = ARM::PC; 1005 DeleteRet = true; 1006 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; 1007 } else 1008 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; 1009 // Fold the return instruction into the LDM. 1010 } 1011 1012 // If NoGap is true, pop consecutive registers and then leave the rest 1013 // for other instructions. e.g. 1014 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11} 1015 if (NoGap && LastReg && LastReg != Reg-1) 1016 break; 1017 1018 LastReg = Reg; 1019 Regs.push_back(Reg); 1020 } 1021 1022 if (Regs.empty()) 1023 continue; 1024 1025 std::sort(Regs.begin(), Regs.end(), [&](unsigned LHS, unsigned RHS) { 1026 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS); 1027 }); 1028 1029 if (Regs.size() > 1 || LdrOpc == 0) { 1030 MachineInstrBuilder MIB = 1031 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) 1032 .addReg(ARM::SP)); 1033 for (unsigned i = 0, e = Regs.size(); i < e; ++i) 1034 MIB.addReg(Regs[i], getDefRegState(true)); 1035 if (DeleteRet && MI != MBB.end()) { 1036 MIB.copyImplicitOps(*MI); 1037 MI->eraseFromParent(); 1038 } 1039 MI = MIB; 1040 } else if (Regs.size() == 1) { 1041 // If we adjusted the reg to PC from LR above, switch it back here. We 1042 // only do that for LDM. 1043 if (Regs[0] == ARM::PC) 1044 Regs[0] = ARM::LR; 1045 MachineInstrBuilder MIB = 1046 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0]) 1047 .addReg(ARM::SP, RegState::Define) 1048 .addReg(ARM::SP); 1049 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once 1050 // that refactoring is complete (eventually). 1051 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) { 1052 MIB.addReg(0); 1053 MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); 1054 } else 1055 MIB.addImm(4); 1056 AddDefaultPred(MIB); 1057 } 1058 Regs.clear(); 1059 1060 // Put any subsequent vpop instructions after this one: they will refer to 1061 // higher register numbers so need to be popped afterwards. 1062 if (MI != MBB.end()) 1063 ++MI; 1064 } 1065 } 1066 1067 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers 1068 /// starting from d8. Also insert stack realignment code and leave the stack 1069 /// pointer pointing to the d8 spill slot. 1070 static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, 1071 MachineBasicBlock::iterator MI, 1072 unsigned NumAlignedDPRCS2Regs, 1073 const std::vector<CalleeSavedInfo> &CSI, 1074 const TargetRegisterInfo *TRI) { 1075 MachineFunction &MF = *MBB.getParent(); 1076 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1077 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); 1078 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1079 MachineFrameInfo &MFI = MF.getFrameInfo(); 1080 1081 // Mark the D-register spill slots as properly aligned. Since MFI computes 1082 // stack slot layout backwards, this can actually mean that the d-reg stack 1083 // slot offsets can be wrong. The offset for d8 will always be correct. 1084 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1085 unsigned DNum = CSI[i].getReg() - ARM::D8; 1086 if (DNum > NumAlignedDPRCS2Regs - 1) 1087 continue; 1088 int FI = CSI[i].getFrameIdx(); 1089 // The even-numbered registers will be 16-byte aligned, the odd-numbered 1090 // registers will be 8-byte aligned. 1091 MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16); 1092 1093 // The stack slot for D8 needs to be maximally aligned because this is 1094 // actually the point where we align the stack pointer. MachineFrameInfo 1095 // computes all offsets relative to the incoming stack pointer which is a 1096 // bit weird when realigning the stack. Any extra padding for this 1097 // over-alignment is not realized because the code inserted below adjusts 1098 // the stack pointer by numregs * 8 before aligning the stack pointer. 1099 if (DNum == 0) 1100 MFI.setObjectAlignment(FI, MFI.getMaxAlignment()); 1101 } 1102 1103 // Move the stack pointer to the d8 spill slot, and align it at the same 1104 // time. Leave the stack slot address in the scratch register r4. 1105 // 1106 // sub r4, sp, #numregs * 8 1107 // bic r4, r4, #align - 1 1108 // mov sp, r4 1109 // 1110 bool isThumb = AFI->isThumbFunction(); 1111 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); 1112 AFI->setShouldRestoreSPFromFP(true); 1113 1114 // sub r4, sp, #numregs * 8 1115 // The immediate is <= 64, so it doesn't need any special encoding. 1116 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; 1117 AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) 1118 .addReg(ARM::SP) 1119 .addImm(8 * NumAlignedDPRCS2Regs))); 1120 1121 unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment(); 1122 // We must set parameter MustBeSingleInstruction to true, since 1123 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform 1124 // stack alignment. Luckily, this can always be done since all ARM 1125 // architecture versions that support Neon also support the BFC 1126 // instruction. 1127 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true); 1128 1129 // mov sp, r4 1130 // The stack pointer must be adjusted before spilling anything, otherwise 1131 // the stack slots could be clobbered by an interrupt handler. 1132 // Leave r4 live, it is used below. 1133 Opc = isThumb ? ARM::tMOVr : ARM::MOVr; 1134 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) 1135 .addReg(ARM::R4); 1136 MIB = AddDefaultPred(MIB); 1137 if (!isThumb) 1138 AddDefaultCC(MIB); 1139 1140 // Now spill NumAlignedDPRCS2Regs registers starting from d8. 1141 // r4 holds the stack slot address. 1142 unsigned NextReg = ARM::D8; 1143 1144 // 16-byte aligned vst1.64 with 4 d-regs and address writeback. 1145 // The writeback is only needed when emitting two vst1.64 instructions. 1146 if (NumAlignedDPRCS2Regs >= 6) { 1147 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 1148 &ARM::QQPRRegClass); 1149 MBB.addLiveIn(SupReg); 1150 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), 1151 ARM::R4) 1152 .addReg(ARM::R4, RegState::Kill).addImm(16) 1153 .addReg(NextReg) 1154 .addReg(SupReg, RegState::ImplicitKill)); 1155 NextReg += 4; 1156 NumAlignedDPRCS2Regs -= 4; 1157 } 1158 1159 // We won't modify r4 beyond this point. It currently points to the next 1160 // register to be spilled. 1161 unsigned R4BaseReg = NextReg; 1162 1163 // 16-byte aligned vst1.64 with 4 d-regs, no writeback. 1164 if (NumAlignedDPRCS2Regs >= 4) { 1165 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 1166 &ARM::QQPRRegClass); 1167 MBB.addLiveIn(SupReg); 1168 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) 1169 .addReg(ARM::R4).addImm(16).addReg(NextReg) 1170 .addReg(SupReg, RegState::ImplicitKill)); 1171 NextReg += 4; 1172 NumAlignedDPRCS2Regs -= 4; 1173 } 1174 1175 // 16-byte aligned vst1.64 with 2 d-regs. 1176 if (NumAlignedDPRCS2Regs >= 2) { 1177 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 1178 &ARM::QPRRegClass); 1179 MBB.addLiveIn(SupReg); 1180 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) 1181 .addReg(ARM::R4).addImm(16).addReg(SupReg)); 1182 NextReg += 2; 1183 NumAlignedDPRCS2Regs -= 2; 1184 } 1185 1186 // Finally, use a vanilla vstr.64 for the odd last register. 1187 if (NumAlignedDPRCS2Regs) { 1188 MBB.addLiveIn(NextReg); 1189 // vstr.64 uses addrmode5 which has an offset scale of 4. 1190 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) 1191 .addReg(NextReg) 1192 .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2)); 1193 } 1194 1195 // The last spill instruction inserted should kill the scratch register r4. 1196 std::prev(MI)->addRegisterKilled(ARM::R4, TRI); 1197 } 1198 1199 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an 1200 /// iterator to the following instruction. 1201 static MachineBasicBlock::iterator 1202 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, 1203 unsigned NumAlignedDPRCS2Regs) { 1204 // sub r4, sp, #numregs * 8 1205 // bic r4, r4, #align - 1 1206 // mov sp, r4 1207 ++MI; ++MI; ++MI; 1208 assert(MI->mayStore() && "Expecting spill instruction"); 1209 1210 // These switches all fall through. 1211 switch(NumAlignedDPRCS2Regs) { 1212 case 7: 1213 ++MI; 1214 assert(MI->mayStore() && "Expecting spill instruction"); 1215 default: 1216 ++MI; 1217 assert(MI->mayStore() && "Expecting spill instruction"); 1218 case 1: 1219 case 2: 1220 case 4: 1221 assert(MI->killsRegister(ARM::R4) && "Missed kill flag"); 1222 ++MI; 1223 } 1224 return MI; 1225 } 1226 1227 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers 1228 /// starting from d8. These instructions are assumed to execute while the 1229 /// stack is still aligned, unlike the code inserted by emitPopInst. 1230 static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, 1231 MachineBasicBlock::iterator MI, 1232 unsigned NumAlignedDPRCS2Regs, 1233 const std::vector<CalleeSavedInfo> &CSI, 1234 const TargetRegisterInfo *TRI) { 1235 MachineFunction &MF = *MBB.getParent(); 1236 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1237 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); 1238 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1239 1240 // Find the frame index assigned to d8. 1241 int D8SpillFI = 0; 1242 for (unsigned i = 0, e = CSI.size(); i != e; ++i) 1243 if (CSI[i].getReg() == ARM::D8) { 1244 D8SpillFI = CSI[i].getFrameIdx(); 1245 break; 1246 } 1247 1248 // Materialize the address of the d8 spill slot into the scratch register r4. 1249 // This can be fairly complicated if the stack frame is large, so just use 1250 // the normal frame index elimination mechanism to do it. This code runs as 1251 // the initial part of the epilog where the stack and base pointers haven't 1252 // been changed yet. 1253 bool isThumb = AFI->isThumbFunction(); 1254 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); 1255 1256 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 1257 AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) 1258 .addFrameIndex(D8SpillFI).addImm(0))); 1259 1260 // Now restore NumAlignedDPRCS2Regs registers starting from d8. 1261 unsigned NextReg = ARM::D8; 1262 1263 // 16-byte aligned vld1.64 with 4 d-regs and writeback. 1264 if (NumAlignedDPRCS2Regs >= 6) { 1265 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 1266 &ARM::QQPRRegClass); 1267 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) 1268 .addReg(ARM::R4, RegState::Define) 1269 .addReg(ARM::R4, RegState::Kill).addImm(16) 1270 .addReg(SupReg, RegState::ImplicitDefine)); 1271 NextReg += 4; 1272 NumAlignedDPRCS2Regs -= 4; 1273 } 1274 1275 // We won't modify r4 beyond this point. It currently points to the next 1276 // register to be spilled. 1277 unsigned R4BaseReg = NextReg; 1278 1279 // 16-byte aligned vld1.64 with 4 d-regs, no writeback. 1280 if (NumAlignedDPRCS2Regs >= 4) { 1281 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 1282 &ARM::QQPRRegClass); 1283 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) 1284 .addReg(ARM::R4).addImm(16) 1285 .addReg(SupReg, RegState::ImplicitDefine)); 1286 NextReg += 4; 1287 NumAlignedDPRCS2Regs -= 4; 1288 } 1289 1290 // 16-byte aligned vld1.64 with 2 d-regs. 1291 if (NumAlignedDPRCS2Regs >= 2) { 1292 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 1293 &ARM::QPRRegClass); 1294 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) 1295 .addReg(ARM::R4).addImm(16)); 1296 NextReg += 2; 1297 NumAlignedDPRCS2Regs -= 2; 1298 } 1299 1300 // Finally, use a vanilla vldr.64 for the remaining odd register. 1301 if (NumAlignedDPRCS2Regs) 1302 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) 1303 .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); 1304 1305 // Last store kills r4. 1306 std::prev(MI)->addRegisterKilled(ARM::R4, TRI); 1307 } 1308 1309 bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 1310 MachineBasicBlock::iterator MI, 1311 const std::vector<CalleeSavedInfo> &CSI, 1312 const TargetRegisterInfo *TRI) const { 1313 if (CSI.empty()) 1314 return false; 1315 1316 MachineFunction &MF = *MBB.getParent(); 1317 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1318 1319 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; 1320 unsigned PushOneOpc = AFI->isThumbFunction() ? 1321 ARM::t2STR_PRE : ARM::STR_PRE_IMM; 1322 unsigned FltOpc = ARM::VSTMDDB_UPD; 1323 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); 1324 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, 1325 MachineInstr::FrameSetup); 1326 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, 1327 MachineInstr::FrameSetup); 1328 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, 1329 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); 1330 1331 // The code above does not insert spill code for the aligned DPRCS2 registers. 1332 // The stack realignment code will be inserted between the push instructions 1333 // and these spills. 1334 if (NumAlignedDPRCS2Regs) 1335 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); 1336 1337 return true; 1338 } 1339 1340 bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 1341 MachineBasicBlock::iterator MI, 1342 const std::vector<CalleeSavedInfo> &CSI, 1343 const TargetRegisterInfo *TRI) const { 1344 if (CSI.empty()) 1345 return false; 1346 1347 MachineFunction &MF = *MBB.getParent(); 1348 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1349 bool isVarArg = AFI->getArgRegsSaveSize() > 0; 1350 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); 1351 1352 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2 1353 // registers. Do that here instead. 1354 if (NumAlignedDPRCS2Regs) 1355 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); 1356 1357 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; 1358 unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; 1359 unsigned FltOpc = ARM::VLDMDIA_UPD; 1360 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, 1361 NumAlignedDPRCS2Regs); 1362 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, 1363 &isARMArea2Register, 0); 1364 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, 1365 &isARMArea1Register, 0); 1366 1367 return true; 1368 } 1369 1370 // FIXME: Make generic? 1371 static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, 1372 const ARMBaseInstrInfo &TII) { 1373 unsigned FnSize = 0; 1374 for (auto &MBB : MF) { 1375 for (auto &MI : MBB) 1376 FnSize += TII.getInstSizeInBytes(MI); 1377 } 1378 return FnSize; 1379 } 1380 1381 /// estimateRSStackSizeLimit - Look at each instruction that references stack 1382 /// frames and return the stack size limit beyond which some of these 1383 /// instructions will require a scratch register during their expansion later. 1384 // FIXME: Move to TII? 1385 static unsigned estimateRSStackSizeLimit(MachineFunction &MF, 1386 const TargetFrameLowering *TFI) { 1387 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1388 unsigned Limit = (1 << 12) - 1; 1389 for (auto &MBB : MF) { 1390 for (auto &MI : MBB) { 1391 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 1392 if (!MI.getOperand(i).isFI()) 1393 continue; 1394 1395 // When using ADDri to get the address of a stack object, 255 is the 1396 // largest offset guaranteed to fit in the immediate offset. 1397 if (MI.getOpcode() == ARM::ADDri) { 1398 Limit = std::min(Limit, (1U << 8) - 1); 1399 break; 1400 } 1401 1402 // Otherwise check the addressing mode. 1403 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) { 1404 case ARMII::AddrMode3: 1405 case ARMII::AddrModeT2_i8: 1406 Limit = std::min(Limit, (1U << 8) - 1); 1407 break; 1408 case ARMII::AddrMode5: 1409 case ARMII::AddrModeT2_i8s4: 1410 Limit = std::min(Limit, ((1U << 8) - 1) * 4); 1411 break; 1412 case ARMII::AddrModeT2_i12: 1413 // i12 supports only positive offset so these will be converted to 1414 // i8 opcodes. See llvm::rewriteT2FrameIndex. 1415 if (TFI->hasFP(MF) && AFI->hasStackFrame()) 1416 Limit = std::min(Limit, (1U << 8) - 1); 1417 break; 1418 case ARMII::AddrMode4: 1419 case ARMII::AddrMode6: 1420 // Addressing modes 4 & 6 (load/store) instructions can't encode an 1421 // immediate offset for stack references. 1422 return 0; 1423 default: 1424 break; 1425 } 1426 break; // At most one FI per instruction 1427 } 1428 } 1429 } 1430 1431 return Limit; 1432 } 1433 1434 // In functions that realign the stack, it can be an advantage to spill the 1435 // callee-saved vector registers after realigning the stack. The vst1 and vld1 1436 // instructions take alignment hints that can improve performance. 1437 // 1438 static void 1439 checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { 1440 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0); 1441 if (!SpillAlignedNEONRegs) 1442 return; 1443 1444 // Naked functions don't spill callee-saved registers. 1445 if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) 1446 return; 1447 1448 // We are planning to use NEON instructions vst1 / vld1. 1449 if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON()) 1450 return; 1451 1452 // Don't bother if the default stack alignment is sufficiently high. 1453 if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8) 1454 return; 1455 1456 // Aligned spills require stack realignment. 1457 if (!static_cast<const ARMBaseRegisterInfo *>( 1458 MF.getSubtarget().getRegisterInfo())->canRealignStack(MF)) 1459 return; 1460 1461 // We always spill contiguous d-registers starting from d8. Count how many 1462 // needs spilling. The register allocator will almost always use the 1463 // callee-saved registers in order, but it can happen that there are holes in 1464 // the range. Registers above the hole will be spilled to the standard DPRCS 1465 // area. 1466 unsigned NumSpills = 0; 1467 for (; NumSpills < 8; ++NumSpills) 1468 if (!SavedRegs.test(ARM::D8 + NumSpills)) 1469 break; 1470 1471 // Don't do this for just one d-register. It's not worth it. 1472 if (NumSpills < 2) 1473 return; 1474 1475 // Spill the first NumSpills D-registers after realigning the stack. 1476 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills); 1477 1478 // A scratch register is required for the vst1 / vld1 instructions. 1479 SavedRegs.set(ARM::R4); 1480 } 1481 1482 void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, 1483 BitVector &SavedRegs, 1484 RegScavenger *RS) const { 1485 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1486 // This tells PEI to spill the FP as if it is any other callee-save register 1487 // to take advantage the eliminateFrameIndex machinery. This also ensures it 1488 // is spilled in the order specified by getCalleeSavedRegs() to make it easier 1489 // to combine multiple loads / stores. 1490 bool CanEliminateFrame = true; 1491 bool CS1Spilled = false; 1492 bool LRSpilled = false; 1493 unsigned NumGPRSpills = 0; 1494 unsigned NumFPRSpills = 0; 1495 SmallVector<unsigned, 4> UnspilledCS1GPRs; 1496 SmallVector<unsigned, 4> UnspilledCS2GPRs; 1497 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( 1498 MF.getSubtarget().getRegisterInfo()); 1499 const ARMBaseInstrInfo &TII = 1500 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); 1501 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1502 MachineFrameInfo &MFI = MF.getFrameInfo(); 1503 MachineRegisterInfo &MRI = MF.getRegInfo(); 1504 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1505 (void)TRI; // Silence unused warning in non-assert builds. 1506 unsigned FramePtr = RegInfo->getFrameRegister(MF); 1507 1508 // Spill R4 if Thumb2 function requires stack realignment - it will be used as 1509 // scratch register. Also spill R4 if Thumb2 function has varsized objects, 1510 // since it's not always possible to restore sp from fp in a single 1511 // instruction. 1512 // FIXME: It will be better just to find spare register here. 1513 if (AFI->isThumb2Function() && 1514 (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) 1515 SavedRegs.set(ARM::R4); 1516 1517 if (AFI->isThumb1OnlyFunction()) { 1518 // Spill LR if Thumb1 function uses variable length argument lists. 1519 if (AFI->getArgRegsSaveSize() > 0) 1520 SavedRegs.set(ARM::LR); 1521 1522 // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know 1523 // for sure what the stack size will be, but for this, an estimate is good 1524 // enough. If there anything changes it, it'll be a spill, which implies 1525 // we've used all the registers and so R4 is already used, so not marking 1526 // it here will be OK. 1527 // FIXME: It will be better just to find spare register here. 1528 unsigned StackSize = MFI.estimateStackSize(MF); 1529 if (MFI.hasVarSizedObjects() || StackSize > 508) 1530 SavedRegs.set(ARM::R4); 1531 } 1532 1533 // See if we can spill vector registers to aligned stack. 1534 checkNumAlignedDPRCS2Regs(MF, SavedRegs); 1535 1536 // Spill the BasePtr if it's used. 1537 if (RegInfo->hasBasePointer(MF)) 1538 SavedRegs.set(RegInfo->getBaseRegister()); 1539 1540 // Don't spill FP if the frame can be eliminated. This is determined 1541 // by scanning the callee-save registers to see if any is modified. 1542 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 1543 for (unsigned i = 0; CSRegs[i]; ++i) { 1544 unsigned Reg = CSRegs[i]; 1545 bool Spilled = false; 1546 if (SavedRegs.test(Reg)) { 1547 Spilled = true; 1548 CanEliminateFrame = false; 1549 } 1550 1551 if (!ARM::GPRRegClass.contains(Reg)) { 1552 if (Spilled) { 1553 if (ARM::SPRRegClass.contains(Reg)) 1554 NumFPRSpills++; 1555 else if (ARM::DPRRegClass.contains(Reg)) 1556 NumFPRSpills += 2; 1557 else if (ARM::QPRRegClass.contains(Reg)) 1558 NumFPRSpills += 4; 1559 } 1560 continue; 1561 } 1562 1563 if (Spilled) { 1564 NumGPRSpills++; 1565 1566 if (!STI.splitFramePushPop(MF)) { 1567 if (Reg == ARM::LR) 1568 LRSpilled = true; 1569 CS1Spilled = true; 1570 continue; 1571 } 1572 1573 // Keep track if LR and any of R4, R5, R6, and R7 is spilled. 1574 switch (Reg) { 1575 case ARM::LR: 1576 LRSpilled = true; 1577 LLVM_FALLTHROUGH; 1578 case ARM::R0: case ARM::R1: 1579 case ARM::R2: case ARM::R3: 1580 case ARM::R4: case ARM::R5: 1581 case ARM::R6: case ARM::R7: 1582 CS1Spilled = true; 1583 break; 1584 default: 1585 break; 1586 } 1587 } else { 1588 if (!STI.splitFramePushPop(MF)) { 1589 UnspilledCS1GPRs.push_back(Reg); 1590 continue; 1591 } 1592 1593 switch (Reg) { 1594 case ARM::R0: case ARM::R1: 1595 case ARM::R2: case ARM::R3: 1596 case ARM::R4: case ARM::R5: 1597 case ARM::R6: case ARM::R7: 1598 case ARM::LR: 1599 UnspilledCS1GPRs.push_back(Reg); 1600 break; 1601 default: 1602 UnspilledCS2GPRs.push_back(Reg); 1603 break; 1604 } 1605 } 1606 } 1607 1608 bool ForceLRSpill = false; 1609 if (!LRSpilled && AFI->isThumb1OnlyFunction()) { 1610 unsigned FnSize = GetFunctionSizeInBytes(MF, TII); 1611 // Force LR to be spilled if the Thumb function size is > 2048. This enables 1612 // use of BL to implement far jump. If it turns out that it's not needed 1613 // then the branch fix up path will undo it. 1614 if (FnSize >= (1 << 11)) { 1615 CanEliminateFrame = false; 1616 ForceLRSpill = true; 1617 } 1618 } 1619 1620 // If any of the stack slot references may be out of range of an immediate 1621 // offset, make sure a register (or a spill slot) is available for the 1622 // register scavenger. Note that if we're indexing off the frame pointer, the 1623 // effective stack size is 4 bytes larger since the FP points to the stack 1624 // slot of the previous FP. Also, if we have variable sized objects in the 1625 // function, stack slot references will often be negative, and some of 1626 // our instructions are positive-offset only, so conservatively consider 1627 // that case to want a spill slot (or register) as well. Similarly, if 1628 // the function adjusts the stack pointer during execution and the 1629 // adjustments aren't already part of our stack size estimate, our offset 1630 // calculations may be off, so be conservative. 1631 // FIXME: We could add logic to be more precise about negative offsets 1632 // and which instructions will need a scratch register for them. Is it 1633 // worth the effort and added fragility? 1634 unsigned EstimatedStackSize = 1635 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills); 1636 if (hasFP(MF)) { 1637 if (AFI->hasStackFrame()) 1638 EstimatedStackSize += 4; 1639 } else { 1640 // If FP is not used, SP will be used to access arguments, so count the 1641 // size of arguments into the estimation. 1642 EstimatedStackSize += MF.getInfo<ARMFunctionInfo>()->getArgumentStackSize(); 1643 } 1644 EstimatedStackSize += 16; // For possible paddings. 1645 1646 bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) || 1647 MFI.hasVarSizedObjects() || 1648 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)); 1649 bool ExtraCSSpill = false; 1650 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { 1651 AFI->setHasStackFrame(true); 1652 1653 if (hasFP(MF)) { 1654 SavedRegs.set(FramePtr); 1655 // If the frame pointer is required by the ABI, also spill LR so that we 1656 // emit a complete frame record. 1657 if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) { 1658 SavedRegs.set(ARM::LR); 1659 LRSpilled = true; 1660 NumGPRSpills++; 1661 auto LRPos = find(UnspilledCS1GPRs, ARM::LR); 1662 if (LRPos != UnspilledCS1GPRs.end()) 1663 UnspilledCS1GPRs.erase(LRPos); 1664 } 1665 auto FPPos = find(UnspilledCS1GPRs, FramePtr); 1666 if (FPPos != UnspilledCS1GPRs.end()) 1667 UnspilledCS1GPRs.erase(FPPos); 1668 NumGPRSpills++; 1669 if (FramePtr == ARM::R7) 1670 CS1Spilled = true; 1671 } 1672 1673 if (AFI->isThumb1OnlyFunction()) { 1674 // For Thumb1-only targets, we need some low registers when we save and 1675 // restore the high registers (which aren't allocatable, but could be 1676 // used by inline assembly) because the push/pop instructions can not 1677 // access high registers. If necessary, we might need to push more low 1678 // registers to ensure that there is at least one free that can be used 1679 // for the saving & restoring, and preferably we should ensure that as 1680 // many as are needed are available so that fewer push/pop instructions 1681 // are required. 1682 1683 // Low registers which are not currently pushed, but could be (r4-r7). 1684 SmallVector<unsigned, 4> AvailableRegs; 1685 1686 // Unused argument registers (r0-r3) can be clobbered in the prologue for 1687 // free. 1688 int EntryRegDeficit = 0; 1689 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) { 1690 if (!MF.getRegInfo().isLiveIn(Reg)) { 1691 --EntryRegDeficit; 1692 DEBUG(dbgs() << PrintReg(Reg, TRI) 1693 << " is unused argument register, EntryRegDeficit = " 1694 << EntryRegDeficit << "\n"); 1695 } 1696 } 1697 1698 // Unused return registers can be clobbered in the epilogue for free. 1699 int ExitRegDeficit = AFI->getReturnRegsCount() - 4; 1700 DEBUG(dbgs() << AFI->getReturnRegsCount() 1701 << " return regs used, ExitRegDeficit = " << ExitRegDeficit 1702 << "\n"); 1703 1704 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit); 1705 DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n"); 1706 1707 // r4-r6 can be used in the prologue if they are pushed by the first push 1708 // instruction. 1709 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) { 1710 if (SavedRegs.test(Reg)) { 1711 --RegDeficit; 1712 DEBUG(dbgs() << PrintReg(Reg, TRI) 1713 << " is saved low register, RegDeficit = " << RegDeficit 1714 << "\n"); 1715 } else { 1716 AvailableRegs.push_back(Reg); 1717 DEBUG(dbgs() 1718 << PrintReg(Reg, TRI) 1719 << " is non-saved low register, adding to AvailableRegs\n"); 1720 } 1721 } 1722 1723 // r7 can be used if it is not being used as the frame pointer. 1724 if (!hasFP(MF)) { 1725 if (SavedRegs.test(ARM::R7)) { 1726 --RegDeficit; 1727 DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = " 1728 << RegDeficit << "\n"); 1729 } else { 1730 AvailableRegs.push_back(ARM::R7); 1731 DEBUG(dbgs() 1732 << "%R7 is non-saved low register, adding to AvailableRegs\n"); 1733 } 1734 } 1735 1736 // Each of r8-r11 needs to be copied to a low register, then pushed. 1737 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) { 1738 if (SavedRegs.test(Reg)) { 1739 ++RegDeficit; 1740 DEBUG(dbgs() << PrintReg(Reg, TRI) 1741 << " is saved high register, RegDeficit = " << RegDeficit 1742 << "\n"); 1743 } 1744 } 1745 1746 // LR can only be used by PUSH, not POP, and can't be used at all if the 1747 // llvm.returnaddress intrinsic is used. This is only worth doing if we 1748 // are more limited at function entry than exit. 1749 if ((EntryRegDeficit > ExitRegDeficit) && 1750 !(MF.getRegInfo().isLiveIn(ARM::LR) && 1751 MF.getFrameInfo().isReturnAddressTaken())) { 1752 if (SavedRegs.test(ARM::LR)) { 1753 --RegDeficit; 1754 DEBUG(dbgs() << "%LR is saved register, RegDeficit = " << RegDeficit 1755 << "\n"); 1756 } else { 1757 AvailableRegs.push_back(ARM::LR); 1758 DEBUG(dbgs() << "%LR is not saved, adding to AvailableRegs\n"); 1759 } 1760 } 1761 1762 // If there are more high registers that need pushing than low registers 1763 // available, push some more low registers so that we can use fewer push 1764 // instructions. This might not reduce RegDeficit all the way to zero, 1765 // because we can only guarantee that r4-r6 are available, but r8-r11 may 1766 // need saving. 1767 DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n"); 1768 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) { 1769 unsigned Reg = AvailableRegs.pop_back_val(); 1770 DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) 1771 << " to make up reg deficit\n"); 1772 SavedRegs.set(Reg); 1773 NumGPRSpills++; 1774 CS1Spilled = true; 1775 ExtraCSSpill = true; 1776 UnspilledCS1GPRs.erase(find(UnspilledCS1GPRs, Reg)); 1777 if (Reg == ARM::LR) 1778 LRSpilled = true; 1779 } 1780 DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit << "\n"); 1781 } 1782 1783 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. 1784 // Spill LR as well so we can fold BX_RET to the registers restore (LDM). 1785 if (!LRSpilled && CS1Spilled) { 1786 SavedRegs.set(ARM::LR); 1787 NumGPRSpills++; 1788 SmallVectorImpl<unsigned>::iterator LRPos; 1789 LRPos = find(UnspilledCS1GPRs, (unsigned)ARM::LR); 1790 if (LRPos != UnspilledCS1GPRs.end()) 1791 UnspilledCS1GPRs.erase(LRPos); 1792 1793 ForceLRSpill = false; 1794 ExtraCSSpill = true; 1795 } 1796 1797 // If stack and double are 8-byte aligned and we are spilling an odd number 1798 // of GPRs, spill one extra callee save GPR so we won't have to pad between 1799 // the integer and double callee save areas. 1800 DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n"); 1801 unsigned TargetAlign = getStackAlignment(); 1802 if (TargetAlign >= 8 && (NumGPRSpills & 1)) { 1803 if (CS1Spilled && !UnspilledCS1GPRs.empty()) { 1804 for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { 1805 unsigned Reg = UnspilledCS1GPRs[i]; 1806 // Don't spill high register if the function is thumb. In the case of 1807 // Windows on ARM, accept R11 (frame pointer) 1808 if (!AFI->isThumbFunction() || 1809 (STI.isTargetWindows() && Reg == ARM::R11) || 1810 isARMLowRegister(Reg) || Reg == ARM::LR) { 1811 SavedRegs.set(Reg); 1812 DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) 1813 << " to make up alignment\n"); 1814 if (!MRI.isReserved(Reg)) 1815 ExtraCSSpill = true; 1816 break; 1817 } 1818 } 1819 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { 1820 unsigned Reg = UnspilledCS2GPRs.front(); 1821 SavedRegs.set(Reg); 1822 DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) 1823 << " to make up alignment\n"); 1824 if (!MRI.isReserved(Reg)) 1825 ExtraCSSpill = true; 1826 } 1827 } 1828 1829 // Estimate if we might need to scavenge a register at some point in order 1830 // to materialize a stack offset. If so, either spill one additional 1831 // callee-saved register or reserve a special spill slot to facilitate 1832 // register scavenging. Thumb1 needs a spill slot for stack pointer 1833 // adjustments also, even when the frame itself is small. 1834 if (BigStack && !ExtraCSSpill) { 1835 // If any non-reserved CS register isn't spilled, just spill one or two 1836 // extra. That should take care of it! 1837 unsigned NumExtras = TargetAlign / 4; 1838 SmallVector<unsigned, 2> Extras; 1839 while (NumExtras && !UnspilledCS1GPRs.empty()) { 1840 unsigned Reg = UnspilledCS1GPRs.back(); 1841 UnspilledCS1GPRs.pop_back(); 1842 if (!MRI.isReserved(Reg) && 1843 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || 1844 Reg == ARM::LR)) { 1845 Extras.push_back(Reg); 1846 NumExtras--; 1847 } 1848 } 1849 // For non-Thumb1 functions, also check for hi-reg CS registers 1850 if (!AFI->isThumb1OnlyFunction()) { 1851 while (NumExtras && !UnspilledCS2GPRs.empty()) { 1852 unsigned Reg = UnspilledCS2GPRs.back(); 1853 UnspilledCS2GPRs.pop_back(); 1854 if (!MRI.isReserved(Reg)) { 1855 Extras.push_back(Reg); 1856 NumExtras--; 1857 } 1858 } 1859 } 1860 if (Extras.size() && NumExtras == 0) { 1861 for (unsigned i = 0, e = Extras.size(); i != e; ++i) { 1862 SavedRegs.set(Extras[i]); 1863 } 1864 } else if (!AFI->isThumb1OnlyFunction()) { 1865 // note: Thumb1 functions spill to R12, not the stack. Reserve a slot 1866 // closest to SP or frame pointer. 1867 assert(RS && "Register scavenging not provided"); 1868 const TargetRegisterClass *RC = &ARM::GPRRegClass; 1869 RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), 1870 RC->getAlignment(), 1871 false)); 1872 } 1873 } 1874 } 1875 1876 if (ForceLRSpill) { 1877 SavedRegs.set(ARM::LR); 1878 AFI->setLRIsSpilledForFarJump(true); 1879 } 1880 } 1881 1882 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( 1883 MachineFunction &MF, MachineBasicBlock &MBB, 1884 MachineBasicBlock::iterator I) const { 1885 const ARMBaseInstrInfo &TII = 1886 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); 1887 if (!hasReservedCallFrame(MF)) { 1888 // If we have alloca, convert as follows: 1889 // ADJCALLSTACKDOWN -> sub, sp, sp, amount 1890 // ADJCALLSTACKUP -> add, sp, sp, amount 1891 MachineInstr &Old = *I; 1892 DebugLoc dl = Old.getDebugLoc(); 1893 unsigned Amount = Old.getOperand(0).getImm(); 1894 if (Amount != 0) { 1895 // We need to keep the stack aligned properly. To do this, we round the 1896 // amount of space needed for the outgoing arguments up to the next 1897 // alignment boundary. 1898 Amount = alignSPAdjust(Amount); 1899 1900 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1901 assert(!AFI->isThumb1OnlyFunction() && 1902 "This eliminateCallFramePseudoInstr does not support Thumb1!"); 1903 bool isARM = !AFI->isThumbFunction(); 1904 1905 // Replace the pseudo instruction with a new instruction... 1906 unsigned Opc = Old.getOpcode(); 1907 int PIdx = Old.findFirstPredOperandIdx(); 1908 ARMCC::CondCodes Pred = 1909 (PIdx == -1) ? ARMCC::AL 1910 : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm(); 1911 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { 1912 // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. 1913 unsigned PredReg = Old.getOperand(2).getReg(); 1914 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, 1915 Pred, PredReg); 1916 } else { 1917 // Note: PredReg is operand 3 for ADJCALLSTACKUP. 1918 unsigned PredReg = Old.getOperand(3).getReg(); 1919 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); 1920 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, 1921 Pred, PredReg); 1922 } 1923 } 1924 } 1925 return MBB.erase(I); 1926 } 1927 1928 /// Get the minimum constant for ARM that is greater than or equal to the 1929 /// argument. In ARM, constants can have any value that can be produced by 1930 /// rotating an 8-bit value to the right by an even number of bits within a 1931 /// 32-bit word. 1932 static uint32_t alignToARMConstant(uint32_t Value) { 1933 unsigned Shifted = 0; 1934 1935 if (Value == 0) 1936 return 0; 1937 1938 while (!(Value & 0xC0000000)) { 1939 Value = Value << 2; 1940 Shifted += 2; 1941 } 1942 1943 bool Carry = (Value & 0x00FFFFFF); 1944 Value = ((Value & 0xFF000000) >> 24) + Carry; 1945 1946 if (Value & 0x0000100) 1947 Value = Value & 0x000001FC; 1948 1949 if (Shifted > 24) 1950 Value = Value >> (Shifted - 24); 1951 else 1952 Value = Value << (24 - Shifted); 1953 1954 return Value; 1955 } 1956 1957 // The stack limit in the TCB is set to this many bytes above the actual 1958 // stack limit. 1959 static const uint64_t kSplitStackAvailable = 256; 1960 1961 // Adjust the function prologue to enable split stacks. This currently only 1962 // supports android and linux. 1963 // 1964 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but 1965 // must be well defined in order to allow for consistent implementations of the 1966 // __morestack helper function. The ABI is also not a normal ABI in that it 1967 // doesn't follow the normal calling conventions because this allows the 1968 // prologue of each function to be optimized further. 1969 // 1970 // Currently, the ABI looks like (when calling __morestack) 1971 // 1972 // * r4 holds the minimum stack size requested for this function call 1973 // * r5 holds the stack size of the arguments to the function 1974 // * the beginning of the function is 3 instructions after the call to 1975 // __morestack 1976 // 1977 // Implementations of __morestack should use r4 to allocate a new stack, r5 to 1978 // place the arguments on to the new stack, and the 3-instruction knowledge to 1979 // jump directly to the body of the function when working on the new stack. 1980 // 1981 // An old (and possibly no longer compatible) implementation of __morestack for 1982 // ARM can be found at [1]. 1983 // 1984 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S 1985 void ARMFrameLowering::adjustForSegmentedStacks( 1986 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { 1987 unsigned Opcode; 1988 unsigned CFIIndex; 1989 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>(); 1990 bool Thumb = ST->isThumb(); 1991 1992 // Sadly, this currently doesn't support varargs, platforms other than 1993 // android/linux. Note that thumb1/thumb2 are support for android/linux. 1994 if (MF.getFunction()->isVarArg()) 1995 report_fatal_error("Segmented stacks do not support vararg functions."); 1996 if (!ST->isTargetAndroid() && !ST->isTargetLinux()) 1997 report_fatal_error("Segmented stacks not supported on this platform."); 1998 1999 MachineFrameInfo &MFI = MF.getFrameInfo(); 2000 MachineModuleInfo &MMI = MF.getMMI(); 2001 MCContext &Context = MMI.getContext(); 2002 const MCRegisterInfo *MRI = Context.getRegisterInfo(); 2003 const ARMBaseInstrInfo &TII = 2004 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); 2005 ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>(); 2006 DebugLoc DL; 2007 2008 uint64_t StackSize = MFI.getStackSize(); 2009 2010 // Do not generate a prologue for functions with a stack of size zero 2011 if (StackSize == 0) 2012 return; 2013 2014 // Use R4 and R5 as scratch registers. 2015 // We save R4 and R5 before use and restore them before leaving the function. 2016 unsigned ScratchReg0 = ARM::R4; 2017 unsigned ScratchReg1 = ARM::R5; 2018 uint64_t AlignedStackSize; 2019 2020 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock(); 2021 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock(); 2022 MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock(); 2023 MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock(); 2024 MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock(); 2025 2026 // Grab everything that reaches PrologueMBB to update there liveness as well. 2027 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion; 2028 SmallVector<MachineBasicBlock *, 2> WalkList; 2029 WalkList.push_back(&PrologueMBB); 2030 2031 do { 2032 MachineBasicBlock *CurMBB = WalkList.pop_back_val(); 2033 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) { 2034 if (BeforePrologueRegion.insert(PredBB).second) 2035 WalkList.push_back(PredBB); 2036 } 2037 } while (!WalkList.empty()); 2038 2039 // The order in that list is important. 2040 // The blocks will all be inserted before PrologueMBB using that order. 2041 // Therefore the block that should appear first in the CFG should appear 2042 // first in the list. 2043 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB, 2044 PostStackMBB}; 2045 2046 for (MachineBasicBlock *B : AddedBlocks) 2047 BeforePrologueRegion.insert(B); 2048 2049 for (const auto &LI : PrologueMBB.liveins()) { 2050 for (MachineBasicBlock *PredBB : BeforePrologueRegion) 2051 PredBB->addLiveIn(LI); 2052 } 2053 2054 // Remove the newly added blocks from the list, since we know 2055 // we do not have to do the following updates for them. 2056 for (MachineBasicBlock *B : AddedBlocks) { 2057 BeforePrologueRegion.erase(B); 2058 MF.insert(PrologueMBB.getIterator(), B); 2059 } 2060 2061 for (MachineBasicBlock *MBB : BeforePrologueRegion) { 2062 // Make sure the LiveIns are still sorted and unique. 2063 MBB->sortUniqueLiveIns(); 2064 // Replace the edges to PrologueMBB by edges to the sequences 2065 // we are about to add. 2066 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]); 2067 } 2068 2069 // The required stack size that is aligned to ARM constant criterion. 2070 AlignedStackSize = alignToARMConstant(StackSize); 2071 2072 // When the frame size is less than 256 we just compare the stack 2073 // boundary directly to the value of the stack pointer, per gcc. 2074 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable; 2075 2076 // We will use two of the callee save registers as scratch registers so we 2077 // need to save those registers onto the stack. 2078 // We will use SR0 to hold stack limit and SR1 to hold the stack size 2079 // requested and arguments for __morestack(). 2080 // SR0: Scratch Register #0 2081 // SR1: Scratch Register #1 2082 // push {SR0, SR1} 2083 if (Thumb) { 2084 AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))) 2085 .addReg(ScratchReg0).addReg(ScratchReg1); 2086 } else { 2087 AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) 2088 .addReg(ARM::SP, RegState::Define).addReg(ARM::SP)) 2089 .addReg(ScratchReg0).addReg(ScratchReg1); 2090 } 2091 2092 // Emit the relevant DWARF information about the change in stack pointer as 2093 // well as where to find both r4 and r5 (the callee-save registers) 2094 CFIIndex = 2095 MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8)); 2096 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 2097 .addCFIIndex(CFIIndex); 2098 CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( 2099 nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); 2100 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 2101 .addCFIIndex(CFIIndex); 2102 CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( 2103 nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); 2104 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 2105 .addCFIIndex(CFIIndex); 2106 2107 // mov SR1, sp 2108 if (Thumb) { 2109 AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) 2110 .addReg(ARM::SP)); 2111 } else if (CompareStackPointer) { 2112 AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) 2113 .addReg(ARM::SP)).addReg(0); 2114 } 2115 2116 // sub SR1, sp, #StackSize 2117 if (!CompareStackPointer && Thumb) { 2118 AddDefaultPred( 2119 AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)) 2120 .addReg(ScratchReg1).addImm(AlignedStackSize)); 2121 } else if (!CompareStackPointer) { 2122 AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) 2123 .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0); 2124 } 2125 2126 if (Thumb && ST->isThumb1Only()) { 2127 unsigned PCLabelId = ARMFI->createPICLabelUId(); 2128 ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create( 2129 MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0); 2130 MachineConstantPool *MCP = MF.getConstantPool(); 2131 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4); 2132 2133 // ldr SR0, [pc, offset(STACK_LIMIT)] 2134 AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) 2135 .addConstantPoolIndex(CPI)); 2136 2137 // ldr SR0, [SR0] 2138 AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) 2139 .addReg(ScratchReg0).addImm(0)); 2140 } else { 2141 // Get TLS base address from the coprocessor 2142 // mrc p15, #0, SR0, c13, c0, #3 2143 AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) 2144 .addImm(15) 2145 .addImm(0) 2146 .addImm(13) 2147 .addImm(0) 2148 .addImm(3)); 2149 2150 // Use the last tls slot on android and a private field of the TCP on linux. 2151 assert(ST->isTargetAndroid() || ST->isTargetLinux()); 2152 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1; 2153 2154 // Get the stack limit from the right offset 2155 // ldr SR0, [sr0, #4 * TlsOffset] 2156 AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) 2157 .addReg(ScratchReg0).addImm(4 * TlsOffset)); 2158 } 2159 2160 // Compare stack limit with stack size requested. 2161 // cmp SR0, SR1 2162 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr; 2163 AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode)) 2164 .addReg(ScratchReg0) 2165 .addReg(ScratchReg1)); 2166 2167 // This jump is taken if StackLimit < SP - stack required. 2168 Opcode = Thumb ? ARM::tBcc : ARM::Bcc; 2169 BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB) 2170 .addImm(ARMCC::LO) 2171 .addReg(ARM::CPSR); 2172 2173 2174 // Calling __morestack(StackSize, Size of stack arguments). 2175 // __morestack knows that the stack size requested is in SR0(r4) 2176 // and amount size of stack arguments is in SR1(r5). 2177 2178 // Pass first argument for the __morestack by Scratch Register #0. 2179 // The amount size of stack required 2180 if (Thumb) { 2181 AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), 2182 ScratchReg0)).addImm(AlignedStackSize)); 2183 } else { 2184 AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) 2185 .addImm(AlignedStackSize)).addReg(0); 2186 } 2187 // Pass second argument for the __morestack by Scratch Register #1. 2188 // The amount size of stack consumed to save function arguments. 2189 if (Thumb) { 2190 AddDefaultPred( 2191 AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)) 2192 .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))); 2193 } else { 2194 AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) 2195 .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))) 2196 .addReg(0); 2197 } 2198 2199 // push {lr} - Save return address of this function. 2200 if (Thumb) { 2201 AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))) 2202 .addReg(ARM::LR); 2203 } else { 2204 AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD)) 2205 .addReg(ARM::SP, RegState::Define) 2206 .addReg(ARM::SP)) 2207 .addReg(ARM::LR); 2208 } 2209 2210 // Emit the DWARF info about the change in stack as well as where to find the 2211 // previous link register 2212 CFIIndex = 2213 MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12)); 2214 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 2215 .addCFIIndex(CFIIndex); 2216 CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( 2217 nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12)); 2218 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 2219 .addCFIIndex(CFIIndex); 2220 2221 // Call __morestack(). 2222 if (Thumb) { 2223 AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL))) 2224 .addExternalSymbol("__morestack"); 2225 } else { 2226 BuildMI(AllocMBB, DL, TII.get(ARM::BL)) 2227 .addExternalSymbol("__morestack"); 2228 } 2229 2230 // pop {lr} - Restore return address of this original function. 2231 if (Thumb) { 2232 if (ST->isThumb1Only()) { 2233 AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) 2234 .addReg(ScratchReg0); 2235 AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) 2236 .addReg(ScratchReg0)); 2237 } else { 2238 AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) 2239 .addReg(ARM::LR, RegState::Define) 2240 .addReg(ARM::SP, RegState::Define) 2241 .addReg(ARM::SP) 2242 .addImm(4)); 2243 } 2244 } else { 2245 AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) 2246 .addReg(ARM::SP, RegState::Define) 2247 .addReg(ARM::SP)) 2248 .addReg(ARM::LR); 2249 } 2250 2251 // Restore SR0 and SR1 in case of __morestack() was called. 2252 // __morestack() will skip PostStackMBB block so we need to restore 2253 // scratch registers from here. 2254 // pop {SR0, SR1} 2255 if (Thumb) { 2256 AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) 2257 .addReg(ScratchReg0) 2258 .addReg(ScratchReg1); 2259 } else { 2260 AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) 2261 .addReg(ARM::SP, RegState::Define) 2262 .addReg(ARM::SP)) 2263 .addReg(ScratchReg0) 2264 .addReg(ScratchReg1); 2265 } 2266 2267 // Update the CFA offset now that we've popped 2268 CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); 2269 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 2270 .addCFIIndex(CFIIndex); 2271 2272 // bx lr - Return from this function. 2273 Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET; 2274 AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode))); 2275 2276 // Restore SR0 and SR1 in case of __morestack() was not called. 2277 // pop {SR0, SR1} 2278 if (Thumb) { 2279 AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))) 2280 .addReg(ScratchReg0) 2281 .addReg(ScratchReg1); 2282 } else { 2283 AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) 2284 .addReg(ARM::SP, RegState::Define) 2285 .addReg(ARM::SP)) 2286 .addReg(ScratchReg0) 2287 .addReg(ScratchReg1); 2288 } 2289 2290 // Update the CFA offset now that we've popped 2291 CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); 2292 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 2293 .addCFIIndex(CFIIndex); 2294 2295 // Tell debuggers that r4 and r5 are now the same as they were in the 2296 // previous function, that they're the "Same Value". 2297 CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( 2298 nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); 2299 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 2300 .addCFIIndex(CFIIndex); 2301 CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( 2302 nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); 2303 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 2304 .addCFIIndex(CFIIndex); 2305 2306 // Organizing MBB lists 2307 PostStackMBB->addSuccessor(&PrologueMBB); 2308 2309 AllocMBB->addSuccessor(PostStackMBB); 2310 2311 GetMBB->addSuccessor(PostStackMBB); 2312 GetMBB->addSuccessor(AllocMBB); 2313 2314 McrMBB->addSuccessor(GetMBB); 2315 2316 PrevStackMBB->addSuccessor(McrMBB); 2317 2318 #ifdef EXPENSIVE_CHECKS 2319 MF.verify(); 2320 #endif 2321 } 2322