1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMMachineFunctionInfo.h" 19 #include "ARMSubtarget.h" 20 #include "MCTargetDesc/ARMAddressingModes.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallSet.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/CodeGen/MachineBasicBlock.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstr.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/RegisterScavenging.h" 33 #include "llvm/CodeGen/SelectionDAGNodes.h" 34 #include "llvm/IR/DataLayout.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/Function.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Target/TargetInstrInfo.h" 40 #include "llvm/Target/TargetMachine.h" 41 #include "llvm/Target/TargetRegisterInfo.h" 42 using namespace llvm; 43 44 #define DEBUG_TYPE "arm-ldst-opt" 45 46 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 47 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 48 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 49 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 50 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 51 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 52 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 53 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 54 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 55 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 56 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 57 58 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 59 /// load / store instructions to form ldm / stm instructions. 60 61 namespace { 62 struct ARMLoadStoreOpt : public MachineFunctionPass { 63 static char ID; 64 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 65 66 const TargetInstrInfo *TII; 67 const TargetRegisterInfo *TRI; 68 const ARMSubtarget *STI; 69 ARMFunctionInfo *AFI; 70 RegScavenger *RS; 71 bool isThumb2; 72 73 bool runOnMachineFunction(MachineFunction &Fn) override; 74 75 const char *getPassName() const override { 76 return "ARM load / store optimization pass"; 77 } 78 79 private: 80 struct MemOpQueueEntry { 81 int Offset; 82 unsigned Reg; 83 bool isKill; 84 unsigned Position; 85 MachineBasicBlock::iterator MBBI; 86 bool Merged; 87 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 88 MachineBasicBlock::iterator i) 89 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 90 }; 91 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 92 typedef MemOpQueue::iterator MemOpQueueIter; 93 94 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, 95 const MemOpQueue &MemOps, unsigned DefReg, 96 unsigned RangeBegin, unsigned RangeEnd); 97 98 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 99 int Offset, unsigned Base, bool BaseKill, int Opcode, 100 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 101 DebugLoc dl, 102 ArrayRef<std::pair<unsigned, bool> > Regs, 103 ArrayRef<unsigned> ImpDefs); 104 void MergeOpsUpdate(MachineBasicBlock &MBB, 105 MemOpQueue &MemOps, 106 unsigned memOpsBegin, 107 unsigned memOpsEnd, 108 unsigned insertAfter, 109 int Offset, 110 unsigned Base, 111 bool BaseKill, 112 int Opcode, 113 ARMCC::CondCodes Pred, 114 unsigned PredReg, 115 unsigned Scratch, 116 DebugLoc dl, 117 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 118 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 119 int Opcode, unsigned Size, 120 ARMCC::CondCodes Pred, unsigned PredReg, 121 unsigned Scratch, MemOpQueue &MemOps, 122 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 123 124 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 125 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 126 MachineBasicBlock::iterator &MBBI); 127 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 128 MachineBasicBlock::iterator MBBI, 129 const TargetInstrInfo *TII, 130 bool &Advance, 131 MachineBasicBlock::iterator &I); 132 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 133 MachineBasicBlock::iterator MBBI, 134 bool &Advance, 135 MachineBasicBlock::iterator &I); 136 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 137 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 138 }; 139 char ARMLoadStoreOpt::ID = 0; 140 } 141 142 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 143 switch (Opcode) { 144 default: llvm_unreachable("Unhandled opcode!"); 145 case ARM::LDRi12: 146 ++NumLDMGened; 147 switch (Mode) { 148 default: llvm_unreachable("Unhandled submode!"); 149 case ARM_AM::ia: return ARM::LDMIA; 150 case ARM_AM::da: return ARM::LDMDA; 151 case ARM_AM::db: return ARM::LDMDB; 152 case ARM_AM::ib: return ARM::LDMIB; 153 } 154 case ARM::STRi12: 155 ++NumSTMGened; 156 switch (Mode) { 157 default: llvm_unreachable("Unhandled submode!"); 158 case ARM_AM::ia: return ARM::STMIA; 159 case ARM_AM::da: return ARM::STMDA; 160 case ARM_AM::db: return ARM::STMDB; 161 case ARM_AM::ib: return ARM::STMIB; 162 } 163 case ARM::t2LDRi8: 164 case ARM::t2LDRi12: 165 ++NumLDMGened; 166 switch (Mode) { 167 default: llvm_unreachable("Unhandled submode!"); 168 case ARM_AM::ia: return ARM::t2LDMIA; 169 case ARM_AM::db: return ARM::t2LDMDB; 170 } 171 case ARM::t2STRi8: 172 case ARM::t2STRi12: 173 ++NumSTMGened; 174 switch (Mode) { 175 default: llvm_unreachable("Unhandled submode!"); 176 case ARM_AM::ia: return ARM::t2STMIA; 177 case ARM_AM::db: return ARM::t2STMDB; 178 } 179 case ARM::VLDRS: 180 ++NumVLDMGened; 181 switch (Mode) { 182 default: llvm_unreachable("Unhandled submode!"); 183 case ARM_AM::ia: return ARM::VLDMSIA; 184 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 185 } 186 case ARM::VSTRS: 187 ++NumVSTMGened; 188 switch (Mode) { 189 default: llvm_unreachable("Unhandled submode!"); 190 case ARM_AM::ia: return ARM::VSTMSIA; 191 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 192 } 193 case ARM::VLDRD: 194 ++NumVLDMGened; 195 switch (Mode) { 196 default: llvm_unreachable("Unhandled submode!"); 197 case ARM_AM::ia: return ARM::VLDMDIA; 198 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 199 } 200 case ARM::VSTRD: 201 ++NumVSTMGened; 202 switch (Mode) { 203 default: llvm_unreachable("Unhandled submode!"); 204 case ARM_AM::ia: return ARM::VSTMDIA; 205 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 206 } 207 } 208 } 209 210 namespace llvm { 211 namespace ARM_AM { 212 213 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 214 switch (Opcode) { 215 default: llvm_unreachable("Unhandled opcode!"); 216 case ARM::LDMIA_RET: 217 case ARM::LDMIA: 218 case ARM::LDMIA_UPD: 219 case ARM::STMIA: 220 case ARM::STMIA_UPD: 221 case ARM::t2LDMIA_RET: 222 case ARM::t2LDMIA: 223 case ARM::t2LDMIA_UPD: 224 case ARM::t2STMIA: 225 case ARM::t2STMIA_UPD: 226 case ARM::VLDMSIA: 227 case ARM::VLDMSIA_UPD: 228 case ARM::VSTMSIA: 229 case ARM::VSTMSIA_UPD: 230 case ARM::VLDMDIA: 231 case ARM::VLDMDIA_UPD: 232 case ARM::VSTMDIA: 233 case ARM::VSTMDIA_UPD: 234 return ARM_AM::ia; 235 236 case ARM::LDMDA: 237 case ARM::LDMDA_UPD: 238 case ARM::STMDA: 239 case ARM::STMDA_UPD: 240 return ARM_AM::da; 241 242 case ARM::LDMDB: 243 case ARM::LDMDB_UPD: 244 case ARM::STMDB: 245 case ARM::STMDB_UPD: 246 case ARM::t2LDMDB: 247 case ARM::t2LDMDB_UPD: 248 case ARM::t2STMDB: 249 case ARM::t2STMDB_UPD: 250 case ARM::VLDMSDB_UPD: 251 case ARM::VSTMSDB_UPD: 252 case ARM::VLDMDDB_UPD: 253 case ARM::VSTMDDB_UPD: 254 return ARM_AM::db; 255 256 case ARM::LDMIB: 257 case ARM::LDMIB_UPD: 258 case ARM::STMIB: 259 case ARM::STMIB_UPD: 260 return ARM_AM::ib; 261 } 262 } 263 264 } // end namespace ARM_AM 265 } // end namespace llvm 266 267 static bool isT2i32Load(unsigned Opc) { 268 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 269 } 270 271 static bool isi32Load(unsigned Opc) { 272 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 273 } 274 275 static bool isT2i32Store(unsigned Opc) { 276 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 277 } 278 279 static bool isi32Store(unsigned Opc) { 280 return Opc == ARM::STRi12 || isT2i32Store(Opc); 281 } 282 283 /// MergeOps - Create and insert a LDM or STM with Base as base register and 284 /// registers in Regs as the register operands that would be loaded / stored. 285 /// It returns true if the transformation is done. 286 bool 287 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 288 MachineBasicBlock::iterator MBBI, 289 int Offset, unsigned Base, bool BaseKill, 290 int Opcode, ARMCC::CondCodes Pred, 291 unsigned PredReg, unsigned Scratch, DebugLoc dl, 292 ArrayRef<std::pair<unsigned, bool> > Regs, 293 ArrayRef<unsigned> ImpDefs) { 294 // Only a single register to load / store. Don't bother. 295 unsigned NumRegs = Regs.size(); 296 if (NumRegs <= 1) 297 return false; 298 299 ARM_AM::AMSubMode Mode = ARM_AM::ia; 300 // VFP and Thumb2 do not support IB or DA modes. 301 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 302 bool haveIBAndDA = isNotVFP && !isThumb2; 303 if (Offset == 4 && haveIBAndDA) 304 Mode = ARM_AM::ib; 305 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 306 Mode = ARM_AM::da; 307 else if (Offset == -4 * (int)NumRegs && isNotVFP) 308 // VLDM/VSTM do not support DB mode without also updating the base reg. 309 Mode = ARM_AM::db; 310 else if (Offset != 0) { 311 // Check if this is a supported opcode before we insert instructions to 312 // calculate a new base register. 313 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 314 315 // If starting offset isn't zero, insert a MI to materialize a new base. 316 // But only do so if it is cost effective, i.e. merging more than two 317 // loads / stores. 318 if (NumRegs <= 2) 319 return false; 320 321 unsigned NewBase; 322 if (isi32Load(Opcode)) 323 // If it is a load, then just use one of the destination register to 324 // use as the new base. 325 NewBase = Regs[NumRegs-1].first; 326 else { 327 // Use the scratch register to use as a new base. 328 NewBase = Scratch; 329 if (NewBase == 0) 330 return false; 331 } 332 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 333 if (Offset < 0) { 334 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 335 Offset = - Offset; 336 } 337 int ImmedOffset = isThumb2 338 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 339 if (ImmedOffset == -1) 340 // FIXME: Try t2ADDri12 or t2SUBri12? 341 return false; // Probably not worth it then. 342 343 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 344 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 345 .addImm(Pred).addReg(PredReg).addReg(0); 346 Base = NewBase; 347 BaseKill = true; // New base is always killed right its use. 348 } 349 350 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 351 Opcode == ARM::VLDRD); 352 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 353 if (!Opcode) return false; 354 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 355 .addReg(Base, getKillRegState(BaseKill)) 356 .addImm(Pred).addReg(PredReg); 357 for (unsigned i = 0; i != NumRegs; ++i) 358 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 359 | getKillRegState(Regs[i].second)); 360 361 // Add implicit defs for super-registers. 362 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) 363 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); 364 365 return true; 366 } 367 368 /// \brief Find all instructions using a given imp-def within a range. 369 /// 370 /// We are trying to combine a range of instructions, one of which (located at 371 /// position RangeBegin) implicitly defines a register. The final LDM/STM will 372 /// be placed at RangeEnd, and so any uses of this definition between RangeStart 373 /// and RangeEnd must be modified to use an undefined value. 374 /// 375 /// The live range continues until we find a second definition or one of the 376 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so 377 /// we must consider all uses and decide which are relevant in a second pass. 378 void ARMLoadStoreOpt::findUsesOfImpDef( 379 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, 380 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { 381 std::map<unsigned, MachineOperand *> Uses; 382 unsigned LastLivePos = RangeEnd; 383 384 // First we find all uses of this register with Position between RangeBegin 385 // and RangeEnd, any or all of these could be uses of a definition at 386 // RangeBegin. We also record the latest position a definition at RangeBegin 387 // would be considered live. 388 for (unsigned i = 0; i < MemOps.size(); ++i) { 389 MachineInstr &MI = *MemOps[i].MBBI; 390 unsigned MIPosition = MemOps[i].Position; 391 if (MIPosition <= RangeBegin || MIPosition > RangeEnd) 392 continue; 393 394 // If this instruction defines the register, then any later use will be of 395 // that definition rather than ours. 396 if (MI.definesRegister(DefReg)) 397 LastLivePos = std::min(LastLivePos, MIPosition); 398 399 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); 400 if (!UseOp) 401 continue; 402 403 // If this instruction kills the register then (assuming liveness is 404 // correct when we start) we don't need to think about anything after here. 405 if (UseOp->isKill()) 406 LastLivePos = std::min(LastLivePos, MIPosition); 407 408 Uses[MIPosition] = UseOp; 409 } 410 411 // Now we traverse the list of all uses, and append the ones that actually use 412 // our definition to the requested list. 413 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), 414 E = Uses.end(); 415 I != E; ++I) { 416 // List is sorted by position so once we've found one out of range there 417 // will be no more to consider. 418 if (I->first > LastLivePos) 419 break; 420 UsesOfImpDefs.push_back(I->second); 421 } 422 } 423 424 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 425 // success. 426 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 427 MemOpQueue &memOps, 428 unsigned memOpsBegin, unsigned memOpsEnd, 429 unsigned insertAfter, int Offset, 430 unsigned Base, bool BaseKill, 431 int Opcode, 432 ARMCC::CondCodes Pred, unsigned PredReg, 433 unsigned Scratch, 434 DebugLoc dl, 435 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 436 // First calculate which of the registers should be killed by the merged 437 // instruction. 438 const unsigned insertPos = memOps[insertAfter].Position; 439 SmallSet<unsigned, 4> KilledRegs; 440 DenseMap<unsigned, unsigned> Killer; 441 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 442 if (i == memOpsBegin) { 443 i = memOpsEnd; 444 if (i == e) 445 break; 446 } 447 if (memOps[i].Position < insertPos && memOps[i].isKill) { 448 unsigned Reg = memOps[i].Reg; 449 KilledRegs.insert(Reg); 450 Killer[Reg] = i; 451 } 452 } 453 454 SmallVector<std::pair<unsigned, bool>, 8> Regs; 455 SmallVector<unsigned, 8> ImpDefs; 456 SmallVector<MachineOperand *, 8> UsesOfImpDefs; 457 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 458 unsigned Reg = memOps[i].Reg; 459 // If we are inserting the merged operation after an operation that 460 // uses the same register, make sure to transfer any kill flag. 461 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 462 Regs.push_back(std::make_pair(Reg, isKill)); 463 464 // Collect any implicit defs of super-registers. They must be preserved. 465 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { 466 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) 467 continue; 468 unsigned DefReg = MO->getReg(); 469 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 470 ImpDefs.push_back(DefReg); 471 472 // There may be other uses of the definition between this instruction and 473 // the eventual LDM/STM position. These should be marked undef if the 474 // merge takes place. 475 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, 476 insertPos); 477 } 478 } 479 480 // Try to do the merge. 481 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 482 ++Loc; 483 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 484 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 485 return; 486 487 // Merge succeeded, update records. 488 Merges.push_back(std::prev(Loc)); 489 490 // In gathering loads together, we may have moved the imp-def of a register 491 // past one of its uses. This is OK, since we know better than the rest of 492 // LLVM what's OK with ARM loads and stores; but we still have to adjust the 493 // affected uses. 494 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), 495 E = UsesOfImpDefs.end(); 496 I != E; ++I) 497 (*I)->setIsUndef(); 498 499 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 500 // Remove kill flags from any memops that come before insertPos. 501 if (Regs[i-memOpsBegin].second) { 502 unsigned Reg = Regs[i-memOpsBegin].first; 503 if (KilledRegs.count(Reg)) { 504 unsigned j = Killer[Reg]; 505 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 506 assert(Idx >= 0 && "Cannot find killing operand"); 507 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 508 memOps[j].isKill = false; 509 } 510 memOps[i].isKill = true; 511 } 512 MBB.erase(memOps[i].MBBI); 513 // Update this memop to refer to the merged instruction. 514 // We may need to move kill flags again. 515 memOps[i].Merged = true; 516 memOps[i].MBBI = Merges.back(); 517 memOps[i].Position = insertPos; 518 } 519 } 520 521 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 522 /// load / store multiple instructions. 523 void 524 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 525 unsigned Base, int Opcode, unsigned Size, 526 ARMCC::CondCodes Pred, unsigned PredReg, 527 unsigned Scratch, MemOpQueue &MemOps, 528 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 529 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 530 int Offset = MemOps[SIndex].Offset; 531 int SOffset = Offset; 532 unsigned insertAfter = SIndex; 533 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 534 DebugLoc dl = Loc->getDebugLoc(); 535 const MachineOperand &PMO = Loc->getOperand(0); 536 unsigned PReg = PMO.getReg(); 537 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 538 unsigned Count = 1; 539 unsigned Limit = ~0U; 540 541 // vldm / vstm limit are 32 for S variants, 16 for D variants. 542 543 switch (Opcode) { 544 default: break; 545 case ARM::VSTRS: 546 Limit = 32; 547 break; 548 case ARM::VSTRD: 549 Limit = 16; 550 break; 551 case ARM::VLDRD: 552 Limit = 16; 553 break; 554 case ARM::VLDRS: 555 Limit = 32; 556 break; 557 } 558 559 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 560 int NewOffset = MemOps[i].Offset; 561 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 562 unsigned Reg = MO.getReg(); 563 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 564 // Register numbers must be in ascending order. For VFP / NEON load and 565 // store multiples, the registers must also be consecutive and within the 566 // limit on the number of registers per instruction. 567 if (Reg != ARM::SP && 568 NewOffset == Offset + (int)Size && 569 ((isNotVFP && RegNum > PRegNum) || 570 ((Count < Limit) && RegNum == PRegNum+1)) && 571 // On Swift we don't want vldm/vstm to start with a odd register num 572 // because Q register unaligned vldm/vstm need more uops. 573 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 574 Offset += Size; 575 PRegNum = RegNum; 576 ++Count; 577 } else { 578 // Can't merge this in. Try merge the earlier ones first. 579 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 580 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 581 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 582 MemOps, Merges); 583 return; 584 } 585 586 if (MemOps[i].Position > MemOps[insertAfter].Position) 587 insertAfter = i; 588 } 589 590 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 591 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 592 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 593 return; 594 } 595 596 static bool definesCPSR(MachineInstr *MI) { 597 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 598 const MachineOperand &MO = MI->getOperand(i); 599 if (!MO.isReg()) 600 continue; 601 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 602 // If the instruction has live CPSR def, then it's not safe to fold it 603 // into load / store. 604 return true; 605 } 606 607 return false; 608 } 609 610 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 611 unsigned Bytes, unsigned Limit, 612 ARMCC::CondCodes Pred, unsigned PredReg) { 613 unsigned MyPredReg = 0; 614 if (!MI) 615 return false; 616 617 bool CheckCPSRDef = false; 618 switch (MI->getOpcode()) { 619 default: return false; 620 case ARM::t2SUBri: 621 case ARM::SUBri: 622 CheckCPSRDef = true; 623 // fallthrough 624 case ARM::tSUBspi: 625 break; 626 } 627 628 // Make sure the offset fits in 8 bits. 629 if (Bytes == 0 || (Limit && Bytes >= Limit)) 630 return false; 631 632 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 633 if (!(MI->getOperand(0).getReg() == Base && 634 MI->getOperand(1).getReg() == Base && 635 (MI->getOperand(2).getImm()*Scale) == Bytes && 636 getInstrPredicate(MI, MyPredReg) == Pred && 637 MyPredReg == PredReg)) 638 return false; 639 640 return CheckCPSRDef ? !definesCPSR(MI) : true; 641 } 642 643 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 644 unsigned Bytes, unsigned Limit, 645 ARMCC::CondCodes Pred, unsigned PredReg) { 646 unsigned MyPredReg = 0; 647 if (!MI) 648 return false; 649 650 bool CheckCPSRDef = false; 651 switch (MI->getOpcode()) { 652 default: return false; 653 case ARM::t2ADDri: 654 case ARM::ADDri: 655 CheckCPSRDef = true; 656 // fallthrough 657 case ARM::tADDspi: 658 break; 659 } 660 661 if (Bytes == 0 || (Limit && Bytes >= Limit)) 662 // Make sure the offset fits in 8 bits. 663 return false; 664 665 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 666 if (!(MI->getOperand(0).getReg() == Base && 667 MI->getOperand(1).getReg() == Base && 668 (MI->getOperand(2).getImm()*Scale) == Bytes && 669 getInstrPredicate(MI, MyPredReg) == Pred && 670 MyPredReg == PredReg)) 671 return false; 672 673 return CheckCPSRDef ? !definesCPSR(MI) : true; 674 } 675 676 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 677 switch (MI->getOpcode()) { 678 default: return 0; 679 case ARM::LDRi12: 680 case ARM::STRi12: 681 case ARM::t2LDRi8: 682 case ARM::t2LDRi12: 683 case ARM::t2STRi8: 684 case ARM::t2STRi12: 685 case ARM::VLDRS: 686 case ARM::VSTRS: 687 return 4; 688 case ARM::VLDRD: 689 case ARM::VSTRD: 690 return 8; 691 case ARM::LDMIA: 692 case ARM::LDMDA: 693 case ARM::LDMDB: 694 case ARM::LDMIB: 695 case ARM::STMIA: 696 case ARM::STMDA: 697 case ARM::STMDB: 698 case ARM::STMIB: 699 case ARM::t2LDMIA: 700 case ARM::t2LDMDB: 701 case ARM::t2STMIA: 702 case ARM::t2STMDB: 703 case ARM::VLDMSIA: 704 case ARM::VSTMSIA: 705 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 706 case ARM::VLDMDIA: 707 case ARM::VSTMDIA: 708 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 709 } 710 } 711 712 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 713 ARM_AM::AMSubMode Mode) { 714 switch (Opc) { 715 default: llvm_unreachable("Unhandled opcode!"); 716 case ARM::LDMIA: 717 case ARM::LDMDA: 718 case ARM::LDMDB: 719 case ARM::LDMIB: 720 switch (Mode) { 721 default: llvm_unreachable("Unhandled submode!"); 722 case ARM_AM::ia: return ARM::LDMIA_UPD; 723 case ARM_AM::ib: return ARM::LDMIB_UPD; 724 case ARM_AM::da: return ARM::LDMDA_UPD; 725 case ARM_AM::db: return ARM::LDMDB_UPD; 726 } 727 case ARM::STMIA: 728 case ARM::STMDA: 729 case ARM::STMDB: 730 case ARM::STMIB: 731 switch (Mode) { 732 default: llvm_unreachable("Unhandled submode!"); 733 case ARM_AM::ia: return ARM::STMIA_UPD; 734 case ARM_AM::ib: return ARM::STMIB_UPD; 735 case ARM_AM::da: return ARM::STMDA_UPD; 736 case ARM_AM::db: return ARM::STMDB_UPD; 737 } 738 case ARM::t2LDMIA: 739 case ARM::t2LDMDB: 740 switch (Mode) { 741 default: llvm_unreachable("Unhandled submode!"); 742 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 743 case ARM_AM::db: return ARM::t2LDMDB_UPD; 744 } 745 case ARM::t2STMIA: 746 case ARM::t2STMDB: 747 switch (Mode) { 748 default: llvm_unreachable("Unhandled submode!"); 749 case ARM_AM::ia: return ARM::t2STMIA_UPD; 750 case ARM_AM::db: return ARM::t2STMDB_UPD; 751 } 752 case ARM::VLDMSIA: 753 switch (Mode) { 754 default: llvm_unreachable("Unhandled submode!"); 755 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 756 case ARM_AM::db: return ARM::VLDMSDB_UPD; 757 } 758 case ARM::VLDMDIA: 759 switch (Mode) { 760 default: llvm_unreachable("Unhandled submode!"); 761 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 762 case ARM_AM::db: return ARM::VLDMDDB_UPD; 763 } 764 case ARM::VSTMSIA: 765 switch (Mode) { 766 default: llvm_unreachable("Unhandled submode!"); 767 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 768 case ARM_AM::db: return ARM::VSTMSDB_UPD; 769 } 770 case ARM::VSTMDIA: 771 switch (Mode) { 772 default: llvm_unreachable("Unhandled submode!"); 773 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 774 case ARM_AM::db: return ARM::VSTMDDB_UPD; 775 } 776 } 777 } 778 779 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 780 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 781 /// 782 /// stmia rn, <ra, rb, rc> 783 /// rn := rn + 4 * 3; 784 /// => 785 /// stmia rn!, <ra, rb, rc> 786 /// 787 /// rn := rn - 4 * 3; 788 /// ldmia rn, <ra, rb, rc> 789 /// => 790 /// ldmdb rn!, <ra, rb, rc> 791 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 792 MachineBasicBlock::iterator MBBI, 793 bool &Advance, 794 MachineBasicBlock::iterator &I) { 795 MachineInstr *MI = MBBI; 796 unsigned Base = MI->getOperand(0).getReg(); 797 bool BaseKill = MI->getOperand(0).isKill(); 798 unsigned Bytes = getLSMultipleTransferSize(MI); 799 unsigned PredReg = 0; 800 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 801 int Opcode = MI->getOpcode(); 802 DebugLoc dl = MI->getDebugLoc(); 803 804 // Can't use an updating ld/st if the base register is also a dest 805 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 806 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 807 if (MI->getOperand(i).getReg() == Base) 808 return false; 809 810 bool DoMerge = false; 811 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 812 813 // Try merging with the previous instruction. 814 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 815 if (MBBI != BeginMBBI) { 816 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 817 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 818 --PrevMBBI; 819 if (Mode == ARM_AM::ia && 820 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 821 Mode = ARM_AM::db; 822 DoMerge = true; 823 } else if (Mode == ARM_AM::ib && 824 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 825 Mode = ARM_AM::da; 826 DoMerge = true; 827 } 828 if (DoMerge) 829 MBB.erase(PrevMBBI); 830 } 831 832 // Try merging with the next instruction. 833 MachineBasicBlock::iterator EndMBBI = MBB.end(); 834 if (!DoMerge && MBBI != EndMBBI) { 835 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 836 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 837 ++NextMBBI; 838 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 839 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 840 DoMerge = true; 841 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 842 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 843 DoMerge = true; 844 } 845 if (DoMerge) { 846 if (NextMBBI == I) { 847 Advance = true; 848 ++I; 849 } 850 MBB.erase(NextMBBI); 851 } 852 } 853 854 if (!DoMerge) 855 return false; 856 857 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 858 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 859 .addReg(Base, getDefRegState(true)) // WB base register 860 .addReg(Base, getKillRegState(BaseKill)) 861 .addImm(Pred).addReg(PredReg); 862 863 // Transfer the rest of operands. 864 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 865 MIB.addOperand(MI->getOperand(OpNum)); 866 867 // Transfer memoperands. 868 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 869 870 MBB.erase(MBBI); 871 return true; 872 } 873 874 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 875 ARM_AM::AddrOpc Mode) { 876 switch (Opc) { 877 case ARM::LDRi12: 878 return ARM::LDR_PRE_IMM; 879 case ARM::STRi12: 880 return ARM::STR_PRE_IMM; 881 case ARM::VLDRS: 882 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 883 case ARM::VLDRD: 884 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 885 case ARM::VSTRS: 886 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 887 case ARM::VSTRD: 888 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 889 case ARM::t2LDRi8: 890 case ARM::t2LDRi12: 891 return ARM::t2LDR_PRE; 892 case ARM::t2STRi8: 893 case ARM::t2STRi12: 894 return ARM::t2STR_PRE; 895 default: llvm_unreachable("Unhandled opcode!"); 896 } 897 } 898 899 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 900 ARM_AM::AddrOpc Mode) { 901 switch (Opc) { 902 case ARM::LDRi12: 903 return ARM::LDR_POST_IMM; 904 case ARM::STRi12: 905 return ARM::STR_POST_IMM; 906 case ARM::VLDRS: 907 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 908 case ARM::VLDRD: 909 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 910 case ARM::VSTRS: 911 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 912 case ARM::VSTRD: 913 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 914 case ARM::t2LDRi8: 915 case ARM::t2LDRi12: 916 return ARM::t2LDR_POST; 917 case ARM::t2STRi8: 918 case ARM::t2STRi12: 919 return ARM::t2STR_POST; 920 default: llvm_unreachable("Unhandled opcode!"); 921 } 922 } 923 924 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 925 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 926 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 927 MachineBasicBlock::iterator MBBI, 928 const TargetInstrInfo *TII, 929 bool &Advance, 930 MachineBasicBlock::iterator &I) { 931 MachineInstr *MI = MBBI; 932 unsigned Base = MI->getOperand(1).getReg(); 933 bool BaseKill = MI->getOperand(1).isKill(); 934 unsigned Bytes = getLSMultipleTransferSize(MI); 935 int Opcode = MI->getOpcode(); 936 DebugLoc dl = MI->getDebugLoc(); 937 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 938 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 939 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 940 if (isi32Load(Opcode) || isi32Store(Opcode)) 941 if (MI->getOperand(2).getImm() != 0) 942 return false; 943 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 944 return false; 945 946 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 947 // Can't do the merge if the destination register is the same as the would-be 948 // writeback register. 949 if (MI->getOperand(0).getReg() == Base) 950 return false; 951 952 unsigned PredReg = 0; 953 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 954 bool DoMerge = false; 955 ARM_AM::AddrOpc AddSub = ARM_AM::add; 956 unsigned NewOpc = 0; 957 // AM2 - 12 bits, thumb2 - 8 bits. 958 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 959 960 // Try merging with the previous instruction. 961 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 962 if (MBBI != BeginMBBI) { 963 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 964 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 965 --PrevMBBI; 966 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 967 DoMerge = true; 968 AddSub = ARM_AM::sub; 969 } else if (!isAM5 && 970 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 971 DoMerge = true; 972 } 973 if (DoMerge) { 974 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 975 MBB.erase(PrevMBBI); 976 } 977 } 978 979 // Try merging with the next instruction. 980 MachineBasicBlock::iterator EndMBBI = MBB.end(); 981 if (!DoMerge && MBBI != EndMBBI) { 982 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 983 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 984 ++NextMBBI; 985 if (!isAM5 && 986 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 987 DoMerge = true; 988 AddSub = ARM_AM::sub; 989 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 990 DoMerge = true; 991 } 992 if (DoMerge) { 993 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 994 if (NextMBBI == I) { 995 Advance = true; 996 ++I; 997 } 998 MBB.erase(NextMBBI); 999 } 1000 } 1001 1002 if (!DoMerge) 1003 return false; 1004 1005 if (isAM5) { 1006 // VLDM[SD}_UPD, VSTM[SD]_UPD 1007 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1008 // updating load/store-multiple instructions can be used with only one 1009 // register.) 1010 MachineOperand &MO = MI->getOperand(0); 1011 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1012 .addReg(Base, getDefRegState(true)) // WB base register 1013 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1014 .addImm(Pred).addReg(PredReg) 1015 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1016 getKillRegState(MO.isKill()))); 1017 } else if (isLd) { 1018 if (isAM2) { 1019 // LDR_PRE, LDR_POST 1020 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1021 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1022 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1023 .addReg(Base, RegState::Define) 1024 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1025 } else { 1026 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1027 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1028 .addReg(Base, RegState::Define) 1029 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1030 } 1031 } else { 1032 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1033 // t2LDR_PRE, t2LDR_POST 1034 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1035 .addReg(Base, RegState::Define) 1036 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1037 } 1038 } else { 1039 MachineOperand &MO = MI->getOperand(0); 1040 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1041 // the vestigal zero-reg offset register. When that's fixed, this clause 1042 // can be removed entirely. 1043 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1044 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1045 // STR_PRE, STR_POST 1046 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1047 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1048 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1049 } else { 1050 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1051 // t2STR_PRE, t2STR_POST 1052 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1053 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1054 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1055 } 1056 } 1057 MBB.erase(MBBI); 1058 1059 return true; 1060 } 1061 1062 /// isMemoryOp - Returns true if instruction is a memory operation that this 1063 /// pass is capable of operating on. 1064 static bool isMemoryOp(const MachineInstr *MI) { 1065 // When no memory operands are present, conservatively assume unaligned, 1066 // volatile, unfoldable. 1067 if (!MI->hasOneMemOperand()) 1068 return false; 1069 1070 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1071 1072 // Don't touch volatile memory accesses - we may be changing their order. 1073 if (MMO->isVolatile()) 1074 return false; 1075 1076 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1077 // not. 1078 if (MMO->getAlignment() < 4) 1079 return false; 1080 1081 // str <undef> could probably be eliminated entirely, but for now we just want 1082 // to avoid making a mess of it. 1083 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1084 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1085 MI->getOperand(0).isUndef()) 1086 return false; 1087 1088 // Likewise don't mess with references to undefined addresses. 1089 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1090 MI->getOperand(1).isUndef()) 1091 return false; 1092 1093 int Opcode = MI->getOpcode(); 1094 switch (Opcode) { 1095 default: break; 1096 case ARM::VLDRS: 1097 case ARM::VSTRS: 1098 return MI->getOperand(1).isReg(); 1099 case ARM::VLDRD: 1100 case ARM::VSTRD: 1101 return MI->getOperand(1).isReg(); 1102 case ARM::LDRi12: 1103 case ARM::STRi12: 1104 case ARM::t2LDRi8: 1105 case ARM::t2LDRi12: 1106 case ARM::t2STRi8: 1107 case ARM::t2STRi12: 1108 return MI->getOperand(1).isReg(); 1109 } 1110 return false; 1111 } 1112 1113 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1114 /// op that is being merged. 1115 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1116 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1117 unsigned Position = MemOps[0].Position; 1118 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1119 if (MemOps[i].Position < Position) { 1120 Position = MemOps[i].Position; 1121 Loc = MemOps[i].MBBI; 1122 } 1123 } 1124 1125 if (Loc != MBB.begin()) 1126 RS->forward(std::prev(Loc)); 1127 } 1128 1129 static int getMemoryOpOffset(const MachineInstr *MI) { 1130 int Opcode = MI->getOpcode(); 1131 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1132 unsigned NumOperands = MI->getDesc().getNumOperands(); 1133 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1134 1135 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1136 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1137 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1138 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1139 return OffField; 1140 1141 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1142 : ARM_AM::getAM5Offset(OffField) * 4; 1143 if (isAM3) { 1144 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1145 Offset = -Offset; 1146 } else { 1147 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1148 Offset = -Offset; 1149 } 1150 return Offset; 1151 } 1152 1153 static void InsertLDR_STR(MachineBasicBlock &MBB, 1154 MachineBasicBlock::iterator &MBBI, 1155 int Offset, bool isDef, 1156 DebugLoc dl, unsigned NewOpc, 1157 unsigned Reg, bool RegDeadKill, bool RegUndef, 1158 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1159 bool OffKill, bool OffUndef, 1160 ARMCC::CondCodes Pred, unsigned PredReg, 1161 const TargetInstrInfo *TII, bool isT2) { 1162 if (isDef) { 1163 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1164 TII->get(NewOpc)) 1165 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1166 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1167 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1168 } else { 1169 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1170 TII->get(NewOpc)) 1171 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1172 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1173 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1174 } 1175 } 1176 1177 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1178 MachineBasicBlock::iterator &MBBI) { 1179 MachineInstr *MI = &*MBBI; 1180 unsigned Opcode = MI->getOpcode(); 1181 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1182 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1183 const MachineOperand &BaseOp = MI->getOperand(2); 1184 unsigned BaseReg = BaseOp.getReg(); 1185 unsigned EvenReg = MI->getOperand(0).getReg(); 1186 unsigned OddReg = MI->getOperand(1).getReg(); 1187 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1188 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1189 // ARM errata 602117: LDRD with base in list may result in incorrect base 1190 // register when interrupted or faulted. 1191 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1192 if (!Errata602117 && 1193 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1194 return false; 1195 1196 MachineBasicBlock::iterator NewBBI = MBBI; 1197 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1198 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1199 bool EvenDeadKill = isLd ? 1200 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1201 bool EvenUndef = MI->getOperand(0).isUndef(); 1202 bool OddDeadKill = isLd ? 1203 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1204 bool OddUndef = MI->getOperand(1).isUndef(); 1205 bool BaseKill = BaseOp.isKill(); 1206 bool BaseUndef = BaseOp.isUndef(); 1207 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1208 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1209 int OffImm = getMemoryOpOffset(MI); 1210 unsigned PredReg = 0; 1211 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1212 1213 if (OddRegNum > EvenRegNum && OffImm == 0) { 1214 // Ascending register numbers and no offset. It's safe to change it to a 1215 // ldm or stm. 1216 unsigned NewOpc = (isLd) 1217 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1218 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1219 if (isLd) { 1220 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1221 .addReg(BaseReg, getKillRegState(BaseKill)) 1222 .addImm(Pred).addReg(PredReg) 1223 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1224 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1225 ++NumLDRD2LDM; 1226 } else { 1227 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1228 .addReg(BaseReg, getKillRegState(BaseKill)) 1229 .addImm(Pred).addReg(PredReg) 1230 .addReg(EvenReg, 1231 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1232 .addReg(OddReg, 1233 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1234 ++NumSTRD2STM; 1235 } 1236 NewBBI = std::prev(MBBI); 1237 } else { 1238 // Split into two instructions. 1239 unsigned NewOpc = (isLd) 1240 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1241 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1242 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1243 // so adjust and use t2LDRi12 here for that. 1244 unsigned NewOpc2 = (isLd) 1245 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1246 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1247 DebugLoc dl = MBBI->getDebugLoc(); 1248 // If this is a load and base register is killed, it may have been 1249 // re-defed by the load, make sure the first load does not clobber it. 1250 if (isLd && 1251 (BaseKill || OffKill) && 1252 (TRI->regsOverlap(EvenReg, BaseReg))) { 1253 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1254 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1255 OddReg, OddDeadKill, false, 1256 BaseReg, false, BaseUndef, false, OffUndef, 1257 Pred, PredReg, TII, isT2); 1258 NewBBI = std::prev(MBBI); 1259 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1260 EvenReg, EvenDeadKill, false, 1261 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1262 Pred, PredReg, TII, isT2); 1263 } else { 1264 if (OddReg == EvenReg && EvenDeadKill) { 1265 // If the two source operands are the same, the kill marker is 1266 // probably on the first one. e.g. 1267 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1268 EvenDeadKill = false; 1269 OddDeadKill = true; 1270 } 1271 // Never kill the base register in the first instruction. 1272 if (EvenReg == BaseReg) 1273 EvenDeadKill = false; 1274 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1275 EvenReg, EvenDeadKill, EvenUndef, 1276 BaseReg, false, BaseUndef, false, OffUndef, 1277 Pred, PredReg, TII, isT2); 1278 NewBBI = std::prev(MBBI); 1279 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1280 OddReg, OddDeadKill, OddUndef, 1281 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1282 Pred, PredReg, TII, isT2); 1283 } 1284 if (isLd) 1285 ++NumLDRD2LDR; 1286 else 1287 ++NumSTRD2STR; 1288 } 1289 1290 MBB.erase(MI); 1291 MBBI = NewBBI; 1292 return true; 1293 } 1294 return false; 1295 } 1296 1297 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1298 /// ops of the same base and incrementing offset into LDM / STM ops. 1299 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1300 unsigned NumMerges = 0; 1301 unsigned NumMemOps = 0; 1302 MemOpQueue MemOps; 1303 unsigned CurrBase = 0; 1304 int CurrOpc = -1; 1305 unsigned CurrSize = 0; 1306 ARMCC::CondCodes CurrPred = ARMCC::AL; 1307 unsigned CurrPredReg = 0; 1308 unsigned Position = 0; 1309 SmallVector<MachineBasicBlock::iterator,4> Merges; 1310 1311 RS->enterBasicBlock(&MBB); 1312 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1313 while (MBBI != E) { 1314 if (FixInvalidRegPairOp(MBB, MBBI)) 1315 continue; 1316 1317 bool Advance = false; 1318 bool TryMerge = false; 1319 bool Clobber = false; 1320 1321 bool isMemOp = isMemoryOp(MBBI); 1322 if (isMemOp) { 1323 int Opcode = MBBI->getOpcode(); 1324 unsigned Size = getLSMultipleTransferSize(MBBI); 1325 const MachineOperand &MO = MBBI->getOperand(0); 1326 unsigned Reg = MO.getReg(); 1327 bool isKill = MO.isDef() ? false : MO.isKill(); 1328 unsigned Base = MBBI->getOperand(1).getReg(); 1329 unsigned PredReg = 0; 1330 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1331 int Offset = getMemoryOpOffset(MBBI); 1332 // Watch out for: 1333 // r4 := ldr [r5] 1334 // r5 := ldr [r5, #4] 1335 // r6 := ldr [r5, #8] 1336 // 1337 // The second ldr has effectively broken the chain even though it 1338 // looks like the later ldr(s) use the same base register. Try to 1339 // merge the ldr's so far, including this one. But don't try to 1340 // combine the following ldr(s). 1341 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1342 1343 // Watch out for: 1344 // r4 := ldr [r0, #8] 1345 // r4 := ldr [r0, #4] 1346 // 1347 // The optimization may reorder the second ldr in front of the first 1348 // ldr, which violates write after write(WAW) dependence. The same as 1349 // str. Try to merge inst(s) already in MemOps. 1350 bool Overlap = false; 1351 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 1352 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 1353 Overlap = true; 1354 break; 1355 } 1356 } 1357 1358 if (CurrBase == 0 && !Clobber) { 1359 // Start of a new chain. 1360 CurrBase = Base; 1361 CurrOpc = Opcode; 1362 CurrSize = Size; 1363 CurrPred = Pred; 1364 CurrPredReg = PredReg; 1365 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1366 ++NumMemOps; 1367 Advance = true; 1368 } else if (!Overlap) { 1369 if (Clobber) { 1370 TryMerge = true; 1371 Advance = true; 1372 } 1373 1374 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1375 // No need to match PredReg. 1376 // Continue adding to the queue. 1377 if (Offset > MemOps.back().Offset) { 1378 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1379 Position, MBBI)); 1380 ++NumMemOps; 1381 Advance = true; 1382 } else { 1383 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1384 I != E; ++I) { 1385 if (Offset < I->Offset) { 1386 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1387 Position, MBBI)); 1388 ++NumMemOps; 1389 Advance = true; 1390 break; 1391 } else if (Offset == I->Offset) { 1392 // Collision! This can't be merged! 1393 break; 1394 } 1395 } 1396 } 1397 } 1398 } 1399 } 1400 1401 if (MBBI->isDebugValue()) { 1402 ++MBBI; 1403 if (MBBI == E) 1404 // Reach the end of the block, try merging the memory instructions. 1405 TryMerge = true; 1406 } else if (Advance) { 1407 ++Position; 1408 ++MBBI; 1409 if (MBBI == E) 1410 // Reach the end of the block, try merging the memory instructions. 1411 TryMerge = true; 1412 } else 1413 TryMerge = true; 1414 1415 if (TryMerge) { 1416 if (NumMemOps > 1) { 1417 // Try to find a free register to use as a new base in case it's needed. 1418 // First advance to the instruction just before the start of the chain. 1419 AdvanceRS(MBB, MemOps); 1420 // Find a scratch register. 1421 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass); 1422 // Process the load / store instructions. 1423 RS->forward(std::prev(MBBI)); 1424 1425 // Merge ops. 1426 Merges.clear(); 1427 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1428 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1429 1430 // Try folding preceding/trailing base inc/dec into the generated 1431 // LDM/STM ops. 1432 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1433 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1434 ++NumMerges; 1435 NumMerges += Merges.size(); 1436 1437 // Try folding preceding/trailing base inc/dec into those load/store 1438 // that were not merged to form LDM/STM ops. 1439 for (unsigned i = 0; i != NumMemOps; ++i) 1440 if (!MemOps[i].Merged) 1441 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1442 ++NumMerges; 1443 1444 // RS may be pointing to an instruction that's deleted. 1445 RS->skipTo(std::prev(MBBI)); 1446 } else if (NumMemOps == 1) { 1447 // Try folding preceding/trailing base inc/dec into the single 1448 // load/store. 1449 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1450 ++NumMerges; 1451 RS->forward(std::prev(MBBI)); 1452 } 1453 } 1454 1455 CurrBase = 0; 1456 CurrOpc = -1; 1457 CurrSize = 0; 1458 CurrPred = ARMCC::AL; 1459 CurrPredReg = 0; 1460 if (NumMemOps) { 1461 MemOps.clear(); 1462 NumMemOps = 0; 1463 } 1464 1465 // If iterator hasn't been advanced and this is not a memory op, skip it. 1466 // It can't start a new chain anyway. 1467 if (!Advance && !isMemOp && MBBI != E) { 1468 ++Position; 1469 ++MBBI; 1470 } 1471 } 1472 } 1473 return NumMerges > 0; 1474 } 1475 1476 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1477 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1478 /// directly restore the value of LR into pc. 1479 /// ldmfd sp!, {..., lr} 1480 /// bx lr 1481 /// or 1482 /// ldmfd sp!, {..., lr} 1483 /// mov pc, lr 1484 /// => 1485 /// ldmfd sp!, {..., pc} 1486 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1487 if (MBB.empty()) return false; 1488 1489 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1490 if (MBBI != MBB.begin() && 1491 (MBBI->getOpcode() == ARM::BX_RET || 1492 MBBI->getOpcode() == ARM::tBX_RET || 1493 MBBI->getOpcode() == ARM::MOVPCLR)) { 1494 MachineInstr *PrevMI = std::prev(MBBI); 1495 unsigned Opcode = PrevMI->getOpcode(); 1496 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1497 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1498 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1499 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1500 if (MO.getReg() != ARM::LR) 1501 return false; 1502 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1503 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1504 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1505 PrevMI->setDesc(TII->get(NewOpc)); 1506 MO.setReg(ARM::PC); 1507 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1508 MBB.erase(MBBI); 1509 return true; 1510 } 1511 } 1512 return false; 1513 } 1514 1515 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1516 const TargetMachine &TM = Fn.getTarget(); 1517 AFI = Fn.getInfo<ARMFunctionInfo>(); 1518 TII = TM.getInstrInfo(); 1519 TRI = TM.getRegisterInfo(); 1520 STI = &TM.getSubtarget<ARMSubtarget>(); 1521 RS = new RegScavenger(); 1522 isThumb2 = AFI->isThumb2Function(); 1523 1524 bool Modified = false; 1525 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1526 ++MFI) { 1527 MachineBasicBlock &MBB = *MFI; 1528 Modified |= LoadStoreMultipleOpti(MBB); 1529 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1530 Modified |= MergeReturnIntoLDM(MBB); 1531 } 1532 1533 delete RS; 1534 return Modified; 1535 } 1536 1537 1538 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1539 /// load / stores from consecutive locations close to make it more 1540 /// likely they will be combined later. 1541 1542 namespace { 1543 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1544 static char ID; 1545 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1546 1547 const DataLayout *TD; 1548 const TargetInstrInfo *TII; 1549 const TargetRegisterInfo *TRI; 1550 const ARMSubtarget *STI; 1551 MachineRegisterInfo *MRI; 1552 MachineFunction *MF; 1553 1554 bool runOnMachineFunction(MachineFunction &Fn) override; 1555 1556 const char *getPassName() const override { 1557 return "ARM pre- register allocation load / store optimization pass"; 1558 } 1559 1560 private: 1561 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1562 unsigned &NewOpc, unsigned &EvenReg, 1563 unsigned &OddReg, unsigned &BaseReg, 1564 int &Offset, 1565 unsigned &PredReg, ARMCC::CondCodes &Pred, 1566 bool &isT2); 1567 bool RescheduleOps(MachineBasicBlock *MBB, 1568 SmallVectorImpl<MachineInstr *> &Ops, 1569 unsigned Base, bool isLd, 1570 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1571 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1572 }; 1573 char ARMPreAllocLoadStoreOpt::ID = 0; 1574 } 1575 1576 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1577 TD = Fn.getTarget().getDataLayout(); 1578 TII = Fn.getTarget().getInstrInfo(); 1579 TRI = Fn.getTarget().getRegisterInfo(); 1580 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1581 MRI = &Fn.getRegInfo(); 1582 MF = &Fn; 1583 1584 bool Modified = false; 1585 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1586 ++MFI) 1587 Modified |= RescheduleLoadStoreInstrs(MFI); 1588 1589 return Modified; 1590 } 1591 1592 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1593 MachineBasicBlock::iterator I, 1594 MachineBasicBlock::iterator E, 1595 SmallPtrSet<MachineInstr*, 4> &MemOps, 1596 SmallSet<unsigned, 4> &MemRegs, 1597 const TargetRegisterInfo *TRI) { 1598 // Are there stores / loads / calls between them? 1599 // FIXME: This is overly conservative. We should make use of alias information 1600 // some day. 1601 SmallSet<unsigned, 4> AddedRegPressure; 1602 while (++I != E) { 1603 if (I->isDebugValue() || MemOps.count(&*I)) 1604 continue; 1605 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1606 return false; 1607 if (isLd && I->mayStore()) 1608 return false; 1609 if (!isLd) { 1610 if (I->mayLoad()) 1611 return false; 1612 // It's not safe to move the first 'str' down. 1613 // str r1, [r0] 1614 // strh r5, [r0] 1615 // str r4, [r0, #+4] 1616 if (I->mayStore()) 1617 return false; 1618 } 1619 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1620 MachineOperand &MO = I->getOperand(j); 1621 if (!MO.isReg()) 1622 continue; 1623 unsigned Reg = MO.getReg(); 1624 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1625 return false; 1626 if (Reg != Base && !MemRegs.count(Reg)) 1627 AddedRegPressure.insert(Reg); 1628 } 1629 } 1630 1631 // Estimate register pressure increase due to the transformation. 1632 if (MemRegs.size() <= 4) 1633 // Ok if we are moving small number of instructions. 1634 return true; 1635 return AddedRegPressure.size() <= MemRegs.size() * 2; 1636 } 1637 1638 1639 /// Copy Op0 and Op1 operands into a new array assigned to MI. 1640 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1641 MachineInstr *Op1) { 1642 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1643 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1644 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1645 1646 MachineFunction *MF = MI->getParent()->getParent(); 1647 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1648 MachineSDNode::mmo_iterator MemEnd = 1649 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1650 MemEnd = 1651 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1652 MI->setMemRefs(MemBegin, MemEnd); 1653 } 1654 1655 bool 1656 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1657 DebugLoc &dl, 1658 unsigned &NewOpc, unsigned &EvenReg, 1659 unsigned &OddReg, unsigned &BaseReg, 1660 int &Offset, unsigned &PredReg, 1661 ARMCC::CondCodes &Pred, 1662 bool &isT2) { 1663 // Make sure we're allowed to generate LDRD/STRD. 1664 if (!STI->hasV5TEOps()) 1665 return false; 1666 1667 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1668 unsigned Scale = 1; 1669 unsigned Opcode = Op0->getOpcode(); 1670 if (Opcode == ARM::LDRi12) 1671 NewOpc = ARM::LDRD; 1672 else if (Opcode == ARM::STRi12) 1673 NewOpc = ARM::STRD; 1674 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1675 NewOpc = ARM::t2LDRDi8; 1676 Scale = 4; 1677 isT2 = true; 1678 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1679 NewOpc = ARM::t2STRDi8; 1680 Scale = 4; 1681 isT2 = true; 1682 } else 1683 return false; 1684 1685 // Make sure the base address satisfies i64 ld / st alignment requirement. 1686 // At the moment, we ignore the memoryoperand's value. 1687 // If we want to use AliasAnalysis, we should check it accordingly. 1688 if (!Op0->hasOneMemOperand() || 1689 (*Op0->memoperands_begin())->isVolatile()) 1690 return false; 1691 1692 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1693 const Function *Func = MF->getFunction(); 1694 unsigned ReqAlign = STI->hasV6Ops() 1695 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1696 : 8; // Pre-v6 need 8-byte align 1697 if (Align < ReqAlign) 1698 return false; 1699 1700 // Then make sure the immediate offset fits. 1701 int OffImm = getMemoryOpOffset(Op0); 1702 if (isT2) { 1703 int Limit = (1 << 8) * Scale; 1704 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1705 return false; 1706 Offset = OffImm; 1707 } else { 1708 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1709 if (OffImm < 0) { 1710 AddSub = ARM_AM::sub; 1711 OffImm = - OffImm; 1712 } 1713 int Limit = (1 << 8) * Scale; 1714 if (OffImm >= Limit || (OffImm & (Scale-1))) 1715 return false; 1716 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1717 } 1718 EvenReg = Op0->getOperand(0).getReg(); 1719 OddReg = Op1->getOperand(0).getReg(); 1720 if (EvenReg == OddReg) 1721 return false; 1722 BaseReg = Op0->getOperand(1).getReg(); 1723 Pred = getInstrPredicate(Op0, PredReg); 1724 dl = Op0->getDebugLoc(); 1725 return true; 1726 } 1727 1728 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1729 SmallVectorImpl<MachineInstr *> &Ops, 1730 unsigned Base, bool isLd, 1731 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1732 bool RetVal = false; 1733 1734 // Sort by offset (in reverse order). 1735 std::sort(Ops.begin(), Ops.end(), 1736 [](const MachineInstr *LHS, const MachineInstr *RHS) { 1737 int LOffset = getMemoryOpOffset(LHS); 1738 int ROffset = getMemoryOpOffset(RHS); 1739 assert(LHS == RHS || LOffset != ROffset); 1740 return LOffset > ROffset; 1741 }); 1742 1743 // The loads / stores of the same base are in order. Scan them from first to 1744 // last and check for the following: 1745 // 1. Any def of base. 1746 // 2. Any gaps. 1747 while (Ops.size() > 1) { 1748 unsigned FirstLoc = ~0U; 1749 unsigned LastLoc = 0; 1750 MachineInstr *FirstOp = nullptr; 1751 MachineInstr *LastOp = nullptr; 1752 int LastOffset = 0; 1753 unsigned LastOpcode = 0; 1754 unsigned LastBytes = 0; 1755 unsigned NumMove = 0; 1756 for (int i = Ops.size() - 1; i >= 0; --i) { 1757 MachineInstr *Op = Ops[i]; 1758 unsigned Loc = MI2LocMap[Op]; 1759 if (Loc <= FirstLoc) { 1760 FirstLoc = Loc; 1761 FirstOp = Op; 1762 } 1763 if (Loc >= LastLoc) { 1764 LastLoc = Loc; 1765 LastOp = Op; 1766 } 1767 1768 unsigned LSMOpcode 1769 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 1770 if (LastOpcode && LSMOpcode != LastOpcode) 1771 break; 1772 1773 int Offset = getMemoryOpOffset(Op); 1774 unsigned Bytes = getLSMultipleTransferSize(Op); 1775 if (LastBytes) { 1776 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1777 break; 1778 } 1779 LastOffset = Offset; 1780 LastBytes = Bytes; 1781 LastOpcode = LSMOpcode; 1782 if (++NumMove == 8) // FIXME: Tune this limit. 1783 break; 1784 } 1785 1786 if (NumMove <= 1) 1787 Ops.pop_back(); 1788 else { 1789 SmallPtrSet<MachineInstr*, 4> MemOps; 1790 SmallSet<unsigned, 4> MemRegs; 1791 for (int i = NumMove-1; i >= 0; --i) { 1792 MemOps.insert(Ops[i]); 1793 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1794 } 1795 1796 // Be conservative, if the instructions are too far apart, don't 1797 // move them. We want to limit the increase of register pressure. 1798 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1799 if (DoMove) 1800 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1801 MemOps, MemRegs, TRI); 1802 if (!DoMove) { 1803 for (unsigned i = 0; i != NumMove; ++i) 1804 Ops.pop_back(); 1805 } else { 1806 // This is the new location for the loads / stores. 1807 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1808 while (InsertPos != MBB->end() 1809 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1810 ++InsertPos; 1811 1812 // If we are moving a pair of loads / stores, see if it makes sense 1813 // to try to allocate a pair of registers that can form register pairs. 1814 MachineInstr *Op0 = Ops.back(); 1815 MachineInstr *Op1 = Ops[Ops.size()-2]; 1816 unsigned EvenReg = 0, OddReg = 0; 1817 unsigned BaseReg = 0, PredReg = 0; 1818 ARMCC::CondCodes Pred = ARMCC::AL; 1819 bool isT2 = false; 1820 unsigned NewOpc = 0; 1821 int Offset = 0; 1822 DebugLoc dl; 1823 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1824 EvenReg, OddReg, BaseReg, 1825 Offset, PredReg, Pred, isT2)) { 1826 Ops.pop_back(); 1827 Ops.pop_back(); 1828 1829 const MCInstrDesc &MCID = TII->get(NewOpc); 1830 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 1831 MRI->constrainRegClass(EvenReg, TRC); 1832 MRI->constrainRegClass(OddReg, TRC); 1833 1834 // Form the pair instruction. 1835 if (isLd) { 1836 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1837 .addReg(EvenReg, RegState::Define) 1838 .addReg(OddReg, RegState::Define) 1839 .addReg(BaseReg); 1840 // FIXME: We're converting from LDRi12 to an insn that still 1841 // uses addrmode2, so we need an explicit offset reg. It should 1842 // always by reg0 since we're transforming LDRi12s. 1843 if (!isT2) 1844 MIB.addReg(0); 1845 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1846 concatenateMemOperands(MIB, Op0, Op1); 1847 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1848 ++NumLDRDFormed; 1849 } else { 1850 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1851 .addReg(EvenReg) 1852 .addReg(OddReg) 1853 .addReg(BaseReg); 1854 // FIXME: We're converting from LDRi12 to an insn that still 1855 // uses addrmode2, so we need an explicit offset reg. It should 1856 // always by reg0 since we're transforming STRi12s. 1857 if (!isT2) 1858 MIB.addReg(0); 1859 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1860 concatenateMemOperands(MIB, Op0, Op1); 1861 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1862 ++NumSTRDFormed; 1863 } 1864 MBB->erase(Op0); 1865 MBB->erase(Op1); 1866 1867 // Add register allocation hints to form register pairs. 1868 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1869 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1870 } else { 1871 for (unsigned i = 0; i != NumMove; ++i) { 1872 MachineInstr *Op = Ops.back(); 1873 Ops.pop_back(); 1874 MBB->splice(InsertPos, MBB, Op); 1875 } 1876 } 1877 1878 NumLdStMoved += NumMove; 1879 RetVal = true; 1880 } 1881 } 1882 } 1883 1884 return RetVal; 1885 } 1886 1887 bool 1888 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1889 bool RetVal = false; 1890 1891 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1892 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1893 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1894 SmallVector<unsigned, 4> LdBases; 1895 SmallVector<unsigned, 4> StBases; 1896 1897 unsigned Loc = 0; 1898 MachineBasicBlock::iterator MBBI = MBB->begin(); 1899 MachineBasicBlock::iterator E = MBB->end(); 1900 while (MBBI != E) { 1901 for (; MBBI != E; ++MBBI) { 1902 MachineInstr *MI = MBBI; 1903 if (MI->isCall() || MI->isTerminator()) { 1904 // Stop at barriers. 1905 ++MBBI; 1906 break; 1907 } 1908 1909 if (!MI->isDebugValue()) 1910 MI2LocMap[MI] = ++Loc; 1911 1912 if (!isMemoryOp(MI)) 1913 continue; 1914 unsigned PredReg = 0; 1915 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 1916 continue; 1917 1918 int Opc = MI->getOpcode(); 1919 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1920 unsigned Base = MI->getOperand(1).getReg(); 1921 int Offset = getMemoryOpOffset(MI); 1922 1923 bool StopHere = false; 1924 if (isLd) { 1925 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1926 Base2LdsMap.find(Base); 1927 if (BI != Base2LdsMap.end()) { 1928 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1929 if (Offset == getMemoryOpOffset(BI->second[i])) { 1930 StopHere = true; 1931 break; 1932 } 1933 } 1934 if (!StopHere) 1935 BI->second.push_back(MI); 1936 } else { 1937 Base2LdsMap[Base].push_back(MI); 1938 LdBases.push_back(Base); 1939 } 1940 } else { 1941 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1942 Base2StsMap.find(Base); 1943 if (BI != Base2StsMap.end()) { 1944 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1945 if (Offset == getMemoryOpOffset(BI->second[i])) { 1946 StopHere = true; 1947 break; 1948 } 1949 } 1950 if (!StopHere) 1951 BI->second.push_back(MI); 1952 } else { 1953 Base2StsMap[Base].push_back(MI); 1954 StBases.push_back(Base); 1955 } 1956 } 1957 1958 if (StopHere) { 1959 // Found a duplicate (a base+offset combination that's seen earlier). 1960 // Backtrack. 1961 --Loc; 1962 break; 1963 } 1964 } 1965 1966 // Re-schedule loads. 1967 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1968 unsigned Base = LdBases[i]; 1969 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 1970 if (Lds.size() > 1) 1971 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1972 } 1973 1974 // Re-schedule stores. 1975 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1976 unsigned Base = StBases[i]; 1977 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 1978 if (Sts.size() > 1) 1979 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1980 } 1981 1982 if (MBBI != E) { 1983 Base2LdsMap.clear(); 1984 Base2StsMap.clear(); 1985 LdBases.clear(); 1986 StBases.clear(); 1987 } 1988 } 1989 1990 return RetVal; 1991 } 1992 1993 1994 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1995 /// optimization pass. 1996 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1997 if (PreAlloc) 1998 return new ARMPreAllocLoadStoreOpt(); 1999 return new ARMLoadStoreOpt(); 2000 } 2001