1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMAddressingModes.h" 18 #include "ARMBaseInstrInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMRegisterInfo.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/Target/TargetData.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 #include "llvm/Target/TargetMachine.h" 32 #include "llvm/Target/TargetRegisterInfo.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallSet.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/Statistic.h" 40 using namespace llvm; 41 42 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55 /// load / store instructions to form ldm / stm instructions. 56 57 namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Reg; 78 bool isKill; 79 unsigned Position; 80 MachineBasicBlock::iterator MBBI; 81 bool Merged; 82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 83 MachineBasicBlock::iterator i) 84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 85 }; 86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 87 typedef MemOpQueue::iterator MemOpQueueIter; 88 89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 90 int Offset, unsigned Base, bool BaseKill, int Opcode, 91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 93 void MergeOpsUpdate(MachineBasicBlock &MBB, 94 MemOpQueue &MemOps, 95 unsigned memOpsBegin, 96 unsigned memOpsEnd, 97 unsigned insertAfter, 98 int Offset, 99 unsigned Base, 100 bool BaseKill, 101 int Opcode, 102 ARMCC::CondCodes Pred, 103 unsigned PredReg, 104 unsigned Scratch, 105 DebugLoc dl, 106 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 108 int Opcode, unsigned Size, 109 ARMCC::CondCodes Pred, unsigned PredReg, 110 unsigned Scratch, MemOpQueue &MemOps, 111 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 112 113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 115 MachineBasicBlock::iterator &MBBI); 116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 117 MachineBasicBlock::iterator MBBI, 118 const TargetInstrInfo *TII, 119 bool &Advance, 120 MachineBasicBlock::iterator &I); 121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 122 MachineBasicBlock::iterator MBBI, 123 bool &Advance, 124 MachineBasicBlock::iterator &I); 125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 127 }; 128 char ARMLoadStoreOpt::ID = 0; 129 } 130 131 static int getLoadStoreMultipleOpcode(int Opcode) { 132 switch (Opcode) { 133 case ARM::LDR: 134 ++NumLDMGened; 135 return ARM::LDM; 136 case ARM::STR: 137 ++NumSTMGened; 138 return ARM::STM; 139 case ARM::t2LDRi8: 140 case ARM::t2LDRi12: 141 ++NumLDMGened; 142 return ARM::t2LDM; 143 case ARM::t2STRi8: 144 case ARM::t2STRi12: 145 ++NumSTMGened; 146 return ARM::t2STM; 147 case ARM::VLDRS: 148 ++NumVLDMGened; 149 return ARM::VLDMS; 150 case ARM::VSTRS: 151 ++NumVSTMGened; 152 return ARM::VSTMS; 153 case ARM::VLDRD: 154 ++NumVLDMGened; 155 return ARM::VLDMD; 156 case ARM::VSTRD: 157 ++NumVSTMGened; 158 return ARM::VSTMD; 159 default: llvm_unreachable("Unhandled opcode!"); 160 } 161 return 0; 162 } 163 164 static bool isT2i32Load(unsigned Opc) { 165 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 166 } 167 168 static bool isi32Load(unsigned Opc) { 169 return Opc == ARM::LDR || isT2i32Load(Opc); 170 } 171 172 static bool isT2i32Store(unsigned Opc) { 173 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 174 } 175 176 static bool isi32Store(unsigned Opc) { 177 return Opc == ARM::STR || isT2i32Store(Opc); 178 } 179 180 /// MergeOps - Create and insert a LDM or STM with Base as base register and 181 /// registers in Regs as the register operands that would be loaded / stored. 182 /// It returns true if the transformation is done. 183 bool 184 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 185 MachineBasicBlock::iterator MBBI, 186 int Offset, unsigned Base, bool BaseKill, 187 int Opcode, ARMCC::CondCodes Pred, 188 unsigned PredReg, unsigned Scratch, DebugLoc dl, 189 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 190 // Only a single register to load / store. Don't bother. 191 unsigned NumRegs = Regs.size(); 192 if (NumRegs <= 1) 193 return false; 194 195 ARM_AM::AMSubMode Mode = ARM_AM::ia; 196 // VFP and Thumb2 do not support IB or DA modes. 197 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 198 bool haveIBAndDA = isNotVFP && !isThumb2; 199 if (Offset == 4 && haveIBAndDA) 200 Mode = ARM_AM::ib; 201 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 202 Mode = ARM_AM::da; 203 else if (Offset == -4 * (int)NumRegs && isNotVFP) 204 // VLDM/VSTM do not support DB mode without also updating the base reg. 205 Mode = ARM_AM::db; 206 else if (Offset != 0) { 207 // If starting offset isn't zero, insert a MI to materialize a new base. 208 // But only do so if it is cost effective, i.e. merging more than two 209 // loads / stores. 210 if (NumRegs <= 2) 211 return false; 212 213 unsigned NewBase; 214 if (isi32Load(Opcode)) 215 // If it is a load, then just use one of the destination register to 216 // use as the new base. 217 NewBase = Regs[NumRegs-1].first; 218 else { 219 // Use the scratch register to use as a new base. 220 NewBase = Scratch; 221 if (NewBase == 0) 222 return false; 223 } 224 int BaseOpc = !isThumb2 225 ? ARM::ADDri 226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 227 if (Offset < 0) { 228 BaseOpc = !isThumb2 229 ? ARM::SUBri 230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 231 Offset = - Offset; 232 } 233 int ImmedOffset = isThumb2 234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 235 if (ImmedOffset == -1) 236 // FIXME: Try t2ADDri12 or t2SUBri12? 237 return false; // Probably not worth it then. 238 239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 241 .addImm(Pred).addReg(PredReg).addReg(0); 242 Base = NewBase; 243 BaseKill = true; // New base is always killed right its use. 244 } 245 246 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 247 Opcode == ARM::VLDRD); 248 Opcode = getLoadStoreMultipleOpcode(Opcode); 249 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 250 .addReg(Base, getKillRegState(BaseKill)) 251 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg); 252 for (unsigned i = 0; i != NumRegs; ++i) 253 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 254 | getKillRegState(Regs[i].second)); 255 256 return true; 257 } 258 259 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 260 // success. 261 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 262 MemOpQueue &memOps, 263 unsigned memOpsBegin, unsigned memOpsEnd, 264 unsigned insertAfter, int Offset, 265 unsigned Base, bool BaseKill, 266 int Opcode, 267 ARMCC::CondCodes Pred, unsigned PredReg, 268 unsigned Scratch, 269 DebugLoc dl, 270 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 271 // First calculate which of the registers should be killed by the merged 272 // instruction. 273 const unsigned insertPos = memOps[insertAfter].Position; 274 275 SmallSet<unsigned, 4> UnavailRegs; 276 SmallSet<unsigned, 4> KilledRegs; 277 DenseMap<unsigned, unsigned> Killer; 278 for (unsigned i = 0; i < memOpsBegin; ++i) { 279 if (memOps[i].Position < insertPos && memOps[i].isKill) { 280 unsigned Reg = memOps[i].Reg; 281 if (memOps[i].Merged) 282 UnavailRegs.insert(Reg); 283 else { 284 KilledRegs.insert(Reg); 285 Killer[Reg] = i; 286 } 287 } 288 } 289 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) { 290 if (memOps[i].Position < insertPos && memOps[i].isKill) { 291 unsigned Reg = memOps[i].Reg; 292 KilledRegs.insert(Reg); 293 Killer[Reg] = i; 294 } 295 } 296 297 SmallVector<std::pair<unsigned, bool>, 8> Regs; 298 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 299 unsigned Reg = memOps[i].Reg; 300 if (UnavailRegs.count(Reg)) 301 // Register is killed before and it's not easy / possible to update the 302 // kill marker on already merged instructions. Abort. 303 return; 304 305 // If we are inserting the merged operation after an unmerged operation that 306 // uses the same register, make sure to transfer any kill flag. 307 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 308 Regs.push_back(std::make_pair(Reg, isKill)); 309 } 310 311 // Try to do the merge. 312 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 313 ++Loc; 314 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 315 Pred, PredReg, Scratch, dl, Regs)) 316 return; 317 318 // Merge succeeded, update records. 319 Merges.push_back(prior(Loc)); 320 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 321 // Remove kill flags from any unmerged memops that come before insertPos. 322 if (Regs[i-memOpsBegin].second) { 323 unsigned Reg = Regs[i-memOpsBegin].first; 324 if (KilledRegs.count(Reg)) { 325 unsigned j = Killer[Reg]; 326 memOps[j].MBBI->getOperand(0).setIsKill(false); 327 memOps[j].isKill = false; 328 } 329 } 330 MBB.erase(memOps[i].MBBI); 331 memOps[i].Merged = true; 332 } 333 } 334 335 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 336 /// load / store multiple instructions. 337 void 338 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 339 unsigned Base, int Opcode, unsigned Size, 340 ARMCC::CondCodes Pred, unsigned PredReg, 341 unsigned Scratch, MemOpQueue &MemOps, 342 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 343 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 344 int Offset = MemOps[SIndex].Offset; 345 int SOffset = Offset; 346 unsigned insertAfter = SIndex; 347 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 348 DebugLoc dl = Loc->getDebugLoc(); 349 const MachineOperand &PMO = Loc->getOperand(0); 350 unsigned PReg = PMO.getReg(); 351 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 352 : getARMRegisterNumbering(PReg); 353 unsigned Count = 1; 354 355 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 356 int NewOffset = MemOps[i].Offset; 357 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 358 unsigned Reg = MO.getReg(); 359 unsigned RegNum = MO.isUndef() ? UINT_MAX 360 : getARMRegisterNumbering(Reg); 361 // Register numbers must be in ascending order. For VFP, the registers 362 // must also be consecutive and there is a limit of 16 double-word 363 // registers per instruction. 364 if (Reg != ARM::SP && 365 NewOffset == Offset + (int)Size && 366 ((isNotVFP && RegNum > PRegNum) 367 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { 368 Offset += Size; 369 PRegNum = RegNum; 370 ++Count; 371 } else { 372 // Can't merge this in. Try merge the earlier ones first. 373 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 374 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 375 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 376 MemOps, Merges); 377 return; 378 } 379 380 if (MemOps[i].Position > MemOps[insertAfter].Position) 381 insertAfter = i; 382 } 383 384 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 385 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 386 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 387 return; 388 } 389 390 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 391 unsigned Bytes, unsigned Limit, 392 ARMCC::CondCodes Pred, unsigned PredReg){ 393 unsigned MyPredReg = 0; 394 if (!MI) 395 return false; 396 if (MI->getOpcode() != ARM::t2SUBri && 397 MI->getOpcode() != ARM::t2SUBrSPi && 398 MI->getOpcode() != ARM::t2SUBrSPi12 && 399 MI->getOpcode() != ARM::tSUBspi && 400 MI->getOpcode() != ARM::SUBri) 401 return false; 402 403 // Make sure the offset fits in 8 bits. 404 if (Bytes == 0 || (Limit && Bytes >= Limit)) 405 return false; 406 407 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 408 return (MI->getOperand(0).getReg() == Base && 409 MI->getOperand(1).getReg() == Base && 410 (MI->getOperand(2).getImm()*Scale) == Bytes && 411 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 412 MyPredReg == PredReg); 413 } 414 415 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 416 unsigned Bytes, unsigned Limit, 417 ARMCC::CondCodes Pred, unsigned PredReg){ 418 unsigned MyPredReg = 0; 419 if (!MI) 420 return false; 421 if (MI->getOpcode() != ARM::t2ADDri && 422 MI->getOpcode() != ARM::t2ADDrSPi && 423 MI->getOpcode() != ARM::t2ADDrSPi12 && 424 MI->getOpcode() != ARM::tADDspi && 425 MI->getOpcode() != ARM::ADDri) 426 return false; 427 428 if (Bytes == 0 || (Limit && Bytes >= Limit)) 429 // Make sure the offset fits in 8 bits. 430 return false; 431 432 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 433 return (MI->getOperand(0).getReg() == Base && 434 MI->getOperand(1).getReg() == Base && 435 (MI->getOperand(2).getImm()*Scale) == Bytes && 436 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 437 MyPredReg == PredReg); 438 } 439 440 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 441 switch (MI->getOpcode()) { 442 default: return 0; 443 case ARM::LDR: 444 case ARM::STR: 445 case ARM::t2LDRi8: 446 case ARM::t2LDRi12: 447 case ARM::t2STRi8: 448 case ARM::t2STRi12: 449 case ARM::VLDRS: 450 case ARM::VSTRS: 451 return 4; 452 case ARM::VLDRD: 453 case ARM::VSTRD: 454 return 8; 455 case ARM::LDM: 456 case ARM::STM: 457 case ARM::t2LDM: 458 case ARM::t2STM: 459 case ARM::VLDMS: 460 case ARM::VSTMS: 461 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 462 case ARM::VLDMD: 463 case ARM::VSTMD: 464 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 465 } 466 } 467 468 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) { 469 switch (Opc) { 470 case ARM::LDM: return ARM::LDM_UPD; 471 case ARM::STM: return ARM::STM_UPD; 472 case ARM::t2LDM: return ARM::t2LDM_UPD; 473 case ARM::t2STM: return ARM::t2STM_UPD; 474 case ARM::VLDMS: return ARM::VLDMS_UPD; 475 case ARM::VLDMD: return ARM::VLDMD_UPD; 476 case ARM::VSTMS: return ARM::VSTMS_UPD; 477 case ARM::VSTMD: return ARM::VSTMD_UPD; 478 default: llvm_unreachable("Unhandled opcode!"); 479 } 480 return 0; 481 } 482 483 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 484 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 485 /// 486 /// stmia rn, <ra, rb, rc> 487 /// rn := rn + 4 * 3; 488 /// => 489 /// stmia rn!, <ra, rb, rc> 490 /// 491 /// rn := rn - 4 * 3; 492 /// ldmia rn, <ra, rb, rc> 493 /// => 494 /// ldmdb rn!, <ra, rb, rc> 495 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 496 MachineBasicBlock::iterator MBBI, 497 bool &Advance, 498 MachineBasicBlock::iterator &I) { 499 MachineInstr *MI = MBBI; 500 unsigned Base = MI->getOperand(0).getReg(); 501 bool BaseKill = MI->getOperand(0).isKill(); 502 unsigned Bytes = getLSMultipleTransferSize(MI); 503 unsigned PredReg = 0; 504 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 505 int Opcode = MI->getOpcode(); 506 DebugLoc dl = MI->getDebugLoc(); 507 508 bool DoMerge = false; 509 ARM_AM::AMSubMode Mode = ARM_AM::ia; 510 511 // Can't use an updating ld/st if the base register is also a dest 512 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 513 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 514 if (MI->getOperand(i).getReg() == Base) 515 return false; 516 } 517 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 518 519 // Try merging with the previous instruction. 520 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 521 if (MBBI != BeginMBBI) { 522 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 523 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 524 --PrevMBBI; 525 if (Mode == ARM_AM::ia && 526 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 527 Mode = ARM_AM::db; 528 DoMerge = true; 529 } else if (Mode == ARM_AM::ib && 530 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 531 Mode = ARM_AM::da; 532 DoMerge = true; 533 } 534 if (DoMerge) 535 MBB.erase(PrevMBBI); 536 } 537 538 // Try merging with the next instruction. 539 MachineBasicBlock::iterator EndMBBI = MBB.end(); 540 if (!DoMerge && MBBI != EndMBBI) { 541 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 542 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 543 ++NextMBBI; 544 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 545 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 546 DoMerge = true; 547 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 548 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 549 DoMerge = true; 550 } 551 if (DoMerge) { 552 if (NextMBBI == I) { 553 Advance = true; 554 ++I; 555 } 556 MBB.erase(NextMBBI); 557 } 558 } 559 560 if (!DoMerge) 561 return false; 562 563 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); 564 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 565 .addReg(Base, getDefRegState(true)) // WB base register 566 .addReg(Base, getKillRegState(BaseKill)) 567 .addImm(ARM_AM::getAM4ModeImm(Mode)) 568 .addImm(Pred).addReg(PredReg); 569 // Transfer the rest of operands. 570 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) 571 MIB.addOperand(MI->getOperand(OpNum)); 572 // Transfer memoperands. 573 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 574 575 MBB.erase(MBBI); 576 return true; 577 } 578 579 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 580 switch (Opc) { 581 case ARM::LDR: return ARM::LDR_PRE; 582 case ARM::STR: return ARM::STR_PRE; 583 case ARM::VLDRS: return ARM::VLDMS_UPD; 584 case ARM::VLDRD: return ARM::VLDMD_UPD; 585 case ARM::VSTRS: return ARM::VSTMS_UPD; 586 case ARM::VSTRD: return ARM::VSTMD_UPD; 587 case ARM::t2LDRi8: 588 case ARM::t2LDRi12: 589 return ARM::t2LDR_PRE; 590 case ARM::t2STRi8: 591 case ARM::t2STRi12: 592 return ARM::t2STR_PRE; 593 default: llvm_unreachable("Unhandled opcode!"); 594 } 595 return 0; 596 } 597 598 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 599 switch (Opc) { 600 case ARM::LDR: return ARM::LDR_POST; 601 case ARM::STR: return ARM::STR_POST; 602 case ARM::VLDRS: return ARM::VLDMS_UPD; 603 case ARM::VLDRD: return ARM::VLDMD_UPD; 604 case ARM::VSTRS: return ARM::VSTMS_UPD; 605 case ARM::VSTRD: return ARM::VSTMD_UPD; 606 case ARM::t2LDRi8: 607 case ARM::t2LDRi12: 608 return ARM::t2LDR_POST; 609 case ARM::t2STRi8: 610 case ARM::t2STRi12: 611 return ARM::t2STR_POST; 612 default: llvm_unreachable("Unhandled opcode!"); 613 } 614 return 0; 615 } 616 617 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 618 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 619 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 620 MachineBasicBlock::iterator MBBI, 621 const TargetInstrInfo *TII, 622 bool &Advance, 623 MachineBasicBlock::iterator &I) { 624 MachineInstr *MI = MBBI; 625 unsigned Base = MI->getOperand(1).getReg(); 626 bool BaseKill = MI->getOperand(1).isKill(); 627 unsigned Bytes = getLSMultipleTransferSize(MI); 628 int Opcode = MI->getOpcode(); 629 DebugLoc dl = MI->getDebugLoc(); 630 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 631 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 632 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR); 633 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) 634 return false; 635 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 636 return false; 637 if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) 638 if (MI->getOperand(2).getImm() != 0) 639 return false; 640 641 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 642 // Can't do the merge if the destination register is the same as the would-be 643 // writeback register. 644 if (isLd && MI->getOperand(0).getReg() == Base) 645 return false; 646 647 unsigned PredReg = 0; 648 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 649 bool DoMerge = false; 650 ARM_AM::AddrOpc AddSub = ARM_AM::add; 651 unsigned NewOpc = 0; 652 // AM2 - 12 bits, thumb2 - 8 bits. 653 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 654 655 // Try merging with the previous instruction. 656 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 657 if (MBBI != BeginMBBI) { 658 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 659 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 660 --PrevMBBI; 661 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 662 DoMerge = true; 663 AddSub = ARM_AM::sub; 664 } else if (!isAM5 && 665 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 666 DoMerge = true; 667 } 668 if (DoMerge) { 669 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 670 MBB.erase(PrevMBBI); 671 } 672 } 673 674 // Try merging with the next instruction. 675 MachineBasicBlock::iterator EndMBBI = MBB.end(); 676 if (!DoMerge && MBBI != EndMBBI) { 677 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 678 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 679 ++NextMBBI; 680 if (!isAM5 && 681 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 682 DoMerge = true; 683 AddSub = ARM_AM::sub; 684 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 685 DoMerge = true; 686 } 687 if (DoMerge) { 688 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 689 if (NextMBBI == I) { 690 Advance = true; 691 ++I; 692 } 693 MBB.erase(NextMBBI); 694 } 695 } 696 697 if (!DoMerge) 698 return false; 699 700 unsigned Offset = 0; 701 if (isAM5) 702 Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ? 703 ARM_AM::db : ARM_AM::ia); 704 else if (isAM2) 705 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 706 else 707 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 708 709 if (isAM5) { 710 // VLDM[SD}_UPD, VSTM[SD]_UPD 711 // (There are no base-updating versions of VLDR/VSTR instructions, but the 712 // updating load/store-multiple instructions can be used with only one 713 // register.) 714 MachineOperand &MO = MI->getOperand(0); 715 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 716 .addReg(Base, getDefRegState(true)) // WB base register 717 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 718 .addImm(Offset) 719 .addImm(Pred).addReg(PredReg) 720 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 721 getKillRegState(MO.isKill()))); 722 } else if (isLd) { 723 if (isAM2) 724 // LDR_PRE, LDR_POST, 725 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 726 .addReg(Base, RegState::Define) 727 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 728 else 729 // t2LDR_PRE, t2LDR_POST 730 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 731 .addReg(Base, RegState::Define) 732 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 733 } else { 734 MachineOperand &MO = MI->getOperand(0); 735 if (isAM2) 736 // STR_PRE, STR_POST 737 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 738 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 739 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 740 else 741 // t2STR_PRE, t2STR_POST 742 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 743 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 744 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 745 } 746 MBB.erase(MBBI); 747 748 return true; 749 } 750 751 /// isMemoryOp - Returns true if instruction is a memory operations (that this 752 /// pass is capable of operating on). 753 static bool isMemoryOp(const MachineInstr *MI) { 754 // When no memory operands are present, conservatively assume unaligned, 755 // volatile, unfoldable. 756 if (!MI->hasOneMemOperand()) 757 return false; 758 759 const MachineMemOperand *MMO = *MI->memoperands_begin(); 760 761 // Don't touch volatile memory accesses - we may be changing their order. 762 if (MMO->isVolatile()) 763 return false; 764 765 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 766 // not. 767 if (MMO->getAlignment() < 4) 768 return false; 769 770 // str <undef> could probably be eliminated entirely, but for now we just want 771 // to avoid making a mess of it. 772 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 773 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 774 MI->getOperand(0).isUndef()) 775 return false; 776 777 // Likewise don't mess with references to undefined addresses. 778 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 779 MI->getOperand(1).isUndef()) 780 return false; 781 782 int Opcode = MI->getOpcode(); 783 switch (Opcode) { 784 default: break; 785 case ARM::LDR: 786 case ARM::STR: 787 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; 788 case ARM::VLDRS: 789 case ARM::VSTRS: 790 return MI->getOperand(1).isReg(); 791 case ARM::VLDRD: 792 case ARM::VSTRD: 793 return MI->getOperand(1).isReg(); 794 case ARM::t2LDRi8: 795 case ARM::t2LDRi12: 796 case ARM::t2STRi8: 797 case ARM::t2STRi12: 798 return MI->getOperand(1).isReg(); 799 } 800 return false; 801 } 802 803 /// AdvanceRS - Advance register scavenger to just before the earliest memory 804 /// op that is being merged. 805 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 806 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 807 unsigned Position = MemOps[0].Position; 808 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 809 if (MemOps[i].Position < Position) { 810 Position = MemOps[i].Position; 811 Loc = MemOps[i].MBBI; 812 } 813 } 814 815 if (Loc != MBB.begin()) 816 RS->forward(prior(Loc)); 817 } 818 819 static int getMemoryOpOffset(const MachineInstr *MI) { 820 int Opcode = MI->getOpcode(); 821 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 822 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 823 unsigned NumOperands = MI->getDesc().getNumOperands(); 824 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 825 826 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 827 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 828 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) 829 return OffField; 830 831 int Offset = isAM2 832 ? ARM_AM::getAM2Offset(OffField) 833 : (isAM3 ? ARM_AM::getAM3Offset(OffField) 834 : ARM_AM::getAM5Offset(OffField) * 4); 835 if (isAM2) { 836 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 837 Offset = -Offset; 838 } else if (isAM3) { 839 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 840 Offset = -Offset; 841 } else { 842 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 843 Offset = -Offset; 844 } 845 return Offset; 846 } 847 848 static void InsertLDR_STR(MachineBasicBlock &MBB, 849 MachineBasicBlock::iterator &MBBI, 850 int OffImm, bool isDef, 851 DebugLoc dl, unsigned NewOpc, 852 unsigned Reg, bool RegDeadKill, bool RegUndef, 853 unsigned BaseReg, bool BaseKill, bool BaseUndef, 854 unsigned OffReg, bool OffKill, bool OffUndef, 855 ARMCC::CondCodes Pred, unsigned PredReg, 856 const TargetInstrInfo *TII, bool isT2) { 857 int Offset = OffImm; 858 if (!isT2) { 859 if (OffImm < 0) 860 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); 861 else 862 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); 863 } 864 if (isDef) { 865 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 866 TII->get(NewOpc)) 867 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 868 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 869 if (!isT2) 870 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 871 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 872 } else { 873 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 874 TII->get(NewOpc)) 875 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 876 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 877 if (!isT2) 878 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 879 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 880 } 881 } 882 883 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 884 MachineBasicBlock::iterator &MBBI) { 885 MachineInstr *MI = &*MBBI; 886 unsigned Opcode = MI->getOpcode(); 887 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 888 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 889 unsigned EvenReg = MI->getOperand(0).getReg(); 890 unsigned OddReg = MI->getOperand(1).getReg(); 891 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 892 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 893 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 894 return false; 895 896 MachineBasicBlock::iterator NewBBI = MBBI; 897 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 898 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 899 bool EvenDeadKill = isLd ? 900 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 901 bool EvenUndef = MI->getOperand(0).isUndef(); 902 bool OddDeadKill = isLd ? 903 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 904 bool OddUndef = MI->getOperand(1).isUndef(); 905 const MachineOperand &BaseOp = MI->getOperand(2); 906 unsigned BaseReg = BaseOp.getReg(); 907 bool BaseKill = BaseOp.isKill(); 908 bool BaseUndef = BaseOp.isUndef(); 909 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); 910 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 911 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 912 int OffImm = getMemoryOpOffset(MI); 913 unsigned PredReg = 0; 914 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 915 916 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { 917 // Ascending register numbers and no offset. It's safe to change it to a 918 // ldm or stm. 919 unsigned NewOpc = (isLd) 920 ? (isT2 ? ARM::t2LDM : ARM::LDM) 921 : (isT2 ? ARM::t2STM : ARM::STM); 922 if (isLd) { 923 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 924 .addReg(BaseReg, getKillRegState(BaseKill)) 925 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 926 .addImm(Pred).addReg(PredReg) 927 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 928 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 929 ++NumLDRD2LDM; 930 } else { 931 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 932 .addReg(BaseReg, getKillRegState(BaseKill)) 933 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 934 .addImm(Pred).addReg(PredReg) 935 .addReg(EvenReg, 936 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 937 .addReg(OddReg, 938 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 939 ++NumSTRD2STM; 940 } 941 NewBBI = llvm::prior(MBBI); 942 } else { 943 // Split into two instructions. 944 assert((!isT2 || !OffReg) && 945 "Thumb2 ldrd / strd does not encode offset register!"); 946 unsigned NewOpc = (isLd) 947 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) 948 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); 949 DebugLoc dl = MBBI->getDebugLoc(); 950 // If this is a load and base register is killed, it may have been 951 // re-defed by the load, make sure the first load does not clobber it. 952 if (isLd && 953 (BaseKill || OffKill) && 954 (TRI->regsOverlap(EvenReg, BaseReg) || 955 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { 956 assert(!TRI->regsOverlap(OddReg, BaseReg) && 957 (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); 958 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 959 OddReg, OddDeadKill, false, 960 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 961 Pred, PredReg, TII, isT2); 962 NewBBI = llvm::prior(MBBI); 963 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 964 EvenReg, EvenDeadKill, false, 965 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 966 Pred, PredReg, TII, isT2); 967 } else { 968 if (OddReg == EvenReg && EvenDeadKill) { 969 // If the two source operands are the same, the kill marker is 970 // probably on the first one. e.g. 971 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 972 EvenDeadKill = false; 973 OddDeadKill = true; 974 } 975 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 976 EvenReg, EvenDeadKill, EvenUndef, 977 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 978 Pred, PredReg, TII, isT2); 979 NewBBI = llvm::prior(MBBI); 980 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 981 OddReg, OddDeadKill, OddUndef, 982 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 983 Pred, PredReg, TII, isT2); 984 } 985 if (isLd) 986 ++NumLDRD2LDR; 987 else 988 ++NumSTRD2STR; 989 } 990 991 MBB.erase(MI); 992 MBBI = NewBBI; 993 return true; 994 } 995 return false; 996 } 997 998 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 999 /// ops of the same base and incrementing offset into LDM / STM ops. 1000 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1001 unsigned NumMerges = 0; 1002 unsigned NumMemOps = 0; 1003 MemOpQueue MemOps; 1004 unsigned CurrBase = 0; 1005 int CurrOpc = -1; 1006 unsigned CurrSize = 0; 1007 ARMCC::CondCodes CurrPred = ARMCC::AL; 1008 unsigned CurrPredReg = 0; 1009 unsigned Position = 0; 1010 SmallVector<MachineBasicBlock::iterator,4> Merges; 1011 1012 RS->enterBasicBlock(&MBB); 1013 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1014 while (MBBI != E) { 1015 if (FixInvalidRegPairOp(MBB, MBBI)) 1016 continue; 1017 1018 bool Advance = false; 1019 bool TryMerge = false; 1020 bool Clobber = false; 1021 1022 bool isMemOp = isMemoryOp(MBBI); 1023 if (isMemOp) { 1024 int Opcode = MBBI->getOpcode(); 1025 unsigned Size = getLSMultipleTransferSize(MBBI); 1026 const MachineOperand &MO = MBBI->getOperand(0); 1027 unsigned Reg = MO.getReg(); 1028 bool isKill = MO.isDef() ? false : MO.isKill(); 1029 unsigned Base = MBBI->getOperand(1).getReg(); 1030 unsigned PredReg = 0; 1031 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1032 int Offset = getMemoryOpOffset(MBBI); 1033 // Watch out for: 1034 // r4 := ldr [r5] 1035 // r5 := ldr [r5, #4] 1036 // r6 := ldr [r5, #8] 1037 // 1038 // The second ldr has effectively broken the chain even though it 1039 // looks like the later ldr(s) use the same base register. Try to 1040 // merge the ldr's so far, including this one. But don't try to 1041 // combine the following ldr(s). 1042 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1043 if (CurrBase == 0 && !Clobber) { 1044 // Start of a new chain. 1045 CurrBase = Base; 1046 CurrOpc = Opcode; 1047 CurrSize = Size; 1048 CurrPred = Pred; 1049 CurrPredReg = PredReg; 1050 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1051 ++NumMemOps; 1052 Advance = true; 1053 } else { 1054 if (Clobber) { 1055 TryMerge = true; 1056 Advance = true; 1057 } 1058 1059 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1060 // No need to match PredReg. 1061 // Continue adding to the queue. 1062 if (Offset > MemOps.back().Offset) { 1063 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1064 Position, MBBI)); 1065 ++NumMemOps; 1066 Advance = true; 1067 } else { 1068 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1069 I != E; ++I) { 1070 if (Offset < I->Offset) { 1071 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1072 Position, MBBI)); 1073 ++NumMemOps; 1074 Advance = true; 1075 break; 1076 } else if (Offset == I->Offset) { 1077 // Collision! This can't be merged! 1078 break; 1079 } 1080 } 1081 } 1082 } 1083 } 1084 } 1085 1086 if (MBBI->isDebugValue()) { 1087 ++MBBI; 1088 if (MBBI == E) 1089 // Reach the end of the block, try merging the memory instructions. 1090 TryMerge = true; 1091 } else if (Advance) { 1092 ++Position; 1093 ++MBBI; 1094 if (MBBI == E) 1095 // Reach the end of the block, try merging the memory instructions. 1096 TryMerge = true; 1097 } else 1098 TryMerge = true; 1099 1100 if (TryMerge) { 1101 if (NumMemOps > 1) { 1102 // Try to find a free register to use as a new base in case it's needed. 1103 // First advance to the instruction just before the start of the chain. 1104 AdvanceRS(MBB, MemOps); 1105 // Find a scratch register. 1106 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1107 // Process the load / store instructions. 1108 RS->forward(prior(MBBI)); 1109 1110 // Merge ops. 1111 Merges.clear(); 1112 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1113 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1114 1115 // Try folding preceeding/trailing base inc/dec into the generated 1116 // LDM/STM ops. 1117 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1118 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1119 ++NumMerges; 1120 NumMerges += Merges.size(); 1121 1122 // Try folding preceeding/trailing base inc/dec into those load/store 1123 // that were not merged to form LDM/STM ops. 1124 for (unsigned i = 0; i != NumMemOps; ++i) 1125 if (!MemOps[i].Merged) 1126 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1127 ++NumMerges; 1128 1129 // RS may be pointing to an instruction that's deleted. 1130 RS->skipTo(prior(MBBI)); 1131 } else if (NumMemOps == 1) { 1132 // Try folding preceeding/trailing base inc/dec into the single 1133 // load/store. 1134 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1135 ++NumMerges; 1136 RS->forward(prior(MBBI)); 1137 } 1138 } 1139 1140 CurrBase = 0; 1141 CurrOpc = -1; 1142 CurrSize = 0; 1143 CurrPred = ARMCC::AL; 1144 CurrPredReg = 0; 1145 if (NumMemOps) { 1146 MemOps.clear(); 1147 NumMemOps = 0; 1148 } 1149 1150 // If iterator hasn't been advanced and this is not a memory op, skip it. 1151 // It can't start a new chain anyway. 1152 if (!Advance && !isMemOp && MBBI != E) { 1153 ++Position; 1154 ++MBBI; 1155 } 1156 } 1157 } 1158 return NumMerges > 0; 1159 } 1160 1161 namespace { 1162 struct OffsetCompare { 1163 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1164 int LOffset = getMemoryOpOffset(LHS); 1165 int ROffset = getMemoryOpOffset(RHS); 1166 assert(LHS == RHS || LOffset != ROffset); 1167 return LOffset > ROffset; 1168 } 1169 }; 1170 } 1171 1172 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1173 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it 1174 /// directly restore the value of LR into pc. 1175 /// ldmfd sp!, {..., lr} 1176 /// bx lr 1177 /// or 1178 /// ldmfd sp!, {..., lr} 1179 /// mov pc, lr 1180 /// => 1181 /// ldmfd sp!, {..., pc} 1182 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1183 if (MBB.empty()) return false; 1184 1185 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1186 if (MBBI != MBB.begin() && 1187 (MBBI->getOpcode() == ARM::BX_RET || 1188 MBBI->getOpcode() == ARM::tBX_RET || 1189 MBBI->getOpcode() == ARM::MOVPCLR)) { 1190 MachineInstr *PrevMI = prior(MBBI); 1191 if (PrevMI->getOpcode() == ARM::LDM_UPD || 1192 PrevMI->getOpcode() == ARM::t2LDM_UPD) { 1193 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1194 if (MO.getReg() != ARM::LR) 1195 return false; 1196 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; 1197 PrevMI->setDesc(TII->get(NewOpc)); 1198 MO.setReg(ARM::PC); 1199 PrevMI->copyImplicitOps(&*MBBI); 1200 MBB.erase(MBBI); 1201 return true; 1202 } 1203 } 1204 return false; 1205 } 1206 1207 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1208 const TargetMachine &TM = Fn.getTarget(); 1209 AFI = Fn.getInfo<ARMFunctionInfo>(); 1210 TII = TM.getInstrInfo(); 1211 TRI = TM.getRegisterInfo(); 1212 RS = new RegScavenger(); 1213 isThumb2 = AFI->isThumb2Function(); 1214 1215 bool Modified = false; 1216 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1217 ++MFI) { 1218 MachineBasicBlock &MBB = *MFI; 1219 Modified |= LoadStoreMultipleOpti(MBB); 1220 Modified |= MergeReturnIntoLDM(MBB); 1221 } 1222 1223 delete RS; 1224 return Modified; 1225 } 1226 1227 1228 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1229 /// load / stores from consecutive locations close to make it more 1230 /// likely they will be combined later. 1231 1232 namespace { 1233 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1234 static char ID; 1235 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1236 1237 const TargetData *TD; 1238 const TargetInstrInfo *TII; 1239 const TargetRegisterInfo *TRI; 1240 const ARMSubtarget *STI; 1241 MachineRegisterInfo *MRI; 1242 MachineFunction *MF; 1243 1244 virtual bool runOnMachineFunction(MachineFunction &Fn); 1245 1246 virtual const char *getPassName() const { 1247 return "ARM pre- register allocation load / store optimization pass"; 1248 } 1249 1250 private: 1251 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1252 unsigned &NewOpc, unsigned &EvenReg, 1253 unsigned &OddReg, unsigned &BaseReg, 1254 unsigned &OffReg, int &Offset, 1255 unsigned &PredReg, ARMCC::CondCodes &Pred, 1256 bool &isT2); 1257 bool RescheduleOps(MachineBasicBlock *MBB, 1258 SmallVector<MachineInstr*, 4> &Ops, 1259 unsigned Base, bool isLd, 1260 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1261 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1262 }; 1263 char ARMPreAllocLoadStoreOpt::ID = 0; 1264 } 1265 1266 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1267 TD = Fn.getTarget().getTargetData(); 1268 TII = Fn.getTarget().getInstrInfo(); 1269 TRI = Fn.getTarget().getRegisterInfo(); 1270 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1271 MRI = &Fn.getRegInfo(); 1272 MF = &Fn; 1273 1274 bool Modified = false; 1275 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1276 ++MFI) 1277 Modified |= RescheduleLoadStoreInstrs(MFI); 1278 1279 return Modified; 1280 } 1281 1282 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1283 MachineBasicBlock::iterator I, 1284 MachineBasicBlock::iterator E, 1285 SmallPtrSet<MachineInstr*, 4> &MemOps, 1286 SmallSet<unsigned, 4> &MemRegs, 1287 const TargetRegisterInfo *TRI) { 1288 // Are there stores / loads / calls between them? 1289 // FIXME: This is overly conservative. We should make use of alias information 1290 // some day. 1291 SmallSet<unsigned, 4> AddedRegPressure; 1292 while (++I != E) { 1293 if (I->isDebugValue() || MemOps.count(&*I)) 1294 continue; 1295 const TargetInstrDesc &TID = I->getDesc(); 1296 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1297 return false; 1298 if (isLd && TID.mayStore()) 1299 return false; 1300 if (!isLd) { 1301 if (TID.mayLoad()) 1302 return false; 1303 // It's not safe to move the first 'str' down. 1304 // str r1, [r0] 1305 // strh r5, [r0] 1306 // str r4, [r0, #+4] 1307 if (TID.mayStore()) 1308 return false; 1309 } 1310 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1311 MachineOperand &MO = I->getOperand(j); 1312 if (!MO.isReg()) 1313 continue; 1314 unsigned Reg = MO.getReg(); 1315 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1316 return false; 1317 if (Reg != Base && !MemRegs.count(Reg)) 1318 AddedRegPressure.insert(Reg); 1319 } 1320 } 1321 1322 // Estimate register pressure increase due to the transformation. 1323 if (MemRegs.size() <= 4) 1324 // Ok if we are moving small number of instructions. 1325 return true; 1326 return AddedRegPressure.size() <= MemRegs.size() * 2; 1327 } 1328 1329 bool 1330 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1331 DebugLoc &dl, 1332 unsigned &NewOpc, unsigned &EvenReg, 1333 unsigned &OddReg, unsigned &BaseReg, 1334 unsigned &OffReg, int &Offset, 1335 unsigned &PredReg, 1336 ARMCC::CondCodes &Pred, 1337 bool &isT2) { 1338 // Make sure we're allowed to generate LDRD/STRD. 1339 if (!STI->hasV5TEOps()) 1340 return false; 1341 1342 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1343 unsigned Scale = 1; 1344 unsigned Opcode = Op0->getOpcode(); 1345 if (Opcode == ARM::LDR) 1346 NewOpc = ARM::LDRD; 1347 else if (Opcode == ARM::STR) 1348 NewOpc = ARM::STRD; 1349 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1350 NewOpc = ARM::t2LDRDi8; 1351 Scale = 4; 1352 isT2 = true; 1353 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1354 NewOpc = ARM::t2STRDi8; 1355 Scale = 4; 1356 isT2 = true; 1357 } else 1358 return false; 1359 1360 // Make sure the offset registers match. 1361 if (!isT2 && 1362 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) 1363 return false; 1364 1365 // Must sure the base address satisfies i64 ld / st alignment requirement. 1366 if (!Op0->hasOneMemOperand() || 1367 !(*Op0->memoperands_begin())->getValue() || 1368 (*Op0->memoperands_begin())->isVolatile()) 1369 return false; 1370 1371 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1372 const Function *Func = MF->getFunction(); 1373 unsigned ReqAlign = STI->hasV6Ops() 1374 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1375 : 8; // Pre-v6 need 8-byte align 1376 if (Align < ReqAlign) 1377 return false; 1378 1379 // Then make sure the immediate offset fits. 1380 int OffImm = getMemoryOpOffset(Op0); 1381 if (isT2) { 1382 if (OffImm < 0) { 1383 if (OffImm < -255) 1384 // Can't fall back to t2LDRi8 / t2STRi8. 1385 return false; 1386 } else { 1387 int Limit = (1 << 8) * Scale; 1388 if (OffImm >= Limit || (OffImm & (Scale-1))) 1389 return false; 1390 } 1391 Offset = OffImm; 1392 } else { 1393 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1394 if (OffImm < 0) { 1395 AddSub = ARM_AM::sub; 1396 OffImm = - OffImm; 1397 } 1398 int Limit = (1 << 8) * Scale; 1399 if (OffImm >= Limit || (OffImm & (Scale-1))) 1400 return false; 1401 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1402 } 1403 EvenReg = Op0->getOperand(0).getReg(); 1404 OddReg = Op1->getOperand(0).getReg(); 1405 if (EvenReg == OddReg) 1406 return false; 1407 BaseReg = Op0->getOperand(1).getReg(); 1408 if (!isT2) 1409 OffReg = Op0->getOperand(2).getReg(); 1410 Pred = llvm::getInstrPredicate(Op0, PredReg); 1411 dl = Op0->getDebugLoc(); 1412 return true; 1413 } 1414 1415 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1416 SmallVector<MachineInstr*, 4> &Ops, 1417 unsigned Base, bool isLd, 1418 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1419 bool RetVal = false; 1420 1421 // Sort by offset (in reverse order). 1422 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1423 1424 // The loads / stores of the same base are in order. Scan them from first to 1425 // last and check for the following: 1426 // 1. Any def of base. 1427 // 2. Any gaps. 1428 while (Ops.size() > 1) { 1429 unsigned FirstLoc = ~0U; 1430 unsigned LastLoc = 0; 1431 MachineInstr *FirstOp = 0; 1432 MachineInstr *LastOp = 0; 1433 int LastOffset = 0; 1434 unsigned LastOpcode = 0; 1435 unsigned LastBytes = 0; 1436 unsigned NumMove = 0; 1437 for (int i = Ops.size() - 1; i >= 0; --i) { 1438 MachineInstr *Op = Ops[i]; 1439 unsigned Loc = MI2LocMap[Op]; 1440 if (Loc <= FirstLoc) { 1441 FirstLoc = Loc; 1442 FirstOp = Op; 1443 } 1444 if (Loc >= LastLoc) { 1445 LastLoc = Loc; 1446 LastOp = Op; 1447 } 1448 1449 unsigned Opcode = Op->getOpcode(); 1450 if (LastOpcode && Opcode != LastOpcode) 1451 break; 1452 1453 int Offset = getMemoryOpOffset(Op); 1454 unsigned Bytes = getLSMultipleTransferSize(Op); 1455 if (LastBytes) { 1456 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1457 break; 1458 } 1459 LastOffset = Offset; 1460 LastBytes = Bytes; 1461 LastOpcode = Opcode; 1462 if (++NumMove == 8) // FIXME: Tune this limit. 1463 break; 1464 } 1465 1466 if (NumMove <= 1) 1467 Ops.pop_back(); 1468 else { 1469 SmallPtrSet<MachineInstr*, 4> MemOps; 1470 SmallSet<unsigned, 4> MemRegs; 1471 for (int i = NumMove-1; i >= 0; --i) { 1472 MemOps.insert(Ops[i]); 1473 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1474 } 1475 1476 // Be conservative, if the instructions are too far apart, don't 1477 // move them. We want to limit the increase of register pressure. 1478 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1479 if (DoMove) 1480 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1481 MemOps, MemRegs, TRI); 1482 if (!DoMove) { 1483 for (unsigned i = 0; i != NumMove; ++i) 1484 Ops.pop_back(); 1485 } else { 1486 // This is the new location for the loads / stores. 1487 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1488 while (InsertPos != MBB->end() 1489 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1490 ++InsertPos; 1491 1492 // If we are moving a pair of loads / stores, see if it makes sense 1493 // to try to allocate a pair of registers that can form register pairs. 1494 MachineInstr *Op0 = Ops.back(); 1495 MachineInstr *Op1 = Ops[Ops.size()-2]; 1496 unsigned EvenReg = 0, OddReg = 0; 1497 unsigned BaseReg = 0, OffReg = 0, PredReg = 0; 1498 ARMCC::CondCodes Pred = ARMCC::AL; 1499 bool isT2 = false; 1500 unsigned NewOpc = 0; 1501 int Offset = 0; 1502 DebugLoc dl; 1503 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1504 EvenReg, OddReg, BaseReg, OffReg, 1505 Offset, PredReg, Pred, isT2)) { 1506 Ops.pop_back(); 1507 Ops.pop_back(); 1508 1509 // Form the pair instruction. 1510 if (isLd) { 1511 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1512 dl, TII->get(NewOpc)) 1513 .addReg(EvenReg, RegState::Define) 1514 .addReg(OddReg, RegState::Define) 1515 .addReg(BaseReg); 1516 if (!isT2) 1517 MIB.addReg(OffReg); 1518 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1519 ++NumLDRDFormed; 1520 } else { 1521 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1522 dl, TII->get(NewOpc)) 1523 .addReg(EvenReg) 1524 .addReg(OddReg) 1525 .addReg(BaseReg); 1526 if (!isT2) 1527 MIB.addReg(OffReg); 1528 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1529 ++NumSTRDFormed; 1530 } 1531 MBB->erase(Op0); 1532 MBB->erase(Op1); 1533 1534 // Add register allocation hints to form register pairs. 1535 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1536 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1537 } else { 1538 for (unsigned i = 0; i != NumMove; ++i) { 1539 MachineInstr *Op = Ops.back(); 1540 Ops.pop_back(); 1541 MBB->splice(InsertPos, MBB, Op); 1542 } 1543 } 1544 1545 NumLdStMoved += NumMove; 1546 RetVal = true; 1547 } 1548 } 1549 } 1550 1551 return RetVal; 1552 } 1553 1554 bool 1555 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1556 bool RetVal = false; 1557 1558 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1559 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1560 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1561 SmallVector<unsigned, 4> LdBases; 1562 SmallVector<unsigned, 4> StBases; 1563 1564 unsigned Loc = 0; 1565 MachineBasicBlock::iterator MBBI = MBB->begin(); 1566 MachineBasicBlock::iterator E = MBB->end(); 1567 while (MBBI != E) { 1568 for (; MBBI != E; ++MBBI) { 1569 MachineInstr *MI = MBBI; 1570 const TargetInstrDesc &TID = MI->getDesc(); 1571 if (TID.isCall() || TID.isTerminator()) { 1572 // Stop at barriers. 1573 ++MBBI; 1574 break; 1575 } 1576 1577 if (!MI->isDebugValue()) 1578 MI2LocMap[MI] = ++Loc; 1579 1580 if (!isMemoryOp(MI)) 1581 continue; 1582 unsigned PredReg = 0; 1583 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1584 continue; 1585 1586 int Opc = MI->getOpcode(); 1587 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1588 unsigned Base = MI->getOperand(1).getReg(); 1589 int Offset = getMemoryOpOffset(MI); 1590 1591 bool StopHere = false; 1592 if (isLd) { 1593 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1594 Base2LdsMap.find(Base); 1595 if (BI != Base2LdsMap.end()) { 1596 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1597 if (Offset == getMemoryOpOffset(BI->second[i])) { 1598 StopHere = true; 1599 break; 1600 } 1601 } 1602 if (!StopHere) 1603 BI->second.push_back(MI); 1604 } else { 1605 SmallVector<MachineInstr*, 4> MIs; 1606 MIs.push_back(MI); 1607 Base2LdsMap[Base] = MIs; 1608 LdBases.push_back(Base); 1609 } 1610 } else { 1611 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1612 Base2StsMap.find(Base); 1613 if (BI != Base2StsMap.end()) { 1614 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1615 if (Offset == getMemoryOpOffset(BI->second[i])) { 1616 StopHere = true; 1617 break; 1618 } 1619 } 1620 if (!StopHere) 1621 BI->second.push_back(MI); 1622 } else { 1623 SmallVector<MachineInstr*, 4> MIs; 1624 MIs.push_back(MI); 1625 Base2StsMap[Base] = MIs; 1626 StBases.push_back(Base); 1627 } 1628 } 1629 1630 if (StopHere) { 1631 // Found a duplicate (a base+offset combination that's seen earlier). 1632 // Backtrack. 1633 --Loc; 1634 break; 1635 } 1636 } 1637 1638 // Re-schedule loads. 1639 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1640 unsigned Base = LdBases[i]; 1641 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1642 if (Lds.size() > 1) 1643 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1644 } 1645 1646 // Re-schedule stores. 1647 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1648 unsigned Base = StBases[i]; 1649 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1650 if (Sts.size() > 1) 1651 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1652 } 1653 1654 if (MBBI != E) { 1655 Base2LdsMap.clear(); 1656 Base2StsMap.clear(); 1657 LdBases.clear(); 1658 StBases.clear(); 1659 } 1660 } 1661 1662 return RetVal; 1663 } 1664 1665 1666 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1667 /// optimization pass. 1668 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1669 if (PreAlloc) 1670 return new ARMPreAllocLoadStoreOpt(); 1671 return new ARMLoadStoreOpt(); 1672 } 1673