1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMAddressingModes.h" 18 #include "ARMBaseInstrInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMRegisterInfo.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/Target/TargetData.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 #include "llvm/Target/TargetMachine.h" 32 #include "llvm/Target/TargetRegisterInfo.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallSet.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/Statistic.h" 40 using namespace llvm; 41 42 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55 /// load / store instructions to form ldm / stm instructions. 56 57 namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Reg; 78 bool isKill; 79 unsigned Position; 80 MachineBasicBlock::iterator MBBI; 81 bool Merged; 82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 83 MachineBasicBlock::iterator i) 84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 85 }; 86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 87 typedef MemOpQueue::iterator MemOpQueueIter; 88 89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 90 int Offset, unsigned Base, bool BaseKill, int Opcode, 91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 93 void MergeOpsUpdate(MachineBasicBlock &MBB, 94 MemOpQueue &MemOps, 95 unsigned memOpsBegin, 96 unsigned memOpsEnd, 97 unsigned insertAfter, 98 int Offset, 99 unsigned Base, 100 bool BaseKill, 101 int Opcode, 102 ARMCC::CondCodes Pred, 103 unsigned PredReg, 104 unsigned Scratch, 105 DebugLoc dl, 106 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 108 int Opcode, unsigned Size, 109 ARMCC::CondCodes Pred, unsigned PredReg, 110 unsigned Scratch, MemOpQueue &MemOps, 111 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 112 113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 115 MachineBasicBlock::iterator &MBBI); 116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 117 MachineBasicBlock::iterator MBBI, 118 const TargetInstrInfo *TII, 119 bool &Advance, 120 MachineBasicBlock::iterator &I); 121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 122 MachineBasicBlock::iterator MBBI, 123 bool &Advance, 124 MachineBasicBlock::iterator &I); 125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 127 }; 128 char ARMLoadStoreOpt::ID = 0; 129 } 130 131 static int getLoadStoreMultipleOpcode(int Opcode) { 132 switch (Opcode) { 133 case ARM::LDR: 134 ++NumLDMGened; 135 return ARM::LDM; 136 case ARM::STR: 137 ++NumSTMGened; 138 return ARM::STM; 139 case ARM::t2LDRi8: 140 case ARM::t2LDRi12: 141 ++NumLDMGened; 142 return ARM::t2LDM; 143 case ARM::t2STRi8: 144 case ARM::t2STRi12: 145 ++NumSTMGened; 146 return ARM::t2STM; 147 case ARM::VLDRS: 148 ++NumVLDMGened; 149 return ARM::VLDMS; 150 case ARM::VSTRS: 151 ++NumVSTMGened; 152 return ARM::VSTMS; 153 case ARM::VLDRD: 154 ++NumVLDMGened; 155 return ARM::VLDMD; 156 case ARM::VSTRD: 157 ++NumVSTMGened; 158 return ARM::VSTMD; 159 default: llvm_unreachable("Unhandled opcode!"); 160 } 161 return 0; 162 } 163 164 static bool isT2i32Load(unsigned Opc) { 165 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 166 } 167 168 static bool isi32Load(unsigned Opc) { 169 return Opc == ARM::LDR || isT2i32Load(Opc); 170 } 171 172 static bool isT2i32Store(unsigned Opc) { 173 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 174 } 175 176 static bool isi32Store(unsigned Opc) { 177 return Opc == ARM::STR || isT2i32Store(Opc); 178 } 179 180 /// MergeOps - Create and insert a LDM or STM with Base as base register and 181 /// registers in Regs as the register operands that would be loaded / stored. 182 /// It returns true if the transformation is done. 183 bool 184 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 185 MachineBasicBlock::iterator MBBI, 186 int Offset, unsigned Base, bool BaseKill, 187 int Opcode, ARMCC::CondCodes Pred, 188 unsigned PredReg, unsigned Scratch, DebugLoc dl, 189 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 190 // Only a single register to load / store. Don't bother. 191 unsigned NumRegs = Regs.size(); 192 if (NumRegs <= 1) 193 return false; 194 195 ARM_AM::AMSubMode Mode = ARM_AM::ia; 196 // VFP and Thumb2 do not support IB or DA modes. 197 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 198 bool haveIBAndDA = isNotVFP && !isThumb2; 199 if (Offset == 4 && haveIBAndDA) 200 Mode = ARM_AM::ib; 201 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 202 Mode = ARM_AM::da; 203 else if (Offset == -4 * (int)NumRegs && isNotVFP) 204 // VLDM/VSTM do not support DB mode without also updating the base reg. 205 Mode = ARM_AM::db; 206 else if (Offset != 0) { 207 // If starting offset isn't zero, insert a MI to materialize a new base. 208 // But only do so if it is cost effective, i.e. merging more than two 209 // loads / stores. 210 if (NumRegs <= 2) 211 return false; 212 213 unsigned NewBase; 214 if (isi32Load(Opcode)) 215 // If it is a load, then just use one of the destination register to 216 // use as the new base. 217 NewBase = Regs[NumRegs-1].first; 218 else { 219 // Use the scratch register to use as a new base. 220 NewBase = Scratch; 221 if (NewBase == 0) 222 return false; 223 } 224 int BaseOpc = !isThumb2 225 ? ARM::ADDri 226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 227 if (Offset < 0) { 228 BaseOpc = !isThumb2 229 ? ARM::SUBri 230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 231 Offset = - Offset; 232 } 233 int ImmedOffset = isThumb2 234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 235 if (ImmedOffset == -1) 236 // FIXME: Try t2ADDri12 or t2SUBri12? 237 return false; // Probably not worth it then. 238 239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 241 .addImm(Pred).addReg(PredReg).addReg(0); 242 Base = NewBase; 243 BaseKill = true; // New base is always killed right its use. 244 } 245 246 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 247 Opcode == ARM::VLDRD); 248 Opcode = getLoadStoreMultipleOpcode(Opcode); 249 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 250 .addReg(Base, getKillRegState(BaseKill)) 251 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg); 252 for (unsigned i = 0; i != NumRegs; ++i) 253 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 254 | getKillRegState(Regs[i].second)); 255 256 return true; 257 } 258 259 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 260 // success. 261 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 262 MemOpQueue &memOps, 263 unsigned memOpsBegin, unsigned memOpsEnd, 264 unsigned insertAfter, int Offset, 265 unsigned Base, bool BaseKill, 266 int Opcode, 267 ARMCC::CondCodes Pred, unsigned PredReg, 268 unsigned Scratch, 269 DebugLoc dl, 270 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 271 // First calculate which of the registers should be killed by the merged 272 // instruction. 273 const unsigned insertPos = memOps[insertAfter].Position; 274 275 SmallSet<unsigned, 4> UnavailRegs; 276 SmallSet<unsigned, 4> KilledRegs; 277 DenseMap<unsigned, unsigned> Killer; 278 for (unsigned i = 0; i < memOpsBegin; ++i) { 279 if (memOps[i].Position < insertPos && memOps[i].isKill) { 280 unsigned Reg = memOps[i].Reg; 281 if (memOps[i].Merged) 282 UnavailRegs.insert(Reg); 283 else { 284 KilledRegs.insert(Reg); 285 Killer[Reg] = i; 286 } 287 } 288 } 289 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) { 290 if (memOps[i].Position < insertPos && memOps[i].isKill) { 291 unsigned Reg = memOps[i].Reg; 292 KilledRegs.insert(Reg); 293 Killer[Reg] = i; 294 } 295 } 296 297 SmallVector<std::pair<unsigned, bool>, 8> Regs; 298 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 299 unsigned Reg = memOps[i].Reg; 300 if (UnavailRegs.count(Reg)) 301 // Register is killed before and it's not easy / possible to update the 302 // kill marker on already merged instructions. Abort. 303 return; 304 305 // If we are inserting the merged operation after an unmerged operation that 306 // uses the same register, make sure to transfer any kill flag. 307 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 308 Regs.push_back(std::make_pair(Reg, isKill)); 309 } 310 311 // Try to do the merge. 312 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 313 ++Loc; 314 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 315 Pred, PredReg, Scratch, dl, Regs)) 316 return; 317 318 // Merge succeeded, update records. 319 Merges.push_back(prior(Loc)); 320 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 321 // Remove kill flags from any unmerged memops that come before insertPos. 322 if (Regs[i-memOpsBegin].second) { 323 unsigned Reg = Regs[i-memOpsBegin].first; 324 if (KilledRegs.count(Reg)) { 325 unsigned j = Killer[Reg]; 326 memOps[j].MBBI->getOperand(0).setIsKill(false); 327 memOps[j].isKill = false; 328 } 329 } 330 MBB.erase(memOps[i].MBBI); 331 memOps[i].Merged = true; 332 } 333 } 334 335 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 336 /// load / store multiple instructions. 337 void 338 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 339 unsigned Base, int Opcode, unsigned Size, 340 ARMCC::CondCodes Pred, unsigned PredReg, 341 unsigned Scratch, MemOpQueue &MemOps, 342 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 343 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 344 int Offset = MemOps[SIndex].Offset; 345 int SOffset = Offset; 346 unsigned insertAfter = SIndex; 347 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 348 DebugLoc dl = Loc->getDebugLoc(); 349 const MachineOperand &PMO = Loc->getOperand(0); 350 unsigned PReg = PMO.getReg(); 351 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 352 : ARMRegisterInfo::getRegisterNumbering(PReg); 353 unsigned Count = 1; 354 355 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 356 int NewOffset = MemOps[i].Offset; 357 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 358 unsigned Reg = MO.getReg(); 359 unsigned RegNum = MO.isUndef() ? UINT_MAX 360 : ARMRegisterInfo::getRegisterNumbering(Reg); 361 // Register numbers must be in ascending order. For VFP, the registers 362 // must also be consecutive and there is a limit of 16 double-word 363 // registers per instruction. 364 if (Reg != ARM::SP && 365 NewOffset == Offset + (int)Size && 366 ((isNotVFP && RegNum > PRegNum) 367 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { 368 Offset += Size; 369 PRegNum = RegNum; 370 ++Count; 371 } else { 372 // Can't merge this in. Try merge the earlier ones first. 373 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 374 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 375 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 376 MemOps, Merges); 377 return; 378 } 379 380 if (MemOps[i].Position > MemOps[insertAfter].Position) 381 insertAfter = i; 382 } 383 384 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 385 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 386 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 387 return; 388 } 389 390 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 391 unsigned Bytes, unsigned Limit, 392 ARMCC::CondCodes Pred, unsigned PredReg){ 393 unsigned MyPredReg = 0; 394 if (!MI) 395 return false; 396 if (MI->getOpcode() != ARM::t2SUBri && 397 MI->getOpcode() != ARM::t2SUBrSPi && 398 MI->getOpcode() != ARM::t2SUBrSPi12 && 399 MI->getOpcode() != ARM::tSUBspi && 400 MI->getOpcode() != ARM::SUBri) 401 return false; 402 403 // Make sure the offset fits in 8 bits. 404 if (Bytes == 0 || (Limit && Bytes >= Limit)) 405 return false; 406 407 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 408 return (MI->getOperand(0).getReg() == Base && 409 MI->getOperand(1).getReg() == Base && 410 (MI->getOperand(2).getImm()*Scale) == Bytes && 411 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 412 MyPredReg == PredReg); 413 } 414 415 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 416 unsigned Bytes, unsigned Limit, 417 ARMCC::CondCodes Pred, unsigned PredReg){ 418 unsigned MyPredReg = 0; 419 if (!MI) 420 return false; 421 if (MI->getOpcode() != ARM::t2ADDri && 422 MI->getOpcode() != ARM::t2ADDrSPi && 423 MI->getOpcode() != ARM::t2ADDrSPi12 && 424 MI->getOpcode() != ARM::tADDspi && 425 MI->getOpcode() != ARM::ADDri) 426 return false; 427 428 if (Bytes == 0 || (Limit && Bytes >= Limit)) 429 // Make sure the offset fits in 8 bits. 430 return false; 431 432 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 433 return (MI->getOperand(0).getReg() == Base && 434 MI->getOperand(1).getReg() == Base && 435 (MI->getOperand(2).getImm()*Scale) == Bytes && 436 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 437 MyPredReg == PredReg); 438 } 439 440 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 441 switch (MI->getOpcode()) { 442 default: return 0; 443 case ARM::LDR: 444 case ARM::STR: 445 case ARM::t2LDRi8: 446 case ARM::t2LDRi12: 447 case ARM::t2STRi8: 448 case ARM::t2STRi12: 449 case ARM::VLDRS: 450 case ARM::VSTRS: 451 return 4; 452 case ARM::VLDRD: 453 case ARM::VSTRD: 454 return 8; 455 case ARM::LDM: 456 case ARM::STM: 457 case ARM::t2LDM: 458 case ARM::t2STM: 459 case ARM::VLDMS: 460 case ARM::VSTMS: 461 return (MI->getNumOperands() - 4) * 4; 462 case ARM::VLDMD: 463 case ARM::VSTMD: 464 return (MI->getNumOperands() - 4) * 8; 465 } 466 } 467 468 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) { 469 switch (Opc) { 470 case ARM::LDM: return ARM::LDM_UPD; 471 case ARM::STM: return ARM::STM_UPD; 472 case ARM::t2LDM: return ARM::t2LDM_UPD; 473 case ARM::t2STM: return ARM::t2STM_UPD; 474 case ARM::VLDMS: return ARM::VLDMS_UPD; 475 case ARM::VLDMD: return ARM::VLDMD_UPD; 476 case ARM::VSTMS: return ARM::VSTMS_UPD; 477 case ARM::VSTMD: return ARM::VSTMD_UPD; 478 default: llvm_unreachable("Unhandled opcode!"); 479 } 480 return 0; 481 } 482 483 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 484 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 485 /// 486 /// stmia rn, <ra, rb, rc> 487 /// rn := rn + 4 * 3; 488 /// => 489 /// stmia rn!, <ra, rb, rc> 490 /// 491 /// rn := rn - 4 * 3; 492 /// ldmia rn, <ra, rb, rc> 493 /// => 494 /// ldmdb rn!, <ra, rb, rc> 495 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 496 MachineBasicBlock::iterator MBBI, 497 bool &Advance, 498 MachineBasicBlock::iterator &I) { 499 MachineInstr *MI = MBBI; 500 unsigned Base = MI->getOperand(0).getReg(); 501 bool BaseKill = MI->getOperand(0).isKill(); 502 unsigned Bytes = getLSMultipleTransferSize(MI); 503 unsigned PredReg = 0; 504 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 505 int Opcode = MI->getOpcode(); 506 DebugLoc dl = MI->getDebugLoc(); 507 508 bool DoMerge = false; 509 ARM_AM::AMSubMode Mode = ARM_AM::ia; 510 511 // Can't use an updating ld/st if the base register is also a dest 512 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 513 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 514 if (MI->getOperand(i).getReg() == Base) 515 return false; 516 } 517 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 518 519 // Try merging with the previous instruction. 520 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 521 if (MBBI != BeginMBBI) { 522 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 523 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 524 --PrevMBBI; 525 if (Mode == ARM_AM::ia && 526 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 527 Mode = ARM_AM::db; 528 DoMerge = true; 529 } else if (Mode == ARM_AM::ib && 530 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 531 Mode = ARM_AM::da; 532 DoMerge = true; 533 } 534 if (DoMerge) 535 MBB.erase(PrevMBBI); 536 } 537 538 // Try merging with the next instruction. 539 MachineBasicBlock::iterator EndMBBI = MBB.end(); 540 if (!DoMerge && MBBI != EndMBBI) { 541 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 542 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 543 ++NextMBBI; 544 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 545 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 546 DoMerge = true; 547 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 548 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 549 DoMerge = true; 550 } 551 if (DoMerge) { 552 if (NextMBBI == I) { 553 Advance = true; 554 ++I; 555 } 556 MBB.erase(NextMBBI); 557 } 558 } 559 560 if (!DoMerge) 561 return false; 562 563 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); 564 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 565 .addReg(Base, getDefRegState(true)) // WB base register 566 .addReg(Base, getKillRegState(BaseKill)) 567 .addImm(ARM_AM::getAM4ModeImm(Mode)) 568 .addImm(Pred).addReg(PredReg); 569 // Transfer the rest of operands. 570 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) 571 MIB.addOperand(MI->getOperand(OpNum)); 572 // Transfer memoperands. 573 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 574 575 MBB.erase(MBBI); 576 return true; 577 } 578 579 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 580 switch (Opc) { 581 case ARM::LDR: return ARM::LDR_PRE; 582 case ARM::STR: return ARM::STR_PRE; 583 case ARM::VLDRS: return ARM::VLDMS_UPD; 584 case ARM::VLDRD: return ARM::VLDMD_UPD; 585 case ARM::VSTRS: return ARM::VSTMS_UPD; 586 case ARM::VSTRD: return ARM::VSTMD_UPD; 587 case ARM::t2LDRi8: 588 case ARM::t2LDRi12: 589 return ARM::t2LDR_PRE; 590 case ARM::t2STRi8: 591 case ARM::t2STRi12: 592 return ARM::t2STR_PRE; 593 default: llvm_unreachable("Unhandled opcode!"); 594 } 595 return 0; 596 } 597 598 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 599 switch (Opc) { 600 case ARM::LDR: return ARM::LDR_POST; 601 case ARM::STR: return ARM::STR_POST; 602 case ARM::VLDRS: return ARM::VLDMS_UPD; 603 case ARM::VLDRD: return ARM::VLDMD_UPD; 604 case ARM::VSTRS: return ARM::VSTMS_UPD; 605 case ARM::VSTRD: return ARM::VSTMD_UPD; 606 case ARM::t2LDRi8: 607 case ARM::t2LDRi12: 608 return ARM::t2LDR_POST; 609 case ARM::t2STRi8: 610 case ARM::t2STRi12: 611 return ARM::t2STR_POST; 612 default: llvm_unreachable("Unhandled opcode!"); 613 } 614 return 0; 615 } 616 617 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 618 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 619 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 620 MachineBasicBlock::iterator MBBI, 621 const TargetInstrInfo *TII, 622 bool &Advance, 623 MachineBasicBlock::iterator &I) { 624 MachineInstr *MI = MBBI; 625 unsigned Base = MI->getOperand(1).getReg(); 626 bool BaseKill = MI->getOperand(1).isKill(); 627 unsigned Bytes = getLSMultipleTransferSize(MI); 628 int Opcode = MI->getOpcode(); 629 DebugLoc dl = MI->getDebugLoc(); 630 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 631 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 632 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR); 633 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) 634 return false; 635 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 636 return false; 637 if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) 638 if (MI->getOperand(2).getImm() != 0) 639 return false; 640 641 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 642 // Can't do the merge if the destination register is the same as the would-be 643 // writeback register. 644 if (isLd && MI->getOperand(0).getReg() == Base) 645 return false; 646 647 unsigned PredReg = 0; 648 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 649 bool DoMerge = false; 650 ARM_AM::AddrOpc AddSub = ARM_AM::add; 651 unsigned NewOpc = 0; 652 // AM2 - 12 bits, thumb2 - 8 bits. 653 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 654 655 // Try merging with the previous instruction. 656 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 657 if (MBBI != BeginMBBI) { 658 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 659 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 660 --PrevMBBI; 661 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 662 DoMerge = true; 663 AddSub = ARM_AM::sub; 664 } else if (!isAM5 && 665 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 666 DoMerge = true; 667 } 668 if (DoMerge) { 669 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 670 MBB.erase(PrevMBBI); 671 } 672 } 673 674 // Try merging with the next instruction. 675 MachineBasicBlock::iterator EndMBBI = MBB.end(); 676 if (!DoMerge && MBBI != EndMBBI) { 677 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 678 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 679 ++NextMBBI; 680 if (!isAM5 && 681 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 682 DoMerge = true; 683 AddSub = ARM_AM::sub; 684 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 685 DoMerge = true; 686 } 687 if (DoMerge) { 688 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 689 if (NextMBBI == I) { 690 Advance = true; 691 ++I; 692 } 693 MBB.erase(NextMBBI); 694 } 695 } 696 697 if (!DoMerge) 698 return false; 699 700 unsigned Offset = 0; 701 if (isAM5) 702 Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ? 703 ARM_AM::db : ARM_AM::ia); 704 else if (isAM2) 705 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 706 else 707 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 708 709 if (isAM5) { 710 // VLDM[SD}_UPD, VSTM[SD]_UPD 711 // (There are no base-updating versions of VLDR/VSTR instructions, but the 712 // updating load/store-multiple instructions can be used with only one 713 // register.) 714 MachineOperand &MO = MI->getOperand(0); 715 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 716 .addReg(Base, getDefRegState(true)) // WB base register 717 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 718 .addImm(Offset) 719 .addImm(Pred).addReg(PredReg) 720 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 721 getKillRegState(MO.isKill()))); 722 } else if (isLd) { 723 if (isAM2) 724 // LDR_PRE, LDR_POST, 725 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 726 .addReg(Base, RegState::Define) 727 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 728 else 729 // t2LDR_PRE, t2LDR_POST 730 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 731 .addReg(Base, RegState::Define) 732 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 733 } else { 734 MachineOperand &MO = MI->getOperand(0); 735 if (isAM2) 736 // STR_PRE, STR_POST 737 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 738 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 739 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 740 else 741 // t2STR_PRE, t2STR_POST 742 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 743 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 744 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 745 } 746 MBB.erase(MBBI); 747 748 return true; 749 } 750 751 /// isMemoryOp - Returns true if instruction is a memory operations (that this 752 /// pass is capable of operating on). 753 static bool isMemoryOp(const MachineInstr *MI) { 754 // When no memory operands are present, conservatively assume unaligned, 755 // volatile, unfoldable. 756 if (!MI->hasOneMemOperand()) 757 return false; 758 759 const MachineMemOperand *MMO = *MI->memoperands_begin(); 760 761 // Don't touch volatile memory accesses - we may be changing their order. 762 if (MMO->isVolatile()) 763 return false; 764 765 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 766 // not. 767 if (MMO->getAlignment() < 4) 768 return false; 769 770 // str <undef> could probably be eliminated entirely, but for now we just want 771 // to avoid making a mess of it. 772 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 773 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 774 MI->getOperand(0).isUndef()) 775 return false; 776 777 // Likewise don't mess with references to undefined addresses. 778 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 779 MI->getOperand(1).isUndef()) 780 return false; 781 782 int Opcode = MI->getOpcode(); 783 switch (Opcode) { 784 default: break; 785 case ARM::LDR: 786 case ARM::STR: 787 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; 788 case ARM::VLDRS: 789 case ARM::VSTRS: 790 return MI->getOperand(1).isReg(); 791 case ARM::VLDRD: 792 case ARM::VSTRD: 793 return MI->getOperand(1).isReg(); 794 case ARM::t2LDRi8: 795 case ARM::t2LDRi12: 796 case ARM::t2STRi8: 797 case ARM::t2STRi12: 798 return MI->getOperand(1).isReg(); 799 } 800 return false; 801 } 802 803 /// AdvanceRS - Advance register scavenger to just before the earliest memory 804 /// op that is being merged. 805 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 806 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 807 unsigned Position = MemOps[0].Position; 808 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 809 if (MemOps[i].Position < Position) { 810 Position = MemOps[i].Position; 811 Loc = MemOps[i].MBBI; 812 } 813 } 814 815 if (Loc != MBB.begin()) 816 RS->forward(prior(Loc)); 817 } 818 819 static int getMemoryOpOffset(const MachineInstr *MI) { 820 int Opcode = MI->getOpcode(); 821 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 822 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 823 unsigned NumOperands = MI->getDesc().getNumOperands(); 824 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 825 826 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 827 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 828 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) 829 return OffField; 830 831 int Offset = isAM2 832 ? ARM_AM::getAM2Offset(OffField) 833 : (isAM3 ? ARM_AM::getAM3Offset(OffField) 834 : ARM_AM::getAM5Offset(OffField) * 4); 835 if (isAM2) { 836 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 837 Offset = -Offset; 838 } else if (isAM3) { 839 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 840 Offset = -Offset; 841 } else { 842 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 843 Offset = -Offset; 844 } 845 return Offset; 846 } 847 848 static void InsertLDR_STR(MachineBasicBlock &MBB, 849 MachineBasicBlock::iterator &MBBI, 850 int OffImm, bool isDef, 851 DebugLoc dl, unsigned NewOpc, 852 unsigned Reg, bool RegDeadKill, bool RegUndef, 853 unsigned BaseReg, bool BaseKill, bool BaseUndef, 854 unsigned OffReg, bool OffKill, bool OffUndef, 855 ARMCC::CondCodes Pred, unsigned PredReg, 856 const TargetInstrInfo *TII, bool isT2) { 857 int Offset = OffImm; 858 if (!isT2) { 859 if (OffImm < 0) 860 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); 861 else 862 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); 863 } 864 if (isDef) { 865 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 866 TII->get(NewOpc)) 867 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 868 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 869 if (!isT2) 870 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 871 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 872 } else { 873 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 874 TII->get(NewOpc)) 875 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 876 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 877 if (!isT2) 878 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 879 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 880 } 881 } 882 883 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 884 MachineBasicBlock::iterator &MBBI) { 885 MachineInstr *MI = &*MBBI; 886 unsigned Opcode = MI->getOpcode(); 887 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 888 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 889 unsigned EvenReg = MI->getOperand(0).getReg(); 890 unsigned OddReg = MI->getOperand(1).getReg(); 891 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 892 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 893 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 894 return false; 895 896 MachineBasicBlock::iterator NewBBI = MBBI; 897 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 898 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 899 bool EvenDeadKill = isLd ? 900 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 901 bool EvenUndef = MI->getOperand(0).isUndef(); 902 bool OddDeadKill = isLd ? 903 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 904 bool OddUndef = MI->getOperand(1).isUndef(); 905 const MachineOperand &BaseOp = MI->getOperand(2); 906 unsigned BaseReg = BaseOp.getReg(); 907 bool BaseKill = BaseOp.isKill(); 908 bool BaseUndef = BaseOp.isUndef(); 909 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); 910 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 911 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 912 int OffImm = getMemoryOpOffset(MI); 913 unsigned PredReg = 0; 914 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 915 916 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { 917 // Ascending register numbers and no offset. It's safe to change it to a 918 // ldm or stm. 919 unsigned NewOpc = (isLd) 920 ? (isT2 ? ARM::t2LDM : ARM::LDM) 921 : (isT2 ? ARM::t2STM : ARM::STM); 922 if (isLd) { 923 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 924 .addReg(BaseReg, getKillRegState(BaseKill)) 925 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 926 .addImm(Pred).addReg(PredReg) 927 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 928 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 929 ++NumLDRD2LDM; 930 } else { 931 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 932 .addReg(BaseReg, getKillRegState(BaseKill)) 933 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 934 .addImm(Pred).addReg(PredReg) 935 .addReg(EvenReg, 936 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 937 .addReg(OddReg, 938 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 939 ++NumSTRD2STM; 940 } 941 NewBBI = llvm::prior(MBBI); 942 } else { 943 // Split into two instructions. 944 assert((!isT2 || !OffReg) && 945 "Thumb2 ldrd / strd does not encode offset register!"); 946 unsigned NewOpc = (isLd) 947 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) 948 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); 949 DebugLoc dl = MBBI->getDebugLoc(); 950 // If this is a load and base register is killed, it may have been 951 // re-defed by the load, make sure the first load does not clobber it. 952 if (isLd && 953 (BaseKill || OffKill) && 954 (TRI->regsOverlap(EvenReg, BaseReg) || 955 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { 956 assert(!TRI->regsOverlap(OddReg, BaseReg) && 957 (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); 958 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 959 OddReg, OddDeadKill, false, 960 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 961 Pred, PredReg, TII, isT2); 962 NewBBI = llvm::prior(MBBI); 963 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 964 EvenReg, EvenDeadKill, false, 965 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 966 Pred, PredReg, TII, isT2); 967 } else { 968 if (OddReg == EvenReg && EvenDeadKill) { 969 // If the two source operands are the same, the kill marker is 970 // probably on the first one. e.g. 971 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 972 EvenDeadKill = false; 973 OddDeadKill = true; 974 } 975 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 976 EvenReg, EvenDeadKill, EvenUndef, 977 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 978 Pred, PredReg, TII, isT2); 979 NewBBI = llvm::prior(MBBI); 980 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 981 OddReg, OddDeadKill, OddUndef, 982 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 983 Pred, PredReg, TII, isT2); 984 } 985 if (isLd) 986 ++NumLDRD2LDR; 987 else 988 ++NumSTRD2STR; 989 } 990 991 MBB.erase(MI); 992 MBBI = NewBBI; 993 return true; 994 } 995 return false; 996 } 997 998 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 999 /// ops of the same base and incrementing offset into LDM / STM ops. 1000 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1001 unsigned NumMerges = 0; 1002 unsigned NumMemOps = 0; 1003 MemOpQueue MemOps; 1004 unsigned CurrBase = 0; 1005 int CurrOpc = -1; 1006 unsigned CurrSize = 0; 1007 ARMCC::CondCodes CurrPred = ARMCC::AL; 1008 unsigned CurrPredReg = 0; 1009 unsigned Position = 0; 1010 SmallVector<MachineBasicBlock::iterator,4> Merges; 1011 1012 RS->enterBasicBlock(&MBB); 1013 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1014 while (MBBI != E) { 1015 if (FixInvalidRegPairOp(MBB, MBBI)) 1016 continue; 1017 1018 bool Advance = false; 1019 bool TryMerge = false; 1020 bool Clobber = false; 1021 1022 bool isMemOp = isMemoryOp(MBBI); 1023 if (isMemOp) { 1024 int Opcode = MBBI->getOpcode(); 1025 unsigned Size = getLSMultipleTransferSize(MBBI); 1026 const MachineOperand &MO = MBBI->getOperand(0); 1027 unsigned Reg = MO.getReg(); 1028 bool isKill = MO.isDef() ? false : MO.isKill(); 1029 unsigned Base = MBBI->getOperand(1).getReg(); 1030 unsigned PredReg = 0; 1031 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1032 int Offset = getMemoryOpOffset(MBBI); 1033 // Watch out for: 1034 // r4 := ldr [r5] 1035 // r5 := ldr [r5, #4] 1036 // r6 := ldr [r5, #8] 1037 // 1038 // The second ldr has effectively broken the chain even though it 1039 // looks like the later ldr(s) use the same base register. Try to 1040 // merge the ldr's so far, including this one. But don't try to 1041 // combine the following ldr(s). 1042 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1043 if (CurrBase == 0 && !Clobber) { 1044 // Start of a new chain. 1045 CurrBase = Base; 1046 CurrOpc = Opcode; 1047 CurrSize = Size; 1048 CurrPred = Pred; 1049 CurrPredReg = PredReg; 1050 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1051 ++NumMemOps; 1052 Advance = true; 1053 } else { 1054 if (Clobber) { 1055 TryMerge = true; 1056 Advance = true; 1057 } 1058 1059 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1060 // No need to match PredReg. 1061 // Continue adding to the queue. 1062 if (Offset > MemOps.back().Offset) { 1063 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1064 Position, MBBI)); 1065 ++NumMemOps; 1066 Advance = true; 1067 } else { 1068 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1069 I != E; ++I) { 1070 if (Offset < I->Offset) { 1071 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1072 Position, MBBI)); 1073 ++NumMemOps; 1074 Advance = true; 1075 break; 1076 } else if (Offset == I->Offset) { 1077 // Collision! This can't be merged! 1078 break; 1079 } 1080 } 1081 } 1082 } 1083 } 1084 } 1085 1086 if (MBBI->isDebugValue()) { 1087 ++MBBI; 1088 if (MBBI == E) 1089 // Reach the end of the block, try merging the memory instructions. 1090 TryMerge = true; 1091 } else if (Advance) { 1092 ++Position; 1093 ++MBBI; 1094 if (MBBI == E) 1095 // Reach the end of the block, try merging the memory instructions. 1096 TryMerge = true; 1097 } else 1098 TryMerge = true; 1099 1100 if (TryMerge) { 1101 if (NumMemOps > 1) { 1102 // Try to find a free register to use as a new base in case it's needed. 1103 // First advance to the instruction just before the start of the chain. 1104 AdvanceRS(MBB, MemOps); 1105 // Find a scratch register. 1106 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1107 // Process the load / store instructions. 1108 RS->forward(prior(MBBI)); 1109 1110 // Merge ops. 1111 Merges.clear(); 1112 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1113 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1114 1115 // Try folding preceeding/trailing base inc/dec into the generated 1116 // LDM/STM ops. 1117 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1118 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1119 ++NumMerges; 1120 NumMerges += Merges.size(); 1121 1122 // Try folding preceeding/trailing base inc/dec into those load/store 1123 // that were not merged to form LDM/STM ops. 1124 for (unsigned i = 0; i != NumMemOps; ++i) 1125 if (!MemOps[i].Merged) 1126 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1127 ++NumMerges; 1128 1129 // RS may be pointing to an instruction that's deleted. 1130 RS->skipTo(prior(MBBI)); 1131 } else if (NumMemOps == 1) { 1132 // Try folding preceeding/trailing base inc/dec into the single 1133 // load/store. 1134 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1135 ++NumMerges; 1136 RS->forward(prior(MBBI)); 1137 } 1138 } 1139 1140 CurrBase = 0; 1141 CurrOpc = -1; 1142 CurrSize = 0; 1143 CurrPred = ARMCC::AL; 1144 CurrPredReg = 0; 1145 if (NumMemOps) { 1146 MemOps.clear(); 1147 NumMemOps = 0; 1148 } 1149 1150 // If iterator hasn't been advanced and this is not a memory op, skip it. 1151 // It can't start a new chain anyway. 1152 if (!Advance && !isMemOp && MBBI != E) { 1153 ++Position; 1154 ++MBBI; 1155 } 1156 } 1157 } 1158 return NumMerges > 0; 1159 } 1160 1161 namespace { 1162 struct OffsetCompare { 1163 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1164 int LOffset = getMemoryOpOffset(LHS); 1165 int ROffset = getMemoryOpOffset(RHS); 1166 assert(LHS == RHS || LOffset != ROffset); 1167 return LOffset > ROffset; 1168 } 1169 }; 1170 } 1171 1172 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1173 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it 1174 /// directly restore the value of LR into pc. 1175 /// ldmfd sp!, {..., lr} 1176 /// bx lr 1177 /// or 1178 /// ldmfd sp!, {..., lr} 1179 /// mov pc, lr 1180 /// => 1181 /// ldmfd sp!, {..., pc} 1182 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1183 if (MBB.empty()) return false; 1184 1185 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1186 if (MBBI != MBB.begin() && 1187 (MBBI->getOpcode() == ARM::BX_RET || 1188 MBBI->getOpcode() == ARM::tBX_RET || 1189 MBBI->getOpcode() == ARM::MOVPCLR)) { 1190 MachineInstr *PrevMI = prior(MBBI); 1191 if (PrevMI->getOpcode() == ARM::LDM_UPD || 1192 PrevMI->getOpcode() == ARM::t2LDM_UPD) { 1193 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1194 if (MO.getReg() != ARM::LR) 1195 return false; 1196 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; 1197 PrevMI->setDesc(TII->get(NewOpc)); 1198 MO.setReg(ARM::PC); 1199 MBB.erase(MBBI); 1200 return true; 1201 } 1202 } 1203 return false; 1204 } 1205 1206 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1207 const TargetMachine &TM = Fn.getTarget(); 1208 AFI = Fn.getInfo<ARMFunctionInfo>(); 1209 TII = TM.getInstrInfo(); 1210 TRI = TM.getRegisterInfo(); 1211 RS = new RegScavenger(); 1212 isThumb2 = AFI->isThumb2Function(); 1213 1214 bool Modified = false; 1215 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1216 ++MFI) { 1217 MachineBasicBlock &MBB = *MFI; 1218 Modified |= LoadStoreMultipleOpti(MBB); 1219 Modified |= MergeReturnIntoLDM(MBB); 1220 } 1221 1222 delete RS; 1223 return Modified; 1224 } 1225 1226 1227 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1228 /// load / stores from consecutive locations close to make it more 1229 /// likely they will be combined later. 1230 1231 namespace { 1232 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1233 static char ID; 1234 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1235 1236 const TargetData *TD; 1237 const TargetInstrInfo *TII; 1238 const TargetRegisterInfo *TRI; 1239 const ARMSubtarget *STI; 1240 MachineRegisterInfo *MRI; 1241 MachineFunction *MF; 1242 1243 virtual bool runOnMachineFunction(MachineFunction &Fn); 1244 1245 virtual const char *getPassName() const { 1246 return "ARM pre- register allocation load / store optimization pass"; 1247 } 1248 1249 private: 1250 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1251 unsigned &NewOpc, unsigned &EvenReg, 1252 unsigned &OddReg, unsigned &BaseReg, 1253 unsigned &OffReg, int &Offset, 1254 unsigned &PredReg, ARMCC::CondCodes &Pred, 1255 bool &isT2); 1256 bool RescheduleOps(MachineBasicBlock *MBB, 1257 SmallVector<MachineInstr*, 4> &Ops, 1258 unsigned Base, bool isLd, 1259 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1260 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1261 }; 1262 char ARMPreAllocLoadStoreOpt::ID = 0; 1263 } 1264 1265 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1266 TD = Fn.getTarget().getTargetData(); 1267 TII = Fn.getTarget().getInstrInfo(); 1268 TRI = Fn.getTarget().getRegisterInfo(); 1269 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1270 MRI = &Fn.getRegInfo(); 1271 MF = &Fn; 1272 1273 bool Modified = false; 1274 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1275 ++MFI) 1276 Modified |= RescheduleLoadStoreInstrs(MFI); 1277 1278 return Modified; 1279 } 1280 1281 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1282 MachineBasicBlock::iterator I, 1283 MachineBasicBlock::iterator E, 1284 SmallPtrSet<MachineInstr*, 4> &MemOps, 1285 SmallSet<unsigned, 4> &MemRegs, 1286 const TargetRegisterInfo *TRI) { 1287 // Are there stores / loads / calls between them? 1288 // FIXME: This is overly conservative. We should make use of alias information 1289 // some day. 1290 SmallSet<unsigned, 4> AddedRegPressure; 1291 while (++I != E) { 1292 if (I->isDebugValue() || MemOps.count(&*I)) 1293 continue; 1294 const TargetInstrDesc &TID = I->getDesc(); 1295 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1296 return false; 1297 if (isLd && TID.mayStore()) 1298 return false; 1299 if (!isLd) { 1300 if (TID.mayLoad()) 1301 return false; 1302 // It's not safe to move the first 'str' down. 1303 // str r1, [r0] 1304 // strh r5, [r0] 1305 // str r4, [r0, #+4] 1306 if (TID.mayStore()) 1307 return false; 1308 } 1309 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1310 MachineOperand &MO = I->getOperand(j); 1311 if (!MO.isReg()) 1312 continue; 1313 unsigned Reg = MO.getReg(); 1314 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1315 return false; 1316 if (Reg != Base && !MemRegs.count(Reg)) 1317 AddedRegPressure.insert(Reg); 1318 } 1319 } 1320 1321 // Estimate register pressure increase due to the transformation. 1322 if (MemRegs.size() <= 4) 1323 // Ok if we are moving small number of instructions. 1324 return true; 1325 return AddedRegPressure.size() <= MemRegs.size() * 2; 1326 } 1327 1328 bool 1329 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1330 DebugLoc &dl, 1331 unsigned &NewOpc, unsigned &EvenReg, 1332 unsigned &OddReg, unsigned &BaseReg, 1333 unsigned &OffReg, int &Offset, 1334 unsigned &PredReg, 1335 ARMCC::CondCodes &Pred, 1336 bool &isT2) { 1337 // Make sure we're allowed to generate LDRD/STRD. 1338 if (!STI->hasV5TEOps()) 1339 return false; 1340 1341 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1342 unsigned Scale = 1; 1343 unsigned Opcode = Op0->getOpcode(); 1344 if (Opcode == ARM::LDR) 1345 NewOpc = ARM::LDRD; 1346 else if (Opcode == ARM::STR) 1347 NewOpc = ARM::STRD; 1348 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1349 NewOpc = ARM::t2LDRDi8; 1350 Scale = 4; 1351 isT2 = true; 1352 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1353 NewOpc = ARM::t2STRDi8; 1354 Scale = 4; 1355 isT2 = true; 1356 } else 1357 return false; 1358 1359 // Make sure the offset registers match. 1360 if (!isT2 && 1361 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) 1362 return false; 1363 1364 // Must sure the base address satisfies i64 ld / st alignment requirement. 1365 if (!Op0->hasOneMemOperand() || 1366 !(*Op0->memoperands_begin())->getValue() || 1367 (*Op0->memoperands_begin())->isVolatile()) 1368 return false; 1369 1370 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1371 const Function *Func = MF->getFunction(); 1372 unsigned ReqAlign = STI->hasV6Ops() 1373 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 1374 : 8; // Pre-v6 need 8-byte align 1375 if (Align < ReqAlign) 1376 return false; 1377 1378 // Then make sure the immediate offset fits. 1379 int OffImm = getMemoryOpOffset(Op0); 1380 if (isT2) { 1381 if (OffImm < 0) { 1382 if (OffImm < -255) 1383 // Can't fall back to t2LDRi8 / t2STRi8. 1384 return false; 1385 } else { 1386 int Limit = (1 << 8) * Scale; 1387 if (OffImm >= Limit || (OffImm & (Scale-1))) 1388 return false; 1389 } 1390 Offset = OffImm; 1391 } else { 1392 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1393 if (OffImm < 0) { 1394 AddSub = ARM_AM::sub; 1395 OffImm = - OffImm; 1396 } 1397 int Limit = (1 << 8) * Scale; 1398 if (OffImm >= Limit || (OffImm & (Scale-1))) 1399 return false; 1400 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1401 } 1402 EvenReg = Op0->getOperand(0).getReg(); 1403 OddReg = Op1->getOperand(0).getReg(); 1404 if (EvenReg == OddReg) 1405 return false; 1406 BaseReg = Op0->getOperand(1).getReg(); 1407 if (!isT2) 1408 OffReg = Op0->getOperand(2).getReg(); 1409 Pred = llvm::getInstrPredicate(Op0, PredReg); 1410 dl = Op0->getDebugLoc(); 1411 return true; 1412 } 1413 1414 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1415 SmallVector<MachineInstr*, 4> &Ops, 1416 unsigned Base, bool isLd, 1417 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1418 bool RetVal = false; 1419 1420 // Sort by offset (in reverse order). 1421 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1422 1423 // The loads / stores of the same base are in order. Scan them from first to 1424 // last and check for the following: 1425 // 1. Any def of base. 1426 // 2. Any gaps. 1427 while (Ops.size() > 1) { 1428 unsigned FirstLoc = ~0U; 1429 unsigned LastLoc = 0; 1430 MachineInstr *FirstOp = 0; 1431 MachineInstr *LastOp = 0; 1432 int LastOffset = 0; 1433 unsigned LastOpcode = 0; 1434 unsigned LastBytes = 0; 1435 unsigned NumMove = 0; 1436 for (int i = Ops.size() - 1; i >= 0; --i) { 1437 MachineInstr *Op = Ops[i]; 1438 unsigned Loc = MI2LocMap[Op]; 1439 if (Loc <= FirstLoc) { 1440 FirstLoc = Loc; 1441 FirstOp = Op; 1442 } 1443 if (Loc >= LastLoc) { 1444 LastLoc = Loc; 1445 LastOp = Op; 1446 } 1447 1448 unsigned Opcode = Op->getOpcode(); 1449 if (LastOpcode && Opcode != LastOpcode) 1450 break; 1451 1452 int Offset = getMemoryOpOffset(Op); 1453 unsigned Bytes = getLSMultipleTransferSize(Op); 1454 if (LastBytes) { 1455 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1456 break; 1457 } 1458 LastOffset = Offset; 1459 LastBytes = Bytes; 1460 LastOpcode = Opcode; 1461 if (++NumMove == 8) // FIXME: Tune this limit. 1462 break; 1463 } 1464 1465 if (NumMove <= 1) 1466 Ops.pop_back(); 1467 else { 1468 SmallPtrSet<MachineInstr*, 4> MemOps; 1469 SmallSet<unsigned, 4> MemRegs; 1470 for (int i = NumMove-1; i >= 0; --i) { 1471 MemOps.insert(Ops[i]); 1472 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1473 } 1474 1475 // Be conservative, if the instructions are too far apart, don't 1476 // move them. We want to limit the increase of register pressure. 1477 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1478 if (DoMove) 1479 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1480 MemOps, MemRegs, TRI); 1481 if (!DoMove) { 1482 for (unsigned i = 0; i != NumMove; ++i) 1483 Ops.pop_back(); 1484 } else { 1485 // This is the new location for the loads / stores. 1486 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1487 while (InsertPos != MBB->end() 1488 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1489 ++InsertPos; 1490 1491 // If we are moving a pair of loads / stores, see if it makes sense 1492 // to try to allocate a pair of registers that can form register pairs. 1493 MachineInstr *Op0 = Ops.back(); 1494 MachineInstr *Op1 = Ops[Ops.size()-2]; 1495 unsigned EvenReg = 0, OddReg = 0; 1496 unsigned BaseReg = 0, OffReg = 0, PredReg = 0; 1497 ARMCC::CondCodes Pred = ARMCC::AL; 1498 bool isT2 = false; 1499 unsigned NewOpc = 0; 1500 int Offset = 0; 1501 DebugLoc dl; 1502 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1503 EvenReg, OddReg, BaseReg, OffReg, 1504 Offset, PredReg, Pred, isT2)) { 1505 Ops.pop_back(); 1506 Ops.pop_back(); 1507 1508 // Form the pair instruction. 1509 if (isLd) { 1510 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1511 dl, TII->get(NewOpc)) 1512 .addReg(EvenReg, RegState::Define) 1513 .addReg(OddReg, RegState::Define) 1514 .addReg(BaseReg); 1515 if (!isT2) 1516 MIB.addReg(OffReg); 1517 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1518 ++NumLDRDFormed; 1519 } else { 1520 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1521 dl, TII->get(NewOpc)) 1522 .addReg(EvenReg) 1523 .addReg(OddReg) 1524 .addReg(BaseReg); 1525 if (!isT2) 1526 MIB.addReg(OffReg); 1527 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1528 ++NumSTRDFormed; 1529 } 1530 MBB->erase(Op0); 1531 MBB->erase(Op1); 1532 1533 // Add register allocation hints to form register pairs. 1534 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1535 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1536 } else { 1537 for (unsigned i = 0; i != NumMove; ++i) { 1538 MachineInstr *Op = Ops.back(); 1539 Ops.pop_back(); 1540 MBB->splice(InsertPos, MBB, Op); 1541 } 1542 } 1543 1544 NumLdStMoved += NumMove; 1545 RetVal = true; 1546 } 1547 } 1548 } 1549 1550 return RetVal; 1551 } 1552 1553 bool 1554 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1555 bool RetVal = false; 1556 1557 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1558 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1559 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1560 SmallVector<unsigned, 4> LdBases; 1561 SmallVector<unsigned, 4> StBases; 1562 1563 unsigned Loc = 0; 1564 MachineBasicBlock::iterator MBBI = MBB->begin(); 1565 MachineBasicBlock::iterator E = MBB->end(); 1566 while (MBBI != E) { 1567 for (; MBBI != E; ++MBBI) { 1568 MachineInstr *MI = MBBI; 1569 const TargetInstrDesc &TID = MI->getDesc(); 1570 if (TID.isCall() || TID.isTerminator()) { 1571 // Stop at barriers. 1572 ++MBBI; 1573 break; 1574 } 1575 1576 if (!MI->isDebugValue()) 1577 MI2LocMap[MI] = ++Loc; 1578 1579 if (!isMemoryOp(MI)) 1580 continue; 1581 unsigned PredReg = 0; 1582 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1583 continue; 1584 1585 int Opc = MI->getOpcode(); 1586 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1587 unsigned Base = MI->getOperand(1).getReg(); 1588 int Offset = getMemoryOpOffset(MI); 1589 1590 bool StopHere = false; 1591 if (isLd) { 1592 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1593 Base2LdsMap.find(Base); 1594 if (BI != Base2LdsMap.end()) { 1595 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1596 if (Offset == getMemoryOpOffset(BI->second[i])) { 1597 StopHere = true; 1598 break; 1599 } 1600 } 1601 if (!StopHere) 1602 BI->second.push_back(MI); 1603 } else { 1604 SmallVector<MachineInstr*, 4> MIs; 1605 MIs.push_back(MI); 1606 Base2LdsMap[Base] = MIs; 1607 LdBases.push_back(Base); 1608 } 1609 } else { 1610 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1611 Base2StsMap.find(Base); 1612 if (BI != Base2StsMap.end()) { 1613 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1614 if (Offset == getMemoryOpOffset(BI->second[i])) { 1615 StopHere = true; 1616 break; 1617 } 1618 } 1619 if (!StopHere) 1620 BI->second.push_back(MI); 1621 } else { 1622 SmallVector<MachineInstr*, 4> MIs; 1623 MIs.push_back(MI); 1624 Base2StsMap[Base] = MIs; 1625 StBases.push_back(Base); 1626 } 1627 } 1628 1629 if (StopHere) { 1630 // Found a duplicate (a base+offset combination that's seen earlier). 1631 // Backtrack. 1632 --Loc; 1633 break; 1634 } 1635 } 1636 1637 // Re-schedule loads. 1638 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1639 unsigned Base = LdBases[i]; 1640 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1641 if (Lds.size() > 1) 1642 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1643 } 1644 1645 // Re-schedule stores. 1646 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1647 unsigned Base = StBases[i]; 1648 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1649 if (Sts.size() > 1) 1650 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1651 } 1652 1653 if (MBBI != E) { 1654 Base2LdsMap.clear(); 1655 Base2StsMap.clear(); 1656 LdBases.clear(); 1657 StBases.clear(); 1658 } 1659 } 1660 1661 return RetVal; 1662 } 1663 1664 1665 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1666 /// optimization pass. 1667 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1668 if (PreAlloc) 1669 return new ARMPreAllocLoadStoreOpt(); 1670 return new ARMLoadStoreOpt(); 1671 } 1672