1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMAddressingModes.h" 18 #include "ARMBaseInstrInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMRegisterInfo.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/Target/TargetData.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 #include "llvm/Target/TargetMachine.h" 32 #include "llvm/Target/TargetRegisterInfo.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallSet.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/Statistic.h" 40 using namespace llvm; 41 42 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55 /// load / store instructions to form ldm / stm instructions. 56 57 namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Position; 78 MachineBasicBlock::iterator MBBI; 79 bool Merged; 80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) 81 : Offset(o), Position(p), MBBI(i), Merged(false) {} 82 }; 83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 84 typedef MemOpQueue::iterator MemOpQueueIter; 85 86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 87 int Offset, unsigned Base, bool BaseKill, int Opcode, 88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 90 void MergeOpsUpdate(MachineBasicBlock &MBB, 91 MemOpQueue &MemOps, 92 unsigned memOpsBegin, 93 unsigned memOpsEnd, 94 unsigned insertAfter, 95 int Offset, 96 unsigned Base, 97 bool BaseKill, 98 int Opcode, 99 ARMCC::CondCodes Pred, 100 unsigned PredReg, 101 unsigned Scratch, 102 DebugLoc dl, 103 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 105 int Opcode, unsigned Size, 106 ARMCC::CondCodes Pred, unsigned PredReg, 107 unsigned Scratch, MemOpQueue &MemOps, 108 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 109 110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 112 MachineBasicBlock::iterator &MBBI); 113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 114 MachineBasicBlock::iterator MBBI, 115 const TargetInstrInfo *TII, 116 bool &Advance, 117 MachineBasicBlock::iterator &I); 118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator MBBI, 120 bool &Advance, 121 MachineBasicBlock::iterator &I); 122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 124 }; 125 char ARMLoadStoreOpt::ID = 0; 126 } 127 128 static int getLoadStoreMultipleOpcode(int Opcode) { 129 switch (Opcode) { 130 case ARM::LDR: 131 NumLDMGened++; 132 return ARM::LDM; 133 case ARM::STR: 134 NumSTMGened++; 135 return ARM::STM; 136 case ARM::t2LDRi8: 137 case ARM::t2LDRi12: 138 NumLDMGened++; 139 return ARM::t2LDM; 140 case ARM::t2STRi8: 141 case ARM::t2STRi12: 142 NumSTMGened++; 143 return ARM::t2STM; 144 case ARM::VLDRS: 145 NumVLDMGened++; 146 return ARM::VLDMS; 147 case ARM::VSTRS: 148 NumVSTMGened++; 149 return ARM::VSTMS; 150 case ARM::VLDRD: 151 NumVLDMGened++; 152 return ARM::VLDMD; 153 case ARM::VSTRD: 154 NumVSTMGened++; 155 return ARM::VSTMD; 156 default: llvm_unreachable("Unhandled opcode!"); 157 } 158 return 0; 159 } 160 161 static bool isT2i32Load(unsigned Opc) { 162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 163 } 164 165 static bool isi32Load(unsigned Opc) { 166 return Opc == ARM::LDR || isT2i32Load(Opc); 167 } 168 169 static bool isT2i32Store(unsigned Opc) { 170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 171 } 172 173 static bool isi32Store(unsigned Opc) { 174 return Opc == ARM::STR || isT2i32Store(Opc); 175 } 176 177 /// MergeOps - Create and insert a LDM or STM with Base as base register and 178 /// registers in Regs as the register operands that would be loaded / stored. 179 /// It returns true if the transformation is done. 180 bool 181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 182 MachineBasicBlock::iterator MBBI, 183 int Offset, unsigned Base, bool BaseKill, 184 int Opcode, ARMCC::CondCodes Pred, 185 unsigned PredReg, unsigned Scratch, DebugLoc dl, 186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 187 // Only a single register to load / store. Don't bother. 188 unsigned NumRegs = Regs.size(); 189 if (NumRegs <= 1) 190 return false; 191 192 ARM_AM::AMSubMode Mode = ARM_AM::ia; 193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 194 if (isAM4 && Offset == 4) { 195 if (isThumb2) 196 // Thumb2 does not support ldmib / stmib. 197 return false; 198 Mode = ARM_AM::ib; 199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) { 200 if (isThumb2) 201 // Thumb2 does not support ldmda / stmda. 202 return false; 203 Mode = ARM_AM::da; 204 } else if (isAM4 && Offset == -4 * (int)NumRegs) { 205 Mode = ARM_AM::db; 206 } else if (Offset != 0) { 207 // If starting offset isn't zero, insert a MI to materialize a new base. 208 // But only do so if it is cost effective, i.e. merging more than two 209 // loads / stores. 210 if (NumRegs <= 2) 211 return false; 212 213 unsigned NewBase; 214 if (isi32Load(Opcode)) 215 // If it is a load, then just use one of the destination register to 216 // use as the new base. 217 NewBase = Regs[NumRegs-1].first; 218 else { 219 // Use the scratch register to use as a new base. 220 NewBase = Scratch; 221 if (NewBase == 0) 222 return false; 223 } 224 int BaseOpc = !isThumb2 225 ? ARM::ADDri 226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 227 if (Offset < 0) { 228 BaseOpc = !isThumb2 229 ? ARM::SUBri 230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 231 Offset = - Offset; 232 } 233 int ImmedOffset = isThumb2 234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 235 if (ImmedOffset == -1) 236 // FIXME: Try t2ADDri12 or t2SUBri12? 237 return false; // Probably not worth it then. 238 239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 241 .addImm(Pred).addReg(PredReg).addReg(0); 242 Base = NewBase; 243 BaseKill = true; // New base is always killed right its use. 244 } 245 246 bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD); 247 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 248 Opcode == ARM::VLDRD); 249 Opcode = getLoadStoreMultipleOpcode(Opcode); 250 MachineInstrBuilder MIB = (isAM4) 251 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 252 .addReg(Base, getKillRegState(BaseKill)) 253 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) 254 : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 255 .addReg(Base, getKillRegState(BaseKill)) 256 .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs)) 257 .addImm(Pred).addReg(PredReg); 258 for (unsigned i = 0; i != NumRegs; ++i) 259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 260 | getKillRegState(Regs[i].second)); 261 262 return true; 263 } 264 265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 266 // success. 267 void ARMLoadStoreOpt:: 268 MergeOpsUpdate(MachineBasicBlock &MBB, 269 MemOpQueue &memOps, 270 unsigned memOpsBegin, 271 unsigned memOpsEnd, 272 unsigned insertAfter, 273 int Offset, 274 unsigned Base, 275 bool BaseKill, 276 int Opcode, 277 ARMCC::CondCodes Pred, 278 unsigned PredReg, 279 unsigned Scratch, 280 DebugLoc dl, 281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 282 // First calculate which of the registers should be killed by the merged 283 // instruction. 284 SmallVector<std::pair<unsigned, bool>, 8> Regs; 285 const unsigned insertPos = memOps[insertAfter].Position; 286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0); 288 unsigned Reg = MO.getReg(); 289 bool isKill = MO.isKill(); 290 291 // If we are inserting the merged operation after an unmerged operation that 292 // uses the same register, make sure to transfer any kill flag. 293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j) 294 if (memOps[j].Position<insertPos) { 295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); 296 if (MOJ.getReg() == Reg && MOJ.isKill()) 297 isKill = true; 298 } 299 300 Regs.push_back(std::make_pair(Reg, isKill)); 301 } 302 303 // Try to do the merge. 304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 305 Loc++; 306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 307 Pred, PredReg, Scratch, dl, Regs)) 308 return; 309 310 // Merge succeeded, update records. 311 Merges.push_back(prior(Loc)); 312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 313 // Remove kill flags from any unmerged memops that come before insertPos. 314 if (Regs[i-memOpsBegin].second) 315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j) 316 if (memOps[j].Position<insertPos) { 317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); 318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill()) 319 MOJ.setIsKill(false); 320 } 321 MBB.erase(memOps[i].MBBI); 322 memOps[i].Merged = true; 323 } 324 } 325 326 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 327 /// load / store multiple instructions. 328 void 329 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 330 unsigned Base, int Opcode, unsigned Size, 331 ARMCC::CondCodes Pred, unsigned PredReg, 332 unsigned Scratch, MemOpQueue &MemOps, 333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 335 int Offset = MemOps[SIndex].Offset; 336 int SOffset = Offset; 337 unsigned insertAfter = SIndex; 338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 339 DebugLoc dl = Loc->getDebugLoc(); 340 const MachineOperand &PMO = Loc->getOperand(0); 341 unsigned PReg = PMO.getReg(); 342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 343 : ARMRegisterInfo::getRegisterNumbering(PReg); 344 unsigned Count = 1; 345 346 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 347 int NewOffset = MemOps[i].Offset; 348 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 349 unsigned Reg = MO.getReg(); 350 unsigned RegNum = MO.isUndef() ? UINT_MAX 351 : ARMRegisterInfo::getRegisterNumbering(Reg); 352 // AM4 - register numbers in ascending order. 353 // AM5 - consecutive register numbers in ascending order. 354 // Can only do up to 16 double-word registers per insn. 355 if (Reg != ARM::SP && 356 NewOffset == Offset + (int)Size && 357 ((isAM4 && RegNum > PRegNum) 358 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { 359 Offset += Size; 360 PRegNum = RegNum; 361 ++Count; 362 } else { 363 // Can't merge this in. Try merge the earlier ones first. 364 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 365 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 366 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 367 MemOps, Merges); 368 return; 369 } 370 371 if (MemOps[i].Position > MemOps[insertAfter].Position) 372 insertAfter = i; 373 } 374 375 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 376 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 377 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 378 return; 379 } 380 381 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 382 unsigned Bytes, unsigned Limit, 383 ARMCC::CondCodes Pred, unsigned PredReg){ 384 unsigned MyPredReg = 0; 385 if (!MI) 386 return false; 387 if (MI->getOpcode() != ARM::t2SUBri && 388 MI->getOpcode() != ARM::t2SUBrSPi && 389 MI->getOpcode() != ARM::t2SUBrSPi12 && 390 MI->getOpcode() != ARM::tSUBspi && 391 MI->getOpcode() != ARM::SUBri) 392 return false; 393 394 // Make sure the offset fits in 8 bits. 395 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 396 return false; 397 398 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 399 return (MI->getOperand(0).getReg() == Base && 400 MI->getOperand(1).getReg() == Base && 401 (MI->getOperand(2).getImm()*Scale) == Bytes && 402 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 403 MyPredReg == PredReg); 404 } 405 406 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 407 unsigned Bytes, unsigned Limit, 408 ARMCC::CondCodes Pred, unsigned PredReg){ 409 unsigned MyPredReg = 0; 410 if (!MI) 411 return false; 412 if (MI->getOpcode() != ARM::t2ADDri && 413 MI->getOpcode() != ARM::t2ADDrSPi && 414 MI->getOpcode() != ARM::t2ADDrSPi12 && 415 MI->getOpcode() != ARM::tADDspi && 416 MI->getOpcode() != ARM::ADDri) 417 return false; 418 419 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 420 // Make sure the offset fits in 8 bits. 421 return false; 422 423 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 424 return (MI->getOperand(0).getReg() == Base && 425 MI->getOperand(1).getReg() == Base && 426 (MI->getOperand(2).getImm()*Scale) == Bytes && 427 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 428 MyPredReg == PredReg); 429 } 430 431 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 432 switch (MI->getOpcode()) { 433 default: return 0; 434 case ARM::LDR: 435 case ARM::STR: 436 case ARM::t2LDRi8: 437 case ARM::t2LDRi12: 438 case ARM::t2STRi8: 439 case ARM::t2STRi12: 440 case ARM::VLDRS: 441 case ARM::VSTRS: 442 return 4; 443 case ARM::VLDRD: 444 case ARM::VSTRD: 445 return 8; 446 case ARM::LDM: 447 case ARM::STM: 448 case ARM::t2LDM: 449 case ARM::t2STM: 450 return (MI->getNumOperands() - 4) * 4; 451 case ARM::VLDMS: 452 case ARM::VSTMS: 453 case ARM::VLDMD: 454 case ARM::VSTMD: 455 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; 456 } 457 } 458 459 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) { 460 switch (Opc) { 461 case ARM::LDM: return ARM::LDM_UPD; 462 case ARM::STM: return ARM::STM_UPD; 463 case ARM::t2LDM: return ARM::t2LDM_UPD; 464 case ARM::t2STM: return ARM::t2STM_UPD; 465 case ARM::VLDMS: return ARM::VLDMS_UPD; 466 case ARM::VLDMD: return ARM::VLDMD_UPD; 467 case ARM::VSTMS: return ARM::VSTMS_UPD; 468 case ARM::VSTMD: return ARM::VSTMD_UPD; 469 default: llvm_unreachable("Unhandled opcode!"); 470 } 471 return 0; 472 } 473 474 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 475 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 476 /// 477 /// stmia rn, <ra, rb, rc> 478 /// rn := rn + 4 * 3; 479 /// => 480 /// stmia rn!, <ra, rb, rc> 481 /// 482 /// rn := rn - 4 * 3; 483 /// ldmia rn, <ra, rb, rc> 484 /// => 485 /// ldmdb rn!, <ra, rb, rc> 486 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 487 MachineBasicBlock::iterator MBBI, 488 bool &Advance, 489 MachineBasicBlock::iterator &I) { 490 MachineInstr *MI = MBBI; 491 unsigned Base = MI->getOperand(0).getReg(); 492 bool BaseKill = MI->getOperand(0).isKill(); 493 unsigned Bytes = getLSMultipleTransferSize(MI); 494 unsigned PredReg = 0; 495 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 496 int Opcode = MI->getOpcode(); 497 DebugLoc dl = MI->getDebugLoc(); 498 bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM || 499 Opcode == ARM::STM || Opcode == ARM::t2STM); 500 501 bool DoMerge = false; 502 ARM_AM::AMSubMode Mode = ARM_AM::ia; 503 unsigned Offset = 0; 504 505 if (isAM4) { 506 // Can't use an updating ld/st if the base register is also a dest 507 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 508 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 509 if (MI->getOperand(i).getReg() == Base) 510 return false; 511 } 512 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 513 } else { 514 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. 515 Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); 516 Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); 517 } 518 519 // Try merging with the previous instruction. 520 if (MBBI != MBB.begin()) { 521 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 522 if (isAM4) { 523 if (Mode == ARM_AM::ia && 524 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 525 DoMerge = true; 526 Mode = ARM_AM::db; 527 } else if (isAM4 && Mode == ARM_AM::ib && 528 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 529 DoMerge = true; 530 Mode = ARM_AM::da; 531 } 532 } else { 533 if (Mode == ARM_AM::ia && 534 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 535 Mode = ARM_AM::db; 536 DoMerge = true; 537 } 538 } 539 if (DoMerge) 540 MBB.erase(PrevMBBI); 541 } 542 543 // Try merging with the next instruction. 544 if (!DoMerge && MBBI != MBB.end()) { 545 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 546 if (isAM4) { 547 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 548 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 549 DoMerge = true; 550 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 551 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 552 DoMerge = true; 553 } 554 } else { 555 if (Mode == ARM_AM::ia && 556 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 557 DoMerge = true; 558 } 559 } 560 if (DoMerge) { 561 if (NextMBBI == I) { 562 Advance = true; 563 ++I; 564 } 565 MBB.erase(NextMBBI); 566 } 567 } 568 569 if (!DoMerge) 570 return false; 571 572 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); 573 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 574 .addReg(Base, getDefRegState(true)) // WB base register 575 .addReg(Base, getKillRegState(BaseKill)); 576 if (isAM4) { 577 // [t2]LDM_UPD, [t2]STM_UPD 578 MIB.addImm(ARM_AM::getAM4ModeImm(Mode)) 579 .addImm(Pred).addReg(PredReg); 580 } else { 581 // VLDM[SD}_UPD, VSTM[SD]_UPD 582 MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset)) 583 .addImm(Pred).addReg(PredReg); 584 } 585 // Transfer the rest of operands. 586 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) 587 MIB.addOperand(MI->getOperand(OpNum)); 588 // Transfer memoperands. 589 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 590 591 MBB.erase(MBBI); 592 return true; 593 } 594 595 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 596 switch (Opc) { 597 case ARM::LDR: return ARM::LDR_PRE; 598 case ARM::STR: return ARM::STR_PRE; 599 case ARM::VLDRS: return ARM::VLDMS_UPD; 600 case ARM::VLDRD: return ARM::VLDMD_UPD; 601 case ARM::VSTRS: return ARM::VSTMS_UPD; 602 case ARM::VSTRD: return ARM::VSTMD_UPD; 603 case ARM::t2LDRi8: 604 case ARM::t2LDRi12: 605 return ARM::t2LDR_PRE; 606 case ARM::t2STRi8: 607 case ARM::t2STRi12: 608 return ARM::t2STR_PRE; 609 default: llvm_unreachable("Unhandled opcode!"); 610 } 611 return 0; 612 } 613 614 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 615 switch (Opc) { 616 case ARM::LDR: return ARM::LDR_POST; 617 case ARM::STR: return ARM::STR_POST; 618 case ARM::VLDRS: return ARM::VLDMS_UPD; 619 case ARM::VLDRD: return ARM::VLDMD_UPD; 620 case ARM::VSTRS: return ARM::VSTMS_UPD; 621 case ARM::VSTRD: return ARM::VSTMD_UPD; 622 case ARM::t2LDRi8: 623 case ARM::t2LDRi12: 624 return ARM::t2LDR_POST; 625 case ARM::t2STRi8: 626 case ARM::t2STRi12: 627 return ARM::t2STR_POST; 628 default: llvm_unreachable("Unhandled opcode!"); 629 } 630 return 0; 631 } 632 633 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 634 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 635 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 636 MachineBasicBlock::iterator MBBI, 637 const TargetInstrInfo *TII, 638 bool &Advance, 639 MachineBasicBlock::iterator &I) { 640 MachineInstr *MI = MBBI; 641 unsigned Base = MI->getOperand(1).getReg(); 642 bool BaseKill = MI->getOperand(1).isKill(); 643 unsigned Bytes = getLSMultipleTransferSize(MI); 644 int Opcode = MI->getOpcode(); 645 DebugLoc dl = MI->getDebugLoc(); 646 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 647 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 648 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR); 649 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) 650 return false; 651 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 652 return false; 653 if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) 654 if (MI->getOperand(2).getImm() != 0) 655 return false; 656 657 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 658 // Can't do the merge if the destination register is the same as the would-be 659 // writeback register. 660 if (isLd && MI->getOperand(0).getReg() == Base) 661 return false; 662 663 unsigned PredReg = 0; 664 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 665 bool DoMerge = false; 666 ARM_AM::AddrOpc AddSub = ARM_AM::add; 667 unsigned NewOpc = 0; 668 // AM2 - 12 bits, thumb2 - 8 bits. 669 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 670 671 // Try merging with the previous instruction. 672 if (MBBI != MBB.begin()) { 673 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 674 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 675 DoMerge = true; 676 AddSub = ARM_AM::sub; 677 } else if (!isAM5 && 678 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 679 DoMerge = true; 680 } 681 if (DoMerge) { 682 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 683 MBB.erase(PrevMBBI); 684 } 685 } 686 687 // Try merging with the next instruction. 688 if (!DoMerge && MBBI != MBB.end()) { 689 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 690 if (!isAM5 && 691 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 692 DoMerge = true; 693 AddSub = ARM_AM::sub; 694 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 695 DoMerge = true; 696 } 697 if (DoMerge) { 698 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 699 if (NextMBBI == I) { 700 Advance = true; 701 ++I; 702 } 703 MBB.erase(NextMBBI); 704 } 705 } 706 707 if (!DoMerge) 708 return false; 709 710 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; 711 unsigned Offset = 0; 712 if (isAM5) 713 Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia, 714 (isDPR ? 2 : 1)); 715 else if (isAM2) 716 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 717 else 718 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 719 720 if (isAM5) { 721 // VLDM[SD}_UPD, VSTM[SD]_UPD 722 MachineOperand &MO = MI->getOperand(0); 723 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 724 .addReg(Base, getDefRegState(true)) // WB base register 725 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 726 .addImm(Offset) 727 .addImm(Pred).addReg(PredReg) 728 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 729 getKillRegState(MO.isKill()))); 730 } else if (isLd) { 731 if (isAM2) 732 // LDR_PRE, LDR_POST, 733 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 734 .addReg(Base, RegState::Define) 735 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 736 else 737 // t2LDR_PRE, t2LDR_POST 738 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 739 .addReg(Base, RegState::Define) 740 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 741 } else { 742 MachineOperand &MO = MI->getOperand(0); 743 if (isAM2) 744 // STR_PRE, STR_POST 745 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 746 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 747 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 748 else 749 // t2STR_PRE, t2STR_POST 750 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 751 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 752 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 753 } 754 MBB.erase(MBBI); 755 756 return true; 757 } 758 759 /// isMemoryOp - Returns true if instruction is a memory operations (that this 760 /// pass is capable of operating on). 761 static bool isMemoryOp(const MachineInstr *MI) { 762 if (MI->hasOneMemOperand()) { 763 const MachineMemOperand *MMO = *MI->memoperands_begin(); 764 765 // Don't touch volatile memory accesses - we may be changing their order. 766 if (MMO->isVolatile()) 767 return false; 768 769 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 770 // not. 771 if (MMO->getAlignment() < 4) 772 return false; 773 } 774 775 // str <undef> could probably be eliminated entirely, but for now we just want 776 // to avoid making a mess of it. 777 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 778 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 779 MI->getOperand(0).isUndef()) 780 return false; 781 782 // Likewise don't mess with references to undefined addresses. 783 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 784 MI->getOperand(1).isUndef()) 785 return false; 786 787 int Opcode = MI->getOpcode(); 788 switch (Opcode) { 789 default: break; 790 case ARM::LDR: 791 case ARM::STR: 792 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; 793 case ARM::VLDRS: 794 case ARM::VSTRS: 795 return MI->getOperand(1).isReg(); 796 case ARM::VLDRD: 797 case ARM::VSTRD: 798 return MI->getOperand(1).isReg(); 799 case ARM::t2LDRi8: 800 case ARM::t2LDRi12: 801 case ARM::t2STRi8: 802 case ARM::t2STRi12: 803 return MI->getOperand(1).isReg(); 804 } 805 return false; 806 } 807 808 /// AdvanceRS - Advance register scavenger to just before the earliest memory 809 /// op that is being merged. 810 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 811 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 812 unsigned Position = MemOps[0].Position; 813 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 814 if (MemOps[i].Position < Position) { 815 Position = MemOps[i].Position; 816 Loc = MemOps[i].MBBI; 817 } 818 } 819 820 if (Loc != MBB.begin()) 821 RS->forward(prior(Loc)); 822 } 823 824 static int getMemoryOpOffset(const MachineInstr *MI) { 825 int Opcode = MI->getOpcode(); 826 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 827 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 828 unsigned NumOperands = MI->getDesc().getNumOperands(); 829 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 830 831 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 832 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 833 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) 834 return OffField; 835 836 int Offset = isAM2 837 ? ARM_AM::getAM2Offset(OffField) 838 : (isAM3 ? ARM_AM::getAM3Offset(OffField) 839 : ARM_AM::getAM5Offset(OffField) * 4); 840 if (isAM2) { 841 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 842 Offset = -Offset; 843 } else if (isAM3) { 844 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 845 Offset = -Offset; 846 } else { 847 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 848 Offset = -Offset; 849 } 850 return Offset; 851 } 852 853 static void InsertLDR_STR(MachineBasicBlock &MBB, 854 MachineBasicBlock::iterator &MBBI, 855 int OffImm, bool isDef, 856 DebugLoc dl, unsigned NewOpc, 857 unsigned Reg, bool RegDeadKill, bool RegUndef, 858 unsigned BaseReg, bool BaseKill, bool BaseUndef, 859 unsigned OffReg, bool OffKill, bool OffUndef, 860 ARMCC::CondCodes Pred, unsigned PredReg, 861 const TargetInstrInfo *TII, bool isT2) { 862 int Offset = OffImm; 863 if (!isT2) { 864 if (OffImm < 0) 865 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); 866 else 867 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); 868 } 869 if (isDef) { 870 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 871 TII->get(NewOpc)) 872 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 873 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 874 if (!isT2) 875 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 876 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 877 } else { 878 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 879 TII->get(NewOpc)) 880 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 881 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 882 if (!isT2) 883 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 884 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 885 } 886 } 887 888 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 889 MachineBasicBlock::iterator &MBBI) { 890 MachineInstr *MI = &*MBBI; 891 unsigned Opcode = MI->getOpcode(); 892 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 893 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 894 unsigned EvenReg = MI->getOperand(0).getReg(); 895 unsigned OddReg = MI->getOperand(1).getReg(); 896 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 897 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 898 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 899 return false; 900 901 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 902 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 903 bool EvenDeadKill = isLd ? 904 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 905 bool EvenUndef = MI->getOperand(0).isUndef(); 906 bool OddDeadKill = isLd ? 907 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 908 bool OddUndef = MI->getOperand(1).isUndef(); 909 const MachineOperand &BaseOp = MI->getOperand(2); 910 unsigned BaseReg = BaseOp.getReg(); 911 bool BaseKill = BaseOp.isKill(); 912 bool BaseUndef = BaseOp.isUndef(); 913 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); 914 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 915 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 916 int OffImm = getMemoryOpOffset(MI); 917 unsigned PredReg = 0; 918 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 919 920 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { 921 // Ascending register numbers and no offset. It's safe to change it to a 922 // ldm or stm. 923 unsigned NewOpc = (isLd) 924 ? (isT2 ? ARM::t2LDM : ARM::LDM) 925 : (isT2 ? ARM::t2STM : ARM::STM); 926 if (isLd) { 927 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 928 .addReg(BaseReg, getKillRegState(BaseKill)) 929 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 930 .addImm(Pred).addReg(PredReg) 931 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 932 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 933 ++NumLDRD2LDM; 934 } else { 935 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 936 .addReg(BaseReg, getKillRegState(BaseKill)) 937 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 938 .addImm(Pred).addReg(PredReg) 939 .addReg(EvenReg, 940 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 941 .addReg(OddReg, 942 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 943 ++NumSTRD2STM; 944 } 945 } else { 946 // Split into two instructions. 947 assert((!isT2 || !OffReg) && 948 "Thumb2 ldrd / strd does not encode offset register!"); 949 unsigned NewOpc = (isLd) 950 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) 951 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); 952 DebugLoc dl = MBBI->getDebugLoc(); 953 // If this is a load and base register is killed, it may have been 954 // re-defed by the load, make sure the first load does not clobber it. 955 if (isLd && 956 (BaseKill || OffKill) && 957 (TRI->regsOverlap(EvenReg, BaseReg) || 958 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { 959 assert(!TRI->regsOverlap(OddReg, BaseReg) && 960 (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); 961 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 962 OddReg, OddDeadKill, false, 963 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 964 Pred, PredReg, TII, isT2); 965 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 966 EvenReg, EvenDeadKill, false, 967 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 968 Pred, PredReg, TII, isT2); 969 } else { 970 if (OddReg == EvenReg && EvenDeadKill) { 971 // If the two source operands are the same, the kill marker is probably 972 // on the first one. e.g. 973 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 974 EvenDeadKill = false; 975 OddDeadKill = true; 976 } 977 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 978 EvenReg, EvenDeadKill, EvenUndef, 979 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 980 Pred, PredReg, TII, isT2); 981 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 982 OddReg, OddDeadKill, OddUndef, 983 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 984 Pred, PredReg, TII, isT2); 985 } 986 if (isLd) 987 ++NumLDRD2LDR; 988 else 989 ++NumSTRD2STR; 990 } 991 992 MBBI = prior(MBBI); 993 MBB.erase(MI); 994 } 995 return false; 996 } 997 998 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 999 /// ops of the same base and incrementing offset into LDM / STM ops. 1000 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1001 unsigned NumMerges = 0; 1002 unsigned NumMemOps = 0; 1003 MemOpQueue MemOps; 1004 unsigned CurrBase = 0; 1005 int CurrOpc = -1; 1006 unsigned CurrSize = 0; 1007 ARMCC::CondCodes CurrPred = ARMCC::AL; 1008 unsigned CurrPredReg = 0; 1009 unsigned Position = 0; 1010 SmallVector<MachineBasicBlock::iterator,4> Merges; 1011 1012 RS->enterBasicBlock(&MBB); 1013 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1014 while (MBBI != E) { 1015 if (FixInvalidRegPairOp(MBB, MBBI)) 1016 continue; 1017 1018 bool Advance = false; 1019 bool TryMerge = false; 1020 bool Clobber = false; 1021 1022 bool isMemOp = isMemoryOp(MBBI); 1023 if (isMemOp) { 1024 int Opcode = MBBI->getOpcode(); 1025 unsigned Size = getLSMultipleTransferSize(MBBI); 1026 unsigned Base = MBBI->getOperand(1).getReg(); 1027 unsigned PredReg = 0; 1028 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1029 int Offset = getMemoryOpOffset(MBBI); 1030 // Watch out for: 1031 // r4 := ldr [r5] 1032 // r5 := ldr [r5, #4] 1033 // r6 := ldr [r5, #8] 1034 // 1035 // The second ldr has effectively broken the chain even though it 1036 // looks like the later ldr(s) use the same base register. Try to 1037 // merge the ldr's so far, including this one. But don't try to 1038 // combine the following ldr(s). 1039 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1040 if (CurrBase == 0 && !Clobber) { 1041 // Start of a new chain. 1042 CurrBase = Base; 1043 CurrOpc = Opcode; 1044 CurrSize = Size; 1045 CurrPred = Pred; 1046 CurrPredReg = PredReg; 1047 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 1048 NumMemOps++; 1049 Advance = true; 1050 } else { 1051 if (Clobber) { 1052 TryMerge = true; 1053 Advance = true; 1054 } 1055 1056 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1057 // No need to match PredReg. 1058 // Continue adding to the queue. 1059 if (Offset > MemOps.back().Offset) { 1060 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 1061 NumMemOps++; 1062 Advance = true; 1063 } else { 1064 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1065 I != E; ++I) { 1066 if (Offset < I->Offset) { 1067 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); 1068 NumMemOps++; 1069 Advance = true; 1070 break; 1071 } else if (Offset == I->Offset) { 1072 // Collision! This can't be merged! 1073 break; 1074 } 1075 } 1076 } 1077 } 1078 } 1079 } 1080 1081 if (Advance) { 1082 ++Position; 1083 ++MBBI; 1084 if (MBBI == E) 1085 // Reach the end of the block, try merging the memory instructions. 1086 TryMerge = true; 1087 } else 1088 TryMerge = true; 1089 1090 if (TryMerge) { 1091 if (NumMemOps > 1) { 1092 // Try to find a free register to use as a new base in case it's needed. 1093 // First advance to the instruction just before the start of the chain. 1094 AdvanceRS(MBB, MemOps); 1095 // Find a scratch register. 1096 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1097 // Process the load / store instructions. 1098 RS->forward(prior(MBBI)); 1099 1100 // Merge ops. 1101 Merges.clear(); 1102 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1103 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1104 1105 // Try folding preceeding/trailing base inc/dec into the generated 1106 // LDM/STM ops. 1107 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1108 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1109 ++NumMerges; 1110 NumMerges += Merges.size(); 1111 1112 // Try folding preceeding/trailing base inc/dec into those load/store 1113 // that were not merged to form LDM/STM ops. 1114 for (unsigned i = 0; i != NumMemOps; ++i) 1115 if (!MemOps[i].Merged) 1116 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1117 ++NumMerges; 1118 1119 // RS may be pointing to an instruction that's deleted. 1120 RS->skipTo(prior(MBBI)); 1121 } else if (NumMemOps == 1) { 1122 // Try folding preceeding/trailing base inc/dec into the single 1123 // load/store. 1124 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1125 ++NumMerges; 1126 RS->forward(prior(MBBI)); 1127 } 1128 } 1129 1130 CurrBase = 0; 1131 CurrOpc = -1; 1132 CurrSize = 0; 1133 CurrPred = ARMCC::AL; 1134 CurrPredReg = 0; 1135 if (NumMemOps) { 1136 MemOps.clear(); 1137 NumMemOps = 0; 1138 } 1139 1140 // If iterator hasn't been advanced and this is not a memory op, skip it. 1141 // It can't start a new chain anyway. 1142 if (!Advance && !isMemOp && MBBI != E) { 1143 ++Position; 1144 ++MBBI; 1145 } 1146 } 1147 } 1148 return NumMerges > 0; 1149 } 1150 1151 namespace { 1152 struct OffsetCompare { 1153 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1154 int LOffset = getMemoryOpOffset(LHS); 1155 int ROffset = getMemoryOpOffset(RHS); 1156 assert(LHS == RHS || LOffset != ROffset); 1157 return LOffset > ROffset; 1158 } 1159 }; 1160 } 1161 1162 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1163 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it 1164 /// directly restore the value of LR into pc. 1165 /// ldmfd sp!, {..., lr} 1166 /// bx lr 1167 /// or 1168 /// ldmfd sp!, {..., lr} 1169 /// mov pc, lr 1170 /// => 1171 /// ldmfd sp!, {..., pc} 1172 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1173 if (MBB.empty()) return false; 1174 1175 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1176 if (MBBI != MBB.begin() && 1177 (MBBI->getOpcode() == ARM::BX_RET || 1178 MBBI->getOpcode() == ARM::tBX_RET || 1179 MBBI->getOpcode() == ARM::MOVPCLR)) { 1180 MachineInstr *PrevMI = prior(MBBI); 1181 if (PrevMI->getOpcode() == ARM::LDM_UPD || 1182 PrevMI->getOpcode() == ARM::t2LDM_UPD) { 1183 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1184 if (MO.getReg() != ARM::LR) 1185 return false; 1186 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; 1187 PrevMI->setDesc(TII->get(NewOpc)); 1188 MO.setReg(ARM::PC); 1189 MBB.erase(MBBI); 1190 return true; 1191 } 1192 } 1193 return false; 1194 } 1195 1196 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1197 const TargetMachine &TM = Fn.getTarget(); 1198 AFI = Fn.getInfo<ARMFunctionInfo>(); 1199 TII = TM.getInstrInfo(); 1200 TRI = TM.getRegisterInfo(); 1201 RS = new RegScavenger(); 1202 isThumb2 = AFI->isThumb2Function(); 1203 1204 bool Modified = false; 1205 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1206 ++MFI) { 1207 MachineBasicBlock &MBB = *MFI; 1208 Modified |= LoadStoreMultipleOpti(MBB); 1209 Modified |= MergeReturnIntoLDM(MBB); 1210 } 1211 1212 delete RS; 1213 return Modified; 1214 } 1215 1216 1217 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1218 /// load / stores from consecutive locations close to make it more 1219 /// likely they will be combined later. 1220 1221 namespace { 1222 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1223 static char ID; 1224 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} 1225 1226 const TargetData *TD; 1227 const TargetInstrInfo *TII; 1228 const TargetRegisterInfo *TRI; 1229 const ARMSubtarget *STI; 1230 MachineRegisterInfo *MRI; 1231 MachineFunction *MF; 1232 1233 virtual bool runOnMachineFunction(MachineFunction &Fn); 1234 1235 virtual const char *getPassName() const { 1236 return "ARM pre- register allocation load / store optimization pass"; 1237 } 1238 1239 private: 1240 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1241 unsigned &NewOpc, unsigned &EvenReg, 1242 unsigned &OddReg, unsigned &BaseReg, 1243 unsigned &OffReg, int &Offset, 1244 unsigned &PredReg, ARMCC::CondCodes &Pred, 1245 bool &isT2); 1246 bool RescheduleOps(MachineBasicBlock *MBB, 1247 SmallVector<MachineInstr*, 4> &Ops, 1248 unsigned Base, bool isLd, 1249 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1250 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1251 }; 1252 char ARMPreAllocLoadStoreOpt::ID = 0; 1253 } 1254 1255 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1256 TD = Fn.getTarget().getTargetData(); 1257 TII = Fn.getTarget().getInstrInfo(); 1258 TRI = Fn.getTarget().getRegisterInfo(); 1259 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1260 MRI = &Fn.getRegInfo(); 1261 MF = &Fn; 1262 1263 bool Modified = false; 1264 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1265 ++MFI) 1266 Modified |= RescheduleLoadStoreInstrs(MFI); 1267 1268 return Modified; 1269 } 1270 1271 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1272 MachineBasicBlock::iterator I, 1273 MachineBasicBlock::iterator E, 1274 SmallPtrSet<MachineInstr*, 4> &MemOps, 1275 SmallSet<unsigned, 4> &MemRegs, 1276 const TargetRegisterInfo *TRI) { 1277 // Are there stores / loads / calls between them? 1278 // FIXME: This is overly conservative. We should make use of alias information 1279 // some day. 1280 SmallSet<unsigned, 4> AddedRegPressure; 1281 while (++I != E) { 1282 if (MemOps.count(&*I)) 1283 continue; 1284 const TargetInstrDesc &TID = I->getDesc(); 1285 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1286 return false; 1287 if (isLd && TID.mayStore()) 1288 return false; 1289 if (!isLd) { 1290 if (TID.mayLoad()) 1291 return false; 1292 // It's not safe to move the first 'str' down. 1293 // str r1, [r0] 1294 // strh r5, [r0] 1295 // str r4, [r0, #+4] 1296 if (TID.mayStore()) 1297 return false; 1298 } 1299 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1300 MachineOperand &MO = I->getOperand(j); 1301 if (!MO.isReg()) 1302 continue; 1303 unsigned Reg = MO.getReg(); 1304 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1305 return false; 1306 if (Reg != Base && !MemRegs.count(Reg)) 1307 AddedRegPressure.insert(Reg); 1308 } 1309 } 1310 1311 // Estimate register pressure increase due to the transformation. 1312 if (MemRegs.size() <= 4) 1313 // Ok if we are moving small number of instructions. 1314 return true; 1315 return AddedRegPressure.size() <= MemRegs.size() * 2; 1316 } 1317 1318 bool 1319 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1320 DebugLoc &dl, 1321 unsigned &NewOpc, unsigned &EvenReg, 1322 unsigned &OddReg, unsigned &BaseReg, 1323 unsigned &OffReg, int &Offset, 1324 unsigned &PredReg, 1325 ARMCC::CondCodes &Pred, 1326 bool &isT2) { 1327 // Make sure we're allowed to generate LDRD/STRD. 1328 if (!STI->hasV5TEOps()) 1329 return false; 1330 1331 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1332 unsigned Scale = 1; 1333 unsigned Opcode = Op0->getOpcode(); 1334 if (Opcode == ARM::LDR) 1335 NewOpc = ARM::LDRD; 1336 else if (Opcode == ARM::STR) 1337 NewOpc = ARM::STRD; 1338 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1339 NewOpc = ARM::t2LDRDi8; 1340 Scale = 4; 1341 isT2 = true; 1342 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1343 NewOpc = ARM::t2STRDi8; 1344 Scale = 4; 1345 isT2 = true; 1346 } else 1347 return false; 1348 1349 // Make sure the offset registers match. 1350 if (!isT2 && 1351 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) 1352 return false; 1353 1354 // Must sure the base address satisfies i64 ld / st alignment requirement. 1355 if (!Op0->hasOneMemOperand() || 1356 !(*Op0->memoperands_begin())->getValue() || 1357 (*Op0->memoperands_begin())->isVolatile()) 1358 return false; 1359 1360 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1361 const Function *Func = MF->getFunction(); 1362 unsigned ReqAlign = STI->hasV6Ops() 1363 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 1364 : 8; // Pre-v6 need 8-byte align 1365 if (Align < ReqAlign) 1366 return false; 1367 1368 // Then make sure the immediate offset fits. 1369 int OffImm = getMemoryOpOffset(Op0); 1370 if (isT2) { 1371 if (OffImm < 0) { 1372 if (OffImm < -255) 1373 // Can't fall back to t2LDRi8 / t2STRi8. 1374 return false; 1375 } else { 1376 int Limit = (1 << 8) * Scale; 1377 if (OffImm >= Limit || (OffImm & (Scale-1))) 1378 return false; 1379 } 1380 Offset = OffImm; 1381 } else { 1382 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1383 if (OffImm < 0) { 1384 AddSub = ARM_AM::sub; 1385 OffImm = - OffImm; 1386 } 1387 int Limit = (1 << 8) * Scale; 1388 if (OffImm >= Limit || (OffImm & (Scale-1))) 1389 return false; 1390 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1391 } 1392 EvenReg = Op0->getOperand(0).getReg(); 1393 OddReg = Op1->getOperand(0).getReg(); 1394 if (EvenReg == OddReg) 1395 return false; 1396 BaseReg = Op0->getOperand(1).getReg(); 1397 if (!isT2) 1398 OffReg = Op0->getOperand(2).getReg(); 1399 Pred = llvm::getInstrPredicate(Op0, PredReg); 1400 dl = Op0->getDebugLoc(); 1401 return true; 1402 } 1403 1404 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1405 SmallVector<MachineInstr*, 4> &Ops, 1406 unsigned Base, bool isLd, 1407 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1408 bool RetVal = false; 1409 1410 // Sort by offset (in reverse order). 1411 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1412 1413 // The loads / stores of the same base are in order. Scan them from first to 1414 // last and check for the followins: 1415 // 1. Any def of base. 1416 // 2. Any gaps. 1417 while (Ops.size() > 1) { 1418 unsigned FirstLoc = ~0U; 1419 unsigned LastLoc = 0; 1420 MachineInstr *FirstOp = 0; 1421 MachineInstr *LastOp = 0; 1422 int LastOffset = 0; 1423 unsigned LastOpcode = 0; 1424 unsigned LastBytes = 0; 1425 unsigned NumMove = 0; 1426 for (int i = Ops.size() - 1; i >= 0; --i) { 1427 MachineInstr *Op = Ops[i]; 1428 unsigned Loc = MI2LocMap[Op]; 1429 if (Loc <= FirstLoc) { 1430 FirstLoc = Loc; 1431 FirstOp = Op; 1432 } 1433 if (Loc >= LastLoc) { 1434 LastLoc = Loc; 1435 LastOp = Op; 1436 } 1437 1438 unsigned Opcode = Op->getOpcode(); 1439 if (LastOpcode && Opcode != LastOpcode) 1440 break; 1441 1442 int Offset = getMemoryOpOffset(Op); 1443 unsigned Bytes = getLSMultipleTransferSize(Op); 1444 if (LastBytes) { 1445 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1446 break; 1447 } 1448 LastOffset = Offset; 1449 LastBytes = Bytes; 1450 LastOpcode = Opcode; 1451 if (++NumMove == 8) // FIXME: Tune this limit. 1452 break; 1453 } 1454 1455 if (NumMove <= 1) 1456 Ops.pop_back(); 1457 else { 1458 SmallPtrSet<MachineInstr*, 4> MemOps; 1459 SmallSet<unsigned, 4> MemRegs; 1460 for (int i = NumMove-1; i >= 0; --i) { 1461 MemOps.insert(Ops[i]); 1462 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1463 } 1464 1465 // Be conservative, if the instructions are too far apart, don't 1466 // move them. We want to limit the increase of register pressure. 1467 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1468 if (DoMove) 1469 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1470 MemOps, MemRegs, TRI); 1471 if (!DoMove) { 1472 for (unsigned i = 0; i != NumMove; ++i) 1473 Ops.pop_back(); 1474 } else { 1475 // This is the new location for the loads / stores. 1476 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1477 while (InsertPos != MBB->end() && MemOps.count(InsertPos)) 1478 ++InsertPos; 1479 1480 // If we are moving a pair of loads / stores, see if it makes sense 1481 // to try to allocate a pair of registers that can form register pairs. 1482 MachineInstr *Op0 = Ops.back(); 1483 MachineInstr *Op1 = Ops[Ops.size()-2]; 1484 unsigned EvenReg = 0, OddReg = 0; 1485 unsigned BaseReg = 0, OffReg = 0, PredReg = 0; 1486 ARMCC::CondCodes Pred = ARMCC::AL; 1487 bool isT2 = false; 1488 unsigned NewOpc = 0; 1489 int Offset = 0; 1490 DebugLoc dl; 1491 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1492 EvenReg, OddReg, BaseReg, OffReg, 1493 Offset, PredReg, Pred, isT2)) { 1494 Ops.pop_back(); 1495 Ops.pop_back(); 1496 1497 // Form the pair instruction. 1498 if (isLd) { 1499 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1500 dl, TII->get(NewOpc)) 1501 .addReg(EvenReg, RegState::Define) 1502 .addReg(OddReg, RegState::Define) 1503 .addReg(BaseReg); 1504 if (!isT2) 1505 MIB.addReg(OffReg); 1506 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1507 ++NumLDRDFormed; 1508 } else { 1509 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1510 dl, TII->get(NewOpc)) 1511 .addReg(EvenReg) 1512 .addReg(OddReg) 1513 .addReg(BaseReg); 1514 if (!isT2) 1515 MIB.addReg(OffReg); 1516 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1517 ++NumSTRDFormed; 1518 } 1519 MBB->erase(Op0); 1520 MBB->erase(Op1); 1521 1522 // Add register allocation hints to form register pairs. 1523 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1524 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1525 } else { 1526 for (unsigned i = 0; i != NumMove; ++i) { 1527 MachineInstr *Op = Ops.back(); 1528 Ops.pop_back(); 1529 MBB->splice(InsertPos, MBB, Op); 1530 } 1531 } 1532 1533 NumLdStMoved += NumMove; 1534 RetVal = true; 1535 } 1536 } 1537 } 1538 1539 return RetVal; 1540 } 1541 1542 bool 1543 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1544 bool RetVal = false; 1545 1546 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1547 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1548 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1549 SmallVector<unsigned, 4> LdBases; 1550 SmallVector<unsigned, 4> StBases; 1551 1552 unsigned Loc = 0; 1553 MachineBasicBlock::iterator MBBI = MBB->begin(); 1554 MachineBasicBlock::iterator E = MBB->end(); 1555 while (MBBI != E) { 1556 for (; MBBI != E; ++MBBI) { 1557 MachineInstr *MI = MBBI; 1558 const TargetInstrDesc &TID = MI->getDesc(); 1559 if (TID.isCall() || TID.isTerminator()) { 1560 // Stop at barriers. 1561 ++MBBI; 1562 break; 1563 } 1564 1565 MI2LocMap[MI] = Loc++; 1566 if (!isMemoryOp(MI)) 1567 continue; 1568 unsigned PredReg = 0; 1569 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1570 continue; 1571 1572 int Opc = MI->getOpcode(); 1573 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1574 unsigned Base = MI->getOperand(1).getReg(); 1575 int Offset = getMemoryOpOffset(MI); 1576 1577 bool StopHere = false; 1578 if (isLd) { 1579 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1580 Base2LdsMap.find(Base); 1581 if (BI != Base2LdsMap.end()) { 1582 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1583 if (Offset == getMemoryOpOffset(BI->second[i])) { 1584 StopHere = true; 1585 break; 1586 } 1587 } 1588 if (!StopHere) 1589 BI->second.push_back(MI); 1590 } else { 1591 SmallVector<MachineInstr*, 4> MIs; 1592 MIs.push_back(MI); 1593 Base2LdsMap[Base] = MIs; 1594 LdBases.push_back(Base); 1595 } 1596 } else { 1597 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1598 Base2StsMap.find(Base); 1599 if (BI != Base2StsMap.end()) { 1600 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1601 if (Offset == getMemoryOpOffset(BI->second[i])) { 1602 StopHere = true; 1603 break; 1604 } 1605 } 1606 if (!StopHere) 1607 BI->second.push_back(MI); 1608 } else { 1609 SmallVector<MachineInstr*, 4> MIs; 1610 MIs.push_back(MI); 1611 Base2StsMap[Base] = MIs; 1612 StBases.push_back(Base); 1613 } 1614 } 1615 1616 if (StopHere) { 1617 // Found a duplicate (a base+offset combination that's seen earlier). 1618 // Backtrack. 1619 --Loc; 1620 break; 1621 } 1622 } 1623 1624 // Re-schedule loads. 1625 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1626 unsigned Base = LdBases[i]; 1627 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1628 if (Lds.size() > 1) 1629 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1630 } 1631 1632 // Re-schedule stores. 1633 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1634 unsigned Base = StBases[i]; 1635 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1636 if (Sts.size() > 1) 1637 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1638 } 1639 1640 if (MBBI != E) { 1641 Base2LdsMap.clear(); 1642 Base2StsMap.clear(); 1643 LdBases.clear(); 1644 StBases.clear(); 1645 } 1646 } 1647 1648 return RetVal; 1649 } 1650 1651 1652 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1653 /// optimization pass. 1654 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1655 if (PreAlloc) 1656 return new ARMPreAllocLoadStoreOpt(); 1657 return new ARMLoadStoreOpt(); 1658 } 1659