1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMAddressingModes.h" 18 #include "ARMBaseInstrInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMRegisterInfo.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/Target/TargetData.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 #include "llvm/Target/TargetMachine.h" 32 #include "llvm/Target/TargetRegisterInfo.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallSet.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/Statistic.h" 40 using namespace llvm; 41 42 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55 /// load / store instructions to form ldm / stm instructions. 56 57 namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Position; 78 MachineBasicBlock::iterator MBBI; 79 bool Merged; 80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) 81 : Offset(o), Position(p), MBBI(i), Merged(false) {} 82 }; 83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 84 typedef MemOpQueue::iterator MemOpQueueIter; 85 86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 87 int Offset, unsigned Base, bool BaseKill, int Opcode, 88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 90 void MergeOpsUpdate(MachineBasicBlock &MBB, 91 MemOpQueue &MemOps, 92 unsigned memOpsBegin, 93 unsigned memOpsEnd, 94 unsigned insertAfter, 95 int Offset, 96 unsigned Base, 97 bool BaseKill, 98 int Opcode, 99 ARMCC::CondCodes Pred, 100 unsigned PredReg, 101 unsigned Scratch, 102 DebugLoc dl, 103 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 105 int Opcode, unsigned Size, 106 ARMCC::CondCodes Pred, unsigned PredReg, 107 unsigned Scratch, MemOpQueue &MemOps, 108 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 109 110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 112 MachineBasicBlock::iterator &MBBI); 113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 114 MachineBasicBlock::iterator MBBI, 115 const TargetInstrInfo *TII, 116 bool &Advance, 117 MachineBasicBlock::iterator &I); 118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator MBBI, 120 bool &Advance, 121 MachineBasicBlock::iterator &I); 122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 124 }; 125 char ARMLoadStoreOpt::ID = 0; 126 } 127 128 static int getLoadStoreMultipleOpcode(int Opcode) { 129 switch (Opcode) { 130 case ARM::LDR: 131 NumLDMGened++; 132 return ARM::LDM; 133 case ARM::STR: 134 NumSTMGened++; 135 return ARM::STM; 136 case ARM::t2LDRi8: 137 case ARM::t2LDRi12: 138 NumLDMGened++; 139 return ARM::t2LDM; 140 case ARM::t2STRi8: 141 case ARM::t2STRi12: 142 NumSTMGened++; 143 return ARM::t2STM; 144 case ARM::VLDRS: 145 NumVLDMGened++; 146 return ARM::VLDMS; 147 case ARM::VSTRS: 148 NumVSTMGened++; 149 return ARM::VSTMS; 150 case ARM::VLDRD: 151 NumVLDMGened++; 152 return ARM::VLDMD; 153 case ARM::VSTRD: 154 NumVSTMGened++; 155 return ARM::VSTMD; 156 default: llvm_unreachable("Unhandled opcode!"); 157 } 158 return 0; 159 } 160 161 static bool isT2i32Load(unsigned Opc) { 162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 163 } 164 165 static bool isi32Load(unsigned Opc) { 166 return Opc == ARM::LDR || isT2i32Load(Opc); 167 } 168 169 static bool isT2i32Store(unsigned Opc) { 170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 171 } 172 173 static bool isi32Store(unsigned Opc) { 174 return Opc == ARM::STR || isT2i32Store(Opc); 175 } 176 177 /// MergeOps - Create and insert a LDM or STM with Base as base register and 178 /// registers in Regs as the register operands that would be loaded / stored. 179 /// It returns true if the transformation is done. 180 bool 181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 182 MachineBasicBlock::iterator MBBI, 183 int Offset, unsigned Base, bool BaseKill, 184 int Opcode, ARMCC::CondCodes Pred, 185 unsigned PredReg, unsigned Scratch, DebugLoc dl, 186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 187 // Only a single register to load / store. Don't bother. 188 unsigned NumRegs = Regs.size(); 189 if (NumRegs <= 1) 190 return false; 191 192 ARM_AM::AMSubMode Mode = ARM_AM::ia; 193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 194 if (isAM4 && Offset == 4) { 195 if (isThumb2) 196 // Thumb2 does not support ldmib / stmib. 197 return false; 198 Mode = ARM_AM::ib; 199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) { 200 if (isThumb2) 201 // Thumb2 does not support ldmda / stmda. 202 return false; 203 Mode = ARM_AM::da; 204 } else if (isAM4 && Offset == -4 * (int)NumRegs) { 205 Mode = ARM_AM::db; 206 } else if (Offset != 0) { 207 // If starting offset isn't zero, insert a MI to materialize a new base. 208 // But only do so if it is cost effective, i.e. merging more than two 209 // loads / stores. 210 if (NumRegs <= 2) 211 return false; 212 213 unsigned NewBase; 214 if (isi32Load(Opcode)) 215 // If it is a load, then just use one of the destination register to 216 // use as the new base. 217 NewBase = Regs[NumRegs-1].first; 218 else { 219 // Use the scratch register to use as a new base. 220 NewBase = Scratch; 221 if (NewBase == 0) 222 return false; 223 } 224 int BaseOpc = !isThumb2 225 ? ARM::ADDri 226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 227 if (Offset < 0) { 228 BaseOpc = !isThumb2 229 ? ARM::SUBri 230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 231 Offset = - Offset; 232 } 233 int ImmedOffset = isThumb2 234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 235 if (ImmedOffset == -1) 236 // FIXME: Try t2ADDri12 or t2SUBri12? 237 return false; // Probably not worth it then. 238 239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 241 .addImm(Pred).addReg(PredReg).addReg(0); 242 Base = NewBase; 243 BaseKill = true; // New base is always killed right its use. 244 } 245 246 bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD); 247 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 248 Opcode == ARM::VLDRD); 249 Opcode = getLoadStoreMultipleOpcode(Opcode); 250 MachineInstrBuilder MIB = (isAM4) 251 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 252 .addReg(Base, getKillRegState(BaseKill)) 253 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) 254 : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 255 .addReg(Base, getKillRegState(BaseKill)) 256 .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs)) 257 .addImm(Pred).addReg(PredReg); 258 for (unsigned i = 0; i != NumRegs; ++i) 259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 260 | getKillRegState(Regs[i].second)); 261 262 return true; 263 } 264 265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 266 // success. 267 void ARMLoadStoreOpt:: 268 MergeOpsUpdate(MachineBasicBlock &MBB, 269 MemOpQueue &memOps, 270 unsigned memOpsBegin, 271 unsigned memOpsEnd, 272 unsigned insertAfter, 273 int Offset, 274 unsigned Base, 275 bool BaseKill, 276 int Opcode, 277 ARMCC::CondCodes Pred, 278 unsigned PredReg, 279 unsigned Scratch, 280 DebugLoc dl, 281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 282 // First calculate which of the registers should be killed by the merged 283 // instruction. 284 SmallVector<std::pair<unsigned, bool>, 8> Regs; 285 const unsigned insertPos = memOps[insertAfter].Position; 286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0); 288 unsigned Reg = MO.getReg(); 289 bool isKill = MO.isKill(); 290 291 // If we are inserting the merged operation after an unmerged operation that 292 // uses the same register, make sure to transfer any kill flag. 293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j) 294 if (memOps[j].Position<insertPos) { 295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); 296 if (MOJ.getReg() == Reg && MOJ.isKill()) 297 isKill = true; 298 } 299 300 Regs.push_back(std::make_pair(Reg, isKill)); 301 } 302 303 // Try to do the merge. 304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 305 Loc++; 306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 307 Pred, PredReg, Scratch, dl, Regs)) 308 return; 309 310 // Merge succeeded, update records. 311 Merges.push_back(prior(Loc)); 312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 313 // Remove kill flags from any unmerged memops that come before insertPos. 314 if (Regs[i-memOpsBegin].second) 315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j) 316 if (memOps[j].Position<insertPos) { 317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); 318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill()) 319 MOJ.setIsKill(false); 320 } 321 MBB.erase(memOps[i].MBBI); 322 memOps[i].Merged = true; 323 } 324 } 325 326 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 327 /// load / store multiple instructions. 328 void 329 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 330 unsigned Base, int Opcode, unsigned Size, 331 ARMCC::CondCodes Pred, unsigned PredReg, 332 unsigned Scratch, MemOpQueue &MemOps, 333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 335 int Offset = MemOps[SIndex].Offset; 336 int SOffset = Offset; 337 unsigned insertAfter = SIndex; 338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 339 DebugLoc dl = Loc->getDebugLoc(); 340 const MachineOperand &PMO = Loc->getOperand(0); 341 unsigned PReg = PMO.getReg(); 342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 343 : ARMRegisterInfo::getRegisterNumbering(PReg); 344 unsigned Count = 1; 345 346 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 347 int NewOffset = MemOps[i].Offset; 348 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 349 unsigned Reg = MO.getReg(); 350 unsigned RegNum = MO.isUndef() ? UINT_MAX 351 : ARMRegisterInfo::getRegisterNumbering(Reg); 352 // AM4 - register numbers in ascending order. 353 // AM5 - consecutive register numbers in ascending order. 354 // Can only do up to 16 double-word registers per insn. 355 if (Reg != ARM::SP && 356 NewOffset == Offset + (int)Size && 357 ((isAM4 && RegNum > PRegNum) 358 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { 359 Offset += Size; 360 PRegNum = RegNum; 361 ++Count; 362 } else { 363 // Can't merge this in. Try merge the earlier ones first. 364 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 365 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 366 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 367 MemOps, Merges); 368 return; 369 } 370 371 if (MemOps[i].Position > MemOps[insertAfter].Position) 372 insertAfter = i; 373 } 374 375 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 376 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 377 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 378 return; 379 } 380 381 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 382 unsigned Bytes, unsigned Limit, 383 ARMCC::CondCodes Pred, unsigned PredReg){ 384 unsigned MyPredReg = 0; 385 if (!MI) 386 return false; 387 if (MI->getOpcode() != ARM::t2SUBri && 388 MI->getOpcode() != ARM::t2SUBrSPi && 389 MI->getOpcode() != ARM::t2SUBrSPi12 && 390 MI->getOpcode() != ARM::tSUBspi && 391 MI->getOpcode() != ARM::SUBri) 392 return false; 393 394 // Make sure the offset fits in 8 bits. 395 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 396 return false; 397 398 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 399 return (MI->getOperand(0).getReg() == Base && 400 MI->getOperand(1).getReg() == Base && 401 (MI->getOperand(2).getImm()*Scale) == Bytes && 402 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 403 MyPredReg == PredReg); 404 } 405 406 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 407 unsigned Bytes, unsigned Limit, 408 ARMCC::CondCodes Pred, unsigned PredReg){ 409 unsigned MyPredReg = 0; 410 if (!MI) 411 return false; 412 if (MI->getOpcode() != ARM::t2ADDri && 413 MI->getOpcode() != ARM::t2ADDrSPi && 414 MI->getOpcode() != ARM::t2ADDrSPi12 && 415 MI->getOpcode() != ARM::tADDspi && 416 MI->getOpcode() != ARM::ADDri) 417 return false; 418 419 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 420 // Make sure the offset fits in 8 bits. 421 return false; 422 423 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 424 return (MI->getOperand(0).getReg() == Base && 425 MI->getOperand(1).getReg() == Base && 426 (MI->getOperand(2).getImm()*Scale) == Bytes && 427 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 428 MyPredReg == PredReg); 429 } 430 431 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 432 switch (MI->getOpcode()) { 433 default: return 0; 434 case ARM::LDR: 435 case ARM::STR: 436 case ARM::t2LDRi8: 437 case ARM::t2LDRi12: 438 case ARM::t2STRi8: 439 case ARM::t2STRi12: 440 case ARM::VLDRS: 441 case ARM::VSTRS: 442 return 4; 443 case ARM::VLDRD: 444 case ARM::VSTRD: 445 return 8; 446 case ARM::LDM: 447 case ARM::STM: 448 case ARM::t2LDM: 449 case ARM::t2STM: 450 return (MI->getNumOperands() - 4) * 4; 451 case ARM::VLDMS: 452 case ARM::VSTMS: 453 case ARM::VLDMD: 454 case ARM::VSTMD: 455 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; 456 } 457 } 458 459 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) { 460 switch (Opc) { 461 case ARM::LDM: return ARM::LDM_UPD; 462 case ARM::STM: return ARM::STM_UPD; 463 case ARM::t2LDM: return ARM::t2LDM_UPD; 464 case ARM::t2STM: return ARM::t2STM_UPD; 465 case ARM::VLDMS: return ARM::VLDMS_UPD; 466 case ARM::VLDMD: return ARM::VLDMD_UPD; 467 case ARM::VSTMS: return ARM::VSTMS_UPD; 468 case ARM::VSTMD: return ARM::VSTMD_UPD; 469 default: llvm_unreachable("Unhandled opcode!"); 470 } 471 return 0; 472 } 473 474 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 475 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 476 /// 477 /// stmia rn, <ra, rb, rc> 478 /// rn := rn + 4 * 3; 479 /// => 480 /// stmia rn!, <ra, rb, rc> 481 /// 482 /// rn := rn - 4 * 3; 483 /// ldmia rn, <ra, rb, rc> 484 /// => 485 /// ldmdb rn!, <ra, rb, rc> 486 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 487 MachineBasicBlock::iterator MBBI, 488 bool &Advance, 489 MachineBasicBlock::iterator &I) { 490 MachineInstr *MI = MBBI; 491 unsigned Base = MI->getOperand(0).getReg(); 492 bool BaseKill = MI->getOperand(0).isKill(); 493 unsigned Bytes = getLSMultipleTransferSize(MI); 494 unsigned PredReg = 0; 495 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 496 int Opcode = MI->getOpcode(); 497 DebugLoc dl = MI->getDebugLoc(); 498 bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM || 499 Opcode == ARM::STM || Opcode == ARM::t2STM); 500 501 bool DoMerge = false; 502 ARM_AM::AMSubMode Mode = ARM_AM::ia; 503 unsigned Offset = 0; 504 505 if (isAM4) { 506 // Can't use an updating ld/st if the base register is also a dest 507 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 508 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 509 if (MI->getOperand(i).getReg() == Base) 510 return false; 511 } 512 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 513 } else { 514 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. 515 Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); 516 Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); 517 } 518 519 // Try merging with the previous instruction. 520 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 521 if (MBBI != BeginMBBI) { 522 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 523 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 524 --PrevMBBI; 525 if (isAM4) { 526 if (Mode == ARM_AM::ia && 527 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 528 DoMerge = true; 529 Mode = ARM_AM::db; 530 } else if (isAM4 && Mode == ARM_AM::ib && 531 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 532 DoMerge = true; 533 Mode = ARM_AM::da; 534 } 535 } else { 536 if (Mode == ARM_AM::ia && 537 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 538 Mode = ARM_AM::db; 539 DoMerge = true; 540 } 541 } 542 if (DoMerge) 543 MBB.erase(PrevMBBI); 544 } 545 546 // Try merging with the next instruction. 547 MachineBasicBlock::iterator EndMBBI = MBB.end(); 548 if (!DoMerge && MBBI != EndMBBI) { 549 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 550 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 551 ++NextMBBI; 552 if (isAM4) { 553 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 554 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 555 DoMerge = true; 556 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 557 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 558 DoMerge = true; 559 } 560 } else { 561 if (Mode == ARM_AM::ia && 562 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 563 DoMerge = true; 564 } 565 } 566 if (DoMerge) { 567 if (NextMBBI == I) { 568 Advance = true; 569 ++I; 570 } 571 MBB.erase(NextMBBI); 572 } 573 } 574 575 if (!DoMerge) 576 return false; 577 578 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); 579 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 580 .addReg(Base, getDefRegState(true)) // WB base register 581 .addReg(Base, getKillRegState(BaseKill)); 582 if (isAM4) { 583 // [t2]LDM_UPD, [t2]STM_UPD 584 MIB.addImm(ARM_AM::getAM4ModeImm(Mode)) 585 .addImm(Pred).addReg(PredReg); 586 } else { 587 // VLDM[SD}_UPD, VSTM[SD]_UPD 588 MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset)) 589 .addImm(Pred).addReg(PredReg); 590 } 591 // Transfer the rest of operands. 592 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) 593 MIB.addOperand(MI->getOperand(OpNum)); 594 // Transfer memoperands. 595 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 596 597 MBB.erase(MBBI); 598 return true; 599 } 600 601 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 602 switch (Opc) { 603 case ARM::LDR: return ARM::LDR_PRE; 604 case ARM::STR: return ARM::STR_PRE; 605 case ARM::VLDRS: return ARM::VLDMS_UPD; 606 case ARM::VLDRD: return ARM::VLDMD_UPD; 607 case ARM::VSTRS: return ARM::VSTMS_UPD; 608 case ARM::VSTRD: return ARM::VSTMD_UPD; 609 case ARM::t2LDRi8: 610 case ARM::t2LDRi12: 611 return ARM::t2LDR_PRE; 612 case ARM::t2STRi8: 613 case ARM::t2STRi12: 614 return ARM::t2STR_PRE; 615 default: llvm_unreachable("Unhandled opcode!"); 616 } 617 return 0; 618 } 619 620 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 621 switch (Opc) { 622 case ARM::LDR: return ARM::LDR_POST; 623 case ARM::STR: return ARM::STR_POST; 624 case ARM::VLDRS: return ARM::VLDMS_UPD; 625 case ARM::VLDRD: return ARM::VLDMD_UPD; 626 case ARM::VSTRS: return ARM::VSTMS_UPD; 627 case ARM::VSTRD: return ARM::VSTMD_UPD; 628 case ARM::t2LDRi8: 629 case ARM::t2LDRi12: 630 return ARM::t2LDR_POST; 631 case ARM::t2STRi8: 632 case ARM::t2STRi12: 633 return ARM::t2STR_POST; 634 default: llvm_unreachable("Unhandled opcode!"); 635 } 636 return 0; 637 } 638 639 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 640 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 641 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 642 MachineBasicBlock::iterator MBBI, 643 const TargetInstrInfo *TII, 644 bool &Advance, 645 MachineBasicBlock::iterator &I) { 646 MachineInstr *MI = MBBI; 647 unsigned Base = MI->getOperand(1).getReg(); 648 bool BaseKill = MI->getOperand(1).isKill(); 649 unsigned Bytes = getLSMultipleTransferSize(MI); 650 int Opcode = MI->getOpcode(); 651 DebugLoc dl = MI->getDebugLoc(); 652 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 653 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 654 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR); 655 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) 656 return false; 657 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 658 return false; 659 if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) 660 if (MI->getOperand(2).getImm() != 0) 661 return false; 662 663 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 664 // Can't do the merge if the destination register is the same as the would-be 665 // writeback register. 666 if (isLd && MI->getOperand(0).getReg() == Base) 667 return false; 668 669 unsigned PredReg = 0; 670 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 671 bool DoMerge = false; 672 ARM_AM::AddrOpc AddSub = ARM_AM::add; 673 unsigned NewOpc = 0; 674 // AM2 - 12 bits, thumb2 - 8 bits. 675 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 676 677 // Try merging with the previous instruction. 678 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 679 if (MBBI != BeginMBBI) { 680 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 681 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 682 --PrevMBBI; 683 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 684 DoMerge = true; 685 AddSub = ARM_AM::sub; 686 } else if (!isAM5 && 687 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 688 DoMerge = true; 689 } 690 if (DoMerge) { 691 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 692 MBB.erase(PrevMBBI); 693 } 694 } 695 696 // Try merging with the next instruction. 697 MachineBasicBlock::iterator EndMBBI = MBB.begin(); 698 if (!DoMerge && MBBI != EndMBBI) { 699 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 700 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 701 ++NextMBBI; 702 if (!isAM5 && 703 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 704 DoMerge = true; 705 AddSub = ARM_AM::sub; 706 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 707 DoMerge = true; 708 } 709 if (DoMerge) { 710 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 711 if (NextMBBI == I) { 712 Advance = true; 713 ++I; 714 } 715 MBB.erase(NextMBBI); 716 } 717 } 718 719 if (!DoMerge) 720 return false; 721 722 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; 723 unsigned Offset = 0; 724 if (isAM5) 725 Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia, 726 (isDPR ? 2 : 1)); 727 else if (isAM2) 728 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 729 else 730 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 731 732 if (isAM5) { 733 // VLDM[SD}_UPD, VSTM[SD]_UPD 734 MachineOperand &MO = MI->getOperand(0); 735 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 736 .addReg(Base, getDefRegState(true)) // WB base register 737 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 738 .addImm(Offset) 739 .addImm(Pred).addReg(PredReg) 740 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 741 getKillRegState(MO.isKill()))); 742 } else if (isLd) { 743 if (isAM2) 744 // LDR_PRE, LDR_POST, 745 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 746 .addReg(Base, RegState::Define) 747 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 748 else 749 // t2LDR_PRE, t2LDR_POST 750 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 751 .addReg(Base, RegState::Define) 752 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 753 } else { 754 MachineOperand &MO = MI->getOperand(0); 755 if (isAM2) 756 // STR_PRE, STR_POST 757 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 758 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 759 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 760 else 761 // t2STR_PRE, t2STR_POST 762 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 763 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 764 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 765 } 766 MBB.erase(MBBI); 767 768 return true; 769 } 770 771 /// isMemoryOp - Returns true if instruction is a memory operations (that this 772 /// pass is capable of operating on). 773 static bool isMemoryOp(const MachineInstr *MI) { 774 if (MI->hasOneMemOperand()) { 775 const MachineMemOperand *MMO = *MI->memoperands_begin(); 776 777 // Don't touch volatile memory accesses - we may be changing their order. 778 if (MMO->isVolatile()) 779 return false; 780 781 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 782 // not. 783 if (MMO->getAlignment() < 4) 784 return false; 785 } 786 787 // str <undef> could probably be eliminated entirely, but for now we just want 788 // to avoid making a mess of it. 789 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 790 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 791 MI->getOperand(0).isUndef()) 792 return false; 793 794 // Likewise don't mess with references to undefined addresses. 795 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 796 MI->getOperand(1).isUndef()) 797 return false; 798 799 int Opcode = MI->getOpcode(); 800 switch (Opcode) { 801 default: break; 802 case ARM::LDR: 803 case ARM::STR: 804 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; 805 case ARM::VLDRS: 806 case ARM::VSTRS: 807 return MI->getOperand(1).isReg(); 808 case ARM::VLDRD: 809 case ARM::VSTRD: 810 return MI->getOperand(1).isReg(); 811 case ARM::t2LDRi8: 812 case ARM::t2LDRi12: 813 case ARM::t2STRi8: 814 case ARM::t2STRi12: 815 return MI->getOperand(1).isReg(); 816 } 817 return false; 818 } 819 820 /// AdvanceRS - Advance register scavenger to just before the earliest memory 821 /// op that is being merged. 822 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 823 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 824 unsigned Position = MemOps[0].Position; 825 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 826 if (MemOps[i].Position < Position) { 827 Position = MemOps[i].Position; 828 Loc = MemOps[i].MBBI; 829 } 830 } 831 832 if (Loc != MBB.begin()) 833 RS->forward(prior(Loc)); 834 } 835 836 static int getMemoryOpOffset(const MachineInstr *MI) { 837 int Opcode = MI->getOpcode(); 838 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 839 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 840 unsigned NumOperands = MI->getDesc().getNumOperands(); 841 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 842 843 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 844 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 845 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) 846 return OffField; 847 848 int Offset = isAM2 849 ? ARM_AM::getAM2Offset(OffField) 850 : (isAM3 ? ARM_AM::getAM3Offset(OffField) 851 : ARM_AM::getAM5Offset(OffField) * 4); 852 if (isAM2) { 853 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 854 Offset = -Offset; 855 } else if (isAM3) { 856 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 857 Offset = -Offset; 858 } else { 859 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 860 Offset = -Offset; 861 } 862 return Offset; 863 } 864 865 static void InsertLDR_STR(MachineBasicBlock &MBB, 866 MachineBasicBlock::iterator &MBBI, 867 int OffImm, bool isDef, 868 DebugLoc dl, unsigned NewOpc, 869 unsigned Reg, bool RegDeadKill, bool RegUndef, 870 unsigned BaseReg, bool BaseKill, bool BaseUndef, 871 unsigned OffReg, bool OffKill, bool OffUndef, 872 ARMCC::CondCodes Pred, unsigned PredReg, 873 const TargetInstrInfo *TII, bool isT2) { 874 int Offset = OffImm; 875 if (!isT2) { 876 if (OffImm < 0) 877 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); 878 else 879 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); 880 } 881 if (isDef) { 882 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 883 TII->get(NewOpc)) 884 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 885 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 886 if (!isT2) 887 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 888 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 889 } else { 890 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 891 TII->get(NewOpc)) 892 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 893 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 894 if (!isT2) 895 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 896 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 897 } 898 } 899 900 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 901 MachineBasicBlock::iterator &MBBI) { 902 MachineInstr *MI = &*MBBI; 903 unsigned Opcode = MI->getOpcode(); 904 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 905 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 906 unsigned EvenReg = MI->getOperand(0).getReg(); 907 unsigned OddReg = MI->getOperand(1).getReg(); 908 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 909 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 910 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 911 return false; 912 913 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 914 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 915 bool EvenDeadKill = isLd ? 916 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 917 bool EvenUndef = MI->getOperand(0).isUndef(); 918 bool OddDeadKill = isLd ? 919 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 920 bool OddUndef = MI->getOperand(1).isUndef(); 921 const MachineOperand &BaseOp = MI->getOperand(2); 922 unsigned BaseReg = BaseOp.getReg(); 923 bool BaseKill = BaseOp.isKill(); 924 bool BaseUndef = BaseOp.isUndef(); 925 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); 926 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 927 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 928 int OffImm = getMemoryOpOffset(MI); 929 unsigned PredReg = 0; 930 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 931 932 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { 933 // Ascending register numbers and no offset. It's safe to change it to a 934 // ldm or stm. 935 unsigned NewOpc = (isLd) 936 ? (isT2 ? ARM::t2LDM : ARM::LDM) 937 : (isT2 ? ARM::t2STM : ARM::STM); 938 if (isLd) { 939 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 940 .addReg(BaseReg, getKillRegState(BaseKill)) 941 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 942 .addImm(Pred).addReg(PredReg) 943 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 944 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 945 ++NumLDRD2LDM; 946 } else { 947 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 948 .addReg(BaseReg, getKillRegState(BaseKill)) 949 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 950 .addImm(Pred).addReg(PredReg) 951 .addReg(EvenReg, 952 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 953 .addReg(OddReg, 954 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 955 ++NumSTRD2STM; 956 } 957 } else { 958 // Split into two instructions. 959 assert((!isT2 || !OffReg) && 960 "Thumb2 ldrd / strd does not encode offset register!"); 961 unsigned NewOpc = (isLd) 962 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) 963 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); 964 DebugLoc dl = MBBI->getDebugLoc(); 965 // If this is a load and base register is killed, it may have been 966 // re-defed by the load, make sure the first load does not clobber it. 967 if (isLd && 968 (BaseKill || OffKill) && 969 (TRI->regsOverlap(EvenReg, BaseReg) || 970 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { 971 assert(!TRI->regsOverlap(OddReg, BaseReg) && 972 (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); 973 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 974 OddReg, OddDeadKill, false, 975 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 976 Pred, PredReg, TII, isT2); 977 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 978 EvenReg, EvenDeadKill, false, 979 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 980 Pred, PredReg, TII, isT2); 981 } else { 982 if (OddReg == EvenReg && EvenDeadKill) { 983 // If the two source operands are the same, the kill marker is 984 // probably on the first one. e.g. 985 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 986 EvenDeadKill = false; 987 OddDeadKill = true; 988 } 989 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 990 EvenReg, EvenDeadKill, EvenUndef, 991 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 992 Pred, PredReg, TII, isT2); 993 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 994 OddReg, OddDeadKill, OddUndef, 995 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 996 Pred, PredReg, TII, isT2); 997 } 998 if (isLd) 999 ++NumLDRD2LDR; 1000 else 1001 ++NumSTRD2STR; 1002 } 1003 1004 MBBI = prior(MBBI); 1005 MBB.erase(MI); 1006 } 1007 return false; 1008 } 1009 1010 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1011 /// ops of the same base and incrementing offset into LDM / STM ops. 1012 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1013 unsigned NumMerges = 0; 1014 unsigned NumMemOps = 0; 1015 MemOpQueue MemOps; 1016 unsigned CurrBase = 0; 1017 int CurrOpc = -1; 1018 unsigned CurrSize = 0; 1019 ARMCC::CondCodes CurrPred = ARMCC::AL; 1020 unsigned CurrPredReg = 0; 1021 unsigned Position = 0; 1022 SmallVector<MachineBasicBlock::iterator,4> Merges; 1023 1024 RS->enterBasicBlock(&MBB); 1025 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1026 while (MBBI != E) { 1027 if (MBBI->isDebugValue()) { 1028 ++MBBI; 1029 continue; 1030 } 1031 if (FixInvalidRegPairOp(MBB, MBBI)) 1032 continue; 1033 1034 bool Advance = false; 1035 bool TryMerge = false; 1036 bool Clobber = false; 1037 1038 bool isMemOp = isMemoryOp(MBBI); 1039 if (isMemOp) { 1040 int Opcode = MBBI->getOpcode(); 1041 unsigned Size = getLSMultipleTransferSize(MBBI); 1042 unsigned Base = MBBI->getOperand(1).getReg(); 1043 unsigned PredReg = 0; 1044 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1045 int Offset = getMemoryOpOffset(MBBI); 1046 // Watch out for: 1047 // r4 := ldr [r5] 1048 // r5 := ldr [r5, #4] 1049 // r6 := ldr [r5, #8] 1050 // 1051 // The second ldr has effectively broken the chain even though it 1052 // looks like the later ldr(s) use the same base register. Try to 1053 // merge the ldr's so far, including this one. But don't try to 1054 // combine the following ldr(s). 1055 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1056 if (CurrBase == 0 && !Clobber) { 1057 // Start of a new chain. 1058 CurrBase = Base; 1059 CurrOpc = Opcode; 1060 CurrSize = Size; 1061 CurrPred = Pred; 1062 CurrPredReg = PredReg; 1063 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 1064 NumMemOps++; 1065 Advance = true; 1066 } else { 1067 if (Clobber) { 1068 TryMerge = true; 1069 Advance = true; 1070 } 1071 1072 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1073 // No need to match PredReg. 1074 // Continue adding to the queue. 1075 if (Offset > MemOps.back().Offset) { 1076 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 1077 NumMemOps++; 1078 Advance = true; 1079 } else { 1080 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1081 I != E; ++I) { 1082 if (Offset < I->Offset) { 1083 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); 1084 NumMemOps++; 1085 Advance = true; 1086 break; 1087 } else if (Offset == I->Offset) { 1088 // Collision! This can't be merged! 1089 break; 1090 } 1091 } 1092 } 1093 } 1094 } 1095 } 1096 1097 if (Advance) { 1098 ++Position; 1099 ++MBBI; 1100 if (MBBI == E) 1101 // Reach the end of the block, try merging the memory instructions. 1102 TryMerge = true; 1103 } else 1104 TryMerge = true; 1105 1106 if (TryMerge) { 1107 if (NumMemOps > 1) { 1108 // Try to find a free register to use as a new base in case it's needed. 1109 // First advance to the instruction just before the start of the chain. 1110 AdvanceRS(MBB, MemOps); 1111 // Find a scratch register. 1112 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1113 // Process the load / store instructions. 1114 RS->forward(prior(MBBI)); 1115 1116 // Merge ops. 1117 Merges.clear(); 1118 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1119 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1120 1121 // Try folding preceeding/trailing base inc/dec into the generated 1122 // LDM/STM ops. 1123 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1124 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1125 ++NumMerges; 1126 NumMerges += Merges.size(); 1127 1128 // Try folding preceeding/trailing base inc/dec into those load/store 1129 // that were not merged to form LDM/STM ops. 1130 for (unsigned i = 0; i != NumMemOps; ++i) 1131 if (!MemOps[i].Merged) 1132 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1133 ++NumMerges; 1134 1135 // RS may be pointing to an instruction that's deleted. 1136 RS->skipTo(prior(MBBI)); 1137 } else if (NumMemOps == 1) { 1138 // Try folding preceeding/trailing base inc/dec into the single 1139 // load/store. 1140 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1141 ++NumMerges; 1142 RS->forward(prior(MBBI)); 1143 } 1144 } 1145 1146 CurrBase = 0; 1147 CurrOpc = -1; 1148 CurrSize = 0; 1149 CurrPred = ARMCC::AL; 1150 CurrPredReg = 0; 1151 if (NumMemOps) { 1152 MemOps.clear(); 1153 NumMemOps = 0; 1154 } 1155 1156 // If iterator hasn't been advanced and this is not a memory op, skip it. 1157 // It can't start a new chain anyway. 1158 if (!Advance && !isMemOp && MBBI != E) { 1159 ++Position; 1160 ++MBBI; 1161 } 1162 } 1163 } 1164 return NumMerges > 0; 1165 } 1166 1167 namespace { 1168 struct OffsetCompare { 1169 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1170 int LOffset = getMemoryOpOffset(LHS); 1171 int ROffset = getMemoryOpOffset(RHS); 1172 assert(LHS == RHS || LOffset != ROffset); 1173 return LOffset > ROffset; 1174 } 1175 }; 1176 } 1177 1178 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1179 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it 1180 /// directly restore the value of LR into pc. 1181 /// ldmfd sp!, {..., lr} 1182 /// bx lr 1183 /// or 1184 /// ldmfd sp!, {..., lr} 1185 /// mov pc, lr 1186 /// => 1187 /// ldmfd sp!, {..., pc} 1188 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1189 if (MBB.empty()) return false; 1190 1191 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1192 if (MBBI != MBB.begin() && 1193 (MBBI->getOpcode() == ARM::BX_RET || 1194 MBBI->getOpcode() == ARM::tBX_RET || 1195 MBBI->getOpcode() == ARM::MOVPCLR)) { 1196 MachineInstr *PrevMI = prior(MBBI); 1197 if (PrevMI->getOpcode() == ARM::LDM_UPD || 1198 PrevMI->getOpcode() == ARM::t2LDM_UPD) { 1199 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1200 if (MO.getReg() != ARM::LR) 1201 return false; 1202 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; 1203 PrevMI->setDesc(TII->get(NewOpc)); 1204 MO.setReg(ARM::PC); 1205 MBB.erase(MBBI); 1206 return true; 1207 } 1208 } 1209 return false; 1210 } 1211 1212 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1213 const TargetMachine &TM = Fn.getTarget(); 1214 AFI = Fn.getInfo<ARMFunctionInfo>(); 1215 TII = TM.getInstrInfo(); 1216 TRI = TM.getRegisterInfo(); 1217 RS = new RegScavenger(); 1218 isThumb2 = AFI->isThumb2Function(); 1219 1220 bool Modified = false; 1221 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1222 ++MFI) { 1223 MachineBasicBlock &MBB = *MFI; 1224 Modified |= LoadStoreMultipleOpti(MBB); 1225 Modified |= MergeReturnIntoLDM(MBB); 1226 } 1227 1228 delete RS; 1229 return Modified; 1230 } 1231 1232 1233 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1234 /// load / stores from consecutive locations close to make it more 1235 /// likely they will be combined later. 1236 1237 namespace { 1238 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1239 static char ID; 1240 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} 1241 1242 const TargetData *TD; 1243 const TargetInstrInfo *TII; 1244 const TargetRegisterInfo *TRI; 1245 const ARMSubtarget *STI; 1246 MachineRegisterInfo *MRI; 1247 MachineFunction *MF; 1248 1249 virtual bool runOnMachineFunction(MachineFunction &Fn); 1250 1251 virtual const char *getPassName() const { 1252 return "ARM pre- register allocation load / store optimization pass"; 1253 } 1254 1255 private: 1256 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1257 unsigned &NewOpc, unsigned &EvenReg, 1258 unsigned &OddReg, unsigned &BaseReg, 1259 unsigned &OffReg, int &Offset, 1260 unsigned &PredReg, ARMCC::CondCodes &Pred, 1261 bool &isT2); 1262 bool RescheduleOps(MachineBasicBlock *MBB, 1263 SmallVector<MachineInstr*, 4> &Ops, 1264 unsigned Base, bool isLd, 1265 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1266 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1267 }; 1268 char ARMPreAllocLoadStoreOpt::ID = 0; 1269 } 1270 1271 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1272 TD = Fn.getTarget().getTargetData(); 1273 TII = Fn.getTarget().getInstrInfo(); 1274 TRI = Fn.getTarget().getRegisterInfo(); 1275 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1276 MRI = &Fn.getRegInfo(); 1277 MF = &Fn; 1278 1279 bool Modified = false; 1280 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1281 ++MFI) 1282 Modified |= RescheduleLoadStoreInstrs(MFI); 1283 1284 return Modified; 1285 } 1286 1287 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1288 MachineBasicBlock::iterator I, 1289 MachineBasicBlock::iterator E, 1290 SmallPtrSet<MachineInstr*, 4> &MemOps, 1291 SmallSet<unsigned, 4> &MemRegs, 1292 const TargetRegisterInfo *TRI) { 1293 // Are there stores / loads / calls between them? 1294 // FIXME: This is overly conservative. We should make use of alias information 1295 // some day. 1296 SmallSet<unsigned, 4> AddedRegPressure; 1297 while (++I != E) { 1298 if (I->isDebugValue() || MemOps.count(&*I)) 1299 continue; 1300 const TargetInstrDesc &TID = I->getDesc(); 1301 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1302 return false; 1303 if (isLd && TID.mayStore()) 1304 return false; 1305 if (!isLd) { 1306 if (TID.mayLoad()) 1307 return false; 1308 // It's not safe to move the first 'str' down. 1309 // str r1, [r0] 1310 // strh r5, [r0] 1311 // str r4, [r0, #+4] 1312 if (TID.mayStore()) 1313 return false; 1314 } 1315 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1316 MachineOperand &MO = I->getOperand(j); 1317 if (!MO.isReg()) 1318 continue; 1319 unsigned Reg = MO.getReg(); 1320 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1321 return false; 1322 if (Reg != Base && !MemRegs.count(Reg)) 1323 AddedRegPressure.insert(Reg); 1324 } 1325 } 1326 1327 // Estimate register pressure increase due to the transformation. 1328 if (MemRegs.size() <= 4) 1329 // Ok if we are moving small number of instructions. 1330 return true; 1331 return AddedRegPressure.size() <= MemRegs.size() * 2; 1332 } 1333 1334 bool 1335 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1336 DebugLoc &dl, 1337 unsigned &NewOpc, unsigned &EvenReg, 1338 unsigned &OddReg, unsigned &BaseReg, 1339 unsigned &OffReg, int &Offset, 1340 unsigned &PredReg, 1341 ARMCC::CondCodes &Pred, 1342 bool &isT2) { 1343 // Make sure we're allowed to generate LDRD/STRD. 1344 if (!STI->hasV5TEOps()) 1345 return false; 1346 1347 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1348 unsigned Scale = 1; 1349 unsigned Opcode = Op0->getOpcode(); 1350 if (Opcode == ARM::LDR) 1351 NewOpc = ARM::LDRD; 1352 else if (Opcode == ARM::STR) 1353 NewOpc = ARM::STRD; 1354 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1355 NewOpc = ARM::t2LDRDi8; 1356 Scale = 4; 1357 isT2 = true; 1358 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1359 NewOpc = ARM::t2STRDi8; 1360 Scale = 4; 1361 isT2 = true; 1362 } else 1363 return false; 1364 1365 // Make sure the offset registers match. 1366 if (!isT2 && 1367 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) 1368 return false; 1369 1370 // Must sure the base address satisfies i64 ld / st alignment requirement. 1371 if (!Op0->hasOneMemOperand() || 1372 !(*Op0->memoperands_begin())->getValue() || 1373 (*Op0->memoperands_begin())->isVolatile()) 1374 return false; 1375 1376 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1377 const Function *Func = MF->getFunction(); 1378 unsigned ReqAlign = STI->hasV6Ops() 1379 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 1380 : 8; // Pre-v6 need 8-byte align 1381 if (Align < ReqAlign) 1382 return false; 1383 1384 // Then make sure the immediate offset fits. 1385 int OffImm = getMemoryOpOffset(Op0); 1386 if (isT2) { 1387 if (OffImm < 0) { 1388 if (OffImm < -255) 1389 // Can't fall back to t2LDRi8 / t2STRi8. 1390 return false; 1391 } else { 1392 int Limit = (1 << 8) * Scale; 1393 if (OffImm >= Limit || (OffImm & (Scale-1))) 1394 return false; 1395 } 1396 Offset = OffImm; 1397 } else { 1398 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1399 if (OffImm < 0) { 1400 AddSub = ARM_AM::sub; 1401 OffImm = - OffImm; 1402 } 1403 int Limit = (1 << 8) * Scale; 1404 if (OffImm >= Limit || (OffImm & (Scale-1))) 1405 return false; 1406 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1407 } 1408 EvenReg = Op0->getOperand(0).getReg(); 1409 OddReg = Op1->getOperand(0).getReg(); 1410 if (EvenReg == OddReg) 1411 return false; 1412 BaseReg = Op0->getOperand(1).getReg(); 1413 if (!isT2) 1414 OffReg = Op0->getOperand(2).getReg(); 1415 Pred = llvm::getInstrPredicate(Op0, PredReg); 1416 dl = Op0->getDebugLoc(); 1417 return true; 1418 } 1419 1420 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1421 SmallVector<MachineInstr*, 4> &Ops, 1422 unsigned Base, bool isLd, 1423 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1424 bool RetVal = false; 1425 1426 // Sort by offset (in reverse order). 1427 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1428 1429 // The loads / stores of the same base are in order. Scan them from first to 1430 // last and check for the following: 1431 // 1. Any def of base. 1432 // 2. Any gaps. 1433 while (Ops.size() > 1) { 1434 unsigned FirstLoc = ~0U; 1435 unsigned LastLoc = 0; 1436 MachineInstr *FirstOp = 0; 1437 MachineInstr *LastOp = 0; 1438 int LastOffset = 0; 1439 unsigned LastOpcode = 0; 1440 unsigned LastBytes = 0; 1441 unsigned NumMove = 0; 1442 for (int i = Ops.size() - 1; i >= 0; --i) { 1443 MachineInstr *Op = Ops[i]; 1444 unsigned Loc = MI2LocMap[Op]; 1445 if (Loc <= FirstLoc) { 1446 FirstLoc = Loc; 1447 FirstOp = Op; 1448 } 1449 if (Loc >= LastLoc) { 1450 LastLoc = Loc; 1451 LastOp = Op; 1452 } 1453 1454 unsigned Opcode = Op->getOpcode(); 1455 if (LastOpcode && Opcode != LastOpcode) 1456 break; 1457 1458 int Offset = getMemoryOpOffset(Op); 1459 unsigned Bytes = getLSMultipleTransferSize(Op); 1460 if (LastBytes) { 1461 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1462 break; 1463 } 1464 LastOffset = Offset; 1465 LastBytes = Bytes; 1466 LastOpcode = Opcode; 1467 if (++NumMove == 8) // FIXME: Tune this limit. 1468 break; 1469 } 1470 1471 if (NumMove <= 1) 1472 Ops.pop_back(); 1473 else { 1474 SmallPtrSet<MachineInstr*, 4> MemOps; 1475 SmallSet<unsigned, 4> MemRegs; 1476 for (int i = NumMove-1; i >= 0; --i) { 1477 MemOps.insert(Ops[i]); 1478 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1479 } 1480 1481 // Be conservative, if the instructions are too far apart, don't 1482 // move them. We want to limit the increase of register pressure. 1483 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1484 if (DoMove) 1485 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1486 MemOps, MemRegs, TRI); 1487 if (!DoMove) { 1488 for (unsigned i = 0; i != NumMove; ++i) 1489 Ops.pop_back(); 1490 } else { 1491 // This is the new location for the loads / stores. 1492 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1493 while (InsertPos != MBB->end() && MemOps.count(InsertPos)) 1494 ++InsertPos; 1495 1496 // If we are moving a pair of loads / stores, see if it makes sense 1497 // to try to allocate a pair of registers that can form register pairs. 1498 MachineInstr *Op0 = Ops.back(); 1499 MachineInstr *Op1 = Ops[Ops.size()-2]; 1500 unsigned EvenReg = 0, OddReg = 0; 1501 unsigned BaseReg = 0, OffReg = 0, PredReg = 0; 1502 ARMCC::CondCodes Pred = ARMCC::AL; 1503 bool isT2 = false; 1504 unsigned NewOpc = 0; 1505 int Offset = 0; 1506 DebugLoc dl; 1507 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1508 EvenReg, OddReg, BaseReg, OffReg, 1509 Offset, PredReg, Pred, isT2)) { 1510 Ops.pop_back(); 1511 Ops.pop_back(); 1512 1513 // Form the pair instruction. 1514 if (isLd) { 1515 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1516 dl, TII->get(NewOpc)) 1517 .addReg(EvenReg, RegState::Define) 1518 .addReg(OddReg, RegState::Define) 1519 .addReg(BaseReg); 1520 if (!isT2) 1521 MIB.addReg(OffReg); 1522 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1523 ++NumLDRDFormed; 1524 } else { 1525 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1526 dl, TII->get(NewOpc)) 1527 .addReg(EvenReg) 1528 .addReg(OddReg) 1529 .addReg(BaseReg); 1530 if (!isT2) 1531 MIB.addReg(OffReg); 1532 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1533 ++NumSTRDFormed; 1534 } 1535 MBB->erase(Op0); 1536 MBB->erase(Op1); 1537 1538 // Add register allocation hints to form register pairs. 1539 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1540 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1541 } else { 1542 for (unsigned i = 0; i != NumMove; ++i) { 1543 MachineInstr *Op = Ops.back(); 1544 Ops.pop_back(); 1545 MBB->splice(InsertPos, MBB, Op); 1546 } 1547 } 1548 1549 NumLdStMoved += NumMove; 1550 RetVal = true; 1551 } 1552 } 1553 } 1554 1555 return RetVal; 1556 } 1557 1558 bool 1559 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1560 bool RetVal = false; 1561 1562 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1563 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1564 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1565 SmallVector<unsigned, 4> LdBases; 1566 SmallVector<unsigned, 4> StBases; 1567 1568 unsigned Loc = 0; 1569 MachineBasicBlock::iterator MBBI = MBB->begin(); 1570 MachineBasicBlock::iterator E = MBB->end(); 1571 while (MBBI != E) { 1572 for (; MBBI != E; ++MBBI) { 1573 MachineInstr *MI = MBBI; 1574 const TargetInstrDesc &TID = MI->getDesc(); 1575 if (TID.isCall() || TID.isTerminator()) { 1576 // Stop at barriers. 1577 ++MBBI; 1578 break; 1579 } 1580 1581 if (!MI->isDebugValue()) 1582 MI2LocMap[MI] = ++Loc; 1583 1584 if (!isMemoryOp(MI)) 1585 continue; 1586 unsigned PredReg = 0; 1587 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1588 continue; 1589 1590 int Opc = MI->getOpcode(); 1591 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1592 unsigned Base = MI->getOperand(1).getReg(); 1593 int Offset = getMemoryOpOffset(MI); 1594 1595 bool StopHere = false; 1596 if (isLd) { 1597 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1598 Base2LdsMap.find(Base); 1599 if (BI != Base2LdsMap.end()) { 1600 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1601 if (Offset == getMemoryOpOffset(BI->second[i])) { 1602 StopHere = true; 1603 break; 1604 } 1605 } 1606 if (!StopHere) 1607 BI->second.push_back(MI); 1608 } else { 1609 SmallVector<MachineInstr*, 4> MIs; 1610 MIs.push_back(MI); 1611 Base2LdsMap[Base] = MIs; 1612 LdBases.push_back(Base); 1613 } 1614 } else { 1615 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1616 Base2StsMap.find(Base); 1617 if (BI != Base2StsMap.end()) { 1618 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1619 if (Offset == getMemoryOpOffset(BI->second[i])) { 1620 StopHere = true; 1621 break; 1622 } 1623 } 1624 if (!StopHere) 1625 BI->second.push_back(MI); 1626 } else { 1627 SmallVector<MachineInstr*, 4> MIs; 1628 MIs.push_back(MI); 1629 Base2StsMap[Base] = MIs; 1630 StBases.push_back(Base); 1631 } 1632 } 1633 1634 if (StopHere) { 1635 // Found a duplicate (a base+offset combination that's seen earlier). 1636 // Backtrack. 1637 --Loc; 1638 break; 1639 } 1640 } 1641 1642 // Re-schedule loads. 1643 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1644 unsigned Base = LdBases[i]; 1645 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1646 if (Lds.size() > 1) 1647 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1648 } 1649 1650 // Re-schedule stores. 1651 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1652 unsigned Base = StBases[i]; 1653 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1654 if (Sts.size() > 1) 1655 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1656 } 1657 1658 if (MBBI != E) { 1659 Base2LdsMap.clear(); 1660 Base2StsMap.clear(); 1661 LdBases.clear(); 1662 StBases.clear(); 1663 } 1664 } 1665 1666 return RetVal; 1667 } 1668 1669 1670 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1671 /// optimization pass. 1672 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1673 if (PreAlloc) 1674 return new ARMPreAllocLoadStoreOpt(); 1675 return new ARMLoadStoreOpt(); 1676 } 1677