1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMAddressingModes.h" 18 #include "ARMBaseInstrInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMRegisterInfo.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/Target/TargetData.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 #include "llvm/Target/TargetMachine.h" 32 #include "llvm/Target/TargetRegisterInfo.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallSet.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/Statistic.h" 40 using namespace llvm; 41 42 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55 /// load / store instructions to form ldm / stm instructions. 56 57 namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Position; 78 MachineBasicBlock::iterator MBBI; 79 bool Merged; 80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) 81 : Offset(o), Position(p), MBBI(i), Merged(false) {} 82 }; 83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 84 typedef MemOpQueue::iterator MemOpQueueIter; 85 86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 87 int Offset, unsigned Base, bool BaseKill, int Opcode, 88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 90 void MergeOpsUpdate(MachineBasicBlock &MBB, 91 MemOpQueue &MemOps, 92 unsigned memOpsBegin, 93 unsigned memOpsEnd, 94 unsigned insertAfter, 95 int Offset, 96 unsigned Base, 97 bool BaseKill, 98 int Opcode, 99 ARMCC::CondCodes Pred, 100 unsigned PredReg, 101 unsigned Scratch, 102 DebugLoc dl, 103 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 105 int Opcode, unsigned Size, 106 ARMCC::CondCodes Pred, unsigned PredReg, 107 unsigned Scratch, MemOpQueue &MemOps, 108 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 109 110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 112 MachineBasicBlock::iterator &MBBI); 113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 114 MachineBasicBlock::iterator MBBI, 115 const TargetInstrInfo *TII, 116 bool &Advance, 117 MachineBasicBlock::iterator &I); 118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator MBBI, 120 bool &Advance, 121 MachineBasicBlock::iterator &I); 122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 124 }; 125 char ARMLoadStoreOpt::ID = 0; 126 } 127 128 static int getLoadStoreMultipleOpcode(int Opcode) { 129 switch (Opcode) { 130 case ARM::LDR: 131 NumLDMGened++; 132 return ARM::LDM; 133 case ARM::STR: 134 NumSTMGened++; 135 return ARM::STM; 136 case ARM::t2LDRi8: 137 case ARM::t2LDRi12: 138 NumLDMGened++; 139 return ARM::t2LDM; 140 case ARM::t2STRi8: 141 case ARM::t2STRi12: 142 NumSTMGened++; 143 return ARM::t2STM; 144 case ARM::VLDRS: 145 NumVLDMGened++; 146 return ARM::VLDMS; 147 case ARM::VSTRS: 148 NumVSTMGened++; 149 return ARM::VSTMS; 150 case ARM::VLDRD: 151 NumVLDMGened++; 152 return ARM::VLDMD; 153 case ARM::VSTRD: 154 NumVSTMGened++; 155 return ARM::VSTMD; 156 default: llvm_unreachable("Unhandled opcode!"); 157 } 158 return 0; 159 } 160 161 static bool isT2i32Load(unsigned Opc) { 162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 163 } 164 165 static bool isi32Load(unsigned Opc) { 166 return Opc == ARM::LDR || isT2i32Load(Opc); 167 } 168 169 static bool isT2i32Store(unsigned Opc) { 170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 171 } 172 173 static bool isi32Store(unsigned Opc) { 174 return Opc == ARM::STR || isT2i32Store(Opc); 175 } 176 177 /// MergeOps - Create and insert a LDM or STM with Base as base register and 178 /// registers in Regs as the register operands that would be loaded / stored. 179 /// It returns true if the transformation is done. 180 bool 181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 182 MachineBasicBlock::iterator MBBI, 183 int Offset, unsigned Base, bool BaseKill, 184 int Opcode, ARMCC::CondCodes Pred, 185 unsigned PredReg, unsigned Scratch, DebugLoc dl, 186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 187 // Only a single register to load / store. Don't bother. 188 unsigned NumRegs = Regs.size(); 189 if (NumRegs <= 1) 190 return false; 191 192 ARM_AM::AMSubMode Mode = ARM_AM::ia; 193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 194 if (isAM4 && Offset == 4) { 195 if (isThumb2) 196 // Thumb2 does not support ldmib / stmib. 197 return false; 198 Mode = ARM_AM::ib; 199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) { 200 if (isThumb2) 201 // Thumb2 does not support ldmda / stmda. 202 return false; 203 Mode = ARM_AM::da; 204 } else if (isAM4 && Offset == -4 * (int)NumRegs) { 205 Mode = ARM_AM::db; 206 } else if (Offset != 0) { 207 // If starting offset isn't zero, insert a MI to materialize a new base. 208 // But only do so if it is cost effective, i.e. merging more than two 209 // loads / stores. 210 if (NumRegs <= 2) 211 return false; 212 213 unsigned NewBase; 214 if (isi32Load(Opcode)) 215 // If it is a load, then just use one of the destination register to 216 // use as the new base. 217 NewBase = Regs[NumRegs-1].first; 218 else { 219 // Use the scratch register to use as a new base. 220 NewBase = Scratch; 221 if (NewBase == 0) 222 return false; 223 } 224 int BaseOpc = !isThumb2 225 ? ARM::ADDri 226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 227 if (Offset < 0) { 228 BaseOpc = !isThumb2 229 ? ARM::SUBri 230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 231 Offset = - Offset; 232 } 233 int ImmedOffset = isThumb2 234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 235 if (ImmedOffset == -1) 236 // FIXME: Try t2ADDri12 or t2SUBri12? 237 return false; // Probably not worth it then. 238 239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 241 .addImm(Pred).addReg(PredReg).addReg(0); 242 Base = NewBase; 243 BaseKill = true; // New base is always killed right its use. 244 } 245 246 bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD; 247 bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 248 Opcode = getLoadStoreMultipleOpcode(Opcode); 249 MachineInstrBuilder MIB = (isAM4) 250 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 251 .addReg(Base, getKillRegState(BaseKill)) 252 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) 253 : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 254 .addReg(Base, getKillRegState(BaseKill)) 255 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) 256 .addImm(Pred).addReg(PredReg); 257 MIB.addReg(0); // Add optional writeback (0 for now). 258 for (unsigned i = 0; i != NumRegs; ++i) 259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 260 | getKillRegState(Regs[i].second)); 261 262 return true; 263 } 264 265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 266 // success. 267 void ARMLoadStoreOpt:: 268 MergeOpsUpdate(MachineBasicBlock &MBB, 269 MemOpQueue &memOps, 270 unsigned memOpsBegin, 271 unsigned memOpsEnd, 272 unsigned insertAfter, 273 int Offset, 274 unsigned Base, 275 bool BaseKill, 276 int Opcode, 277 ARMCC::CondCodes Pred, 278 unsigned PredReg, 279 unsigned Scratch, 280 DebugLoc dl, 281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 282 // First calculate which of the registers should be killed by the merged 283 // instruction. 284 SmallVector<std::pair<unsigned, bool>, 8> Regs; 285 const unsigned insertPos = memOps[insertAfter].Position; 286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0); 288 unsigned Reg = MO.getReg(); 289 bool isKill = MO.isKill(); 290 291 // If we are inserting the merged operation after an unmerged operation that 292 // uses the same register, make sure to transfer any kill flag. 293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j) 294 if (memOps[j].Position<insertPos) { 295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); 296 if (MOJ.getReg() == Reg && MOJ.isKill()) 297 isKill = true; 298 } 299 300 Regs.push_back(std::make_pair(Reg, isKill)); 301 } 302 303 // Try to do the merge. 304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 305 Loc++; 306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 307 Pred, PredReg, Scratch, dl, Regs)) 308 return; 309 310 // Merge succeeded, update records. 311 Merges.push_back(prior(Loc)); 312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 313 // Remove kill flags from any unmerged memops that come before insertPos. 314 if (Regs[i-memOpsBegin].second) 315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j) 316 if (memOps[j].Position<insertPos) { 317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); 318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill()) 319 MOJ.setIsKill(false); 320 } 321 MBB.erase(memOps[i].MBBI); 322 memOps[i].Merged = true; 323 } 324 } 325 326 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 327 /// load / store multiple instructions. 328 void 329 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 330 unsigned Base, int Opcode, unsigned Size, 331 ARMCC::CondCodes Pred, unsigned PredReg, 332 unsigned Scratch, MemOpQueue &MemOps, 333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 335 int Offset = MemOps[SIndex].Offset; 336 int SOffset = Offset; 337 unsigned insertAfter = SIndex; 338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 339 DebugLoc dl = Loc->getDebugLoc(); 340 const MachineOperand &PMO = Loc->getOperand(0); 341 unsigned PReg = PMO.getReg(); 342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 343 : ARMRegisterInfo::getRegisterNumbering(PReg); 344 345 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 346 int NewOffset = MemOps[i].Offset; 347 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 348 unsigned Reg = MO.getReg(); 349 unsigned RegNum = MO.isUndef() ? UINT_MAX 350 : ARMRegisterInfo::getRegisterNumbering(Reg); 351 // AM4 - register numbers in ascending order. 352 // AM5 - consecutive register numbers in ascending order. 353 if (Reg != ARM::SP && 354 NewOffset == Offset + (int)Size && 355 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { 356 Offset += Size; 357 PRegNum = RegNum; 358 } else { 359 // Can't merge this in. Try merge the earlier ones first. 360 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 361 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 362 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 363 MemOps, Merges); 364 return; 365 } 366 367 if (MemOps[i].Position > MemOps[insertAfter].Position) 368 insertAfter = i; 369 } 370 371 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 372 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 373 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 374 return; 375 } 376 377 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 378 unsigned Bytes, unsigned Limit, 379 ARMCC::CondCodes Pred, unsigned PredReg){ 380 unsigned MyPredReg = 0; 381 if (!MI) 382 return false; 383 if (MI->getOpcode() != ARM::t2SUBri && 384 MI->getOpcode() != ARM::t2SUBrSPi && 385 MI->getOpcode() != ARM::t2SUBrSPi12 && 386 MI->getOpcode() != ARM::tSUBspi && 387 MI->getOpcode() != ARM::SUBri) 388 return false; 389 390 // Make sure the offset fits in 8 bits. 391 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 392 return false; 393 394 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 395 return (MI->getOperand(0).getReg() == Base && 396 MI->getOperand(1).getReg() == Base && 397 (MI->getOperand(2).getImm()*Scale) == Bytes && 398 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 399 MyPredReg == PredReg); 400 } 401 402 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 403 unsigned Bytes, unsigned Limit, 404 ARMCC::CondCodes Pred, unsigned PredReg){ 405 unsigned MyPredReg = 0; 406 if (!MI) 407 return false; 408 if (MI->getOpcode() != ARM::t2ADDri && 409 MI->getOpcode() != ARM::t2ADDrSPi && 410 MI->getOpcode() != ARM::t2ADDrSPi12 && 411 MI->getOpcode() != ARM::tADDspi && 412 MI->getOpcode() != ARM::ADDri) 413 return false; 414 415 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 416 // Make sure the offset fits in 8 bits. 417 return false; 418 419 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 420 return (MI->getOperand(0).getReg() == Base && 421 MI->getOperand(1).getReg() == Base && 422 (MI->getOperand(2).getImm()*Scale) == Bytes && 423 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 424 MyPredReg == PredReg); 425 } 426 427 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 428 switch (MI->getOpcode()) { 429 default: return 0; 430 case ARM::LDR: 431 case ARM::STR: 432 case ARM::t2LDRi8: 433 case ARM::t2LDRi12: 434 case ARM::t2STRi8: 435 case ARM::t2STRi12: 436 case ARM::VLDRS: 437 case ARM::VSTRS: 438 return 4; 439 case ARM::VLDRD: 440 case ARM::VSTRD: 441 return 8; 442 case ARM::LDM: 443 case ARM::STM: 444 case ARM::t2LDM: 445 case ARM::t2STM: 446 return (MI->getNumOperands() - 5) * 4; 447 case ARM::VLDMS: 448 case ARM::VSTMS: 449 case ARM::VLDMD: 450 case ARM::VSTMD: 451 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; 452 } 453 } 454 455 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 456 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 457 /// 458 /// stmia rn, <ra, rb, rc> 459 /// rn := rn + 4 * 3; 460 /// => 461 /// stmia rn!, <ra, rb, rc> 462 /// 463 /// rn := rn - 4 * 3; 464 /// ldmia rn, <ra, rb, rc> 465 /// => 466 /// ldmdb rn!, <ra, rb, rc> 467 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 468 MachineBasicBlock::iterator MBBI, 469 bool &Advance, 470 MachineBasicBlock::iterator &I) { 471 MachineInstr *MI = MBBI; 472 unsigned Base = MI->getOperand(0).getReg(); 473 unsigned Bytes = getLSMultipleTransferSize(MI); 474 unsigned PredReg = 0; 475 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 476 int Opcode = MI->getOpcode(); 477 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM || 478 Opcode == ARM::STM || Opcode == ARM::t2STM; 479 480 if (isAM4) { 481 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())) 482 return false; 483 484 // Can't use the updating AM4 sub-mode if the base register is also a dest 485 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 486 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 487 if (MI->getOperand(i).getReg() == Base) 488 return false; 489 } 490 491 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 492 if (MBBI != MBB.begin()) { 493 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 494 if (Mode == ARM_AM::ia && 495 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 496 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true)); 497 MI->getOperand(4).setReg(Base); 498 MI->getOperand(4).setIsDef(); 499 MBB.erase(PrevMBBI); 500 return true; 501 } else if (Mode == ARM_AM::ib && 502 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 503 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true)); 504 MI->getOperand(4).setReg(Base); // WB to base 505 MI->getOperand(4).setIsDef(); 506 MBB.erase(PrevMBBI); 507 return true; 508 } 509 } 510 511 if (MBBI != MBB.end()) { 512 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 513 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 514 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 515 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 516 MI->getOperand(4).setReg(Base); // WB to base 517 MI->getOperand(4).setIsDef(); 518 if (NextMBBI == I) { 519 Advance = true; 520 ++I; 521 } 522 MBB.erase(NextMBBI); 523 return true; 524 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 525 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 526 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 527 MI->getOperand(4).setReg(Base); // WB to base 528 MI->getOperand(4).setIsDef(); 529 if (NextMBBI == I) { 530 Advance = true; 531 ++I; 532 } 533 MBB.erase(NextMBBI); 534 return true; 535 } 536 } 537 } else { 538 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. 539 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) 540 return false; 541 542 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); 543 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); 544 if (MBBI != MBB.begin()) { 545 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 546 if (Mode == ARM_AM::ia && 547 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 548 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset)); 549 MI->getOperand(4).setReg(Base); // WB to base 550 MI->getOperand(4).setIsDef(); 551 MBB.erase(PrevMBBI); 552 return true; 553 } 554 } 555 556 if (MBBI != MBB.end()) { 557 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 558 if (Mode == ARM_AM::ia && 559 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 560 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); 561 MI->getOperand(4).setReg(Base); // WB to base 562 MI->getOperand(4).setIsDef(); 563 if (NextMBBI == I) { 564 Advance = true; 565 ++I; 566 } 567 MBB.erase(NextMBBI); 568 } 569 return true; 570 } 571 } 572 573 return false; 574 } 575 576 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 577 switch (Opc) { 578 case ARM::LDR: return ARM::LDR_PRE; 579 case ARM::STR: return ARM::STR_PRE; 580 case ARM::VLDRS: return ARM::VLDMS; 581 case ARM::VLDRD: return ARM::VLDMD; 582 case ARM::VSTRS: return ARM::VSTMS; 583 case ARM::VSTRD: return ARM::VSTMD; 584 case ARM::t2LDRi8: 585 case ARM::t2LDRi12: 586 return ARM::t2LDR_PRE; 587 case ARM::t2STRi8: 588 case ARM::t2STRi12: 589 return ARM::t2STR_PRE; 590 default: llvm_unreachable("Unhandled opcode!"); 591 } 592 return 0; 593 } 594 595 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 596 switch (Opc) { 597 case ARM::LDR: return ARM::LDR_POST; 598 case ARM::STR: return ARM::STR_POST; 599 case ARM::VLDRS: return ARM::VLDMS; 600 case ARM::VLDRD: return ARM::VLDMD; 601 case ARM::VSTRS: return ARM::VSTMS; 602 case ARM::VSTRD: return ARM::VSTMD; 603 case ARM::t2LDRi8: 604 case ARM::t2LDRi12: 605 return ARM::t2LDR_POST; 606 case ARM::t2STRi8: 607 case ARM::t2STRi12: 608 return ARM::t2STR_POST; 609 default: llvm_unreachable("Unhandled opcode!"); 610 } 611 return 0; 612 } 613 614 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 615 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 616 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 617 MachineBasicBlock::iterator MBBI, 618 const TargetInstrInfo *TII, 619 bool &Advance, 620 MachineBasicBlock::iterator &I) { 621 MachineInstr *MI = MBBI; 622 unsigned Base = MI->getOperand(1).getReg(); 623 bool BaseKill = MI->getOperand(1).isKill(); 624 unsigned Bytes = getLSMultipleTransferSize(MI); 625 int Opcode = MI->getOpcode(); 626 DebugLoc dl = MI->getDebugLoc(); 627 bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 628 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS; 629 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 630 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) 631 return false; 632 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 633 return false; 634 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) 635 if (MI->getOperand(2).getImm() != 0) 636 return false; 637 638 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 639 // Can't do the merge if the destination register is the same as the would-be 640 // writeback register. 641 if (isLd && MI->getOperand(0).getReg() == Base) 642 return false; 643 644 unsigned PredReg = 0; 645 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 646 bool DoMerge = false; 647 ARM_AM::AddrOpc AddSub = ARM_AM::add; 648 unsigned NewOpc = 0; 649 // AM2 - 12 bits, thumb2 - 8 bits. 650 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 651 if (MBBI != MBB.begin()) { 652 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 653 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 654 DoMerge = true; 655 AddSub = ARM_AM::sub; 656 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 657 } else if (!isAM5 && 658 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 659 DoMerge = true; 660 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 661 } 662 if (DoMerge) 663 MBB.erase(PrevMBBI); 664 } 665 666 if (!DoMerge && MBBI != MBB.end()) { 667 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 668 if (!isAM5 && 669 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 670 DoMerge = true; 671 AddSub = ARM_AM::sub; 672 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 673 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 674 DoMerge = true; 675 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 676 } 677 if (DoMerge) { 678 if (NextMBBI == I) { 679 Advance = true; 680 ++I; 681 } 682 MBB.erase(NextMBBI); 683 } 684 } 685 686 if (!DoMerge) 687 return false; 688 689 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; 690 unsigned Offset = 0; 691 if (isAM5) 692 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) 693 ? ARM_AM::db 694 : ARM_AM::ia, true, (isDPR ? 2 : 1)); 695 else if (isAM2) 696 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 697 else 698 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 699 if (isLd) { 700 if (isAM5) 701 // VLDMS, VLDMD 702 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 703 .addReg(Base, getKillRegState(BaseKill)) 704 .addImm(Offset).addImm(Pred).addReg(PredReg) 705 .addReg(Base, getDefRegState(true)) // WB base register 706 .addReg(MI->getOperand(0).getReg(), RegState::Define); 707 else if (isAM2) 708 // LDR_PRE, LDR_POST, 709 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 710 .addReg(Base, RegState::Define) 711 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 712 else 713 // t2LDR_PRE, t2LDR_POST 714 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 715 .addReg(Base, RegState::Define) 716 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 717 } else { 718 MachineOperand &MO = MI->getOperand(0); 719 if (isAM5) 720 // VSTMS, VSTMD 721 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset) 722 .addImm(Pred).addReg(PredReg) 723 .addReg(Base, getDefRegState(true)) // WB base register 724 .addReg(MO.getReg(), getKillRegState(MO.isKill())); 725 else if (isAM2) 726 // STR_PRE, STR_POST 727 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 728 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 729 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 730 else 731 // t2STR_PRE, t2STR_POST 732 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 733 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 734 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 735 } 736 MBB.erase(MBBI); 737 738 return true; 739 } 740 741 /// isMemoryOp - Returns true if instruction is a memory operations (that this 742 /// pass is capable of operating on). 743 static bool isMemoryOp(const MachineInstr *MI) { 744 if (MI->hasOneMemOperand()) { 745 const MachineMemOperand *MMO = *MI->memoperands_begin(); 746 747 // Don't touch volatile memory accesses - we may be changing their order. 748 if (MMO->isVolatile()) 749 return false; 750 751 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is not. 752 if (MMO->getAlignment() < 4) 753 return false; 754 } 755 756 int Opcode = MI->getOpcode(); 757 switch (Opcode) { 758 default: break; 759 case ARM::LDR: 760 case ARM::STR: 761 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; 762 case ARM::VLDRS: 763 case ARM::VSTRS: 764 return MI->getOperand(1).isReg(); 765 case ARM::VLDRD: 766 case ARM::VSTRD: 767 return MI->getOperand(1).isReg(); 768 case ARM::t2LDRi8: 769 case ARM::t2LDRi12: 770 case ARM::t2STRi8: 771 case ARM::t2STRi12: 772 return MI->getOperand(1).isReg(); 773 } 774 return false; 775 } 776 777 /// AdvanceRS - Advance register scavenger to just before the earliest memory 778 /// op that is being merged. 779 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 780 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 781 unsigned Position = MemOps[0].Position; 782 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 783 if (MemOps[i].Position < Position) { 784 Position = MemOps[i].Position; 785 Loc = MemOps[i].MBBI; 786 } 787 } 788 789 if (Loc != MBB.begin()) 790 RS->forward(prior(Loc)); 791 } 792 793 static int getMemoryOpOffset(const MachineInstr *MI) { 794 int Opcode = MI->getOpcode(); 795 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 796 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 797 unsigned NumOperands = MI->getDesc().getNumOperands(); 798 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 799 800 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 801 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 802 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) 803 return OffField; 804 805 int Offset = isAM2 806 ? ARM_AM::getAM2Offset(OffField) 807 : (isAM3 ? ARM_AM::getAM3Offset(OffField) 808 : ARM_AM::getAM5Offset(OffField) * 4); 809 if (isAM2) { 810 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 811 Offset = -Offset; 812 } else if (isAM3) { 813 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 814 Offset = -Offset; 815 } else { 816 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 817 Offset = -Offset; 818 } 819 return Offset; 820 } 821 822 static void InsertLDR_STR(MachineBasicBlock &MBB, 823 MachineBasicBlock::iterator &MBBI, 824 int OffImm, bool isDef, 825 DebugLoc dl, unsigned NewOpc, 826 unsigned Reg, bool RegDeadKill, bool RegUndef, 827 unsigned BaseReg, bool BaseKill, bool BaseUndef, 828 unsigned OffReg, bool OffKill, bool OffUndef, 829 ARMCC::CondCodes Pred, unsigned PredReg, 830 const TargetInstrInfo *TII, bool isT2) { 831 int Offset = OffImm; 832 if (!isT2) { 833 if (OffImm < 0) 834 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); 835 else 836 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); 837 } 838 if (isDef) { 839 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 840 TII->get(NewOpc)) 841 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 842 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 843 if (!isT2) 844 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 845 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 846 } else { 847 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 848 TII->get(NewOpc)) 849 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 850 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 851 if (!isT2) 852 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 853 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 854 } 855 } 856 857 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 858 MachineBasicBlock::iterator &MBBI) { 859 MachineInstr *MI = &*MBBI; 860 unsigned Opcode = MI->getOpcode(); 861 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 862 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 863 unsigned EvenReg = MI->getOperand(0).getReg(); 864 unsigned OddReg = MI->getOperand(1).getReg(); 865 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 866 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 867 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 868 return false; 869 870 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 871 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 872 bool EvenDeadKill = isLd ? 873 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 874 bool EvenUndef = MI->getOperand(0).isUndef(); 875 bool OddDeadKill = isLd ? 876 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 877 bool OddUndef = MI->getOperand(1).isUndef(); 878 const MachineOperand &BaseOp = MI->getOperand(2); 879 unsigned BaseReg = BaseOp.getReg(); 880 bool BaseKill = BaseOp.isKill(); 881 bool BaseUndef = BaseOp.isUndef(); 882 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); 883 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 884 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 885 int OffImm = getMemoryOpOffset(MI); 886 unsigned PredReg = 0; 887 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 888 889 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { 890 // Ascending register numbers and no offset. It's safe to change it to a 891 // ldm or stm. 892 unsigned NewOpc = (isLd) 893 ? (isT2 ? ARM::t2LDM : ARM::LDM) 894 : (isT2 ? ARM::t2STM : ARM::STM); 895 if (isLd) { 896 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 897 .addReg(BaseReg, getKillRegState(BaseKill)) 898 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 899 .addImm(Pred).addReg(PredReg) 900 .addReg(0) 901 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 902 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 903 ++NumLDRD2LDM; 904 } else { 905 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 906 .addReg(BaseReg, getKillRegState(BaseKill)) 907 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 908 .addImm(Pred).addReg(PredReg) 909 .addReg(0) 910 .addReg(EvenReg, 911 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 912 .addReg(OddReg, 913 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 914 ++NumSTRD2STM; 915 } 916 } else { 917 // Split into two instructions. 918 assert((!isT2 || !OffReg) && 919 "Thumb2 ldrd / strd does not encode offset register!"); 920 unsigned NewOpc = (isLd) 921 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) 922 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); 923 DebugLoc dl = MBBI->getDebugLoc(); 924 // If this is a load and base register is killed, it may have been 925 // re-defed by the load, make sure the first load does not clobber it. 926 if (isLd && 927 (BaseKill || OffKill) && 928 (TRI->regsOverlap(EvenReg, BaseReg) || 929 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { 930 assert(!TRI->regsOverlap(OddReg, BaseReg) && 931 (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); 932 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 933 OddReg, OddDeadKill, false, 934 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 935 Pred, PredReg, TII, isT2); 936 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 937 EvenReg, EvenDeadKill, false, 938 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 939 Pred, PredReg, TII, isT2); 940 } else { 941 if (OddReg == EvenReg && EvenDeadKill) { 942 // If the two source operands are the same, the kill marker is probably 943 // on the first one. e.g. 944 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 945 EvenDeadKill = false; 946 OddDeadKill = true; 947 } 948 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 949 EvenReg, EvenDeadKill, EvenUndef, 950 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 951 Pred, PredReg, TII, isT2); 952 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 953 OddReg, OddDeadKill, OddUndef, 954 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 955 Pred, PredReg, TII, isT2); 956 } 957 if (isLd) 958 ++NumLDRD2LDR; 959 else 960 ++NumSTRD2STR; 961 } 962 963 MBBI = prior(MBBI); 964 MBB.erase(MI); 965 } 966 return false; 967 } 968 969 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 970 /// ops of the same base and incrementing offset into LDM / STM ops. 971 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 972 unsigned NumMerges = 0; 973 unsigned NumMemOps = 0; 974 MemOpQueue MemOps; 975 unsigned CurrBase = 0; 976 int CurrOpc = -1; 977 unsigned CurrSize = 0; 978 ARMCC::CondCodes CurrPred = ARMCC::AL; 979 unsigned CurrPredReg = 0; 980 unsigned Position = 0; 981 SmallVector<MachineBasicBlock::iterator,4> Merges; 982 983 RS->enterBasicBlock(&MBB); 984 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 985 while (MBBI != E) { 986 if (FixInvalidRegPairOp(MBB, MBBI)) 987 continue; 988 989 bool Advance = false; 990 bool TryMerge = false; 991 bool Clobber = false; 992 993 bool isMemOp = isMemoryOp(MBBI); 994 if (isMemOp) { 995 int Opcode = MBBI->getOpcode(); 996 unsigned Size = getLSMultipleTransferSize(MBBI); 997 unsigned Base = MBBI->getOperand(1).getReg(); 998 unsigned PredReg = 0; 999 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1000 int Offset = getMemoryOpOffset(MBBI); 1001 // Watch out for: 1002 // r4 := ldr [r5] 1003 // r5 := ldr [r5, #4] 1004 // r6 := ldr [r5, #8] 1005 // 1006 // The second ldr has effectively broken the chain even though it 1007 // looks like the later ldr(s) use the same base register. Try to 1008 // merge the ldr's so far, including this one. But don't try to 1009 // combine the following ldr(s). 1010 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1011 if (CurrBase == 0 && !Clobber) { 1012 // Start of a new chain. 1013 CurrBase = Base; 1014 CurrOpc = Opcode; 1015 CurrSize = Size; 1016 CurrPred = Pred; 1017 CurrPredReg = PredReg; 1018 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 1019 NumMemOps++; 1020 Advance = true; 1021 } else { 1022 if (Clobber) { 1023 TryMerge = true; 1024 Advance = true; 1025 } 1026 1027 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1028 // No need to match PredReg. 1029 // Continue adding to the queue. 1030 if (Offset > MemOps.back().Offset) { 1031 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 1032 NumMemOps++; 1033 Advance = true; 1034 } else { 1035 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1036 I != E; ++I) { 1037 if (Offset < I->Offset) { 1038 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); 1039 NumMemOps++; 1040 Advance = true; 1041 break; 1042 } else if (Offset == I->Offset) { 1043 // Collision! This can't be merged! 1044 break; 1045 } 1046 } 1047 } 1048 } 1049 } 1050 } 1051 1052 if (Advance) { 1053 ++Position; 1054 ++MBBI; 1055 if (MBBI == E) 1056 // Reach the end of the block, try merging the memory instructions. 1057 TryMerge = true; 1058 } else 1059 TryMerge = true; 1060 1061 if (TryMerge) { 1062 if (NumMemOps > 1) { 1063 // Try to find a free register to use as a new base in case it's needed. 1064 // First advance to the instruction just before the start of the chain. 1065 AdvanceRS(MBB, MemOps); 1066 // Find a scratch register. 1067 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1068 // Process the load / store instructions. 1069 RS->forward(prior(MBBI)); 1070 1071 // Merge ops. 1072 Merges.clear(); 1073 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1074 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1075 1076 // Try folding preceeding/trailing base inc/dec into the generated 1077 // LDM/STM ops. 1078 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1079 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1080 ++NumMerges; 1081 NumMerges += Merges.size(); 1082 1083 // Try folding preceeding/trailing base inc/dec into those load/store 1084 // that were not merged to form LDM/STM ops. 1085 for (unsigned i = 0; i != NumMemOps; ++i) 1086 if (!MemOps[i].Merged) 1087 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1088 ++NumMerges; 1089 1090 // RS may be pointing to an instruction that's deleted. 1091 RS->skipTo(prior(MBBI)); 1092 } else if (NumMemOps == 1) { 1093 // Try folding preceeding/trailing base inc/dec into the single 1094 // load/store. 1095 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1096 ++NumMerges; 1097 RS->forward(prior(MBBI)); 1098 } 1099 } 1100 1101 CurrBase = 0; 1102 CurrOpc = -1; 1103 CurrSize = 0; 1104 CurrPred = ARMCC::AL; 1105 CurrPredReg = 0; 1106 if (NumMemOps) { 1107 MemOps.clear(); 1108 NumMemOps = 0; 1109 } 1110 1111 // If iterator hasn't been advanced and this is not a memory op, skip it. 1112 // It can't start a new chain anyway. 1113 if (!Advance && !isMemOp && MBBI != E) { 1114 ++Position; 1115 ++MBBI; 1116 } 1117 } 1118 } 1119 return NumMerges > 0; 1120 } 1121 1122 namespace { 1123 struct OffsetCompare { 1124 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1125 int LOffset = getMemoryOpOffset(LHS); 1126 int ROffset = getMemoryOpOffset(RHS); 1127 assert(LHS == RHS || LOffset != ROffset); 1128 return LOffset > ROffset; 1129 } 1130 }; 1131 } 1132 1133 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op 1134 /// (bx lr) into the preceeding stack restore so it directly restore the value 1135 /// of LR into pc. 1136 /// ldmfd sp!, {r7, lr} 1137 /// bx lr 1138 /// => 1139 /// ldmfd sp!, {r7, pc} 1140 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1141 if (MBB.empty()) return false; 1142 1143 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1144 if (MBBI != MBB.begin() && 1145 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) { 1146 MachineInstr *PrevMI = prior(MBBI); 1147 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) { 1148 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1149 if (MO.getReg() != ARM::LR) 1150 return false; 1151 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; 1152 PrevMI->setDesc(TII->get(NewOpc)); 1153 MO.setReg(ARM::PC); 1154 MBB.erase(MBBI); 1155 return true; 1156 } 1157 } 1158 return false; 1159 } 1160 1161 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1162 const TargetMachine &TM = Fn.getTarget(); 1163 AFI = Fn.getInfo<ARMFunctionInfo>(); 1164 TII = TM.getInstrInfo(); 1165 TRI = TM.getRegisterInfo(); 1166 RS = new RegScavenger(); 1167 isThumb2 = AFI->isThumb2Function(); 1168 1169 bool Modified = false; 1170 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1171 ++MFI) { 1172 MachineBasicBlock &MBB = *MFI; 1173 Modified |= LoadStoreMultipleOpti(MBB); 1174 Modified |= MergeReturnIntoLDM(MBB); 1175 } 1176 1177 delete RS; 1178 return Modified; 1179 } 1180 1181 1182 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1183 /// load / stores from consecutive locations close to make it more 1184 /// likely they will be combined later. 1185 1186 namespace { 1187 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1188 static char ID; 1189 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} 1190 1191 const TargetData *TD; 1192 const TargetInstrInfo *TII; 1193 const TargetRegisterInfo *TRI; 1194 const ARMSubtarget *STI; 1195 MachineRegisterInfo *MRI; 1196 MachineFunction *MF; 1197 1198 virtual bool runOnMachineFunction(MachineFunction &Fn); 1199 1200 virtual const char *getPassName() const { 1201 return "ARM pre- register allocation load / store optimization pass"; 1202 } 1203 1204 private: 1205 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1206 unsigned &NewOpc, unsigned &EvenReg, 1207 unsigned &OddReg, unsigned &BaseReg, 1208 unsigned &OffReg, int &Offset, 1209 unsigned &PredReg, ARMCC::CondCodes &Pred, 1210 bool &isT2); 1211 bool RescheduleOps(MachineBasicBlock *MBB, 1212 SmallVector<MachineInstr*, 4> &Ops, 1213 unsigned Base, bool isLd, 1214 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1215 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1216 }; 1217 char ARMPreAllocLoadStoreOpt::ID = 0; 1218 } 1219 1220 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1221 TD = Fn.getTarget().getTargetData(); 1222 TII = Fn.getTarget().getInstrInfo(); 1223 TRI = Fn.getTarget().getRegisterInfo(); 1224 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1225 MRI = &Fn.getRegInfo(); 1226 MF = &Fn; 1227 1228 bool Modified = false; 1229 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1230 ++MFI) 1231 Modified |= RescheduleLoadStoreInstrs(MFI); 1232 1233 return Modified; 1234 } 1235 1236 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1237 MachineBasicBlock::iterator I, 1238 MachineBasicBlock::iterator E, 1239 SmallPtrSet<MachineInstr*, 4> &MemOps, 1240 SmallSet<unsigned, 4> &MemRegs, 1241 const TargetRegisterInfo *TRI) { 1242 // Are there stores / loads / calls between them? 1243 // FIXME: This is overly conservative. We should make use of alias information 1244 // some day. 1245 SmallSet<unsigned, 4> AddedRegPressure; 1246 while (++I != E) { 1247 if (MemOps.count(&*I)) 1248 continue; 1249 const TargetInstrDesc &TID = I->getDesc(); 1250 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1251 return false; 1252 if (isLd && TID.mayStore()) 1253 return false; 1254 if (!isLd) { 1255 if (TID.mayLoad()) 1256 return false; 1257 // It's not safe to move the first 'str' down. 1258 // str r1, [r0] 1259 // strh r5, [r0] 1260 // str r4, [r0, #+4] 1261 if (TID.mayStore()) 1262 return false; 1263 } 1264 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1265 MachineOperand &MO = I->getOperand(j); 1266 if (!MO.isReg()) 1267 continue; 1268 unsigned Reg = MO.getReg(); 1269 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1270 return false; 1271 if (Reg != Base && !MemRegs.count(Reg)) 1272 AddedRegPressure.insert(Reg); 1273 } 1274 } 1275 1276 // Estimate register pressure increase due to the transformation. 1277 if (MemRegs.size() <= 4) 1278 // Ok if we are moving small number of instructions. 1279 return true; 1280 return AddedRegPressure.size() <= MemRegs.size() * 2; 1281 } 1282 1283 bool 1284 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1285 DebugLoc &dl, 1286 unsigned &NewOpc, unsigned &EvenReg, 1287 unsigned &OddReg, unsigned &BaseReg, 1288 unsigned &OffReg, int &Offset, 1289 unsigned &PredReg, 1290 ARMCC::CondCodes &Pred, 1291 bool &isT2) { 1292 // Make sure we're allowed to generate LDRD/STRD. 1293 if (!STI->hasV5TEOps()) 1294 return false; 1295 1296 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1297 unsigned Scale = 1; 1298 unsigned Opcode = Op0->getOpcode(); 1299 if (Opcode == ARM::LDR) 1300 NewOpc = ARM::LDRD; 1301 else if (Opcode == ARM::STR) 1302 NewOpc = ARM::STRD; 1303 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1304 NewOpc = ARM::t2LDRDi8; 1305 Scale = 4; 1306 isT2 = true; 1307 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1308 NewOpc = ARM::t2STRDi8; 1309 Scale = 4; 1310 isT2 = true; 1311 } else 1312 return false; 1313 1314 // Make sure the offset registers match. 1315 if (!isT2 && 1316 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) 1317 return false; 1318 1319 // Must sure the base address satisfies i64 ld / st alignment requirement. 1320 if (!Op0->hasOneMemOperand() || 1321 !(*Op0->memoperands_begin())->getValue() || 1322 (*Op0->memoperands_begin())->isVolatile()) 1323 return false; 1324 1325 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1326 Function *Func = MF->getFunction(); 1327 unsigned ReqAlign = STI->hasV6Ops() 1328 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 1329 : 8; // Pre-v6 need 8-byte align 1330 if (Align < ReqAlign) 1331 return false; 1332 1333 // Then make sure the immediate offset fits. 1334 int OffImm = getMemoryOpOffset(Op0); 1335 if (isT2) { 1336 if (OffImm < 0) { 1337 if (OffImm < -255) 1338 // Can't fall back to t2LDRi8 / t2STRi8. 1339 return false; 1340 } else { 1341 int Limit = (1 << 8) * Scale; 1342 if (OffImm >= Limit || (OffImm & (Scale-1))) 1343 return false; 1344 } 1345 Offset = OffImm; 1346 } else { 1347 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1348 if (OffImm < 0) { 1349 AddSub = ARM_AM::sub; 1350 OffImm = - OffImm; 1351 } 1352 int Limit = (1 << 8) * Scale; 1353 if (OffImm >= Limit || (OffImm & (Scale-1))) 1354 return false; 1355 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1356 } 1357 EvenReg = Op0->getOperand(0).getReg(); 1358 OddReg = Op1->getOperand(0).getReg(); 1359 if (EvenReg == OddReg) 1360 return false; 1361 BaseReg = Op0->getOperand(1).getReg(); 1362 if (!isT2) 1363 OffReg = Op0->getOperand(2).getReg(); 1364 Pred = llvm::getInstrPredicate(Op0, PredReg); 1365 dl = Op0->getDebugLoc(); 1366 return true; 1367 } 1368 1369 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1370 SmallVector<MachineInstr*, 4> &Ops, 1371 unsigned Base, bool isLd, 1372 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1373 bool RetVal = false; 1374 1375 // Sort by offset (in reverse order). 1376 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1377 1378 // The loads / stores of the same base are in order. Scan them from first to 1379 // last and check for the followins: 1380 // 1. Any def of base. 1381 // 2. Any gaps. 1382 while (Ops.size() > 1) { 1383 unsigned FirstLoc = ~0U; 1384 unsigned LastLoc = 0; 1385 MachineInstr *FirstOp = 0; 1386 MachineInstr *LastOp = 0; 1387 int LastOffset = 0; 1388 unsigned LastOpcode = 0; 1389 unsigned LastBytes = 0; 1390 unsigned NumMove = 0; 1391 for (int i = Ops.size() - 1; i >= 0; --i) { 1392 MachineInstr *Op = Ops[i]; 1393 unsigned Loc = MI2LocMap[Op]; 1394 if (Loc <= FirstLoc) { 1395 FirstLoc = Loc; 1396 FirstOp = Op; 1397 } 1398 if (Loc >= LastLoc) { 1399 LastLoc = Loc; 1400 LastOp = Op; 1401 } 1402 1403 unsigned Opcode = Op->getOpcode(); 1404 if (LastOpcode && Opcode != LastOpcode) 1405 break; 1406 1407 int Offset = getMemoryOpOffset(Op); 1408 unsigned Bytes = getLSMultipleTransferSize(Op); 1409 if (LastBytes) { 1410 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1411 break; 1412 } 1413 LastOffset = Offset; 1414 LastBytes = Bytes; 1415 LastOpcode = Opcode; 1416 if (++NumMove == 8) // FIXME: Tune this limit. 1417 break; 1418 } 1419 1420 if (NumMove <= 1) 1421 Ops.pop_back(); 1422 else { 1423 SmallPtrSet<MachineInstr*, 4> MemOps; 1424 SmallSet<unsigned, 4> MemRegs; 1425 for (int i = NumMove-1; i >= 0; --i) { 1426 MemOps.insert(Ops[i]); 1427 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1428 } 1429 1430 // Be conservative, if the instructions are too far apart, don't 1431 // move them. We want to limit the increase of register pressure. 1432 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1433 if (DoMove) 1434 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1435 MemOps, MemRegs, TRI); 1436 if (!DoMove) { 1437 for (unsigned i = 0; i != NumMove; ++i) 1438 Ops.pop_back(); 1439 } else { 1440 // This is the new location for the loads / stores. 1441 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1442 while (InsertPos != MBB->end() && MemOps.count(InsertPos)) 1443 ++InsertPos; 1444 1445 // If we are moving a pair of loads / stores, see if it makes sense 1446 // to try to allocate a pair of registers that can form register pairs. 1447 MachineInstr *Op0 = Ops.back(); 1448 MachineInstr *Op1 = Ops[Ops.size()-2]; 1449 unsigned EvenReg = 0, OddReg = 0; 1450 unsigned BaseReg = 0, OffReg = 0, PredReg = 0; 1451 ARMCC::CondCodes Pred = ARMCC::AL; 1452 bool isT2 = false; 1453 unsigned NewOpc = 0; 1454 int Offset = 0; 1455 DebugLoc dl; 1456 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1457 EvenReg, OddReg, BaseReg, OffReg, 1458 Offset, PredReg, Pred, isT2)) { 1459 Ops.pop_back(); 1460 Ops.pop_back(); 1461 1462 // Form the pair instruction. 1463 if (isLd) { 1464 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1465 dl, TII->get(NewOpc)) 1466 .addReg(EvenReg, RegState::Define) 1467 .addReg(OddReg, RegState::Define) 1468 .addReg(BaseReg); 1469 if (!isT2) 1470 MIB.addReg(OffReg); 1471 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1472 ++NumLDRDFormed; 1473 } else { 1474 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1475 dl, TII->get(NewOpc)) 1476 .addReg(EvenReg) 1477 .addReg(OddReg) 1478 .addReg(BaseReg); 1479 if (!isT2) 1480 MIB.addReg(OffReg); 1481 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1482 ++NumSTRDFormed; 1483 } 1484 MBB->erase(Op0); 1485 MBB->erase(Op1); 1486 1487 // Add register allocation hints to form register pairs. 1488 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1489 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1490 } else { 1491 for (unsigned i = 0; i != NumMove; ++i) { 1492 MachineInstr *Op = Ops.back(); 1493 Ops.pop_back(); 1494 MBB->splice(InsertPos, MBB, Op); 1495 } 1496 } 1497 1498 NumLdStMoved += NumMove; 1499 RetVal = true; 1500 } 1501 } 1502 } 1503 1504 return RetVal; 1505 } 1506 1507 bool 1508 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1509 bool RetVal = false; 1510 1511 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1512 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1513 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1514 SmallVector<unsigned, 4> LdBases; 1515 SmallVector<unsigned, 4> StBases; 1516 1517 unsigned Loc = 0; 1518 MachineBasicBlock::iterator MBBI = MBB->begin(); 1519 MachineBasicBlock::iterator E = MBB->end(); 1520 while (MBBI != E) { 1521 for (; MBBI != E; ++MBBI) { 1522 MachineInstr *MI = MBBI; 1523 const TargetInstrDesc &TID = MI->getDesc(); 1524 if (TID.isCall() || TID.isTerminator()) { 1525 // Stop at barriers. 1526 ++MBBI; 1527 break; 1528 } 1529 1530 MI2LocMap[MI] = Loc++; 1531 if (!isMemoryOp(MI)) 1532 continue; 1533 unsigned PredReg = 0; 1534 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1535 continue; 1536 1537 int Opc = MI->getOpcode(); 1538 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1539 unsigned Base = MI->getOperand(1).getReg(); 1540 int Offset = getMemoryOpOffset(MI); 1541 1542 bool StopHere = false; 1543 if (isLd) { 1544 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1545 Base2LdsMap.find(Base); 1546 if (BI != Base2LdsMap.end()) { 1547 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1548 if (Offset == getMemoryOpOffset(BI->second[i])) { 1549 StopHere = true; 1550 break; 1551 } 1552 } 1553 if (!StopHere) 1554 BI->second.push_back(MI); 1555 } else { 1556 SmallVector<MachineInstr*, 4> MIs; 1557 MIs.push_back(MI); 1558 Base2LdsMap[Base] = MIs; 1559 LdBases.push_back(Base); 1560 } 1561 } else { 1562 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1563 Base2StsMap.find(Base); 1564 if (BI != Base2StsMap.end()) { 1565 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1566 if (Offset == getMemoryOpOffset(BI->second[i])) { 1567 StopHere = true; 1568 break; 1569 } 1570 } 1571 if (!StopHere) 1572 BI->second.push_back(MI); 1573 } else { 1574 SmallVector<MachineInstr*, 4> MIs; 1575 MIs.push_back(MI); 1576 Base2StsMap[Base] = MIs; 1577 StBases.push_back(Base); 1578 } 1579 } 1580 1581 if (StopHere) { 1582 // Found a duplicate (a base+offset combination that's seen earlier). 1583 // Backtrack. 1584 --Loc; 1585 break; 1586 } 1587 } 1588 1589 // Re-schedule loads. 1590 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1591 unsigned Base = LdBases[i]; 1592 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1593 if (Lds.size() > 1) 1594 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1595 } 1596 1597 // Re-schedule stores. 1598 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1599 unsigned Base = StBases[i]; 1600 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1601 if (Sts.size() > 1) 1602 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1603 } 1604 1605 if (MBBI != E) { 1606 Base2LdsMap.clear(); 1607 Base2StsMap.clear(); 1608 LdBases.clear(); 1609 StBases.clear(); 1610 } 1611 } 1612 1613 return RetVal; 1614 } 1615 1616 1617 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1618 /// optimization pass. 1619 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1620 if (PreAlloc) 1621 return new ARMPreAllocLoadStoreOpt(); 1622 return new ARMLoadStoreOpt(); 1623 } 1624