1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMAddressingModes.h" 18 #include "ARMBaseInstrInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMRegisterInfo.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/Target/TargetData.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 #include "llvm/Target/TargetMachine.h" 32 #include "llvm/Target/TargetRegisterInfo.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallSet.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/Statistic.h" 40 using namespace llvm; 41 42 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55 /// load / store instructions to form ldm / stm instructions. 56 57 namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Position; 78 MachineBasicBlock::iterator MBBI; 79 bool Merged; 80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) 81 : Offset(o), Position(p), MBBI(i), Merged(false) {} 82 }; 83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 84 typedef MemOpQueue::iterator MemOpQueueIter; 85 86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 87 int Offset, unsigned Base, bool BaseKill, int Opcode, 88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 90 void MergeOpsUpdate(MachineBasicBlock &MBB, 91 MemOpQueue &MemOps, 92 unsigned memOpsBegin, 93 unsigned memOpsEnd, 94 unsigned insertAfter, 95 int Offset, 96 unsigned Base, 97 bool BaseKill, 98 int Opcode, 99 ARMCC::CondCodes Pred, 100 unsigned PredReg, 101 unsigned Scratch, 102 DebugLoc dl, 103 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 105 int Opcode, unsigned Size, 106 ARMCC::CondCodes Pred, unsigned PredReg, 107 unsigned Scratch, MemOpQueue &MemOps, 108 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 109 110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 112 MachineBasicBlock::iterator &MBBI); 113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 114 MachineBasicBlock::iterator MBBI, 115 const TargetInstrInfo *TII, 116 bool &Advance, 117 MachineBasicBlock::iterator &I); 118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator MBBI, 120 bool &Advance, 121 MachineBasicBlock::iterator &I); 122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 124 }; 125 char ARMLoadStoreOpt::ID = 0; 126 } 127 128 static int getLoadStoreMultipleOpcode(int Opcode) { 129 switch (Opcode) { 130 case ARM::LDR: 131 NumLDMGened++; 132 return ARM::LDM; 133 case ARM::STR: 134 NumSTMGened++; 135 return ARM::STM; 136 case ARM::t2LDRi8: 137 case ARM::t2LDRi12: 138 NumLDMGened++; 139 return ARM::t2LDM; 140 case ARM::t2STRi8: 141 case ARM::t2STRi12: 142 NumSTMGened++; 143 return ARM::t2STM; 144 case ARM::VLDRS: 145 NumVLDMGened++; 146 return ARM::VLDMS; 147 case ARM::VSTRS: 148 NumVSTMGened++; 149 return ARM::VSTMS; 150 case ARM::VLDRD: 151 NumVLDMGened++; 152 return ARM::VLDMD; 153 case ARM::VSTRD: 154 NumVSTMGened++; 155 return ARM::VSTMD; 156 default: llvm_unreachable("Unhandled opcode!"); 157 } 158 return 0; 159 } 160 161 static bool isT2i32Load(unsigned Opc) { 162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 163 } 164 165 static bool isi32Load(unsigned Opc) { 166 return Opc == ARM::LDR || isT2i32Load(Opc); 167 } 168 169 static bool isT2i32Store(unsigned Opc) { 170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 171 } 172 173 static bool isi32Store(unsigned Opc) { 174 return Opc == ARM::STR || isT2i32Store(Opc); 175 } 176 177 /// MergeOps - Create and insert a LDM or STM with Base as base register and 178 /// registers in Regs as the register operands that would be loaded / stored. 179 /// It returns true if the transformation is done. 180 bool 181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 182 MachineBasicBlock::iterator MBBI, 183 int Offset, unsigned Base, bool BaseKill, 184 int Opcode, ARMCC::CondCodes Pred, 185 unsigned PredReg, unsigned Scratch, DebugLoc dl, 186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 187 // Only a single register to load / store. Don't bother. 188 unsigned NumRegs = Regs.size(); 189 if (NumRegs <= 1) 190 return false; 191 192 ARM_AM::AMSubMode Mode = ARM_AM::ia; 193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 194 if (isAM4 && Offset == 4) { 195 if (isThumb2) 196 // Thumb2 does not support ldmib / stmib. 197 return false; 198 Mode = ARM_AM::ib; 199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) { 200 if (isThumb2) 201 // Thumb2 does not support ldmda / stmda. 202 return false; 203 Mode = ARM_AM::da; 204 } else if (isAM4 && Offset == -4 * (int)NumRegs) { 205 Mode = ARM_AM::db; 206 } else if (Offset != 0) { 207 // If starting offset isn't zero, insert a MI to materialize a new base. 208 // But only do so if it is cost effective, i.e. merging more than two 209 // loads / stores. 210 if (NumRegs <= 2) 211 return false; 212 213 unsigned NewBase; 214 if (isi32Load(Opcode)) 215 // If it is a load, then just use one of the destination register to 216 // use as the new base. 217 NewBase = Regs[NumRegs-1].first; 218 else { 219 // Use the scratch register to use as a new base. 220 NewBase = Scratch; 221 if (NewBase == 0) 222 return false; 223 } 224 int BaseOpc = !isThumb2 225 ? ARM::ADDri 226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 227 if (Offset < 0) { 228 BaseOpc = !isThumb2 229 ? ARM::SUBri 230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 231 Offset = - Offset; 232 } 233 int ImmedOffset = isThumb2 234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 235 if (ImmedOffset == -1) 236 // FIXME: Try t2ADDri12 or t2SUBri12? 237 return false; // Probably not worth it then. 238 239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 241 .addImm(Pred).addReg(PredReg).addReg(0); 242 Base = NewBase; 243 BaseKill = true; // New base is always killed right its use. 244 } 245 246 bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD; 247 bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 248 Opcode = getLoadStoreMultipleOpcode(Opcode); 249 MachineInstrBuilder MIB = (isAM4) 250 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 251 .addReg(Base, getKillRegState(BaseKill)) 252 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) 253 : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 254 .addReg(Base, getKillRegState(BaseKill)) 255 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) 256 .addImm(Pred).addReg(PredReg); 257 MIB.addReg(0); // Add optional writeback (0 for now). 258 for (unsigned i = 0; i != NumRegs; ++i) 259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 260 | getKillRegState(Regs[i].second)); 261 262 return true; 263 } 264 265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 266 // success. 267 void ARMLoadStoreOpt:: 268 MergeOpsUpdate(MachineBasicBlock &MBB, 269 MemOpQueue &memOps, 270 unsigned memOpsBegin, 271 unsigned memOpsEnd, 272 unsigned insertAfter, 273 int Offset, 274 unsigned Base, 275 bool BaseKill, 276 int Opcode, 277 ARMCC::CondCodes Pred, 278 unsigned PredReg, 279 unsigned Scratch, 280 DebugLoc dl, 281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 282 // First calculate which of the registers should be killed by the merged 283 // instruction. 284 SmallVector<std::pair<unsigned, bool>, 8> Regs; 285 const unsigned insertPos = memOps[insertAfter].Position; 286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0); 288 unsigned Reg = MO.getReg(); 289 bool isKill = MO.isKill(); 290 291 // If we are inserting the merged operation after an unmerged operation that 292 // uses the same register, make sure to transfer any kill flag. 293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j) 294 if (memOps[j].Position<insertPos) { 295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); 296 if (MOJ.getReg() == Reg && MOJ.isKill()) 297 isKill = true; 298 } 299 300 Regs.push_back(std::make_pair(Reg, isKill)); 301 } 302 303 // Try to do the merge. 304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 305 Loc++; 306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 307 Pred, PredReg, Scratch, dl, Regs)) 308 return; 309 310 // Merge succeeded, update records. 311 Merges.push_back(prior(Loc)); 312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 313 // Remove kill flags from any unmerged memops that come before insertPos. 314 if (Regs[i-memOpsBegin].second) 315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j) 316 if (memOps[j].Position<insertPos) { 317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); 318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill()) 319 MOJ.setIsKill(false); 320 } 321 MBB.erase(memOps[i].MBBI); 322 memOps[i].Merged = true; 323 } 324 } 325 326 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 327 /// load / store multiple instructions. 328 void 329 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 330 unsigned Base, int Opcode, unsigned Size, 331 ARMCC::CondCodes Pred, unsigned PredReg, 332 unsigned Scratch, MemOpQueue &MemOps, 333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 335 int Offset = MemOps[SIndex].Offset; 336 int SOffset = Offset; 337 unsigned insertAfter = SIndex; 338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 339 DebugLoc dl = Loc->getDebugLoc(); 340 const MachineOperand &PMO = Loc->getOperand(0); 341 unsigned PReg = PMO.getReg(); 342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 343 : ARMRegisterInfo::getRegisterNumbering(PReg); 344 345 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 346 int NewOffset = MemOps[i].Offset; 347 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 348 unsigned Reg = MO.getReg(); 349 unsigned RegNum = MO.isUndef() ? UINT_MAX 350 : ARMRegisterInfo::getRegisterNumbering(Reg); 351 // AM4 - register numbers in ascending order. 352 // AM5 - consecutive register numbers in ascending order. 353 if (NewOffset == Offset + (int)Size && 354 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { 355 Offset += Size; 356 PRegNum = RegNum; 357 } else { 358 // Can't merge this in. Try merge the earlier ones first. 359 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 360 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 361 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 362 MemOps, Merges); 363 return; 364 } 365 366 if (MemOps[i].Position > MemOps[insertAfter].Position) 367 insertAfter = i; 368 } 369 370 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 371 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 372 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 373 return; 374 } 375 376 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 377 unsigned Bytes, unsigned Limit, 378 ARMCC::CondCodes Pred, unsigned PredReg){ 379 unsigned MyPredReg = 0; 380 if (!MI) 381 return false; 382 if (MI->getOpcode() != ARM::t2SUBri && 383 MI->getOpcode() != ARM::t2SUBrSPi && 384 MI->getOpcode() != ARM::t2SUBrSPi12 && 385 MI->getOpcode() != ARM::tSUBspi && 386 MI->getOpcode() != ARM::SUBri) 387 return false; 388 389 // Make sure the offset fits in 8 bits. 390 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 391 return false; 392 393 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 394 return (MI->getOperand(0).getReg() == Base && 395 MI->getOperand(1).getReg() == Base && 396 (MI->getOperand(2).getImm()*Scale) == Bytes && 397 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 398 MyPredReg == PredReg); 399 } 400 401 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 402 unsigned Bytes, unsigned Limit, 403 ARMCC::CondCodes Pred, unsigned PredReg){ 404 unsigned MyPredReg = 0; 405 if (!MI) 406 return false; 407 if (MI->getOpcode() != ARM::t2ADDri && 408 MI->getOpcode() != ARM::t2ADDrSPi && 409 MI->getOpcode() != ARM::t2ADDrSPi12 && 410 MI->getOpcode() != ARM::tADDspi && 411 MI->getOpcode() != ARM::ADDri) 412 return false; 413 414 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 415 // Make sure the offset fits in 8 bits. 416 return false; 417 418 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 419 return (MI->getOperand(0).getReg() == Base && 420 MI->getOperand(1).getReg() == Base && 421 (MI->getOperand(2).getImm()*Scale) == Bytes && 422 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 423 MyPredReg == PredReg); 424 } 425 426 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 427 switch (MI->getOpcode()) { 428 default: return 0; 429 case ARM::LDR: 430 case ARM::STR: 431 case ARM::t2LDRi8: 432 case ARM::t2LDRi12: 433 case ARM::t2STRi8: 434 case ARM::t2STRi12: 435 case ARM::VLDRS: 436 case ARM::VSTRS: 437 return 4; 438 case ARM::VLDRD: 439 case ARM::VSTRD: 440 return 8; 441 case ARM::LDM: 442 case ARM::STM: 443 case ARM::t2LDM: 444 case ARM::t2STM: 445 return (MI->getNumOperands() - 5) * 4; 446 case ARM::VLDMS: 447 case ARM::VSTMS: 448 case ARM::VLDMD: 449 case ARM::VSTMD: 450 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; 451 } 452 } 453 454 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 455 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 456 /// 457 /// stmia rn, <ra, rb, rc> 458 /// rn := rn + 4 * 3; 459 /// => 460 /// stmia rn!, <ra, rb, rc> 461 /// 462 /// rn := rn - 4 * 3; 463 /// ldmia rn, <ra, rb, rc> 464 /// => 465 /// ldmdb rn!, <ra, rb, rc> 466 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 467 MachineBasicBlock::iterator MBBI, 468 bool &Advance, 469 MachineBasicBlock::iterator &I) { 470 MachineInstr *MI = MBBI; 471 unsigned Base = MI->getOperand(0).getReg(); 472 unsigned Bytes = getLSMultipleTransferSize(MI); 473 unsigned PredReg = 0; 474 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 475 int Opcode = MI->getOpcode(); 476 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM || 477 Opcode == ARM::STM || Opcode == ARM::t2STM; 478 479 if (isAM4) { 480 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())) 481 return false; 482 483 // Can't use the updating AM4 sub-mode if the base register is also a dest 484 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 485 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 486 if (MI->getOperand(i).getReg() == Base) 487 return false; 488 } 489 490 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 491 if (MBBI != MBB.begin()) { 492 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 493 if (Mode == ARM_AM::ia && 494 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 495 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true)); 496 MI->getOperand(4).setReg(Base); 497 MI->getOperand(4).setIsDef(); 498 MBB.erase(PrevMBBI); 499 return true; 500 } else if (Mode == ARM_AM::ib && 501 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 502 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true)); 503 MI->getOperand(4).setReg(Base); // WB to base 504 MI->getOperand(4).setIsDef(); 505 MBB.erase(PrevMBBI); 506 return true; 507 } 508 } 509 510 if (MBBI != MBB.end()) { 511 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 512 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 513 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 514 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 515 MI->getOperand(4).setReg(Base); // WB to base 516 MI->getOperand(4).setIsDef(); 517 if (NextMBBI == I) { 518 Advance = true; 519 ++I; 520 } 521 MBB.erase(NextMBBI); 522 return true; 523 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 524 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 525 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 526 MI->getOperand(4).setReg(Base); // WB to base 527 MI->getOperand(4).setIsDef(); 528 if (NextMBBI == I) { 529 Advance = true; 530 ++I; 531 } 532 MBB.erase(NextMBBI); 533 return true; 534 } 535 } 536 } else { 537 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. 538 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) 539 return false; 540 541 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); 542 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); 543 if (MBBI != MBB.begin()) { 544 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 545 if (Mode == ARM_AM::ia && 546 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 547 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset)); 548 MI->getOperand(4).setReg(Base); // WB to base 549 MI->getOperand(4).setIsDef(); 550 MBB.erase(PrevMBBI); 551 return true; 552 } 553 } 554 555 if (MBBI != MBB.end()) { 556 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 557 if (Mode == ARM_AM::ia && 558 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 559 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); 560 MI->getOperand(4).setReg(Base); // WB to base 561 MI->getOperand(4).setIsDef(); 562 if (NextMBBI == I) { 563 Advance = true; 564 ++I; 565 } 566 MBB.erase(NextMBBI); 567 } 568 return true; 569 } 570 } 571 572 return false; 573 } 574 575 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 576 switch (Opc) { 577 case ARM::LDR: return ARM::LDR_PRE; 578 case ARM::STR: return ARM::STR_PRE; 579 case ARM::VLDRS: return ARM::VLDMS; 580 case ARM::VLDRD: return ARM::VLDMD; 581 case ARM::VSTRS: return ARM::VSTMS; 582 case ARM::VSTRD: return ARM::VSTMD; 583 case ARM::t2LDRi8: 584 case ARM::t2LDRi12: 585 return ARM::t2LDR_PRE; 586 case ARM::t2STRi8: 587 case ARM::t2STRi12: 588 return ARM::t2STR_PRE; 589 default: llvm_unreachable("Unhandled opcode!"); 590 } 591 return 0; 592 } 593 594 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 595 switch (Opc) { 596 case ARM::LDR: return ARM::LDR_POST; 597 case ARM::STR: return ARM::STR_POST; 598 case ARM::VLDRS: return ARM::VLDMS; 599 case ARM::VLDRD: return ARM::VLDMD; 600 case ARM::VSTRS: return ARM::VSTMS; 601 case ARM::VSTRD: return ARM::VSTMD; 602 case ARM::t2LDRi8: 603 case ARM::t2LDRi12: 604 return ARM::t2LDR_POST; 605 case ARM::t2STRi8: 606 case ARM::t2STRi12: 607 return ARM::t2STR_POST; 608 default: llvm_unreachable("Unhandled opcode!"); 609 } 610 return 0; 611 } 612 613 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 614 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 615 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 616 MachineBasicBlock::iterator MBBI, 617 const TargetInstrInfo *TII, 618 bool &Advance, 619 MachineBasicBlock::iterator &I) { 620 MachineInstr *MI = MBBI; 621 unsigned Base = MI->getOperand(1).getReg(); 622 bool BaseKill = MI->getOperand(1).isKill(); 623 unsigned Bytes = getLSMultipleTransferSize(MI); 624 int Opcode = MI->getOpcode(); 625 DebugLoc dl = MI->getDebugLoc(); 626 bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 627 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS; 628 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 629 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) 630 return false; 631 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 632 return false; 633 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) 634 if (MI->getOperand(2).getImm() != 0) 635 return false; 636 637 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 638 // Can't do the merge if the destination register is the same as the would-be 639 // writeback register. 640 if (isLd && MI->getOperand(0).getReg() == Base) 641 return false; 642 643 unsigned PredReg = 0; 644 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 645 bool DoMerge = false; 646 ARM_AM::AddrOpc AddSub = ARM_AM::add; 647 unsigned NewOpc = 0; 648 // AM2 - 12 bits, thumb2 - 8 bits. 649 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 650 if (MBBI != MBB.begin()) { 651 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 652 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 653 DoMerge = true; 654 AddSub = ARM_AM::sub; 655 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 656 } else if (!isAM5 && 657 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 658 DoMerge = true; 659 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 660 } 661 if (DoMerge) 662 MBB.erase(PrevMBBI); 663 } 664 665 if (!DoMerge && MBBI != MBB.end()) { 666 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 667 if (!isAM5 && 668 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 669 DoMerge = true; 670 AddSub = ARM_AM::sub; 671 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 672 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 673 DoMerge = true; 674 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 675 } 676 if (DoMerge) { 677 if (NextMBBI == I) { 678 Advance = true; 679 ++I; 680 } 681 MBB.erase(NextMBBI); 682 } 683 } 684 685 if (!DoMerge) 686 return false; 687 688 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; 689 unsigned Offset = 0; 690 if (isAM5) 691 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) 692 ? ARM_AM::db 693 : ARM_AM::ia, true, (isDPR ? 2 : 1)); 694 else if (isAM2) 695 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 696 else 697 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 698 if (isLd) { 699 if (isAM5) 700 // VLDMS, VLDMD 701 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 702 .addReg(Base, getKillRegState(BaseKill)) 703 .addImm(Offset).addImm(Pred).addReg(PredReg) 704 .addReg(Base, getDefRegState(true)) // WB base register 705 .addReg(MI->getOperand(0).getReg(), RegState::Define); 706 else if (isAM2) 707 // LDR_PRE, LDR_POST, 708 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 709 .addReg(Base, RegState::Define) 710 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 711 else 712 // t2LDR_PRE, t2LDR_POST 713 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 714 .addReg(Base, RegState::Define) 715 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 716 } else { 717 MachineOperand &MO = MI->getOperand(0); 718 if (isAM5) 719 // VSTMS, VSTMD 720 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset) 721 .addImm(Pred).addReg(PredReg) 722 .addReg(Base, getDefRegState(true)) // WB base register 723 .addReg(MO.getReg(), getKillRegState(MO.isKill())); 724 else if (isAM2) 725 // STR_PRE, STR_POST 726 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 727 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 728 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 729 else 730 // t2STR_PRE, t2STR_POST 731 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 732 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 733 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 734 } 735 MBB.erase(MBBI); 736 737 return true; 738 } 739 740 /// isMemoryOp - Returns true if instruction is a memory operations (that this 741 /// pass is capable of operating on). 742 static bool isMemoryOp(const MachineInstr *MI) { 743 int Opcode = MI->getOpcode(); 744 switch (Opcode) { 745 default: break; 746 case ARM::LDR: 747 case ARM::STR: 748 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; 749 case ARM::VLDRS: 750 case ARM::VSTRS: 751 return MI->getOperand(1).isReg(); 752 case ARM::VLDRD: 753 case ARM::VSTRD: 754 return MI->getOperand(1).isReg(); 755 case ARM::t2LDRi8: 756 case ARM::t2LDRi12: 757 case ARM::t2STRi8: 758 case ARM::t2STRi12: 759 return MI->getOperand(1).isReg(); 760 } 761 return false; 762 } 763 764 /// AdvanceRS - Advance register scavenger to just before the earliest memory 765 /// op that is being merged. 766 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 767 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 768 unsigned Position = MemOps[0].Position; 769 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 770 if (MemOps[i].Position < Position) { 771 Position = MemOps[i].Position; 772 Loc = MemOps[i].MBBI; 773 } 774 } 775 776 if (Loc != MBB.begin()) 777 RS->forward(prior(Loc)); 778 } 779 780 static int getMemoryOpOffset(const MachineInstr *MI) { 781 int Opcode = MI->getOpcode(); 782 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 783 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 784 unsigned NumOperands = MI->getDesc().getNumOperands(); 785 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 786 787 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 788 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 789 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) 790 return OffField; 791 792 int Offset = isAM2 793 ? ARM_AM::getAM2Offset(OffField) 794 : (isAM3 ? ARM_AM::getAM3Offset(OffField) 795 : ARM_AM::getAM5Offset(OffField) * 4); 796 if (isAM2) { 797 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 798 Offset = -Offset; 799 } else if (isAM3) { 800 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 801 Offset = -Offset; 802 } else { 803 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 804 Offset = -Offset; 805 } 806 return Offset; 807 } 808 809 static void InsertLDR_STR(MachineBasicBlock &MBB, 810 MachineBasicBlock::iterator &MBBI, 811 int OffImm, bool isDef, 812 DebugLoc dl, unsigned NewOpc, 813 unsigned Reg, bool RegDeadKill, bool RegUndef, 814 unsigned BaseReg, bool BaseKill, bool BaseUndef, 815 unsigned OffReg, bool OffKill, bool OffUndef, 816 ARMCC::CondCodes Pred, unsigned PredReg, 817 const TargetInstrInfo *TII, bool isT2) { 818 int Offset = OffImm; 819 if (!isT2) { 820 if (OffImm < 0) 821 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); 822 else 823 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); 824 } 825 if (isDef) { 826 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 827 TII->get(NewOpc)) 828 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 829 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 830 if (!isT2) 831 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 832 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 833 } else { 834 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 835 TII->get(NewOpc)) 836 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 837 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 838 if (!isT2) 839 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 840 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 841 } 842 } 843 844 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 845 MachineBasicBlock::iterator &MBBI) { 846 MachineInstr *MI = &*MBBI; 847 unsigned Opcode = MI->getOpcode(); 848 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 849 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 850 unsigned EvenReg = MI->getOperand(0).getReg(); 851 unsigned OddReg = MI->getOperand(1).getReg(); 852 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 853 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 854 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 855 return false; 856 857 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 858 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 859 bool EvenDeadKill = isLd ? 860 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 861 bool EvenUndef = MI->getOperand(0).isUndef(); 862 bool OddDeadKill = isLd ? 863 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 864 bool OddUndef = MI->getOperand(1).isUndef(); 865 const MachineOperand &BaseOp = MI->getOperand(2); 866 unsigned BaseReg = BaseOp.getReg(); 867 bool BaseKill = BaseOp.isKill(); 868 bool BaseUndef = BaseOp.isUndef(); 869 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); 870 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 871 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 872 int OffImm = getMemoryOpOffset(MI); 873 unsigned PredReg = 0; 874 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 875 876 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { 877 // Ascending register numbers and no offset. It's safe to change it to a 878 // ldm or stm. 879 unsigned NewOpc = (isLd) 880 ? (isT2 ? ARM::t2LDM : ARM::LDM) 881 : (isT2 ? ARM::t2STM : ARM::STM); 882 if (isLd) { 883 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 884 .addReg(BaseReg, getKillRegState(BaseKill)) 885 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 886 .addImm(Pred).addReg(PredReg) 887 .addReg(0) 888 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 889 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 890 ++NumLDRD2LDM; 891 } else { 892 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 893 .addReg(BaseReg, getKillRegState(BaseKill)) 894 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 895 .addImm(Pred).addReg(PredReg) 896 .addReg(0) 897 .addReg(EvenReg, 898 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 899 .addReg(OddReg, 900 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 901 ++NumSTRD2STM; 902 } 903 } else { 904 // Split into two instructions. 905 assert((!isT2 || !OffReg) && 906 "Thumb2 ldrd / strd does not encode offset register!"); 907 unsigned NewOpc = (isLd) 908 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) 909 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); 910 DebugLoc dl = MBBI->getDebugLoc(); 911 // If this is a load and base register is killed, it may have been 912 // re-defed by the load, make sure the first load does not clobber it. 913 if (isLd && 914 (BaseKill || OffKill) && 915 (TRI->regsOverlap(EvenReg, BaseReg) || 916 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { 917 assert(!TRI->regsOverlap(OddReg, BaseReg) && 918 (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); 919 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 920 OddReg, OddDeadKill, false, 921 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 922 Pred, PredReg, TII, isT2); 923 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 924 EvenReg, EvenDeadKill, false, 925 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 926 Pred, PredReg, TII, isT2); 927 } else { 928 if (OddReg == EvenReg && EvenDeadKill) { 929 // If the two source operands are the same, the kill marker is probably 930 // on the first one. e.g. 931 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 932 EvenDeadKill = false; 933 OddDeadKill = true; 934 } 935 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 936 EvenReg, EvenDeadKill, EvenUndef, 937 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 938 Pred, PredReg, TII, isT2); 939 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 940 OddReg, OddDeadKill, OddUndef, 941 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 942 Pred, PredReg, TII, isT2); 943 } 944 if (isLd) 945 ++NumLDRD2LDR; 946 else 947 ++NumSTRD2STR; 948 } 949 950 MBBI = prior(MBBI); 951 MBB.erase(MI); 952 } 953 return false; 954 } 955 956 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 957 /// ops of the same base and incrementing offset into LDM / STM ops. 958 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 959 unsigned NumMerges = 0; 960 unsigned NumMemOps = 0; 961 MemOpQueue MemOps; 962 unsigned CurrBase = 0; 963 int CurrOpc = -1; 964 unsigned CurrSize = 0; 965 ARMCC::CondCodes CurrPred = ARMCC::AL; 966 unsigned CurrPredReg = 0; 967 unsigned Position = 0; 968 SmallVector<MachineBasicBlock::iterator,4> Merges; 969 970 RS->enterBasicBlock(&MBB); 971 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 972 while (MBBI != E) { 973 if (FixInvalidRegPairOp(MBB, MBBI)) 974 continue; 975 976 bool Advance = false; 977 bool TryMerge = false; 978 bool Clobber = false; 979 980 bool isMemOp = isMemoryOp(MBBI); 981 if (isMemOp) { 982 int Opcode = MBBI->getOpcode(); 983 unsigned Size = getLSMultipleTransferSize(MBBI); 984 unsigned Base = MBBI->getOperand(1).getReg(); 985 unsigned PredReg = 0; 986 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 987 int Offset = getMemoryOpOffset(MBBI); 988 // Watch out for: 989 // r4 := ldr [r5] 990 // r5 := ldr [r5, #4] 991 // r6 := ldr [r5, #8] 992 // 993 // The second ldr has effectively broken the chain even though it 994 // looks like the later ldr(s) use the same base register. Try to 995 // merge the ldr's so far, including this one. But don't try to 996 // combine the following ldr(s). 997 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 998 if (CurrBase == 0 && !Clobber) { 999 // Start of a new chain. 1000 CurrBase = Base; 1001 CurrOpc = Opcode; 1002 CurrSize = Size; 1003 CurrPred = Pred; 1004 CurrPredReg = PredReg; 1005 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 1006 NumMemOps++; 1007 Advance = true; 1008 } else { 1009 if (Clobber) { 1010 TryMerge = true; 1011 Advance = true; 1012 } 1013 1014 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1015 // No need to match PredReg. 1016 // Continue adding to the queue. 1017 if (Offset > MemOps.back().Offset) { 1018 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 1019 NumMemOps++; 1020 Advance = true; 1021 } else { 1022 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1023 I != E; ++I) { 1024 if (Offset < I->Offset) { 1025 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); 1026 NumMemOps++; 1027 Advance = true; 1028 break; 1029 } else if (Offset == I->Offset) { 1030 // Collision! This can't be merged! 1031 break; 1032 } 1033 } 1034 } 1035 } 1036 } 1037 } 1038 1039 if (Advance) { 1040 ++Position; 1041 ++MBBI; 1042 if (MBBI == E) 1043 // Reach the end of the block, try merging the memory instructions. 1044 TryMerge = true; 1045 } else 1046 TryMerge = true; 1047 1048 if (TryMerge) { 1049 if (NumMemOps > 1) { 1050 // Try to find a free register to use as a new base in case it's needed. 1051 // First advance to the instruction just before the start of the chain. 1052 AdvanceRS(MBB, MemOps); 1053 // Find a scratch register. 1054 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1055 // Process the load / store instructions. 1056 RS->forward(prior(MBBI)); 1057 1058 // Merge ops. 1059 Merges.clear(); 1060 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1061 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1062 1063 // Try folding preceeding/trailing base inc/dec into the generated 1064 // LDM/STM ops. 1065 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1066 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1067 ++NumMerges; 1068 NumMerges += Merges.size(); 1069 1070 // Try folding preceeding/trailing base inc/dec into those load/store 1071 // that were not merged to form LDM/STM ops. 1072 for (unsigned i = 0; i != NumMemOps; ++i) 1073 if (!MemOps[i].Merged) 1074 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1075 ++NumMerges; 1076 1077 // RS may be pointing to an instruction that's deleted. 1078 RS->skipTo(prior(MBBI)); 1079 } else if (NumMemOps == 1) { 1080 // Try folding preceeding/trailing base inc/dec into the single 1081 // load/store. 1082 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1083 ++NumMerges; 1084 RS->forward(prior(MBBI)); 1085 } 1086 } 1087 1088 CurrBase = 0; 1089 CurrOpc = -1; 1090 CurrSize = 0; 1091 CurrPred = ARMCC::AL; 1092 CurrPredReg = 0; 1093 if (NumMemOps) { 1094 MemOps.clear(); 1095 NumMemOps = 0; 1096 } 1097 1098 // If iterator hasn't been advanced and this is not a memory op, skip it. 1099 // It can't start a new chain anyway. 1100 if (!Advance && !isMemOp && MBBI != E) { 1101 ++Position; 1102 ++MBBI; 1103 } 1104 } 1105 } 1106 return NumMerges > 0; 1107 } 1108 1109 namespace { 1110 struct OffsetCompare { 1111 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1112 int LOffset = getMemoryOpOffset(LHS); 1113 int ROffset = getMemoryOpOffset(RHS); 1114 assert(LHS == RHS || LOffset != ROffset); 1115 return LOffset > ROffset; 1116 } 1117 }; 1118 } 1119 1120 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op 1121 /// (bx lr) into the preceeding stack restore so it directly restore the value 1122 /// of LR into pc. 1123 /// ldmfd sp!, {r7, lr} 1124 /// bx lr 1125 /// => 1126 /// ldmfd sp!, {r7, pc} 1127 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1128 if (MBB.empty()) return false; 1129 1130 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1131 if (MBBI != MBB.begin() && 1132 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) { 1133 MachineInstr *PrevMI = prior(MBBI); 1134 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) { 1135 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1136 if (MO.getReg() != ARM::LR) 1137 return false; 1138 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; 1139 PrevMI->setDesc(TII->get(NewOpc)); 1140 MO.setReg(ARM::PC); 1141 MBB.erase(MBBI); 1142 return true; 1143 } 1144 } 1145 return false; 1146 } 1147 1148 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1149 const TargetMachine &TM = Fn.getTarget(); 1150 AFI = Fn.getInfo<ARMFunctionInfo>(); 1151 TII = TM.getInstrInfo(); 1152 TRI = TM.getRegisterInfo(); 1153 RS = new RegScavenger(); 1154 isThumb2 = AFI->isThumb2Function(); 1155 1156 bool Modified = false; 1157 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1158 ++MFI) { 1159 MachineBasicBlock &MBB = *MFI; 1160 Modified |= LoadStoreMultipleOpti(MBB); 1161 Modified |= MergeReturnIntoLDM(MBB); 1162 } 1163 1164 delete RS; 1165 return Modified; 1166 } 1167 1168 1169 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1170 /// load / stores from consecutive locations close to make it more 1171 /// likely they will be combined later. 1172 1173 namespace { 1174 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1175 static char ID; 1176 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} 1177 1178 const TargetData *TD; 1179 const TargetInstrInfo *TII; 1180 const TargetRegisterInfo *TRI; 1181 const ARMSubtarget *STI; 1182 MachineRegisterInfo *MRI; 1183 MachineFunction *MF; 1184 1185 virtual bool runOnMachineFunction(MachineFunction &Fn); 1186 1187 virtual const char *getPassName() const { 1188 return "ARM pre- register allocation load / store optimization pass"; 1189 } 1190 1191 private: 1192 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1193 unsigned &NewOpc, unsigned &EvenReg, 1194 unsigned &OddReg, unsigned &BaseReg, 1195 unsigned &OffReg, int &Offset, 1196 unsigned &PredReg, ARMCC::CondCodes &Pred, 1197 bool &isT2); 1198 bool RescheduleOps(MachineBasicBlock *MBB, 1199 SmallVector<MachineInstr*, 4> &Ops, 1200 unsigned Base, bool isLd, 1201 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1202 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1203 }; 1204 char ARMPreAllocLoadStoreOpt::ID = 0; 1205 } 1206 1207 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1208 TD = Fn.getTarget().getTargetData(); 1209 TII = Fn.getTarget().getInstrInfo(); 1210 TRI = Fn.getTarget().getRegisterInfo(); 1211 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1212 MRI = &Fn.getRegInfo(); 1213 MF = &Fn; 1214 1215 bool Modified = false; 1216 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1217 ++MFI) 1218 Modified |= RescheduleLoadStoreInstrs(MFI); 1219 1220 return Modified; 1221 } 1222 1223 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1224 MachineBasicBlock::iterator I, 1225 MachineBasicBlock::iterator E, 1226 SmallPtrSet<MachineInstr*, 4> &MemOps, 1227 SmallSet<unsigned, 4> &MemRegs, 1228 const TargetRegisterInfo *TRI) { 1229 // Are there stores / loads / calls between them? 1230 // FIXME: This is overly conservative. We should make use of alias information 1231 // some day. 1232 SmallSet<unsigned, 4> AddedRegPressure; 1233 while (++I != E) { 1234 if (MemOps.count(&*I)) 1235 continue; 1236 const TargetInstrDesc &TID = I->getDesc(); 1237 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1238 return false; 1239 if (isLd && TID.mayStore()) 1240 return false; 1241 if (!isLd) { 1242 if (TID.mayLoad()) 1243 return false; 1244 // It's not safe to move the first 'str' down. 1245 // str r1, [r0] 1246 // strh r5, [r0] 1247 // str r4, [r0, #+4] 1248 if (TID.mayStore()) 1249 return false; 1250 } 1251 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1252 MachineOperand &MO = I->getOperand(j); 1253 if (!MO.isReg()) 1254 continue; 1255 unsigned Reg = MO.getReg(); 1256 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1257 return false; 1258 if (Reg != Base && !MemRegs.count(Reg)) 1259 AddedRegPressure.insert(Reg); 1260 } 1261 } 1262 1263 // Estimate register pressure increase due to the transformation. 1264 if (MemRegs.size() <= 4) 1265 // Ok if we are moving small number of instructions. 1266 return true; 1267 return AddedRegPressure.size() <= MemRegs.size() * 2; 1268 } 1269 1270 bool 1271 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1272 DebugLoc &dl, 1273 unsigned &NewOpc, unsigned &EvenReg, 1274 unsigned &OddReg, unsigned &BaseReg, 1275 unsigned &OffReg, int &Offset, 1276 unsigned &PredReg, 1277 ARMCC::CondCodes &Pred, 1278 bool &isT2) { 1279 // Make sure we're allowed to generate LDRD/STRD. 1280 if (!STI->hasV5TEOps()) 1281 return false; 1282 1283 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1284 unsigned Scale = 1; 1285 unsigned Opcode = Op0->getOpcode(); 1286 if (Opcode == ARM::LDR) 1287 NewOpc = ARM::LDRD; 1288 else if (Opcode == ARM::STR) 1289 NewOpc = ARM::STRD; 1290 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1291 NewOpc = ARM::t2LDRDi8; 1292 Scale = 4; 1293 isT2 = true; 1294 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1295 NewOpc = ARM::t2STRDi8; 1296 Scale = 4; 1297 isT2 = true; 1298 } else 1299 return false; 1300 1301 // Make sure the offset registers match. 1302 if (!isT2 && 1303 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) 1304 return false; 1305 1306 // Must sure the base address satisfies i64 ld / st alignment requirement. 1307 if (!Op0->hasOneMemOperand() || 1308 !(*Op0->memoperands_begin())->getValue() || 1309 (*Op0->memoperands_begin())->isVolatile()) 1310 return false; 1311 1312 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1313 Function *Func = MF->getFunction(); 1314 unsigned ReqAlign = STI->hasV6Ops() 1315 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 1316 : 8; // Pre-v6 need 8-byte align 1317 if (Align < ReqAlign) 1318 return false; 1319 1320 // Then make sure the immediate offset fits. 1321 int OffImm = getMemoryOpOffset(Op0); 1322 if (isT2) { 1323 if (OffImm < 0) { 1324 if (OffImm < -255) 1325 // Can't fall back to t2LDRi8 / t2STRi8. 1326 return false; 1327 } else { 1328 int Limit = (1 << 8) * Scale; 1329 if (OffImm >= Limit || (OffImm & (Scale-1))) 1330 return false; 1331 } 1332 Offset = OffImm; 1333 } else { 1334 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1335 if (OffImm < 0) { 1336 AddSub = ARM_AM::sub; 1337 OffImm = - OffImm; 1338 } 1339 int Limit = (1 << 8) * Scale; 1340 if (OffImm >= Limit || (OffImm & (Scale-1))) 1341 return false; 1342 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1343 } 1344 EvenReg = Op0->getOperand(0).getReg(); 1345 OddReg = Op1->getOperand(0).getReg(); 1346 if (EvenReg == OddReg) 1347 return false; 1348 BaseReg = Op0->getOperand(1).getReg(); 1349 if (!isT2) 1350 OffReg = Op0->getOperand(2).getReg(); 1351 Pred = llvm::getInstrPredicate(Op0, PredReg); 1352 dl = Op0->getDebugLoc(); 1353 return true; 1354 } 1355 1356 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1357 SmallVector<MachineInstr*, 4> &Ops, 1358 unsigned Base, bool isLd, 1359 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1360 bool RetVal = false; 1361 1362 // Sort by offset (in reverse order). 1363 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1364 1365 // The loads / stores of the same base are in order. Scan them from first to 1366 // last and check for the followins: 1367 // 1. Any def of base. 1368 // 2. Any gaps. 1369 while (Ops.size() > 1) { 1370 unsigned FirstLoc = ~0U; 1371 unsigned LastLoc = 0; 1372 MachineInstr *FirstOp = 0; 1373 MachineInstr *LastOp = 0; 1374 int LastOffset = 0; 1375 unsigned LastOpcode = 0; 1376 unsigned LastBytes = 0; 1377 unsigned NumMove = 0; 1378 for (int i = Ops.size() - 1; i >= 0; --i) { 1379 MachineInstr *Op = Ops[i]; 1380 unsigned Loc = MI2LocMap[Op]; 1381 if (Loc <= FirstLoc) { 1382 FirstLoc = Loc; 1383 FirstOp = Op; 1384 } 1385 if (Loc >= LastLoc) { 1386 LastLoc = Loc; 1387 LastOp = Op; 1388 } 1389 1390 unsigned Opcode = Op->getOpcode(); 1391 if (LastOpcode && Opcode != LastOpcode) 1392 break; 1393 1394 int Offset = getMemoryOpOffset(Op); 1395 unsigned Bytes = getLSMultipleTransferSize(Op); 1396 if (LastBytes) { 1397 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1398 break; 1399 } 1400 LastOffset = Offset; 1401 LastBytes = Bytes; 1402 LastOpcode = Opcode; 1403 if (++NumMove == 8) // FIXME: Tune this limit. 1404 break; 1405 } 1406 1407 if (NumMove <= 1) 1408 Ops.pop_back(); 1409 else { 1410 SmallPtrSet<MachineInstr*, 4> MemOps; 1411 SmallSet<unsigned, 4> MemRegs; 1412 for (int i = NumMove-1; i >= 0; --i) { 1413 MemOps.insert(Ops[i]); 1414 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1415 } 1416 1417 // Be conservative, if the instructions are too far apart, don't 1418 // move them. We want to limit the increase of register pressure. 1419 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1420 if (DoMove) 1421 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1422 MemOps, MemRegs, TRI); 1423 if (!DoMove) { 1424 for (unsigned i = 0; i != NumMove; ++i) 1425 Ops.pop_back(); 1426 } else { 1427 // This is the new location for the loads / stores. 1428 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1429 while (InsertPos != MBB->end() && MemOps.count(InsertPos)) 1430 ++InsertPos; 1431 1432 // If we are moving a pair of loads / stores, see if it makes sense 1433 // to try to allocate a pair of registers that can form register pairs. 1434 MachineInstr *Op0 = Ops.back(); 1435 MachineInstr *Op1 = Ops[Ops.size()-2]; 1436 unsigned EvenReg = 0, OddReg = 0; 1437 unsigned BaseReg = 0, OffReg = 0, PredReg = 0; 1438 ARMCC::CondCodes Pred = ARMCC::AL; 1439 bool isT2 = false; 1440 unsigned NewOpc = 0; 1441 int Offset = 0; 1442 DebugLoc dl; 1443 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1444 EvenReg, OddReg, BaseReg, OffReg, 1445 Offset, PredReg, Pred, isT2)) { 1446 Ops.pop_back(); 1447 Ops.pop_back(); 1448 1449 // Form the pair instruction. 1450 if (isLd) { 1451 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1452 dl, TII->get(NewOpc)) 1453 .addReg(EvenReg, RegState::Define) 1454 .addReg(OddReg, RegState::Define) 1455 .addReg(BaseReg); 1456 if (!isT2) 1457 MIB.addReg(OffReg); 1458 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1459 ++NumLDRDFormed; 1460 } else { 1461 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1462 dl, TII->get(NewOpc)) 1463 .addReg(EvenReg) 1464 .addReg(OddReg) 1465 .addReg(BaseReg); 1466 if (!isT2) 1467 MIB.addReg(OffReg); 1468 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1469 ++NumSTRDFormed; 1470 } 1471 MBB->erase(Op0); 1472 MBB->erase(Op1); 1473 1474 // Add register allocation hints to form register pairs. 1475 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1476 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1477 } else { 1478 for (unsigned i = 0; i != NumMove; ++i) { 1479 MachineInstr *Op = Ops.back(); 1480 Ops.pop_back(); 1481 MBB->splice(InsertPos, MBB, Op); 1482 } 1483 } 1484 1485 NumLdStMoved += NumMove; 1486 RetVal = true; 1487 } 1488 } 1489 } 1490 1491 return RetVal; 1492 } 1493 1494 bool 1495 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1496 bool RetVal = false; 1497 1498 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1499 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1500 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1501 SmallVector<unsigned, 4> LdBases; 1502 SmallVector<unsigned, 4> StBases; 1503 1504 unsigned Loc = 0; 1505 MachineBasicBlock::iterator MBBI = MBB->begin(); 1506 MachineBasicBlock::iterator E = MBB->end(); 1507 while (MBBI != E) { 1508 for (; MBBI != E; ++MBBI) { 1509 MachineInstr *MI = MBBI; 1510 const TargetInstrDesc &TID = MI->getDesc(); 1511 if (TID.isCall() || TID.isTerminator()) { 1512 // Stop at barriers. 1513 ++MBBI; 1514 break; 1515 } 1516 1517 MI2LocMap[MI] = Loc++; 1518 if (!isMemoryOp(MI)) 1519 continue; 1520 unsigned PredReg = 0; 1521 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1522 continue; 1523 1524 int Opc = MI->getOpcode(); 1525 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1526 unsigned Base = MI->getOperand(1).getReg(); 1527 int Offset = getMemoryOpOffset(MI); 1528 1529 bool StopHere = false; 1530 if (isLd) { 1531 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1532 Base2LdsMap.find(Base); 1533 if (BI != Base2LdsMap.end()) { 1534 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1535 if (Offset == getMemoryOpOffset(BI->second[i])) { 1536 StopHere = true; 1537 break; 1538 } 1539 } 1540 if (!StopHere) 1541 BI->second.push_back(MI); 1542 } else { 1543 SmallVector<MachineInstr*, 4> MIs; 1544 MIs.push_back(MI); 1545 Base2LdsMap[Base] = MIs; 1546 LdBases.push_back(Base); 1547 } 1548 } else { 1549 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1550 Base2StsMap.find(Base); 1551 if (BI != Base2StsMap.end()) { 1552 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1553 if (Offset == getMemoryOpOffset(BI->second[i])) { 1554 StopHere = true; 1555 break; 1556 } 1557 } 1558 if (!StopHere) 1559 BI->second.push_back(MI); 1560 } else { 1561 SmallVector<MachineInstr*, 4> MIs; 1562 MIs.push_back(MI); 1563 Base2StsMap[Base] = MIs; 1564 StBases.push_back(Base); 1565 } 1566 } 1567 1568 if (StopHere) { 1569 // Found a duplicate (a base+offset combination that's seen earlier). 1570 // Backtrack. 1571 --Loc; 1572 break; 1573 } 1574 } 1575 1576 // Re-schedule loads. 1577 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1578 unsigned Base = LdBases[i]; 1579 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1580 if (Lds.size() > 1) 1581 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1582 } 1583 1584 // Re-schedule stores. 1585 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1586 unsigned Base = StBases[i]; 1587 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1588 if (Sts.size() > 1) 1589 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1590 } 1591 1592 if (MBBI != E) { 1593 Base2LdsMap.clear(); 1594 Base2StsMap.clear(); 1595 LdBases.clear(); 1596 StBases.clear(); 1597 } 1598 } 1599 1600 return RetVal; 1601 } 1602 1603 1604 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1605 /// optimization pass. 1606 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1607 if (PreAlloc) 1608 return new ARMPreAllocLoadStoreOpt(); 1609 return new ARMLoadStoreOpt(); 1610 } 1611