1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMAddressingModes.h" 18 #include "ARMBaseInstrInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMRegisterInfo.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/Target/TargetData.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 #include "llvm/Target/TargetMachine.h" 32 #include "llvm/Target/TargetRegisterInfo.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallSet.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/Statistic.h" 40 using namespace llvm; 41 42 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55 /// load / store instructions to form ldm / stm instructions. 56 57 namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Reg; 78 bool isKill; 79 unsigned Position; 80 MachineBasicBlock::iterator MBBI; 81 bool Merged; 82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 83 MachineBasicBlock::iterator i) 84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 85 }; 86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 87 typedef MemOpQueue::iterator MemOpQueueIter; 88 89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 90 int Offset, unsigned Base, bool BaseKill, int Opcode, 91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 93 void MergeOpsUpdate(MachineBasicBlock &MBB, 94 MemOpQueue &MemOps, 95 unsigned memOpsBegin, 96 unsigned memOpsEnd, 97 unsigned insertAfter, 98 int Offset, 99 unsigned Base, 100 bool BaseKill, 101 int Opcode, 102 ARMCC::CondCodes Pred, 103 unsigned PredReg, 104 unsigned Scratch, 105 DebugLoc dl, 106 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 108 int Opcode, unsigned Size, 109 ARMCC::CondCodes Pred, unsigned PredReg, 110 unsigned Scratch, MemOpQueue &MemOps, 111 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 112 113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 115 MachineBasicBlock::iterator &MBBI); 116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 117 MachineBasicBlock::iterator MBBI, 118 const TargetInstrInfo *TII, 119 bool &Advance, 120 MachineBasicBlock::iterator &I); 121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 122 MachineBasicBlock::iterator MBBI, 123 bool &Advance, 124 MachineBasicBlock::iterator &I); 125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 127 }; 128 char ARMLoadStoreOpt::ID = 0; 129 } 130 131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 132 switch (Opcode) { 133 default: llvm_unreachable("Unhandled opcode!"); 134 case ARM::LDRi12: 135 ++NumLDMGened; 136 switch (Mode) { 137 default: llvm_unreachable("Unhandled submode!"); 138 case ARM_AM::ia: return ARM::LDMIA; 139 case ARM_AM::da: return ARM::LDMDA; 140 case ARM_AM::db: return ARM::LDMDB; 141 case ARM_AM::ib: return ARM::LDMIB; 142 } 143 break; 144 case ARM::STRi12: 145 ++NumSTMGened; 146 switch (Mode) { 147 default: llvm_unreachable("Unhandled submode!"); 148 case ARM_AM::ia: return ARM::STMIA; 149 case ARM_AM::da: return ARM::STMDA; 150 case ARM_AM::db: return ARM::STMDB; 151 case ARM_AM::ib: return ARM::STMIB; 152 } 153 break; 154 case ARM::t2LDRi8: 155 case ARM::t2LDRi12: 156 ++NumLDMGened; 157 switch (Mode) { 158 default: llvm_unreachable("Unhandled submode!"); 159 case ARM_AM::ia: return ARM::t2LDMIA; 160 case ARM_AM::db: return ARM::t2LDMDB; 161 } 162 break; 163 case ARM::t2STRi8: 164 case ARM::t2STRi12: 165 ++NumSTMGened; 166 switch (Mode) { 167 default: llvm_unreachable("Unhandled submode!"); 168 case ARM_AM::ia: return ARM::t2STMIA; 169 case ARM_AM::db: return ARM::t2STMDB; 170 } 171 break; 172 case ARM::VLDRS: 173 ++NumVLDMGened; 174 switch (Mode) { 175 default: llvm_unreachable("Unhandled submode!"); 176 case ARM_AM::ia: return ARM::VLDMSIA; 177 case ARM_AM::db: return ARM::VLDMSDB; 178 } 179 break; 180 case ARM::VSTRS: 181 ++NumVSTMGened; 182 switch (Mode) { 183 default: llvm_unreachable("Unhandled submode!"); 184 case ARM_AM::ia: return ARM::VSTMSIA; 185 case ARM_AM::db: return ARM::VSTMSDB; 186 } 187 break; 188 case ARM::VLDRD: 189 ++NumVLDMGened; 190 switch (Mode) { 191 default: llvm_unreachable("Unhandled submode!"); 192 case ARM_AM::ia: return ARM::VLDMDIA; 193 case ARM_AM::db: return ARM::VLDMDDB; 194 } 195 break; 196 case ARM::VSTRD: 197 ++NumVSTMGened; 198 switch (Mode) { 199 default: llvm_unreachable("Unhandled submode!"); 200 case ARM_AM::ia: return ARM::VSTMDIA; 201 case ARM_AM::db: return ARM::VSTMDDB; 202 } 203 break; 204 } 205 206 return 0; 207 } 208 209 namespace llvm { 210 namespace ARM_AM { 211 212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 213 switch (Opcode) { 214 default: llvm_unreachable("Unhandled opcode!"); 215 case ARM::LDMIA_RET: 216 case ARM::LDMIA: 217 case ARM::LDMIA_UPD: 218 case ARM::STMIA: 219 case ARM::STMIA_UPD: 220 case ARM::t2LDMIA_RET: 221 case ARM::t2LDMIA: 222 case ARM::t2LDMIA_UPD: 223 case ARM::t2STMIA: 224 case ARM::t2STMIA_UPD: 225 case ARM::VLDMSIA: 226 case ARM::VLDMSIA_UPD: 227 case ARM::VSTMSIA: 228 case ARM::VSTMSIA_UPD: 229 case ARM::VLDMDIA: 230 case ARM::VLDMDIA_UPD: 231 case ARM::VSTMDIA: 232 case ARM::VSTMDIA_UPD: 233 return ARM_AM::ia; 234 235 case ARM::LDMDA: 236 case ARM::LDMDA_UPD: 237 case ARM::STMDA: 238 case ARM::STMDA_UPD: 239 return ARM_AM::da; 240 241 case ARM::LDMDB: 242 case ARM::LDMDB_UPD: 243 case ARM::STMDB: 244 case ARM::STMDB_UPD: 245 case ARM::t2LDMDB: 246 case ARM::t2LDMDB_UPD: 247 case ARM::t2STMDB: 248 case ARM::t2STMDB_UPD: 249 case ARM::VLDMSDB: 250 case ARM::VLDMSDB_UPD: 251 case ARM::VSTMSDB: 252 case ARM::VSTMSDB_UPD: 253 case ARM::VLDMDDB: 254 case ARM::VLDMDDB_UPD: 255 case ARM::VSTMDDB: 256 case ARM::VSTMDDB_UPD: 257 return ARM_AM::db; 258 259 case ARM::LDMIB: 260 case ARM::LDMIB_UPD: 261 case ARM::STMIB: 262 case ARM::STMIB_UPD: 263 return ARM_AM::ib; 264 } 265 266 return ARM_AM::bad_am_submode; 267 } 268 269 } // end namespace ARM_AM 270 } // end namespace llvm 271 272 static bool isT2i32Load(unsigned Opc) { 273 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 274 } 275 276 static bool isi32Load(unsigned Opc) { 277 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 278 } 279 280 static bool isT2i32Store(unsigned Opc) { 281 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 282 } 283 284 static bool isi32Store(unsigned Opc) { 285 return Opc == ARM::STRi12 || isT2i32Store(Opc); 286 } 287 288 /// MergeOps - Create and insert a LDM or STM with Base as base register and 289 /// registers in Regs as the register operands that would be loaded / stored. 290 /// It returns true if the transformation is done. 291 bool 292 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 293 MachineBasicBlock::iterator MBBI, 294 int Offset, unsigned Base, bool BaseKill, 295 int Opcode, ARMCC::CondCodes Pred, 296 unsigned PredReg, unsigned Scratch, DebugLoc dl, 297 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 298 // Only a single register to load / store. Don't bother. 299 unsigned NumRegs = Regs.size(); 300 if (NumRegs <= 1) 301 return false; 302 303 ARM_AM::AMSubMode Mode = ARM_AM::ia; 304 // VFP and Thumb2 do not support IB or DA modes. 305 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 306 bool haveIBAndDA = isNotVFP && !isThumb2; 307 if (Offset == 4 && haveIBAndDA) 308 Mode = ARM_AM::ib; 309 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 310 Mode = ARM_AM::da; 311 else if (Offset == -4 * (int)NumRegs && isNotVFP) 312 // VLDM/VSTM do not support DB mode without also updating the base reg. 313 Mode = ARM_AM::db; 314 else if (Offset != 0) { 315 // If starting offset isn't zero, insert a MI to materialize a new base. 316 // But only do so if it is cost effective, i.e. merging more than two 317 // loads / stores. 318 if (NumRegs <= 2) 319 return false; 320 321 unsigned NewBase; 322 if (isi32Load(Opcode)) 323 // If it is a load, then just use one of the destination register to 324 // use as the new base. 325 NewBase = Regs[NumRegs-1].first; 326 else { 327 // Use the scratch register to use as a new base. 328 NewBase = Scratch; 329 if (NewBase == 0) 330 return false; 331 } 332 int BaseOpc = !isThumb2 333 ? ARM::ADDri 334 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 335 if (Offset < 0) { 336 BaseOpc = !isThumb2 337 ? ARM::SUBri 338 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 339 Offset = - Offset; 340 } 341 int ImmedOffset = isThumb2 342 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 343 if (ImmedOffset == -1) 344 // FIXME: Try t2ADDri12 or t2SUBri12? 345 return false; // Probably not worth it then. 346 347 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 348 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 349 .addImm(Pred).addReg(PredReg).addReg(0); 350 Base = NewBase; 351 BaseKill = true; // New base is always killed right its use. 352 } 353 354 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 355 Opcode == ARM::VLDRD); 356 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 357 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 358 .addReg(Base, getKillRegState(BaseKill)) 359 .addImm(Pred).addReg(PredReg); 360 for (unsigned i = 0; i != NumRegs; ++i) 361 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 362 | getKillRegState(Regs[i].second)); 363 364 return true; 365 } 366 367 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 368 // success. 369 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 370 MemOpQueue &memOps, 371 unsigned memOpsBegin, unsigned memOpsEnd, 372 unsigned insertAfter, int Offset, 373 unsigned Base, bool BaseKill, 374 int Opcode, 375 ARMCC::CondCodes Pred, unsigned PredReg, 376 unsigned Scratch, 377 DebugLoc dl, 378 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 379 // First calculate which of the registers should be killed by the merged 380 // instruction. 381 const unsigned insertPos = memOps[insertAfter].Position; 382 383 SmallSet<unsigned, 4> UnavailRegs; 384 SmallSet<unsigned, 4> KilledRegs; 385 DenseMap<unsigned, unsigned> Killer; 386 for (unsigned i = 0; i < memOpsBegin; ++i) { 387 if (memOps[i].Position < insertPos && memOps[i].isKill) { 388 unsigned Reg = memOps[i].Reg; 389 if (memOps[i].Merged) 390 UnavailRegs.insert(Reg); 391 else { 392 KilledRegs.insert(Reg); 393 Killer[Reg] = i; 394 } 395 } 396 } 397 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) { 398 if (memOps[i].Position < insertPos && memOps[i].isKill) { 399 unsigned Reg = memOps[i].Reg; 400 KilledRegs.insert(Reg); 401 Killer[Reg] = i; 402 } 403 } 404 405 SmallVector<std::pair<unsigned, bool>, 8> Regs; 406 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 407 unsigned Reg = memOps[i].Reg; 408 if (UnavailRegs.count(Reg)) 409 // Register is killed before and it's not easy / possible to update the 410 // kill marker on already merged instructions. Abort. 411 return; 412 413 // If we are inserting the merged operation after an unmerged operation that 414 // uses the same register, make sure to transfer any kill flag. 415 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 416 Regs.push_back(std::make_pair(Reg, isKill)); 417 } 418 419 // Try to do the merge. 420 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 421 ++Loc; 422 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 423 Pred, PredReg, Scratch, dl, Regs)) 424 return; 425 426 // Merge succeeded, update records. 427 Merges.push_back(prior(Loc)); 428 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 429 // Remove kill flags from any unmerged memops that come before insertPos. 430 if (Regs[i-memOpsBegin].second) { 431 unsigned Reg = Regs[i-memOpsBegin].first; 432 if (KilledRegs.count(Reg)) { 433 unsigned j = Killer[Reg]; 434 memOps[j].MBBI->getOperand(0).setIsKill(false); 435 memOps[j].isKill = false; 436 } 437 } 438 MBB.erase(memOps[i].MBBI); 439 memOps[i].Merged = true; 440 } 441 } 442 443 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 444 /// load / store multiple instructions. 445 void 446 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 447 unsigned Base, int Opcode, unsigned Size, 448 ARMCC::CondCodes Pred, unsigned PredReg, 449 unsigned Scratch, MemOpQueue &MemOps, 450 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 451 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 452 int Offset = MemOps[SIndex].Offset; 453 int SOffset = Offset; 454 unsigned insertAfter = SIndex; 455 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 456 DebugLoc dl = Loc->getDebugLoc(); 457 const MachineOperand &PMO = Loc->getOperand(0); 458 unsigned PReg = PMO.getReg(); 459 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 460 : getARMRegisterNumbering(PReg); 461 unsigned Count = 1; 462 463 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 464 int NewOffset = MemOps[i].Offset; 465 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 466 unsigned Reg = MO.getReg(); 467 unsigned RegNum = MO.isUndef() ? UINT_MAX 468 : getARMRegisterNumbering(Reg); 469 // Register numbers must be in ascending order. For VFP, the registers 470 // must also be consecutive and there is a limit of 16 double-word 471 // registers per instruction. 472 if (Reg != ARM::SP && 473 NewOffset == Offset + (int)Size && 474 ((isNotVFP && RegNum > PRegNum) 475 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { 476 Offset += Size; 477 PRegNum = RegNum; 478 ++Count; 479 } else { 480 // Can't merge this in. Try merge the earlier ones first. 481 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 482 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 483 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 484 MemOps, Merges); 485 return; 486 } 487 488 if (MemOps[i].Position > MemOps[insertAfter].Position) 489 insertAfter = i; 490 } 491 492 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 493 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 494 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 495 return; 496 } 497 498 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 499 unsigned Bytes, unsigned Limit, 500 ARMCC::CondCodes Pred, unsigned PredReg){ 501 unsigned MyPredReg = 0; 502 if (!MI) 503 return false; 504 if (MI->getOpcode() != ARM::t2SUBri && 505 MI->getOpcode() != ARM::t2SUBrSPi && 506 MI->getOpcode() != ARM::t2SUBrSPi12 && 507 MI->getOpcode() != ARM::tSUBspi && 508 MI->getOpcode() != ARM::SUBri) 509 return false; 510 511 // Make sure the offset fits in 8 bits. 512 if (Bytes == 0 || (Limit && Bytes >= Limit)) 513 return false; 514 515 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 516 return (MI->getOperand(0).getReg() == Base && 517 MI->getOperand(1).getReg() == Base && 518 (MI->getOperand(2).getImm()*Scale) == Bytes && 519 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 520 MyPredReg == PredReg); 521 } 522 523 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 524 unsigned Bytes, unsigned Limit, 525 ARMCC::CondCodes Pred, unsigned PredReg){ 526 unsigned MyPredReg = 0; 527 if (!MI) 528 return false; 529 if (MI->getOpcode() != ARM::t2ADDri && 530 MI->getOpcode() != ARM::t2ADDrSPi && 531 MI->getOpcode() != ARM::t2ADDrSPi12 && 532 MI->getOpcode() != ARM::tADDspi && 533 MI->getOpcode() != ARM::ADDri) 534 return false; 535 536 if (Bytes == 0 || (Limit && Bytes >= Limit)) 537 // Make sure the offset fits in 8 bits. 538 return false; 539 540 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 541 return (MI->getOperand(0).getReg() == Base && 542 MI->getOperand(1).getReg() == Base && 543 (MI->getOperand(2).getImm()*Scale) == Bytes && 544 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 545 MyPredReg == PredReg); 546 } 547 548 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 549 switch (MI->getOpcode()) { 550 default: return 0; 551 case ARM::LDRi12: 552 case ARM::STRi12: 553 case ARM::t2LDRi8: 554 case ARM::t2LDRi12: 555 case ARM::t2STRi8: 556 case ARM::t2STRi12: 557 case ARM::VLDRS: 558 case ARM::VSTRS: 559 return 4; 560 case ARM::VLDRD: 561 case ARM::VSTRD: 562 return 8; 563 case ARM::LDMIA: 564 case ARM::LDMDA: 565 case ARM::LDMDB: 566 case ARM::LDMIB: 567 case ARM::STMIA: 568 case ARM::STMDA: 569 case ARM::STMDB: 570 case ARM::STMIB: 571 case ARM::t2LDMIA: 572 case ARM::t2LDMDB: 573 case ARM::t2STMIA: 574 case ARM::t2STMDB: 575 case ARM::VLDMSIA: 576 case ARM::VLDMSDB: 577 case ARM::VSTMSIA: 578 case ARM::VSTMSDB: 579 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 580 case ARM::VLDMDIA: 581 case ARM::VLDMDDB: 582 case ARM::VSTMDIA: 583 case ARM::VSTMDDB: 584 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 585 } 586 } 587 588 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 589 ARM_AM::AMSubMode Mode) { 590 switch (Opc) { 591 default: llvm_unreachable("Unhandled opcode!"); 592 case ARM::LDMIA: 593 case ARM::LDMDA: 594 case ARM::LDMDB: 595 case ARM::LDMIB: 596 switch (Mode) { 597 default: llvm_unreachable("Unhandled submode!"); 598 case ARM_AM::ia: return ARM::LDMIA_UPD; 599 case ARM_AM::ib: return ARM::LDMIB_UPD; 600 case ARM_AM::da: return ARM::LDMDA_UPD; 601 case ARM_AM::db: return ARM::LDMDB_UPD; 602 } 603 break; 604 case ARM::STMIA: 605 case ARM::STMDA: 606 case ARM::STMDB: 607 case ARM::STMIB: 608 switch (Mode) { 609 default: llvm_unreachable("Unhandled submode!"); 610 case ARM_AM::ia: return ARM::STMIA_UPD; 611 case ARM_AM::ib: return ARM::STMIB_UPD; 612 case ARM_AM::da: return ARM::STMDA_UPD; 613 case ARM_AM::db: return ARM::STMDB_UPD; 614 } 615 break; 616 case ARM::t2LDMIA: 617 case ARM::t2LDMDB: 618 switch (Mode) { 619 default: llvm_unreachable("Unhandled submode!"); 620 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 621 case ARM_AM::db: return ARM::t2LDMDB_UPD; 622 } 623 break; 624 case ARM::t2STMIA: 625 case ARM::t2STMDB: 626 switch (Mode) { 627 default: llvm_unreachable("Unhandled submode!"); 628 case ARM_AM::ia: return ARM::t2STMIA_UPD; 629 case ARM_AM::db: return ARM::t2STMDB_UPD; 630 } 631 break; 632 case ARM::VLDMSIA: 633 case ARM::VLDMSDB: 634 switch (Mode) { 635 default: llvm_unreachable("Unhandled submode!"); 636 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 637 case ARM_AM::db: return ARM::VLDMSDB_UPD; 638 } 639 break; 640 case ARM::VLDMDIA: 641 case ARM::VLDMDDB: 642 switch (Mode) { 643 default: llvm_unreachable("Unhandled submode!"); 644 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 645 case ARM_AM::db: return ARM::VLDMDDB_UPD; 646 } 647 break; 648 case ARM::VSTMSIA: 649 case ARM::VSTMSDB: 650 switch (Mode) { 651 default: llvm_unreachable("Unhandled submode!"); 652 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 653 case ARM_AM::db: return ARM::VSTMSDB_UPD; 654 } 655 break; 656 case ARM::VSTMDIA: 657 case ARM::VSTMDDB: 658 switch (Mode) { 659 default: llvm_unreachable("Unhandled submode!"); 660 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 661 case ARM_AM::db: return ARM::VSTMDDB_UPD; 662 } 663 break; 664 } 665 666 return 0; 667 } 668 669 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 670 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 671 /// 672 /// stmia rn, <ra, rb, rc> 673 /// rn := rn + 4 * 3; 674 /// => 675 /// stmia rn!, <ra, rb, rc> 676 /// 677 /// rn := rn - 4 * 3; 678 /// ldmia rn, <ra, rb, rc> 679 /// => 680 /// ldmdb rn!, <ra, rb, rc> 681 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 682 MachineBasicBlock::iterator MBBI, 683 bool &Advance, 684 MachineBasicBlock::iterator &I) { 685 MachineInstr *MI = MBBI; 686 unsigned Base = MI->getOperand(0).getReg(); 687 bool BaseKill = MI->getOperand(0).isKill(); 688 unsigned Bytes = getLSMultipleTransferSize(MI); 689 unsigned PredReg = 0; 690 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 691 int Opcode = MI->getOpcode(); 692 DebugLoc dl = MI->getDebugLoc(); 693 694 // Can't use an updating ld/st if the base register is also a dest 695 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 696 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 697 if (MI->getOperand(i).getReg() == Base) 698 return false; 699 700 bool DoMerge = false; 701 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 702 703 // Try merging with the previous instruction. 704 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 705 if (MBBI != BeginMBBI) { 706 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 707 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 708 --PrevMBBI; 709 if (Mode == ARM_AM::ia && 710 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 711 Mode = ARM_AM::db; 712 DoMerge = true; 713 } else if (Mode == ARM_AM::ib && 714 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 715 Mode = ARM_AM::da; 716 DoMerge = true; 717 } 718 if (DoMerge) 719 MBB.erase(PrevMBBI); 720 } 721 722 // Try merging with the next instruction. 723 MachineBasicBlock::iterator EndMBBI = MBB.end(); 724 if (!DoMerge && MBBI != EndMBBI) { 725 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 726 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 727 ++NextMBBI; 728 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 729 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 730 DoMerge = true; 731 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 732 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 733 DoMerge = true; 734 } 735 if (DoMerge) { 736 if (NextMBBI == I) { 737 Advance = true; 738 ++I; 739 } 740 MBB.erase(NextMBBI); 741 } 742 } 743 744 if (!DoMerge) 745 return false; 746 747 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 748 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 749 .addReg(Base, getDefRegState(true)) // WB base register 750 .addReg(Base, getKillRegState(BaseKill)) 751 .addImm(Pred).addReg(PredReg); 752 753 // Transfer the rest of operands. 754 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 755 MIB.addOperand(MI->getOperand(OpNum)); 756 757 // Transfer memoperands. 758 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 759 760 MBB.erase(MBBI); 761 return true; 762 } 763 764 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 765 ARM_AM::AddrOpc Mode) { 766 switch (Opc) { 767 case ARM::LDRi12: 768 return ARM::LDR_PRE; 769 case ARM::STRi12: 770 return ARM::STR_PRE; 771 case ARM::VLDRS: 772 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 773 case ARM::VLDRD: 774 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 775 case ARM::VSTRS: 776 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 777 case ARM::VSTRD: 778 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 779 case ARM::t2LDRi8: 780 case ARM::t2LDRi12: 781 return ARM::t2LDR_PRE; 782 case ARM::t2STRi8: 783 case ARM::t2STRi12: 784 return ARM::t2STR_PRE; 785 default: llvm_unreachable("Unhandled opcode!"); 786 } 787 return 0; 788 } 789 790 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 791 ARM_AM::AddrOpc Mode) { 792 switch (Opc) { 793 case ARM::LDRi12: 794 return ARM::LDR_POST; 795 case ARM::STRi12: 796 return ARM::STR_POST; 797 case ARM::VLDRS: 798 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 799 case ARM::VLDRD: 800 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 801 case ARM::VSTRS: 802 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 803 case ARM::VSTRD: 804 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 805 case ARM::t2LDRi8: 806 case ARM::t2LDRi12: 807 return ARM::t2LDR_POST; 808 case ARM::t2STRi8: 809 case ARM::t2STRi12: 810 return ARM::t2STR_POST; 811 default: llvm_unreachable("Unhandled opcode!"); 812 } 813 return 0; 814 } 815 816 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 817 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 818 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 819 MachineBasicBlock::iterator MBBI, 820 const TargetInstrInfo *TII, 821 bool &Advance, 822 MachineBasicBlock::iterator &I) { 823 MachineInstr *MI = MBBI; 824 unsigned Base = MI->getOperand(1).getReg(); 825 bool BaseKill = MI->getOperand(1).isKill(); 826 unsigned Bytes = getLSMultipleTransferSize(MI); 827 int Opcode = MI->getOpcode(); 828 DebugLoc dl = MI->getDebugLoc(); 829 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 830 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 831 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 832 if (isi32Load(Opcode) || isi32Store(Opcode)) 833 if (MI->getOperand(2).getImm() != 0) 834 return false; 835 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 836 return false; 837 838 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 839 // Can't do the merge if the destination register is the same as the would-be 840 // writeback register. 841 if (isLd && MI->getOperand(0).getReg() == Base) 842 return false; 843 844 unsigned PredReg = 0; 845 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 846 bool DoMerge = false; 847 ARM_AM::AddrOpc AddSub = ARM_AM::add; 848 unsigned NewOpc = 0; 849 // AM2 - 12 bits, thumb2 - 8 bits. 850 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 851 852 // Try merging with the previous instruction. 853 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 854 if (MBBI != BeginMBBI) { 855 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 856 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 857 --PrevMBBI; 858 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 859 DoMerge = true; 860 AddSub = ARM_AM::sub; 861 } else if (!isAM5 && 862 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 863 DoMerge = true; 864 } 865 if (DoMerge) { 866 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 867 MBB.erase(PrevMBBI); 868 } 869 } 870 871 // Try merging with the next instruction. 872 MachineBasicBlock::iterator EndMBBI = MBB.end(); 873 if (!DoMerge && MBBI != EndMBBI) { 874 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 875 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 876 ++NextMBBI; 877 if (!isAM5 && 878 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 879 DoMerge = true; 880 AddSub = ARM_AM::sub; 881 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 882 DoMerge = true; 883 } 884 if (DoMerge) { 885 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 886 if (NextMBBI == I) { 887 Advance = true; 888 ++I; 889 } 890 MBB.erase(NextMBBI); 891 } 892 } 893 894 if (!DoMerge) 895 return false; 896 897 unsigned Offset = 0; 898 if (isAM2) 899 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 900 else if (!isAM5) 901 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 902 903 if (isAM5) { 904 // VLDM[SD}_UPD, VSTM[SD]_UPD 905 // (There are no base-updating versions of VLDR/VSTR instructions, but the 906 // updating load/store-multiple instructions can be used with only one 907 // register.) 908 MachineOperand &MO = MI->getOperand(0); 909 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 910 .addReg(Base, getDefRegState(true)) // WB base register 911 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 912 .addImm(Pred).addReg(PredReg) 913 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 914 getKillRegState(MO.isKill()))); 915 } else if (isLd) { 916 if (isAM2) 917 // LDR_PRE, LDR_POST, 918 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 919 .addReg(Base, RegState::Define) 920 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 921 else 922 // t2LDR_PRE, t2LDR_POST 923 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 924 .addReg(Base, RegState::Define) 925 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 926 } else { 927 MachineOperand &MO = MI->getOperand(0); 928 if (isAM2) 929 // STR_PRE, STR_POST 930 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 931 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 932 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 933 else 934 // t2STR_PRE, t2STR_POST 935 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 936 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 937 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 938 } 939 MBB.erase(MBBI); 940 941 return true; 942 } 943 944 /// isMemoryOp - Returns true if instruction is a memory operations (that this 945 /// pass is capable of operating on). 946 static bool isMemoryOp(const MachineInstr *MI) { 947 // When no memory operands are present, conservatively assume unaligned, 948 // volatile, unfoldable. 949 if (!MI->hasOneMemOperand()) 950 return false; 951 952 const MachineMemOperand *MMO = *MI->memoperands_begin(); 953 954 // Don't touch volatile memory accesses - we may be changing their order. 955 if (MMO->isVolatile()) 956 return false; 957 958 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 959 // not. 960 if (MMO->getAlignment() < 4) 961 return false; 962 963 // str <undef> could probably be eliminated entirely, but for now we just want 964 // to avoid making a mess of it. 965 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 966 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 967 MI->getOperand(0).isUndef()) 968 return false; 969 970 // Likewise don't mess with references to undefined addresses. 971 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 972 MI->getOperand(1).isUndef()) 973 return false; 974 975 int Opcode = MI->getOpcode(); 976 switch (Opcode) { 977 default: break; 978 case ARM::VLDRS: 979 case ARM::VSTRS: 980 return MI->getOperand(1).isReg(); 981 case ARM::VLDRD: 982 case ARM::VSTRD: 983 return MI->getOperand(1).isReg(); 984 case ARM::LDRi12: 985 case ARM::STRi12: 986 case ARM::t2LDRi8: 987 case ARM::t2LDRi12: 988 case ARM::t2STRi8: 989 case ARM::t2STRi12: 990 return MI->getOperand(1).isReg(); 991 } 992 return false; 993 } 994 995 /// AdvanceRS - Advance register scavenger to just before the earliest memory 996 /// op that is being merged. 997 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 998 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 999 unsigned Position = MemOps[0].Position; 1000 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1001 if (MemOps[i].Position < Position) { 1002 Position = MemOps[i].Position; 1003 Loc = MemOps[i].MBBI; 1004 } 1005 } 1006 1007 if (Loc != MBB.begin()) 1008 RS->forward(prior(Loc)); 1009 } 1010 1011 static int getMemoryOpOffset(const MachineInstr *MI) { 1012 int Opcode = MI->getOpcode(); 1013 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1014 unsigned NumOperands = MI->getDesc().getNumOperands(); 1015 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1016 1017 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1018 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1019 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1020 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1021 return OffField; 1022 1023 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1024 : ARM_AM::getAM5Offset(OffField) * 4; 1025 if (isAM3) { 1026 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1027 Offset = -Offset; 1028 } else { 1029 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1030 Offset = -Offset; 1031 } 1032 return Offset; 1033 } 1034 1035 static void InsertLDR_STR(MachineBasicBlock &MBB, 1036 MachineBasicBlock::iterator &MBBI, 1037 int Offset, bool isDef, 1038 DebugLoc dl, unsigned NewOpc, 1039 unsigned Reg, bool RegDeadKill, bool RegUndef, 1040 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1041 bool OffKill, bool OffUndef, 1042 ARMCC::CondCodes Pred, unsigned PredReg, 1043 const TargetInstrInfo *TII, bool isT2) { 1044 if (isDef) { 1045 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1046 TII->get(NewOpc)) 1047 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1048 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1049 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1050 } else { 1051 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1052 TII->get(NewOpc)) 1053 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1054 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1055 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1056 } 1057 } 1058 1059 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1060 MachineBasicBlock::iterator &MBBI) { 1061 MachineInstr *MI = &*MBBI; 1062 unsigned Opcode = MI->getOpcode(); 1063 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1064 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1065 unsigned EvenReg = MI->getOperand(0).getReg(); 1066 unsigned OddReg = MI->getOperand(1).getReg(); 1067 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1068 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1069 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 1070 return false; 1071 1072 MachineBasicBlock::iterator NewBBI = MBBI; 1073 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1074 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1075 bool EvenDeadKill = isLd ? 1076 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1077 bool EvenUndef = MI->getOperand(0).isUndef(); 1078 bool OddDeadKill = isLd ? 1079 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1080 bool OddUndef = MI->getOperand(1).isUndef(); 1081 const MachineOperand &BaseOp = MI->getOperand(2); 1082 unsigned BaseReg = BaseOp.getReg(); 1083 bool BaseKill = BaseOp.isKill(); 1084 bool BaseUndef = BaseOp.isUndef(); 1085 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1086 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1087 int OffImm = getMemoryOpOffset(MI); 1088 unsigned PredReg = 0; 1089 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 1090 1091 if (OddRegNum > EvenRegNum && OffImm == 0) { 1092 // Ascending register numbers and no offset. It's safe to change it to a 1093 // ldm or stm. 1094 unsigned NewOpc = (isLd) 1095 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1096 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1097 if (isLd) { 1098 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1099 .addReg(BaseReg, getKillRegState(BaseKill)) 1100 .addImm(Pred).addReg(PredReg) 1101 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1102 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1103 ++NumLDRD2LDM; 1104 } else { 1105 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1106 .addReg(BaseReg, getKillRegState(BaseKill)) 1107 .addImm(Pred).addReg(PredReg) 1108 .addReg(EvenReg, 1109 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1110 .addReg(OddReg, 1111 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1112 ++NumSTRD2STM; 1113 } 1114 NewBBI = llvm::prior(MBBI); 1115 } else { 1116 // Split into two instructions. 1117 unsigned NewOpc = (isLd) 1118 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1119 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1120 DebugLoc dl = MBBI->getDebugLoc(); 1121 // If this is a load and base register is killed, it may have been 1122 // re-defed by the load, make sure the first load does not clobber it. 1123 if (isLd && 1124 (BaseKill || OffKill) && 1125 (TRI->regsOverlap(EvenReg, BaseReg))) { 1126 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1127 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1128 OddReg, OddDeadKill, false, 1129 BaseReg, false, BaseUndef, false, OffUndef, 1130 Pred, PredReg, TII, isT2); 1131 NewBBI = llvm::prior(MBBI); 1132 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1133 EvenReg, EvenDeadKill, false, 1134 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1135 Pred, PredReg, TII, isT2); 1136 } else { 1137 if (OddReg == EvenReg && EvenDeadKill) { 1138 // If the two source operands are the same, the kill marker is 1139 // probably on the first one. e.g. 1140 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1141 EvenDeadKill = false; 1142 OddDeadKill = true; 1143 } 1144 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1145 EvenReg, EvenDeadKill, EvenUndef, 1146 BaseReg, false, BaseUndef, false, OffUndef, 1147 Pred, PredReg, TII, isT2); 1148 NewBBI = llvm::prior(MBBI); 1149 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1150 OddReg, OddDeadKill, OddUndef, 1151 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1152 Pred, PredReg, TII, isT2); 1153 } 1154 if (isLd) 1155 ++NumLDRD2LDR; 1156 else 1157 ++NumSTRD2STR; 1158 } 1159 1160 MBB.erase(MI); 1161 MBBI = NewBBI; 1162 return true; 1163 } 1164 return false; 1165 } 1166 1167 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1168 /// ops of the same base and incrementing offset into LDM / STM ops. 1169 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1170 unsigned NumMerges = 0; 1171 unsigned NumMemOps = 0; 1172 MemOpQueue MemOps; 1173 unsigned CurrBase = 0; 1174 int CurrOpc = -1; 1175 unsigned CurrSize = 0; 1176 ARMCC::CondCodes CurrPred = ARMCC::AL; 1177 unsigned CurrPredReg = 0; 1178 unsigned Position = 0; 1179 SmallVector<MachineBasicBlock::iterator,4> Merges; 1180 1181 RS->enterBasicBlock(&MBB); 1182 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1183 while (MBBI != E) { 1184 if (FixInvalidRegPairOp(MBB, MBBI)) 1185 continue; 1186 1187 bool Advance = false; 1188 bool TryMerge = false; 1189 bool Clobber = false; 1190 1191 bool isMemOp = isMemoryOp(MBBI); 1192 if (isMemOp) { 1193 int Opcode = MBBI->getOpcode(); 1194 unsigned Size = getLSMultipleTransferSize(MBBI); 1195 const MachineOperand &MO = MBBI->getOperand(0); 1196 unsigned Reg = MO.getReg(); 1197 bool isKill = MO.isDef() ? false : MO.isKill(); 1198 unsigned Base = MBBI->getOperand(1).getReg(); 1199 unsigned PredReg = 0; 1200 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1201 int Offset = getMemoryOpOffset(MBBI); 1202 // Watch out for: 1203 // r4 := ldr [r5] 1204 // r5 := ldr [r5, #4] 1205 // r6 := ldr [r5, #8] 1206 // 1207 // The second ldr has effectively broken the chain even though it 1208 // looks like the later ldr(s) use the same base register. Try to 1209 // merge the ldr's so far, including this one. But don't try to 1210 // combine the following ldr(s). 1211 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1212 if (CurrBase == 0 && !Clobber) { 1213 // Start of a new chain. 1214 CurrBase = Base; 1215 CurrOpc = Opcode; 1216 CurrSize = Size; 1217 CurrPred = Pred; 1218 CurrPredReg = PredReg; 1219 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1220 ++NumMemOps; 1221 Advance = true; 1222 } else { 1223 if (Clobber) { 1224 TryMerge = true; 1225 Advance = true; 1226 } 1227 1228 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1229 // No need to match PredReg. 1230 // Continue adding to the queue. 1231 if (Offset > MemOps.back().Offset) { 1232 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1233 Position, MBBI)); 1234 ++NumMemOps; 1235 Advance = true; 1236 } else { 1237 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1238 I != E; ++I) { 1239 if (Offset < I->Offset) { 1240 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1241 Position, MBBI)); 1242 ++NumMemOps; 1243 Advance = true; 1244 break; 1245 } else if (Offset == I->Offset) { 1246 // Collision! This can't be merged! 1247 break; 1248 } 1249 } 1250 } 1251 } 1252 } 1253 } 1254 1255 if (MBBI->isDebugValue()) { 1256 ++MBBI; 1257 if (MBBI == E) 1258 // Reach the end of the block, try merging the memory instructions. 1259 TryMerge = true; 1260 } else if (Advance) { 1261 ++Position; 1262 ++MBBI; 1263 if (MBBI == E) 1264 // Reach the end of the block, try merging the memory instructions. 1265 TryMerge = true; 1266 } else 1267 TryMerge = true; 1268 1269 if (TryMerge) { 1270 if (NumMemOps > 1) { 1271 // Try to find a free register to use as a new base in case it's needed. 1272 // First advance to the instruction just before the start of the chain. 1273 AdvanceRS(MBB, MemOps); 1274 // Find a scratch register. 1275 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1276 // Process the load / store instructions. 1277 RS->forward(prior(MBBI)); 1278 1279 // Merge ops. 1280 Merges.clear(); 1281 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1282 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1283 1284 // Try folding preceeding/trailing base inc/dec into the generated 1285 // LDM/STM ops. 1286 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1287 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1288 ++NumMerges; 1289 NumMerges += Merges.size(); 1290 1291 // Try folding preceeding/trailing base inc/dec into those load/store 1292 // that were not merged to form LDM/STM ops. 1293 for (unsigned i = 0; i != NumMemOps; ++i) 1294 if (!MemOps[i].Merged) 1295 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1296 ++NumMerges; 1297 1298 // RS may be pointing to an instruction that's deleted. 1299 RS->skipTo(prior(MBBI)); 1300 } else if (NumMemOps == 1) { 1301 // Try folding preceeding/trailing base inc/dec into the single 1302 // load/store. 1303 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1304 ++NumMerges; 1305 RS->forward(prior(MBBI)); 1306 } 1307 } 1308 1309 CurrBase = 0; 1310 CurrOpc = -1; 1311 CurrSize = 0; 1312 CurrPred = ARMCC::AL; 1313 CurrPredReg = 0; 1314 if (NumMemOps) { 1315 MemOps.clear(); 1316 NumMemOps = 0; 1317 } 1318 1319 // If iterator hasn't been advanced and this is not a memory op, skip it. 1320 // It can't start a new chain anyway. 1321 if (!Advance && !isMemOp && MBBI != E) { 1322 ++Position; 1323 ++MBBI; 1324 } 1325 } 1326 } 1327 return NumMerges > 0; 1328 } 1329 1330 namespace { 1331 struct OffsetCompare { 1332 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1333 int LOffset = getMemoryOpOffset(LHS); 1334 int ROffset = getMemoryOpOffset(RHS); 1335 assert(LHS == RHS || LOffset != ROffset); 1336 return LOffset > ROffset; 1337 } 1338 }; 1339 } 1340 1341 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1342 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it 1343 /// directly restore the value of LR into pc. 1344 /// ldmfd sp!, {..., lr} 1345 /// bx lr 1346 /// or 1347 /// ldmfd sp!, {..., lr} 1348 /// mov pc, lr 1349 /// => 1350 /// ldmfd sp!, {..., pc} 1351 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1352 if (MBB.empty()) return false; 1353 1354 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1355 if (MBBI != MBB.begin() && 1356 (MBBI->getOpcode() == ARM::BX_RET || 1357 MBBI->getOpcode() == ARM::tBX_RET || 1358 MBBI->getOpcode() == ARM::MOVPCLR)) { 1359 MachineInstr *PrevMI = prior(MBBI); 1360 unsigned Opcode = PrevMI->getOpcode(); 1361 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1362 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1363 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1364 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1365 if (MO.getReg() != ARM::LR) 1366 return false; 1367 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1368 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1369 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1370 PrevMI->setDesc(TII->get(NewOpc)); 1371 MO.setReg(ARM::PC); 1372 PrevMI->copyImplicitOps(&*MBBI); 1373 MBB.erase(MBBI); 1374 return true; 1375 } 1376 } 1377 return false; 1378 } 1379 1380 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1381 const TargetMachine &TM = Fn.getTarget(); 1382 AFI = Fn.getInfo<ARMFunctionInfo>(); 1383 TII = TM.getInstrInfo(); 1384 TRI = TM.getRegisterInfo(); 1385 RS = new RegScavenger(); 1386 isThumb2 = AFI->isThumb2Function(); 1387 1388 bool Modified = false; 1389 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1390 ++MFI) { 1391 MachineBasicBlock &MBB = *MFI; 1392 Modified |= LoadStoreMultipleOpti(MBB); 1393 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1394 Modified |= MergeReturnIntoLDM(MBB); 1395 } 1396 1397 delete RS; 1398 return Modified; 1399 } 1400 1401 1402 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1403 /// load / stores from consecutive locations close to make it more 1404 /// likely they will be combined later. 1405 1406 namespace { 1407 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1408 static char ID; 1409 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1410 1411 const TargetData *TD; 1412 const TargetInstrInfo *TII; 1413 const TargetRegisterInfo *TRI; 1414 const ARMSubtarget *STI; 1415 MachineRegisterInfo *MRI; 1416 MachineFunction *MF; 1417 1418 virtual bool runOnMachineFunction(MachineFunction &Fn); 1419 1420 virtual const char *getPassName() const { 1421 return "ARM pre- register allocation load / store optimization pass"; 1422 } 1423 1424 private: 1425 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1426 unsigned &NewOpc, unsigned &EvenReg, 1427 unsigned &OddReg, unsigned &BaseReg, 1428 int &Offset, 1429 unsigned &PredReg, ARMCC::CondCodes &Pred, 1430 bool &isT2); 1431 bool RescheduleOps(MachineBasicBlock *MBB, 1432 SmallVector<MachineInstr*, 4> &Ops, 1433 unsigned Base, bool isLd, 1434 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1435 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1436 }; 1437 char ARMPreAllocLoadStoreOpt::ID = 0; 1438 } 1439 1440 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1441 TD = Fn.getTarget().getTargetData(); 1442 TII = Fn.getTarget().getInstrInfo(); 1443 TRI = Fn.getTarget().getRegisterInfo(); 1444 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1445 MRI = &Fn.getRegInfo(); 1446 MF = &Fn; 1447 1448 bool Modified = false; 1449 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1450 ++MFI) 1451 Modified |= RescheduleLoadStoreInstrs(MFI); 1452 1453 return Modified; 1454 } 1455 1456 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1457 MachineBasicBlock::iterator I, 1458 MachineBasicBlock::iterator E, 1459 SmallPtrSet<MachineInstr*, 4> &MemOps, 1460 SmallSet<unsigned, 4> &MemRegs, 1461 const TargetRegisterInfo *TRI) { 1462 // Are there stores / loads / calls between them? 1463 // FIXME: This is overly conservative. We should make use of alias information 1464 // some day. 1465 SmallSet<unsigned, 4> AddedRegPressure; 1466 while (++I != E) { 1467 if (I->isDebugValue() || MemOps.count(&*I)) 1468 continue; 1469 const TargetInstrDesc &TID = I->getDesc(); 1470 if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects()) 1471 return false; 1472 if (isLd && TID.mayStore()) 1473 return false; 1474 if (!isLd) { 1475 if (TID.mayLoad()) 1476 return false; 1477 // It's not safe to move the first 'str' down. 1478 // str r1, [r0] 1479 // strh r5, [r0] 1480 // str r4, [r0, #+4] 1481 if (TID.mayStore()) 1482 return false; 1483 } 1484 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1485 MachineOperand &MO = I->getOperand(j); 1486 if (!MO.isReg()) 1487 continue; 1488 unsigned Reg = MO.getReg(); 1489 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1490 return false; 1491 if (Reg != Base && !MemRegs.count(Reg)) 1492 AddedRegPressure.insert(Reg); 1493 } 1494 } 1495 1496 // Estimate register pressure increase due to the transformation. 1497 if (MemRegs.size() <= 4) 1498 // Ok if we are moving small number of instructions. 1499 return true; 1500 return AddedRegPressure.size() <= MemRegs.size() * 2; 1501 } 1502 1503 bool 1504 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1505 DebugLoc &dl, 1506 unsigned &NewOpc, unsigned &EvenReg, 1507 unsigned &OddReg, unsigned &BaseReg, 1508 int &Offset, unsigned &PredReg, 1509 ARMCC::CondCodes &Pred, 1510 bool &isT2) { 1511 // Make sure we're allowed to generate LDRD/STRD. 1512 if (!STI->hasV5TEOps()) 1513 return false; 1514 1515 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1516 unsigned Scale = 1; 1517 unsigned Opcode = Op0->getOpcode(); 1518 if (Opcode == ARM::LDRi12) 1519 NewOpc = ARM::LDRD; 1520 else if (Opcode == ARM::STRi12) 1521 NewOpc = ARM::STRD; 1522 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1523 NewOpc = ARM::t2LDRDi8; 1524 Scale = 4; 1525 isT2 = true; 1526 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1527 NewOpc = ARM::t2STRDi8; 1528 Scale = 4; 1529 isT2 = true; 1530 } else 1531 return false; 1532 1533 // Make sure the base address satisfies i64 ld / st alignment requirement. 1534 if (!Op0->hasOneMemOperand() || 1535 !(*Op0->memoperands_begin())->getValue() || 1536 (*Op0->memoperands_begin())->isVolatile()) 1537 return false; 1538 1539 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1540 const Function *Func = MF->getFunction(); 1541 unsigned ReqAlign = STI->hasV6Ops() 1542 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1543 : 8; // Pre-v6 need 8-byte align 1544 if (Align < ReqAlign) 1545 return false; 1546 1547 // Then make sure the immediate offset fits. 1548 int OffImm = getMemoryOpOffset(Op0); 1549 if (isT2) { 1550 if (OffImm < 0) { 1551 if (OffImm < -255) 1552 // Can't fall back to t2LDRi8 / t2STRi8. 1553 return false; 1554 } else { 1555 int Limit = (1 << 8) * Scale; 1556 if (OffImm >= Limit || (OffImm & (Scale-1))) 1557 return false; 1558 } 1559 Offset = OffImm; 1560 } else { 1561 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1562 if (OffImm < 0) { 1563 AddSub = ARM_AM::sub; 1564 OffImm = - OffImm; 1565 } 1566 int Limit = (1 << 8) * Scale; 1567 if (OffImm >= Limit || (OffImm & (Scale-1))) 1568 return false; 1569 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1570 } 1571 EvenReg = Op0->getOperand(0).getReg(); 1572 OddReg = Op1->getOperand(0).getReg(); 1573 if (EvenReg == OddReg) 1574 return false; 1575 BaseReg = Op0->getOperand(1).getReg(); 1576 Pred = llvm::getInstrPredicate(Op0, PredReg); 1577 dl = Op0->getDebugLoc(); 1578 return true; 1579 } 1580 1581 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1582 SmallVector<MachineInstr*, 4> &Ops, 1583 unsigned Base, bool isLd, 1584 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1585 bool RetVal = false; 1586 1587 // Sort by offset (in reverse order). 1588 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1589 1590 // The loads / stores of the same base are in order. Scan them from first to 1591 // last and check for the following: 1592 // 1. Any def of base. 1593 // 2. Any gaps. 1594 while (Ops.size() > 1) { 1595 unsigned FirstLoc = ~0U; 1596 unsigned LastLoc = 0; 1597 MachineInstr *FirstOp = 0; 1598 MachineInstr *LastOp = 0; 1599 int LastOffset = 0; 1600 unsigned LastOpcode = 0; 1601 unsigned LastBytes = 0; 1602 unsigned NumMove = 0; 1603 for (int i = Ops.size() - 1; i >= 0; --i) { 1604 MachineInstr *Op = Ops[i]; 1605 unsigned Loc = MI2LocMap[Op]; 1606 if (Loc <= FirstLoc) { 1607 FirstLoc = Loc; 1608 FirstOp = Op; 1609 } 1610 if (Loc >= LastLoc) { 1611 LastLoc = Loc; 1612 LastOp = Op; 1613 } 1614 1615 unsigned Opcode = Op->getOpcode(); 1616 if (LastOpcode && Opcode != LastOpcode) 1617 break; 1618 1619 int Offset = getMemoryOpOffset(Op); 1620 unsigned Bytes = getLSMultipleTransferSize(Op); 1621 if (LastBytes) { 1622 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1623 break; 1624 } 1625 LastOffset = Offset; 1626 LastBytes = Bytes; 1627 LastOpcode = Opcode; 1628 if (++NumMove == 8) // FIXME: Tune this limit. 1629 break; 1630 } 1631 1632 if (NumMove <= 1) 1633 Ops.pop_back(); 1634 else { 1635 SmallPtrSet<MachineInstr*, 4> MemOps; 1636 SmallSet<unsigned, 4> MemRegs; 1637 for (int i = NumMove-1; i >= 0; --i) { 1638 MemOps.insert(Ops[i]); 1639 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1640 } 1641 1642 // Be conservative, if the instructions are too far apart, don't 1643 // move them. We want to limit the increase of register pressure. 1644 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1645 if (DoMove) 1646 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1647 MemOps, MemRegs, TRI); 1648 if (!DoMove) { 1649 for (unsigned i = 0; i != NumMove; ++i) 1650 Ops.pop_back(); 1651 } else { 1652 // This is the new location for the loads / stores. 1653 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1654 while (InsertPos != MBB->end() 1655 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1656 ++InsertPos; 1657 1658 // If we are moving a pair of loads / stores, see if it makes sense 1659 // to try to allocate a pair of registers that can form register pairs. 1660 MachineInstr *Op0 = Ops.back(); 1661 MachineInstr *Op1 = Ops[Ops.size()-2]; 1662 unsigned EvenReg = 0, OddReg = 0; 1663 unsigned BaseReg = 0, PredReg = 0; 1664 ARMCC::CondCodes Pred = ARMCC::AL; 1665 bool isT2 = false; 1666 unsigned NewOpc = 0; 1667 int Offset = 0; 1668 DebugLoc dl; 1669 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1670 EvenReg, OddReg, BaseReg, 1671 Offset, PredReg, Pred, isT2)) { 1672 Ops.pop_back(); 1673 Ops.pop_back(); 1674 1675 // Form the pair instruction. 1676 if (isLd) { 1677 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1678 dl, TII->get(NewOpc)) 1679 .addReg(EvenReg, RegState::Define) 1680 .addReg(OddReg, RegState::Define) 1681 .addReg(BaseReg); 1682 // FIXME: We're converting from LDRi12 to an insn that still 1683 // uses addrmode2, so we need an explicit offset reg. It should 1684 // always by reg0 since we're transforming LDRi12s. 1685 if (!isT2) 1686 MIB.addReg(0); 1687 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1688 ++NumLDRDFormed; 1689 } else { 1690 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1691 dl, TII->get(NewOpc)) 1692 .addReg(EvenReg) 1693 .addReg(OddReg) 1694 .addReg(BaseReg); 1695 // FIXME: We're converting from LDRi12 to an insn that still 1696 // uses addrmode2, so we need an explicit offset reg. It should 1697 // always by reg0 since we're transforming STRi12s. 1698 if (!isT2) 1699 MIB.addReg(0); 1700 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1701 ++NumSTRDFormed; 1702 } 1703 MBB->erase(Op0); 1704 MBB->erase(Op1); 1705 1706 // Add register allocation hints to form register pairs. 1707 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1708 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1709 } else { 1710 for (unsigned i = 0; i != NumMove; ++i) { 1711 MachineInstr *Op = Ops.back(); 1712 Ops.pop_back(); 1713 MBB->splice(InsertPos, MBB, Op); 1714 } 1715 } 1716 1717 NumLdStMoved += NumMove; 1718 RetVal = true; 1719 } 1720 } 1721 } 1722 1723 return RetVal; 1724 } 1725 1726 bool 1727 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1728 bool RetVal = false; 1729 1730 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1731 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1732 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1733 SmallVector<unsigned, 4> LdBases; 1734 SmallVector<unsigned, 4> StBases; 1735 1736 unsigned Loc = 0; 1737 MachineBasicBlock::iterator MBBI = MBB->begin(); 1738 MachineBasicBlock::iterator E = MBB->end(); 1739 while (MBBI != E) { 1740 for (; MBBI != E; ++MBBI) { 1741 MachineInstr *MI = MBBI; 1742 const TargetInstrDesc &TID = MI->getDesc(); 1743 if (TID.isCall() || TID.isTerminator()) { 1744 // Stop at barriers. 1745 ++MBBI; 1746 break; 1747 } 1748 1749 if (!MI->isDebugValue()) 1750 MI2LocMap[MI] = ++Loc; 1751 1752 if (!isMemoryOp(MI)) 1753 continue; 1754 unsigned PredReg = 0; 1755 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1756 continue; 1757 1758 int Opc = MI->getOpcode(); 1759 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1760 unsigned Base = MI->getOperand(1).getReg(); 1761 int Offset = getMemoryOpOffset(MI); 1762 1763 bool StopHere = false; 1764 if (isLd) { 1765 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1766 Base2LdsMap.find(Base); 1767 if (BI != Base2LdsMap.end()) { 1768 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1769 if (Offset == getMemoryOpOffset(BI->second[i])) { 1770 StopHere = true; 1771 break; 1772 } 1773 } 1774 if (!StopHere) 1775 BI->second.push_back(MI); 1776 } else { 1777 SmallVector<MachineInstr*, 4> MIs; 1778 MIs.push_back(MI); 1779 Base2LdsMap[Base] = MIs; 1780 LdBases.push_back(Base); 1781 } 1782 } else { 1783 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1784 Base2StsMap.find(Base); 1785 if (BI != Base2StsMap.end()) { 1786 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1787 if (Offset == getMemoryOpOffset(BI->second[i])) { 1788 StopHere = true; 1789 break; 1790 } 1791 } 1792 if (!StopHere) 1793 BI->second.push_back(MI); 1794 } else { 1795 SmallVector<MachineInstr*, 4> MIs; 1796 MIs.push_back(MI); 1797 Base2StsMap[Base] = MIs; 1798 StBases.push_back(Base); 1799 } 1800 } 1801 1802 if (StopHere) { 1803 // Found a duplicate (a base+offset combination that's seen earlier). 1804 // Backtrack. 1805 --Loc; 1806 break; 1807 } 1808 } 1809 1810 // Re-schedule loads. 1811 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1812 unsigned Base = LdBases[i]; 1813 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1814 if (Lds.size() > 1) 1815 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1816 } 1817 1818 // Re-schedule stores. 1819 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1820 unsigned Base = StBases[i]; 1821 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1822 if (Sts.size() > 1) 1823 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1824 } 1825 1826 if (MBBI != E) { 1827 Base2LdsMap.clear(); 1828 Base2StsMap.clear(); 1829 LdBases.clear(); 1830 StBases.clear(); 1831 } 1832 } 1833 1834 return RetVal; 1835 } 1836 1837 1838 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1839 /// optimization pass. 1840 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1841 if (PreAlloc) 1842 return new ARMPreAllocLoadStoreOpt(); 1843 return new ARMLoadStoreOpt(); 1844 } 1845