1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMAddressingModes.h" 18 #include "ARMBaseInstrInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMRegisterInfo.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/Target/TargetData.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 #include "llvm/Target/TargetMachine.h" 32 #include "llvm/Target/TargetRegisterInfo.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallSet.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/Statistic.h" 40 using namespace llvm; 41 42 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55 /// load / store instructions to form ldm / stm instructions. 56 57 namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Reg; 78 bool isKill; 79 unsigned Position; 80 MachineBasicBlock::iterator MBBI; 81 bool Merged; 82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 83 MachineBasicBlock::iterator i) 84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 85 }; 86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 87 typedef MemOpQueue::iterator MemOpQueueIter; 88 89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 90 int Offset, unsigned Base, bool BaseKill, int Opcode, 91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 93 void MergeOpsUpdate(MachineBasicBlock &MBB, 94 MemOpQueue &MemOps, 95 unsigned memOpsBegin, 96 unsigned memOpsEnd, 97 unsigned insertAfter, 98 int Offset, 99 unsigned Base, 100 bool BaseKill, 101 int Opcode, 102 ARMCC::CondCodes Pred, 103 unsigned PredReg, 104 unsigned Scratch, 105 DebugLoc dl, 106 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 108 int Opcode, unsigned Size, 109 ARMCC::CondCodes Pred, unsigned PredReg, 110 unsigned Scratch, MemOpQueue &MemOps, 111 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 112 113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 115 MachineBasicBlock::iterator &MBBI); 116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 117 MachineBasicBlock::iterator MBBI, 118 const TargetInstrInfo *TII, 119 bool &Advance, 120 MachineBasicBlock::iterator &I); 121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 122 MachineBasicBlock::iterator MBBI, 123 bool &Advance, 124 MachineBasicBlock::iterator &I); 125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 127 }; 128 char ARMLoadStoreOpt::ID = 0; 129 } 130 131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 132 switch (Opcode) { 133 default: llvm_unreachable("Unhandled opcode!"); 134 case ARM::LDRi12: 135 ++NumLDMGened; 136 switch (Mode) { 137 default: llvm_unreachable("Unhandled submode!"); 138 case ARM_AM::ia: return ARM::LDMIA; 139 case ARM_AM::da: return ARM::LDMDA; 140 case ARM_AM::db: return ARM::LDMDB; 141 case ARM_AM::ib: return ARM::LDMIB; 142 } 143 break; 144 case ARM::STRi12: 145 ++NumSTMGened; 146 switch (Mode) { 147 default: llvm_unreachable("Unhandled submode!"); 148 case ARM_AM::ia: return ARM::STMIA; 149 case ARM_AM::da: return ARM::STMDA; 150 case ARM_AM::db: return ARM::STMDB; 151 case ARM_AM::ib: return ARM::STMIB; 152 } 153 break; 154 case ARM::t2LDRi8: 155 case ARM::t2LDRi12: 156 ++NumLDMGened; 157 switch (Mode) { 158 default: llvm_unreachable("Unhandled submode!"); 159 case ARM_AM::ia: return ARM::t2LDMIA; 160 case ARM_AM::db: return ARM::t2LDMDB; 161 } 162 break; 163 case ARM::t2STRi8: 164 case ARM::t2STRi12: 165 ++NumSTMGened; 166 switch (Mode) { 167 default: llvm_unreachable("Unhandled submode!"); 168 case ARM_AM::ia: return ARM::t2STMIA; 169 case ARM_AM::db: return ARM::t2STMDB; 170 } 171 break; 172 case ARM::VLDRS: 173 ++NumVLDMGened; 174 switch (Mode) { 175 default: llvm_unreachable("Unhandled submode!"); 176 case ARM_AM::ia: return ARM::VLDMSIA; 177 case ARM_AM::db: return ARM::VLDMSDB; 178 } 179 break; 180 case ARM::VSTRS: 181 ++NumVSTMGened; 182 switch (Mode) { 183 default: llvm_unreachable("Unhandled submode!"); 184 case ARM_AM::ia: return ARM::VSTMSIA; 185 case ARM_AM::db: return ARM::VSTMSDB; 186 } 187 break; 188 case ARM::VLDRD: 189 ++NumVLDMGened; 190 switch (Mode) { 191 default: llvm_unreachable("Unhandled submode!"); 192 case ARM_AM::ia: return ARM::VLDMDIA; 193 case ARM_AM::db: return ARM::VLDMDDB; 194 } 195 break; 196 case ARM::VSTRD: 197 ++NumVSTMGened; 198 switch (Mode) { 199 default: llvm_unreachable("Unhandled submode!"); 200 case ARM_AM::ia: return ARM::VSTMDIA; 201 case ARM_AM::db: return ARM::VSTMDDB; 202 } 203 break; 204 } 205 206 return 0; 207 } 208 209 namespace llvm { 210 namespace ARM_AM { 211 212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 213 switch (Opcode) { 214 default: llvm_unreachable("Unhandled opcode!"); 215 case ARM::LDMIA_RET: 216 case ARM::LDMIA: 217 case ARM::LDMIA_UPD: 218 case ARM::STMIA: 219 case ARM::STMIA_UPD: 220 case ARM::t2LDMIA_RET: 221 case ARM::t2LDMIA: 222 case ARM::t2LDMIA_UPD: 223 case ARM::t2STMIA: 224 case ARM::t2STMIA_UPD: 225 case ARM::VLDMSIA: 226 case ARM::VLDMSIA_UPD: 227 case ARM::VSTMSIA: 228 case ARM::VSTMSIA_UPD: 229 case ARM::VLDMDIA: 230 case ARM::VLDMDIA_UPD: 231 case ARM::VSTMDIA: 232 case ARM::VSTMDIA_UPD: 233 return ARM_AM::ia; 234 235 case ARM::LDMDA: 236 case ARM::LDMDA_UPD: 237 case ARM::STMDA: 238 case ARM::STMDA_UPD: 239 return ARM_AM::da; 240 241 case ARM::LDMDB: 242 case ARM::LDMDB_UPD: 243 case ARM::STMDB: 244 case ARM::STMDB_UPD: 245 case ARM::t2LDMDB: 246 case ARM::t2LDMDB_UPD: 247 case ARM::t2STMDB: 248 case ARM::t2STMDB_UPD: 249 case ARM::VLDMSDB: 250 case ARM::VLDMSDB_UPD: 251 case ARM::VSTMSDB: 252 case ARM::VSTMSDB_UPD: 253 case ARM::VLDMDDB: 254 case ARM::VLDMDDB_UPD: 255 case ARM::VSTMDDB: 256 case ARM::VSTMDDB_UPD: 257 return ARM_AM::db; 258 259 case ARM::LDMIB: 260 case ARM::LDMIB_UPD: 261 case ARM::STMIB: 262 case ARM::STMIB_UPD: 263 return ARM_AM::ib; 264 } 265 266 return ARM_AM::bad_am_submode; 267 } 268 269 } // end namespace ARM_AM 270 } // end namespace llvm 271 272 static bool isT2i32Load(unsigned Opc) { 273 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 274 } 275 276 static bool isi32Load(unsigned Opc) { 277 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 278 } 279 280 static bool isT2i32Store(unsigned Opc) { 281 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 282 } 283 284 static bool isi32Store(unsigned Opc) { 285 return Opc == ARM::STRi12 || isT2i32Store(Opc); 286 } 287 288 /// MergeOps - Create and insert a LDM or STM with Base as base register and 289 /// registers in Regs as the register operands that would be loaded / stored. 290 /// It returns true if the transformation is done. 291 bool 292 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 293 MachineBasicBlock::iterator MBBI, 294 int Offset, unsigned Base, bool BaseKill, 295 int Opcode, ARMCC::CondCodes Pred, 296 unsigned PredReg, unsigned Scratch, DebugLoc dl, 297 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 298 // Only a single register to load / store. Don't bother. 299 unsigned NumRegs = Regs.size(); 300 if (NumRegs <= 1) 301 return false; 302 303 ARM_AM::AMSubMode Mode = ARM_AM::ia; 304 // VFP and Thumb2 do not support IB or DA modes. 305 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 306 bool haveIBAndDA = isNotVFP && !isThumb2; 307 if (Offset == 4 && haveIBAndDA) 308 Mode = ARM_AM::ib; 309 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 310 Mode = ARM_AM::da; 311 else if (Offset == -4 * (int)NumRegs && isNotVFP) 312 // VLDM/VSTM do not support DB mode without also updating the base reg. 313 Mode = ARM_AM::db; 314 else if (Offset != 0) { 315 // If starting offset isn't zero, insert a MI to materialize a new base. 316 // But only do so if it is cost effective, i.e. merging more than two 317 // loads / stores. 318 if (NumRegs <= 2) 319 return false; 320 321 unsigned NewBase; 322 if (isi32Load(Opcode)) 323 // If it is a load, then just use one of the destination register to 324 // use as the new base. 325 NewBase = Regs[NumRegs-1].first; 326 else { 327 // Use the scratch register to use as a new base. 328 NewBase = Scratch; 329 if (NewBase == 0) 330 return false; 331 } 332 int BaseOpc = !isThumb2 333 ? ARM::ADDri 334 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 335 if (Offset < 0) { 336 BaseOpc = !isThumb2 337 ? ARM::SUBri 338 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 339 Offset = - Offset; 340 } 341 int ImmedOffset = isThumb2 342 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 343 if (ImmedOffset == -1) 344 // FIXME: Try t2ADDri12 or t2SUBri12? 345 return false; // Probably not worth it then. 346 347 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 348 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 349 .addImm(Pred).addReg(PredReg).addReg(0); 350 Base = NewBase; 351 BaseKill = true; // New base is always killed right its use. 352 } 353 354 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 355 Opcode == ARM::VLDRD); 356 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 357 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 358 .addReg(Base, getKillRegState(BaseKill)) 359 .addImm(Pred).addReg(PredReg); 360 for (unsigned i = 0; i != NumRegs; ++i) 361 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 362 | getKillRegState(Regs[i].second)); 363 364 return true; 365 } 366 367 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 368 // success. 369 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 370 MemOpQueue &memOps, 371 unsigned memOpsBegin, unsigned memOpsEnd, 372 unsigned insertAfter, int Offset, 373 unsigned Base, bool BaseKill, 374 int Opcode, 375 ARMCC::CondCodes Pred, unsigned PredReg, 376 unsigned Scratch, 377 DebugLoc dl, 378 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 379 // First calculate which of the registers should be killed by the merged 380 // instruction. 381 const unsigned insertPos = memOps[insertAfter].Position; 382 SmallSet<unsigned, 4> KilledRegs; 383 DenseMap<unsigned, unsigned> Killer; 384 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 385 if (i == memOpsBegin) { 386 i = memOpsEnd; 387 if (i == e) 388 break; 389 } 390 if (memOps[i].Position < insertPos && memOps[i].isKill) { 391 unsigned Reg = memOps[i].Reg; 392 KilledRegs.insert(Reg); 393 Killer[Reg] = i; 394 } 395 } 396 397 SmallVector<std::pair<unsigned, bool>, 8> Regs; 398 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 399 unsigned Reg = memOps[i].Reg; 400 // If we are inserting the merged operation after an operation that 401 // uses the same register, make sure to transfer any kill flag. 402 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 403 Regs.push_back(std::make_pair(Reg, isKill)); 404 } 405 406 // Try to do the merge. 407 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 408 ++Loc; 409 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 410 Pred, PredReg, Scratch, dl, Regs)) 411 return; 412 413 // Merge succeeded, update records. 414 Merges.push_back(prior(Loc)); 415 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 416 // Remove kill flags from any memops that come before insertPos. 417 if (Regs[i-memOpsBegin].second) { 418 unsigned Reg = Regs[i-memOpsBegin].first; 419 if (KilledRegs.count(Reg)) { 420 unsigned j = Killer[Reg]; 421 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 422 assert(Idx >= 0 && "Cannot find killing operand"); 423 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 424 memOps[j].isKill = false; 425 } 426 memOps[i].isKill = true; 427 } 428 MBB.erase(memOps[i].MBBI); 429 // Update this memop to refer to the merged instruction. 430 // We may need to move kill flags again. 431 memOps[i].Merged = true; 432 memOps[i].MBBI = Merges.back(); 433 memOps[i].Position = insertPos; 434 } 435 } 436 437 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 438 /// load / store multiple instructions. 439 void 440 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 441 unsigned Base, int Opcode, unsigned Size, 442 ARMCC::CondCodes Pred, unsigned PredReg, 443 unsigned Scratch, MemOpQueue &MemOps, 444 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 445 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 446 int Offset = MemOps[SIndex].Offset; 447 int SOffset = Offset; 448 unsigned insertAfter = SIndex; 449 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 450 DebugLoc dl = Loc->getDebugLoc(); 451 const MachineOperand &PMO = Loc->getOperand(0); 452 unsigned PReg = PMO.getReg(); 453 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 454 : getARMRegisterNumbering(PReg); 455 unsigned Count = 1; 456 457 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 458 int NewOffset = MemOps[i].Offset; 459 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 460 unsigned Reg = MO.getReg(); 461 unsigned RegNum = MO.isUndef() ? UINT_MAX 462 : getARMRegisterNumbering(Reg); 463 // Register numbers must be in ascending order. For VFP, the registers 464 // must also be consecutive and there is a limit of 16 double-word 465 // registers per instruction. 466 if (Reg != ARM::SP && 467 NewOffset == Offset + (int)Size && 468 ((isNotVFP && RegNum > PRegNum) 469 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { 470 Offset += Size; 471 PRegNum = RegNum; 472 ++Count; 473 } else { 474 // Can't merge this in. Try merge the earlier ones first. 475 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 476 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 477 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 478 MemOps, Merges); 479 return; 480 } 481 482 if (MemOps[i].Position > MemOps[insertAfter].Position) 483 insertAfter = i; 484 } 485 486 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 487 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 488 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 489 return; 490 } 491 492 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 493 unsigned Bytes, unsigned Limit, 494 ARMCC::CondCodes Pred, unsigned PredReg){ 495 unsigned MyPredReg = 0; 496 if (!MI) 497 return false; 498 if (MI->getOpcode() != ARM::t2SUBri && 499 MI->getOpcode() != ARM::t2SUBrSPi && 500 MI->getOpcode() != ARM::t2SUBrSPi12 && 501 MI->getOpcode() != ARM::tSUBspi && 502 MI->getOpcode() != ARM::SUBri) 503 return false; 504 505 // Make sure the offset fits in 8 bits. 506 if (Bytes == 0 || (Limit && Bytes >= Limit)) 507 return false; 508 509 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 510 return (MI->getOperand(0).getReg() == Base && 511 MI->getOperand(1).getReg() == Base && 512 (MI->getOperand(2).getImm()*Scale) == Bytes && 513 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 514 MyPredReg == PredReg); 515 } 516 517 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 518 unsigned Bytes, unsigned Limit, 519 ARMCC::CondCodes Pred, unsigned PredReg){ 520 unsigned MyPredReg = 0; 521 if (!MI) 522 return false; 523 if (MI->getOpcode() != ARM::t2ADDri && 524 MI->getOpcode() != ARM::t2ADDrSPi && 525 MI->getOpcode() != ARM::t2ADDrSPi12 && 526 MI->getOpcode() != ARM::tADDspi && 527 MI->getOpcode() != ARM::ADDri) 528 return false; 529 530 if (Bytes == 0 || (Limit && Bytes >= Limit)) 531 // Make sure the offset fits in 8 bits. 532 return false; 533 534 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 535 return (MI->getOperand(0).getReg() == Base && 536 MI->getOperand(1).getReg() == Base && 537 (MI->getOperand(2).getImm()*Scale) == Bytes && 538 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 539 MyPredReg == PredReg); 540 } 541 542 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 543 switch (MI->getOpcode()) { 544 default: return 0; 545 case ARM::LDRi12: 546 case ARM::STRi12: 547 case ARM::t2LDRi8: 548 case ARM::t2LDRi12: 549 case ARM::t2STRi8: 550 case ARM::t2STRi12: 551 case ARM::VLDRS: 552 case ARM::VSTRS: 553 return 4; 554 case ARM::VLDRD: 555 case ARM::VSTRD: 556 return 8; 557 case ARM::LDMIA: 558 case ARM::LDMDA: 559 case ARM::LDMDB: 560 case ARM::LDMIB: 561 case ARM::STMIA: 562 case ARM::STMDA: 563 case ARM::STMDB: 564 case ARM::STMIB: 565 case ARM::t2LDMIA: 566 case ARM::t2LDMDB: 567 case ARM::t2STMIA: 568 case ARM::t2STMDB: 569 case ARM::VLDMSIA: 570 case ARM::VLDMSDB: 571 case ARM::VSTMSIA: 572 case ARM::VSTMSDB: 573 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 574 case ARM::VLDMDIA: 575 case ARM::VLDMDDB: 576 case ARM::VSTMDIA: 577 case ARM::VSTMDDB: 578 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 579 } 580 } 581 582 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 583 ARM_AM::AMSubMode Mode) { 584 switch (Opc) { 585 default: llvm_unreachable("Unhandled opcode!"); 586 case ARM::LDMIA: 587 case ARM::LDMDA: 588 case ARM::LDMDB: 589 case ARM::LDMIB: 590 switch (Mode) { 591 default: llvm_unreachable("Unhandled submode!"); 592 case ARM_AM::ia: return ARM::LDMIA_UPD; 593 case ARM_AM::ib: return ARM::LDMIB_UPD; 594 case ARM_AM::da: return ARM::LDMDA_UPD; 595 case ARM_AM::db: return ARM::LDMDB_UPD; 596 } 597 break; 598 case ARM::STMIA: 599 case ARM::STMDA: 600 case ARM::STMDB: 601 case ARM::STMIB: 602 switch (Mode) { 603 default: llvm_unreachable("Unhandled submode!"); 604 case ARM_AM::ia: return ARM::STMIA_UPD; 605 case ARM_AM::ib: return ARM::STMIB_UPD; 606 case ARM_AM::da: return ARM::STMDA_UPD; 607 case ARM_AM::db: return ARM::STMDB_UPD; 608 } 609 break; 610 case ARM::t2LDMIA: 611 case ARM::t2LDMDB: 612 switch (Mode) { 613 default: llvm_unreachable("Unhandled submode!"); 614 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 615 case ARM_AM::db: return ARM::t2LDMDB_UPD; 616 } 617 break; 618 case ARM::t2STMIA: 619 case ARM::t2STMDB: 620 switch (Mode) { 621 default: llvm_unreachable("Unhandled submode!"); 622 case ARM_AM::ia: return ARM::t2STMIA_UPD; 623 case ARM_AM::db: return ARM::t2STMDB_UPD; 624 } 625 break; 626 case ARM::VLDMSIA: 627 case ARM::VLDMSDB: 628 switch (Mode) { 629 default: llvm_unreachable("Unhandled submode!"); 630 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 631 case ARM_AM::db: return ARM::VLDMSDB_UPD; 632 } 633 break; 634 case ARM::VLDMDIA: 635 case ARM::VLDMDDB: 636 switch (Mode) { 637 default: llvm_unreachable("Unhandled submode!"); 638 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 639 case ARM_AM::db: return ARM::VLDMDDB_UPD; 640 } 641 break; 642 case ARM::VSTMSIA: 643 case ARM::VSTMSDB: 644 switch (Mode) { 645 default: llvm_unreachable("Unhandled submode!"); 646 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 647 case ARM_AM::db: return ARM::VSTMSDB_UPD; 648 } 649 break; 650 case ARM::VSTMDIA: 651 case ARM::VSTMDDB: 652 switch (Mode) { 653 default: llvm_unreachable("Unhandled submode!"); 654 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 655 case ARM_AM::db: return ARM::VSTMDDB_UPD; 656 } 657 break; 658 } 659 660 return 0; 661 } 662 663 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 664 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 665 /// 666 /// stmia rn, <ra, rb, rc> 667 /// rn := rn + 4 * 3; 668 /// => 669 /// stmia rn!, <ra, rb, rc> 670 /// 671 /// rn := rn - 4 * 3; 672 /// ldmia rn, <ra, rb, rc> 673 /// => 674 /// ldmdb rn!, <ra, rb, rc> 675 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 676 MachineBasicBlock::iterator MBBI, 677 bool &Advance, 678 MachineBasicBlock::iterator &I) { 679 MachineInstr *MI = MBBI; 680 unsigned Base = MI->getOperand(0).getReg(); 681 bool BaseKill = MI->getOperand(0).isKill(); 682 unsigned Bytes = getLSMultipleTransferSize(MI); 683 unsigned PredReg = 0; 684 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 685 int Opcode = MI->getOpcode(); 686 DebugLoc dl = MI->getDebugLoc(); 687 688 // Can't use an updating ld/st if the base register is also a dest 689 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 690 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 691 if (MI->getOperand(i).getReg() == Base) 692 return false; 693 694 bool DoMerge = false; 695 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 696 697 // Try merging with the previous instruction. 698 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 699 if (MBBI != BeginMBBI) { 700 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 701 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 702 --PrevMBBI; 703 if (Mode == ARM_AM::ia && 704 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 705 Mode = ARM_AM::db; 706 DoMerge = true; 707 } else if (Mode == ARM_AM::ib && 708 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 709 Mode = ARM_AM::da; 710 DoMerge = true; 711 } 712 if (DoMerge) 713 MBB.erase(PrevMBBI); 714 } 715 716 // Try merging with the next instruction. 717 MachineBasicBlock::iterator EndMBBI = MBB.end(); 718 if (!DoMerge && MBBI != EndMBBI) { 719 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 720 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 721 ++NextMBBI; 722 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 723 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 724 DoMerge = true; 725 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 726 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 727 DoMerge = true; 728 } 729 if (DoMerge) { 730 if (NextMBBI == I) { 731 Advance = true; 732 ++I; 733 } 734 MBB.erase(NextMBBI); 735 } 736 } 737 738 if (!DoMerge) 739 return false; 740 741 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 742 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 743 .addReg(Base, getDefRegState(true)) // WB base register 744 .addReg(Base, getKillRegState(BaseKill)) 745 .addImm(Pred).addReg(PredReg); 746 747 // Transfer the rest of operands. 748 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 749 MIB.addOperand(MI->getOperand(OpNum)); 750 751 // Transfer memoperands. 752 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 753 754 MBB.erase(MBBI); 755 return true; 756 } 757 758 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 759 ARM_AM::AddrOpc Mode) { 760 switch (Opc) { 761 case ARM::LDRi12: 762 return ARM::LDR_PRE; 763 case ARM::STRi12: 764 return ARM::STR_PRE; 765 case ARM::VLDRS: 766 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 767 case ARM::VLDRD: 768 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 769 case ARM::VSTRS: 770 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 771 case ARM::VSTRD: 772 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 773 case ARM::t2LDRi8: 774 case ARM::t2LDRi12: 775 return ARM::t2LDR_PRE; 776 case ARM::t2STRi8: 777 case ARM::t2STRi12: 778 return ARM::t2STR_PRE; 779 default: llvm_unreachable("Unhandled opcode!"); 780 } 781 return 0; 782 } 783 784 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 785 ARM_AM::AddrOpc Mode) { 786 switch (Opc) { 787 case ARM::LDRi12: 788 return ARM::LDR_POST; 789 case ARM::STRi12: 790 return ARM::STR_POST; 791 case ARM::VLDRS: 792 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 793 case ARM::VLDRD: 794 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 795 case ARM::VSTRS: 796 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 797 case ARM::VSTRD: 798 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 799 case ARM::t2LDRi8: 800 case ARM::t2LDRi12: 801 return ARM::t2LDR_POST; 802 case ARM::t2STRi8: 803 case ARM::t2STRi12: 804 return ARM::t2STR_POST; 805 default: llvm_unreachable("Unhandled opcode!"); 806 } 807 return 0; 808 } 809 810 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 811 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 812 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 813 MachineBasicBlock::iterator MBBI, 814 const TargetInstrInfo *TII, 815 bool &Advance, 816 MachineBasicBlock::iterator &I) { 817 MachineInstr *MI = MBBI; 818 unsigned Base = MI->getOperand(1).getReg(); 819 bool BaseKill = MI->getOperand(1).isKill(); 820 unsigned Bytes = getLSMultipleTransferSize(MI); 821 int Opcode = MI->getOpcode(); 822 DebugLoc dl = MI->getDebugLoc(); 823 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 824 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 825 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 826 if (isi32Load(Opcode) || isi32Store(Opcode)) 827 if (MI->getOperand(2).getImm() != 0) 828 return false; 829 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 830 return false; 831 832 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 833 // Can't do the merge if the destination register is the same as the would-be 834 // writeback register. 835 if (isLd && MI->getOperand(0).getReg() == Base) 836 return false; 837 838 unsigned PredReg = 0; 839 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 840 bool DoMerge = false; 841 ARM_AM::AddrOpc AddSub = ARM_AM::add; 842 unsigned NewOpc = 0; 843 // AM2 - 12 bits, thumb2 - 8 bits. 844 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 845 846 // Try merging with the previous instruction. 847 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 848 if (MBBI != BeginMBBI) { 849 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 850 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 851 --PrevMBBI; 852 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 853 DoMerge = true; 854 AddSub = ARM_AM::sub; 855 } else if (!isAM5 && 856 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 857 DoMerge = true; 858 } 859 if (DoMerge) { 860 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 861 MBB.erase(PrevMBBI); 862 } 863 } 864 865 // Try merging with the next instruction. 866 MachineBasicBlock::iterator EndMBBI = MBB.end(); 867 if (!DoMerge && MBBI != EndMBBI) { 868 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 869 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 870 ++NextMBBI; 871 if (!isAM5 && 872 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 873 DoMerge = true; 874 AddSub = ARM_AM::sub; 875 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 876 DoMerge = true; 877 } 878 if (DoMerge) { 879 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 880 if (NextMBBI == I) { 881 Advance = true; 882 ++I; 883 } 884 MBB.erase(NextMBBI); 885 } 886 } 887 888 if (!DoMerge) 889 return false; 890 891 unsigned Offset = 0; 892 if (isAM2) 893 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 894 else if (!isAM5) 895 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 896 897 if (isAM5) { 898 // VLDM[SD}_UPD, VSTM[SD]_UPD 899 // (There are no base-updating versions of VLDR/VSTR instructions, but the 900 // updating load/store-multiple instructions can be used with only one 901 // register.) 902 MachineOperand &MO = MI->getOperand(0); 903 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 904 .addReg(Base, getDefRegState(true)) // WB base register 905 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 906 .addImm(Pred).addReg(PredReg) 907 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 908 getKillRegState(MO.isKill()))); 909 } else if (isLd) { 910 if (isAM2) 911 // LDR_PRE, LDR_POST, 912 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 913 .addReg(Base, RegState::Define) 914 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 915 else 916 // t2LDR_PRE, t2LDR_POST 917 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 918 .addReg(Base, RegState::Define) 919 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 920 } else { 921 MachineOperand &MO = MI->getOperand(0); 922 if (isAM2) 923 // STR_PRE, STR_POST 924 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 925 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 926 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 927 else 928 // t2STR_PRE, t2STR_POST 929 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 930 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 931 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 932 } 933 MBB.erase(MBBI); 934 935 return true; 936 } 937 938 /// isMemoryOp - Returns true if instruction is a memory operations (that this 939 /// pass is capable of operating on). 940 static bool isMemoryOp(const MachineInstr *MI) { 941 // When no memory operands are present, conservatively assume unaligned, 942 // volatile, unfoldable. 943 if (!MI->hasOneMemOperand()) 944 return false; 945 946 const MachineMemOperand *MMO = *MI->memoperands_begin(); 947 948 // Don't touch volatile memory accesses - we may be changing their order. 949 if (MMO->isVolatile()) 950 return false; 951 952 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 953 // not. 954 if (MMO->getAlignment() < 4) 955 return false; 956 957 // str <undef> could probably be eliminated entirely, but for now we just want 958 // to avoid making a mess of it. 959 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 960 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 961 MI->getOperand(0).isUndef()) 962 return false; 963 964 // Likewise don't mess with references to undefined addresses. 965 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 966 MI->getOperand(1).isUndef()) 967 return false; 968 969 int Opcode = MI->getOpcode(); 970 switch (Opcode) { 971 default: break; 972 case ARM::VLDRS: 973 case ARM::VSTRS: 974 return MI->getOperand(1).isReg(); 975 case ARM::VLDRD: 976 case ARM::VSTRD: 977 return MI->getOperand(1).isReg(); 978 case ARM::LDRi12: 979 case ARM::STRi12: 980 case ARM::t2LDRi8: 981 case ARM::t2LDRi12: 982 case ARM::t2STRi8: 983 case ARM::t2STRi12: 984 return MI->getOperand(1).isReg(); 985 } 986 return false; 987 } 988 989 /// AdvanceRS - Advance register scavenger to just before the earliest memory 990 /// op that is being merged. 991 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 992 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 993 unsigned Position = MemOps[0].Position; 994 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 995 if (MemOps[i].Position < Position) { 996 Position = MemOps[i].Position; 997 Loc = MemOps[i].MBBI; 998 } 999 } 1000 1001 if (Loc != MBB.begin()) 1002 RS->forward(prior(Loc)); 1003 } 1004 1005 static int getMemoryOpOffset(const MachineInstr *MI) { 1006 int Opcode = MI->getOpcode(); 1007 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1008 unsigned NumOperands = MI->getDesc().getNumOperands(); 1009 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1010 1011 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1012 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1013 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1014 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1015 return OffField; 1016 1017 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1018 : ARM_AM::getAM5Offset(OffField) * 4; 1019 if (isAM3) { 1020 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1021 Offset = -Offset; 1022 } else { 1023 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1024 Offset = -Offset; 1025 } 1026 return Offset; 1027 } 1028 1029 static void InsertLDR_STR(MachineBasicBlock &MBB, 1030 MachineBasicBlock::iterator &MBBI, 1031 int Offset, bool isDef, 1032 DebugLoc dl, unsigned NewOpc, 1033 unsigned Reg, bool RegDeadKill, bool RegUndef, 1034 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1035 bool OffKill, bool OffUndef, 1036 ARMCC::CondCodes Pred, unsigned PredReg, 1037 const TargetInstrInfo *TII, bool isT2) { 1038 if (isDef) { 1039 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1040 TII->get(NewOpc)) 1041 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1042 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1043 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1044 } else { 1045 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1046 TII->get(NewOpc)) 1047 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1048 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1049 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1050 } 1051 } 1052 1053 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1054 MachineBasicBlock::iterator &MBBI) { 1055 MachineInstr *MI = &*MBBI; 1056 unsigned Opcode = MI->getOpcode(); 1057 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1058 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1059 unsigned EvenReg = MI->getOperand(0).getReg(); 1060 unsigned OddReg = MI->getOperand(1).getReg(); 1061 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1062 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1063 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 1064 return false; 1065 1066 MachineBasicBlock::iterator NewBBI = MBBI; 1067 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1068 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1069 bool EvenDeadKill = isLd ? 1070 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1071 bool EvenUndef = MI->getOperand(0).isUndef(); 1072 bool OddDeadKill = isLd ? 1073 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1074 bool OddUndef = MI->getOperand(1).isUndef(); 1075 const MachineOperand &BaseOp = MI->getOperand(2); 1076 unsigned BaseReg = BaseOp.getReg(); 1077 bool BaseKill = BaseOp.isKill(); 1078 bool BaseUndef = BaseOp.isUndef(); 1079 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1080 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1081 int OffImm = getMemoryOpOffset(MI); 1082 unsigned PredReg = 0; 1083 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 1084 1085 if (OddRegNum > EvenRegNum && OffImm == 0) { 1086 // Ascending register numbers and no offset. It's safe to change it to a 1087 // ldm or stm. 1088 unsigned NewOpc = (isLd) 1089 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1090 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1091 if (isLd) { 1092 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1093 .addReg(BaseReg, getKillRegState(BaseKill)) 1094 .addImm(Pred).addReg(PredReg) 1095 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1096 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1097 ++NumLDRD2LDM; 1098 } else { 1099 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1100 .addReg(BaseReg, getKillRegState(BaseKill)) 1101 .addImm(Pred).addReg(PredReg) 1102 .addReg(EvenReg, 1103 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1104 .addReg(OddReg, 1105 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1106 ++NumSTRD2STM; 1107 } 1108 NewBBI = llvm::prior(MBBI); 1109 } else { 1110 // Split into two instructions. 1111 unsigned NewOpc = (isLd) 1112 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1113 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1114 DebugLoc dl = MBBI->getDebugLoc(); 1115 // If this is a load and base register is killed, it may have been 1116 // re-defed by the load, make sure the first load does not clobber it. 1117 if (isLd && 1118 (BaseKill || OffKill) && 1119 (TRI->regsOverlap(EvenReg, BaseReg))) { 1120 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1121 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1122 OddReg, OddDeadKill, false, 1123 BaseReg, false, BaseUndef, false, OffUndef, 1124 Pred, PredReg, TII, isT2); 1125 NewBBI = llvm::prior(MBBI); 1126 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1127 EvenReg, EvenDeadKill, false, 1128 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1129 Pred, PredReg, TII, isT2); 1130 } else { 1131 if (OddReg == EvenReg && EvenDeadKill) { 1132 // If the two source operands are the same, the kill marker is 1133 // probably on the first one. e.g. 1134 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1135 EvenDeadKill = false; 1136 OddDeadKill = true; 1137 } 1138 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1139 EvenReg, EvenDeadKill, EvenUndef, 1140 BaseReg, false, BaseUndef, false, OffUndef, 1141 Pred, PredReg, TII, isT2); 1142 NewBBI = llvm::prior(MBBI); 1143 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1144 OddReg, OddDeadKill, OddUndef, 1145 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1146 Pred, PredReg, TII, isT2); 1147 } 1148 if (isLd) 1149 ++NumLDRD2LDR; 1150 else 1151 ++NumSTRD2STR; 1152 } 1153 1154 MBB.erase(MI); 1155 MBBI = NewBBI; 1156 return true; 1157 } 1158 return false; 1159 } 1160 1161 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1162 /// ops of the same base and incrementing offset into LDM / STM ops. 1163 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1164 unsigned NumMerges = 0; 1165 unsigned NumMemOps = 0; 1166 MemOpQueue MemOps; 1167 unsigned CurrBase = 0; 1168 int CurrOpc = -1; 1169 unsigned CurrSize = 0; 1170 ARMCC::CondCodes CurrPred = ARMCC::AL; 1171 unsigned CurrPredReg = 0; 1172 unsigned Position = 0; 1173 SmallVector<MachineBasicBlock::iterator,4> Merges; 1174 1175 RS->enterBasicBlock(&MBB); 1176 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1177 while (MBBI != E) { 1178 if (FixInvalidRegPairOp(MBB, MBBI)) 1179 continue; 1180 1181 bool Advance = false; 1182 bool TryMerge = false; 1183 bool Clobber = false; 1184 1185 bool isMemOp = isMemoryOp(MBBI); 1186 if (isMemOp) { 1187 int Opcode = MBBI->getOpcode(); 1188 unsigned Size = getLSMultipleTransferSize(MBBI); 1189 const MachineOperand &MO = MBBI->getOperand(0); 1190 unsigned Reg = MO.getReg(); 1191 bool isKill = MO.isDef() ? false : MO.isKill(); 1192 unsigned Base = MBBI->getOperand(1).getReg(); 1193 unsigned PredReg = 0; 1194 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1195 int Offset = getMemoryOpOffset(MBBI); 1196 // Watch out for: 1197 // r4 := ldr [r5] 1198 // r5 := ldr [r5, #4] 1199 // r6 := ldr [r5, #8] 1200 // 1201 // The second ldr has effectively broken the chain even though it 1202 // looks like the later ldr(s) use the same base register. Try to 1203 // merge the ldr's so far, including this one. But don't try to 1204 // combine the following ldr(s). 1205 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1206 if (CurrBase == 0 && !Clobber) { 1207 // Start of a new chain. 1208 CurrBase = Base; 1209 CurrOpc = Opcode; 1210 CurrSize = Size; 1211 CurrPred = Pred; 1212 CurrPredReg = PredReg; 1213 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1214 ++NumMemOps; 1215 Advance = true; 1216 } else { 1217 if (Clobber) { 1218 TryMerge = true; 1219 Advance = true; 1220 } 1221 1222 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1223 // No need to match PredReg. 1224 // Continue adding to the queue. 1225 if (Offset > MemOps.back().Offset) { 1226 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1227 Position, MBBI)); 1228 ++NumMemOps; 1229 Advance = true; 1230 } else { 1231 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1232 I != E; ++I) { 1233 if (Offset < I->Offset) { 1234 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1235 Position, MBBI)); 1236 ++NumMemOps; 1237 Advance = true; 1238 break; 1239 } else if (Offset == I->Offset) { 1240 // Collision! This can't be merged! 1241 break; 1242 } 1243 } 1244 } 1245 } 1246 } 1247 } 1248 1249 if (MBBI->isDebugValue()) { 1250 ++MBBI; 1251 if (MBBI == E) 1252 // Reach the end of the block, try merging the memory instructions. 1253 TryMerge = true; 1254 } else if (Advance) { 1255 ++Position; 1256 ++MBBI; 1257 if (MBBI == E) 1258 // Reach the end of the block, try merging the memory instructions. 1259 TryMerge = true; 1260 } else 1261 TryMerge = true; 1262 1263 if (TryMerge) { 1264 if (NumMemOps > 1) { 1265 // Try to find a free register to use as a new base in case it's needed. 1266 // First advance to the instruction just before the start of the chain. 1267 AdvanceRS(MBB, MemOps); 1268 // Find a scratch register. 1269 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1270 // Process the load / store instructions. 1271 RS->forward(prior(MBBI)); 1272 1273 // Merge ops. 1274 Merges.clear(); 1275 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1276 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1277 1278 // Try folding preceeding/trailing base inc/dec into the generated 1279 // LDM/STM ops. 1280 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1281 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1282 ++NumMerges; 1283 NumMerges += Merges.size(); 1284 1285 // Try folding preceeding/trailing base inc/dec into those load/store 1286 // that were not merged to form LDM/STM ops. 1287 for (unsigned i = 0; i != NumMemOps; ++i) 1288 if (!MemOps[i].Merged) 1289 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1290 ++NumMerges; 1291 1292 // RS may be pointing to an instruction that's deleted. 1293 RS->skipTo(prior(MBBI)); 1294 } else if (NumMemOps == 1) { 1295 // Try folding preceeding/trailing base inc/dec into the single 1296 // load/store. 1297 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1298 ++NumMerges; 1299 RS->forward(prior(MBBI)); 1300 } 1301 } 1302 1303 CurrBase = 0; 1304 CurrOpc = -1; 1305 CurrSize = 0; 1306 CurrPred = ARMCC::AL; 1307 CurrPredReg = 0; 1308 if (NumMemOps) { 1309 MemOps.clear(); 1310 NumMemOps = 0; 1311 } 1312 1313 // If iterator hasn't been advanced and this is not a memory op, skip it. 1314 // It can't start a new chain anyway. 1315 if (!Advance && !isMemOp && MBBI != E) { 1316 ++Position; 1317 ++MBBI; 1318 } 1319 } 1320 } 1321 return NumMerges > 0; 1322 } 1323 1324 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1325 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it 1326 /// directly restore the value of LR into pc. 1327 /// ldmfd sp!, {..., lr} 1328 /// bx lr 1329 /// or 1330 /// ldmfd sp!, {..., lr} 1331 /// mov pc, lr 1332 /// => 1333 /// ldmfd sp!, {..., pc} 1334 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1335 if (MBB.empty()) return false; 1336 1337 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1338 if (MBBI != MBB.begin() && 1339 (MBBI->getOpcode() == ARM::BX_RET || 1340 MBBI->getOpcode() == ARM::tBX_RET || 1341 MBBI->getOpcode() == ARM::MOVPCLR)) { 1342 MachineInstr *PrevMI = prior(MBBI); 1343 unsigned Opcode = PrevMI->getOpcode(); 1344 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1345 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1346 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1347 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1348 if (MO.getReg() != ARM::LR) 1349 return false; 1350 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1351 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1352 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1353 PrevMI->setDesc(TII->get(NewOpc)); 1354 MO.setReg(ARM::PC); 1355 PrevMI->copyImplicitOps(&*MBBI); 1356 MBB.erase(MBBI); 1357 return true; 1358 } 1359 } 1360 return false; 1361 } 1362 1363 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1364 const TargetMachine &TM = Fn.getTarget(); 1365 AFI = Fn.getInfo<ARMFunctionInfo>(); 1366 TII = TM.getInstrInfo(); 1367 TRI = TM.getRegisterInfo(); 1368 RS = new RegScavenger(); 1369 isThumb2 = AFI->isThumb2Function(); 1370 1371 bool Modified = false; 1372 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1373 ++MFI) { 1374 MachineBasicBlock &MBB = *MFI; 1375 Modified |= LoadStoreMultipleOpti(MBB); 1376 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1377 Modified |= MergeReturnIntoLDM(MBB); 1378 } 1379 1380 delete RS; 1381 return Modified; 1382 } 1383 1384 1385 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1386 /// load / stores from consecutive locations close to make it more 1387 /// likely they will be combined later. 1388 1389 namespace { 1390 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1391 static char ID; 1392 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1393 1394 const TargetData *TD; 1395 const TargetInstrInfo *TII; 1396 const TargetRegisterInfo *TRI; 1397 const ARMSubtarget *STI; 1398 MachineRegisterInfo *MRI; 1399 MachineFunction *MF; 1400 1401 virtual bool runOnMachineFunction(MachineFunction &Fn); 1402 1403 virtual const char *getPassName() const { 1404 return "ARM pre- register allocation load / store optimization pass"; 1405 } 1406 1407 private: 1408 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1409 unsigned &NewOpc, unsigned &EvenReg, 1410 unsigned &OddReg, unsigned &BaseReg, 1411 int &Offset, 1412 unsigned &PredReg, ARMCC::CondCodes &Pred, 1413 bool &isT2); 1414 bool RescheduleOps(MachineBasicBlock *MBB, 1415 SmallVector<MachineInstr*, 4> &Ops, 1416 unsigned Base, bool isLd, 1417 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1418 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1419 }; 1420 char ARMPreAllocLoadStoreOpt::ID = 0; 1421 } 1422 1423 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1424 TD = Fn.getTarget().getTargetData(); 1425 TII = Fn.getTarget().getInstrInfo(); 1426 TRI = Fn.getTarget().getRegisterInfo(); 1427 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1428 MRI = &Fn.getRegInfo(); 1429 MF = &Fn; 1430 1431 bool Modified = false; 1432 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1433 ++MFI) 1434 Modified |= RescheduleLoadStoreInstrs(MFI); 1435 1436 return Modified; 1437 } 1438 1439 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1440 MachineBasicBlock::iterator I, 1441 MachineBasicBlock::iterator E, 1442 SmallPtrSet<MachineInstr*, 4> &MemOps, 1443 SmallSet<unsigned, 4> &MemRegs, 1444 const TargetRegisterInfo *TRI) { 1445 // Are there stores / loads / calls between them? 1446 // FIXME: This is overly conservative. We should make use of alias information 1447 // some day. 1448 SmallSet<unsigned, 4> AddedRegPressure; 1449 while (++I != E) { 1450 if (I->isDebugValue() || MemOps.count(&*I)) 1451 continue; 1452 const TargetInstrDesc &TID = I->getDesc(); 1453 if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects()) 1454 return false; 1455 if (isLd && TID.mayStore()) 1456 return false; 1457 if (!isLd) { 1458 if (TID.mayLoad()) 1459 return false; 1460 // It's not safe to move the first 'str' down. 1461 // str r1, [r0] 1462 // strh r5, [r0] 1463 // str r4, [r0, #+4] 1464 if (TID.mayStore()) 1465 return false; 1466 } 1467 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1468 MachineOperand &MO = I->getOperand(j); 1469 if (!MO.isReg()) 1470 continue; 1471 unsigned Reg = MO.getReg(); 1472 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1473 return false; 1474 if (Reg != Base && !MemRegs.count(Reg)) 1475 AddedRegPressure.insert(Reg); 1476 } 1477 } 1478 1479 // Estimate register pressure increase due to the transformation. 1480 if (MemRegs.size() <= 4) 1481 // Ok if we are moving small number of instructions. 1482 return true; 1483 return AddedRegPressure.size() <= MemRegs.size() * 2; 1484 } 1485 1486 bool 1487 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1488 DebugLoc &dl, 1489 unsigned &NewOpc, unsigned &EvenReg, 1490 unsigned &OddReg, unsigned &BaseReg, 1491 int &Offset, unsigned &PredReg, 1492 ARMCC::CondCodes &Pred, 1493 bool &isT2) { 1494 // Make sure we're allowed to generate LDRD/STRD. 1495 if (!STI->hasV5TEOps()) 1496 return false; 1497 1498 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1499 unsigned Scale = 1; 1500 unsigned Opcode = Op0->getOpcode(); 1501 if (Opcode == ARM::LDRi12) 1502 NewOpc = ARM::LDRD; 1503 else if (Opcode == ARM::STRi12) 1504 NewOpc = ARM::STRD; 1505 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1506 NewOpc = ARM::t2LDRDi8; 1507 Scale = 4; 1508 isT2 = true; 1509 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1510 NewOpc = ARM::t2STRDi8; 1511 Scale = 4; 1512 isT2 = true; 1513 } else 1514 return false; 1515 1516 // Make sure the base address satisfies i64 ld / st alignment requirement. 1517 if (!Op0->hasOneMemOperand() || 1518 !(*Op0->memoperands_begin())->getValue() || 1519 (*Op0->memoperands_begin())->isVolatile()) 1520 return false; 1521 1522 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1523 const Function *Func = MF->getFunction(); 1524 unsigned ReqAlign = STI->hasV6Ops() 1525 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1526 : 8; // Pre-v6 need 8-byte align 1527 if (Align < ReqAlign) 1528 return false; 1529 1530 // Then make sure the immediate offset fits. 1531 int OffImm = getMemoryOpOffset(Op0); 1532 if (isT2) { 1533 if (OffImm < 0) { 1534 if (OffImm < -255) 1535 // Can't fall back to t2LDRi8 / t2STRi8. 1536 return false; 1537 } else { 1538 int Limit = (1 << 8) * Scale; 1539 if (OffImm >= Limit || (OffImm & (Scale-1))) 1540 return false; 1541 } 1542 Offset = OffImm; 1543 } else { 1544 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1545 if (OffImm < 0) { 1546 AddSub = ARM_AM::sub; 1547 OffImm = - OffImm; 1548 } 1549 int Limit = (1 << 8) * Scale; 1550 if (OffImm >= Limit || (OffImm & (Scale-1))) 1551 return false; 1552 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1553 } 1554 EvenReg = Op0->getOperand(0).getReg(); 1555 OddReg = Op1->getOperand(0).getReg(); 1556 if (EvenReg == OddReg) 1557 return false; 1558 BaseReg = Op0->getOperand(1).getReg(); 1559 Pred = llvm::getInstrPredicate(Op0, PredReg); 1560 dl = Op0->getDebugLoc(); 1561 return true; 1562 } 1563 1564 namespace { 1565 struct OffsetCompare { 1566 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1567 int LOffset = getMemoryOpOffset(LHS); 1568 int ROffset = getMemoryOpOffset(RHS); 1569 assert(LHS == RHS || LOffset != ROffset); 1570 return LOffset > ROffset; 1571 } 1572 }; 1573 } 1574 1575 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1576 SmallVector<MachineInstr*, 4> &Ops, 1577 unsigned Base, bool isLd, 1578 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1579 bool RetVal = false; 1580 1581 // Sort by offset (in reverse order). 1582 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1583 1584 // The loads / stores of the same base are in order. Scan them from first to 1585 // last and check for the following: 1586 // 1. Any def of base. 1587 // 2. Any gaps. 1588 while (Ops.size() > 1) { 1589 unsigned FirstLoc = ~0U; 1590 unsigned LastLoc = 0; 1591 MachineInstr *FirstOp = 0; 1592 MachineInstr *LastOp = 0; 1593 int LastOffset = 0; 1594 unsigned LastOpcode = 0; 1595 unsigned LastBytes = 0; 1596 unsigned NumMove = 0; 1597 for (int i = Ops.size() - 1; i >= 0; --i) { 1598 MachineInstr *Op = Ops[i]; 1599 unsigned Loc = MI2LocMap[Op]; 1600 if (Loc <= FirstLoc) { 1601 FirstLoc = Loc; 1602 FirstOp = Op; 1603 } 1604 if (Loc >= LastLoc) { 1605 LastLoc = Loc; 1606 LastOp = Op; 1607 } 1608 1609 unsigned Opcode = Op->getOpcode(); 1610 if (LastOpcode && Opcode != LastOpcode) 1611 break; 1612 1613 int Offset = getMemoryOpOffset(Op); 1614 unsigned Bytes = getLSMultipleTransferSize(Op); 1615 if (LastBytes) { 1616 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1617 break; 1618 } 1619 LastOffset = Offset; 1620 LastBytes = Bytes; 1621 LastOpcode = Opcode; 1622 if (++NumMove == 8) // FIXME: Tune this limit. 1623 break; 1624 } 1625 1626 if (NumMove <= 1) 1627 Ops.pop_back(); 1628 else { 1629 SmallPtrSet<MachineInstr*, 4> MemOps; 1630 SmallSet<unsigned, 4> MemRegs; 1631 for (int i = NumMove-1; i >= 0; --i) { 1632 MemOps.insert(Ops[i]); 1633 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1634 } 1635 1636 // Be conservative, if the instructions are too far apart, don't 1637 // move them. We want to limit the increase of register pressure. 1638 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1639 if (DoMove) 1640 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1641 MemOps, MemRegs, TRI); 1642 if (!DoMove) { 1643 for (unsigned i = 0; i != NumMove; ++i) 1644 Ops.pop_back(); 1645 } else { 1646 // This is the new location for the loads / stores. 1647 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1648 while (InsertPos != MBB->end() 1649 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1650 ++InsertPos; 1651 1652 // If we are moving a pair of loads / stores, see if it makes sense 1653 // to try to allocate a pair of registers that can form register pairs. 1654 MachineInstr *Op0 = Ops.back(); 1655 MachineInstr *Op1 = Ops[Ops.size()-2]; 1656 unsigned EvenReg = 0, OddReg = 0; 1657 unsigned BaseReg = 0, PredReg = 0; 1658 ARMCC::CondCodes Pred = ARMCC::AL; 1659 bool isT2 = false; 1660 unsigned NewOpc = 0; 1661 int Offset = 0; 1662 DebugLoc dl; 1663 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1664 EvenReg, OddReg, BaseReg, 1665 Offset, PredReg, Pred, isT2)) { 1666 Ops.pop_back(); 1667 Ops.pop_back(); 1668 1669 // Form the pair instruction. 1670 if (isLd) { 1671 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1672 dl, TII->get(NewOpc)) 1673 .addReg(EvenReg, RegState::Define) 1674 .addReg(OddReg, RegState::Define) 1675 .addReg(BaseReg); 1676 // FIXME: We're converting from LDRi12 to an insn that still 1677 // uses addrmode2, so we need an explicit offset reg. It should 1678 // always by reg0 since we're transforming LDRi12s. 1679 if (!isT2) 1680 MIB.addReg(0); 1681 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1682 ++NumLDRDFormed; 1683 } else { 1684 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1685 dl, TII->get(NewOpc)) 1686 .addReg(EvenReg) 1687 .addReg(OddReg) 1688 .addReg(BaseReg); 1689 // FIXME: We're converting from LDRi12 to an insn that still 1690 // uses addrmode2, so we need an explicit offset reg. It should 1691 // always by reg0 since we're transforming STRi12s. 1692 if (!isT2) 1693 MIB.addReg(0); 1694 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1695 ++NumSTRDFormed; 1696 } 1697 MBB->erase(Op0); 1698 MBB->erase(Op1); 1699 1700 // Add register allocation hints to form register pairs. 1701 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1702 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1703 } else { 1704 for (unsigned i = 0; i != NumMove; ++i) { 1705 MachineInstr *Op = Ops.back(); 1706 Ops.pop_back(); 1707 MBB->splice(InsertPos, MBB, Op); 1708 } 1709 } 1710 1711 NumLdStMoved += NumMove; 1712 RetVal = true; 1713 } 1714 } 1715 } 1716 1717 return RetVal; 1718 } 1719 1720 bool 1721 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1722 bool RetVal = false; 1723 1724 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1725 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1726 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1727 SmallVector<unsigned, 4> LdBases; 1728 SmallVector<unsigned, 4> StBases; 1729 1730 unsigned Loc = 0; 1731 MachineBasicBlock::iterator MBBI = MBB->begin(); 1732 MachineBasicBlock::iterator E = MBB->end(); 1733 while (MBBI != E) { 1734 for (; MBBI != E; ++MBBI) { 1735 MachineInstr *MI = MBBI; 1736 const TargetInstrDesc &TID = MI->getDesc(); 1737 if (TID.isCall() || TID.isTerminator()) { 1738 // Stop at barriers. 1739 ++MBBI; 1740 break; 1741 } 1742 1743 if (!MI->isDebugValue()) 1744 MI2LocMap[MI] = ++Loc; 1745 1746 if (!isMemoryOp(MI)) 1747 continue; 1748 unsigned PredReg = 0; 1749 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1750 continue; 1751 1752 int Opc = MI->getOpcode(); 1753 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1754 unsigned Base = MI->getOperand(1).getReg(); 1755 int Offset = getMemoryOpOffset(MI); 1756 1757 bool StopHere = false; 1758 if (isLd) { 1759 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1760 Base2LdsMap.find(Base); 1761 if (BI != Base2LdsMap.end()) { 1762 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1763 if (Offset == getMemoryOpOffset(BI->second[i])) { 1764 StopHere = true; 1765 break; 1766 } 1767 } 1768 if (!StopHere) 1769 BI->second.push_back(MI); 1770 } else { 1771 SmallVector<MachineInstr*, 4> MIs; 1772 MIs.push_back(MI); 1773 Base2LdsMap[Base] = MIs; 1774 LdBases.push_back(Base); 1775 } 1776 } else { 1777 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1778 Base2StsMap.find(Base); 1779 if (BI != Base2StsMap.end()) { 1780 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1781 if (Offset == getMemoryOpOffset(BI->second[i])) { 1782 StopHere = true; 1783 break; 1784 } 1785 } 1786 if (!StopHere) 1787 BI->second.push_back(MI); 1788 } else { 1789 SmallVector<MachineInstr*, 4> MIs; 1790 MIs.push_back(MI); 1791 Base2StsMap[Base] = MIs; 1792 StBases.push_back(Base); 1793 } 1794 } 1795 1796 if (StopHere) { 1797 // Found a duplicate (a base+offset combination that's seen earlier). 1798 // Backtrack. 1799 --Loc; 1800 break; 1801 } 1802 } 1803 1804 // Re-schedule loads. 1805 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1806 unsigned Base = LdBases[i]; 1807 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1808 if (Lds.size() > 1) 1809 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1810 } 1811 1812 // Re-schedule stores. 1813 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1814 unsigned Base = StBases[i]; 1815 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1816 if (Sts.size() > 1) 1817 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1818 } 1819 1820 if (MBBI != E) { 1821 Base2LdsMap.clear(); 1822 Base2StsMap.clear(); 1823 LdBases.clear(); 1824 StBases.clear(); 1825 } 1826 } 1827 1828 return RetVal; 1829 } 1830 1831 1832 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1833 /// optimization pass. 1834 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1835 if (PreAlloc) 1836 return new ARMPreAllocLoadStoreOpt(); 1837 return new ARMLoadStoreOpt(); 1838 } 1839