1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "Thumb1RegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterScavenging.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/IR/DataLayout.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Target/TargetInstrInfo.h" 42 #include "llvm/Target/TargetMachine.h" 43 #include "llvm/Target/TargetRegisterInfo.h" 44 using namespace llvm; 45 46 #define DEBUG_TYPE "arm-ldst-opt" 47 48 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 49 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 59 60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 61 /// load / store instructions to form ldm / stm instructions. 62 63 namespace { 64 struct ARMLoadStoreOpt : public MachineFunctionPass { 65 static char ID; 66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 67 68 const TargetInstrInfo *TII; 69 const TargetRegisterInfo *TRI; 70 const ARMSubtarget *STI; 71 const TargetLowering *TL; 72 ARMFunctionInfo *AFI; 73 RegScavenger *RS; 74 bool isThumb1, isThumb2; 75 76 bool runOnMachineFunction(MachineFunction &Fn) override; 77 78 const char *getPassName() const override { 79 return "ARM load / store optimization pass"; 80 } 81 82 private: 83 struct MemOpQueueEntry { 84 int Offset; 85 unsigned Reg; 86 bool isKill; 87 unsigned Position; 88 MachineBasicBlock::iterator MBBI; 89 bool Merged; 90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 91 MachineBasicBlock::iterator i) 92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 93 }; 94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 95 typedef MemOpQueue::iterator MemOpQueueIter; 96 97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, 98 const MemOpQueue &MemOps, unsigned DefReg, 99 unsigned RangeBegin, unsigned RangeEnd); 100 void UpdateBaseRegUses(MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator MBBI, 102 DebugLoc dl, unsigned Base, unsigned WordOffset, 103 ARMCC::CondCodes Pred, unsigned PredReg); 104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 105 int Offset, unsigned Base, bool BaseKill, int Opcode, 106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 107 DebugLoc dl, 108 ArrayRef<std::pair<unsigned, bool> > Regs, 109 ArrayRef<unsigned> ImpDefs); 110 void MergeOpsUpdate(MachineBasicBlock &MBB, 111 MemOpQueue &MemOps, 112 unsigned memOpsBegin, 113 unsigned memOpsEnd, 114 unsigned insertAfter, 115 int Offset, 116 unsigned Base, 117 bool BaseKill, 118 int Opcode, 119 ARMCC::CondCodes Pred, 120 unsigned PredReg, 121 unsigned Scratch, 122 DebugLoc dl, 123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 125 int Opcode, unsigned Size, 126 ARMCC::CondCodes Pred, unsigned PredReg, 127 unsigned Scratch, MemOpQueue &MemOps, 128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 131 MachineBasicBlock::iterator &MBBI); 132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 133 MachineBasicBlock::iterator MBBI, 134 const TargetInstrInfo *TII, 135 bool &Advance, 136 MachineBasicBlock::iterator &I); 137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 138 MachineBasicBlock::iterator MBBI, 139 bool &Advance, 140 MachineBasicBlock::iterator &I); 141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 143 }; 144 char ARMLoadStoreOpt::ID = 0; 145 } 146 147 static bool definesCPSR(const MachineInstr *MI) { 148 for (const auto &MO : MI->operands()) { 149 if (!MO.isReg()) 150 continue; 151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 152 // If the instruction has live CPSR def, then it's not safe to fold it 153 // into load / store. 154 return true; 155 } 156 157 return false; 158 } 159 160 static int getMemoryOpOffset(const MachineInstr *MI) { 161 int Opcode = MI->getOpcode(); 162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 163 unsigned NumOperands = MI->getDesc().getNumOperands(); 164 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 165 166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 170 return OffField; 171 172 // Thumb1 immediate offsets are scaled by 4 173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) 174 return OffField * 4; 175 176 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 177 : ARM_AM::getAM5Offset(OffField) * 4; 178 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 179 : ARM_AM::getAM5Op(OffField); 180 181 if (Op == ARM_AM::sub) 182 return -Offset; 183 184 return Offset; 185 } 186 187 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 188 switch (Opcode) { 189 default: llvm_unreachable("Unhandled opcode!"); 190 case ARM::LDRi12: 191 ++NumLDMGened; 192 switch (Mode) { 193 default: llvm_unreachable("Unhandled submode!"); 194 case ARM_AM::ia: return ARM::LDMIA; 195 case ARM_AM::da: return ARM::LDMDA; 196 case ARM_AM::db: return ARM::LDMDB; 197 case ARM_AM::ib: return ARM::LDMIB; 198 } 199 case ARM::STRi12: 200 ++NumSTMGened; 201 switch (Mode) { 202 default: llvm_unreachable("Unhandled submode!"); 203 case ARM_AM::ia: return ARM::STMIA; 204 case ARM_AM::da: return ARM::STMDA; 205 case ARM_AM::db: return ARM::STMDB; 206 case ARM_AM::ib: return ARM::STMIB; 207 } 208 case ARM::tLDRi: 209 // tLDMIA is writeback-only - unless the base register is in the input 210 // reglist. 211 ++NumLDMGened; 212 switch (Mode) { 213 default: llvm_unreachable("Unhandled submode!"); 214 case ARM_AM::ia: return ARM::tLDMIA; 215 } 216 case ARM::tSTRi: 217 // There is no non-writeback tSTMIA either. 218 ++NumSTMGened; 219 switch (Mode) { 220 default: llvm_unreachable("Unhandled submode!"); 221 case ARM_AM::ia: return ARM::tSTMIA_UPD; 222 } 223 case ARM::t2LDRi8: 224 case ARM::t2LDRi12: 225 ++NumLDMGened; 226 switch (Mode) { 227 default: llvm_unreachable("Unhandled submode!"); 228 case ARM_AM::ia: return ARM::t2LDMIA; 229 case ARM_AM::db: return ARM::t2LDMDB; 230 } 231 case ARM::t2STRi8: 232 case ARM::t2STRi12: 233 ++NumSTMGened; 234 switch (Mode) { 235 default: llvm_unreachable("Unhandled submode!"); 236 case ARM_AM::ia: return ARM::t2STMIA; 237 case ARM_AM::db: return ARM::t2STMDB; 238 } 239 case ARM::VLDRS: 240 ++NumVLDMGened; 241 switch (Mode) { 242 default: llvm_unreachable("Unhandled submode!"); 243 case ARM_AM::ia: return ARM::VLDMSIA; 244 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 245 } 246 case ARM::VSTRS: 247 ++NumVSTMGened; 248 switch (Mode) { 249 default: llvm_unreachable("Unhandled submode!"); 250 case ARM_AM::ia: return ARM::VSTMSIA; 251 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 252 } 253 case ARM::VLDRD: 254 ++NumVLDMGened; 255 switch (Mode) { 256 default: llvm_unreachable("Unhandled submode!"); 257 case ARM_AM::ia: return ARM::VLDMDIA; 258 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 259 } 260 case ARM::VSTRD: 261 ++NumVSTMGened; 262 switch (Mode) { 263 default: llvm_unreachable("Unhandled submode!"); 264 case ARM_AM::ia: return ARM::VSTMDIA; 265 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 266 } 267 } 268 } 269 270 namespace llvm { 271 namespace ARM_AM { 272 273 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 274 switch (Opcode) { 275 default: llvm_unreachable("Unhandled opcode!"); 276 case ARM::LDMIA_RET: 277 case ARM::LDMIA: 278 case ARM::LDMIA_UPD: 279 case ARM::STMIA: 280 case ARM::STMIA_UPD: 281 case ARM::tLDMIA: 282 case ARM::tLDMIA_UPD: 283 case ARM::tSTMIA_UPD: 284 case ARM::t2LDMIA_RET: 285 case ARM::t2LDMIA: 286 case ARM::t2LDMIA_UPD: 287 case ARM::t2STMIA: 288 case ARM::t2STMIA_UPD: 289 case ARM::VLDMSIA: 290 case ARM::VLDMSIA_UPD: 291 case ARM::VSTMSIA: 292 case ARM::VSTMSIA_UPD: 293 case ARM::VLDMDIA: 294 case ARM::VLDMDIA_UPD: 295 case ARM::VSTMDIA: 296 case ARM::VSTMDIA_UPD: 297 return ARM_AM::ia; 298 299 case ARM::LDMDA: 300 case ARM::LDMDA_UPD: 301 case ARM::STMDA: 302 case ARM::STMDA_UPD: 303 return ARM_AM::da; 304 305 case ARM::LDMDB: 306 case ARM::LDMDB_UPD: 307 case ARM::STMDB: 308 case ARM::STMDB_UPD: 309 case ARM::t2LDMDB: 310 case ARM::t2LDMDB_UPD: 311 case ARM::t2STMDB: 312 case ARM::t2STMDB_UPD: 313 case ARM::VLDMSDB_UPD: 314 case ARM::VSTMSDB_UPD: 315 case ARM::VLDMDDB_UPD: 316 case ARM::VSTMDDB_UPD: 317 return ARM_AM::db; 318 319 case ARM::LDMIB: 320 case ARM::LDMIB_UPD: 321 case ARM::STMIB: 322 case ARM::STMIB_UPD: 323 return ARM_AM::ib; 324 } 325 } 326 327 } // end namespace ARM_AM 328 } // end namespace llvm 329 330 static bool isT1i32Load(unsigned Opc) { 331 return Opc == ARM::tLDRi; 332 } 333 334 static bool isT2i32Load(unsigned Opc) { 335 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 336 } 337 338 static bool isi32Load(unsigned Opc) { 339 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 340 } 341 342 static bool isT1i32Store(unsigned Opc) { 343 return Opc == ARM::tSTRi; 344 } 345 346 static bool isT2i32Store(unsigned Opc) { 347 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 348 } 349 350 static bool isi32Store(unsigned Opc) { 351 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 352 } 353 354 static unsigned getImmScale(unsigned Opc) { 355 switch (Opc) { 356 default: llvm_unreachable("Unhandled opcode!"); 357 case ARM::tLDRi: 358 case ARM::tSTRi: 359 return 1; 360 case ARM::tLDRHi: 361 case ARM::tSTRHi: 362 return 2; 363 case ARM::tLDRBi: 364 case ARM::tSTRBi: 365 return 4; 366 } 367 } 368 369 /// Update future uses of the base register with the offset introduced 370 /// due to writeback. This function only works on Thumb1. 371 void 372 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 373 MachineBasicBlock::iterator MBBI, 374 DebugLoc dl, unsigned Base, 375 unsigned WordOffset, 376 ARMCC::CondCodes Pred, unsigned PredReg) { 377 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 378 // Start updating any instructions with immediate offsets. Insert a SUB before 379 // the first non-updateable instruction (if any). 380 for (; MBBI != MBB.end(); ++MBBI) { 381 bool InsertSub = false; 382 unsigned Opc = MBBI->getOpcode(); 383 384 if (MBBI->readsRegister(Base)) { 385 int Offset; 386 bool IsLoad = 387 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 388 bool IsStore = 389 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 390 391 if (IsLoad || IsStore) { 392 // Loads and stores with immediate offsets can be updated, but only if 393 // the new offset isn't negative. 394 // The MachineOperand containing the offset immediate is the last one 395 // before predicates. 396 MachineOperand &MO = 397 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 398 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 399 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 400 401 // If storing the base register, it needs to be reset first. 402 unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); 403 404 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 405 MO.setImm(Offset); 406 else 407 InsertSub = true; 408 409 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 410 !definesCPSR(MBBI)) { 411 // SUBS/ADDS using this register, with a dead def of the CPSR. 412 // Merge it with the update; if the merged offset is too large, 413 // insert a new sub instead. 414 MachineOperand &MO = 415 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 416 Offset = (Opc == ARM::tSUBi8) ? 417 MO.getImm() + WordOffset * 4 : 418 MO.getImm() - WordOffset * 4 ; 419 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 420 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 421 // Offset == 0. 422 MO.setImm(Offset); 423 // The base register has now been reset, so exit early. 424 return; 425 } else { 426 InsertSub = true; 427 } 428 429 } else { 430 // Can't update the instruction. 431 InsertSub = true; 432 } 433 434 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 435 // Since SUBS sets the condition flags, we can't place the base reset 436 // after an instruction that has a live CPSR def. 437 // The base register might also contain an argument for a function call. 438 InsertSub = true; 439 } 440 441 if (InsertSub) { 442 // An instruction above couldn't be updated, so insert a sub. 443 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 444 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) 445 .addImm(Pred).addReg(PredReg); 446 return; 447 } 448 449 if (MBBI->killsRegister(Base)) 450 // Register got killed. Stop updating. 451 return; 452 } 453 454 // End of block was reached. 455 if (MBB.succ_size() > 0) { 456 // FIXME: Because of a bug, live registers are sometimes missing from 457 // the successor blocks' live-in sets. This means we can't trust that 458 // information and *always* have to reset at the end of a block. 459 // See PR21029. 460 if (MBBI != MBB.end()) --MBBI; 461 AddDefaultT1CC( 462 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 463 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) 464 .addImm(Pred).addReg(PredReg); 465 } 466 } 467 468 /// MergeOps - Create and insert a LDM or STM with Base as base register and 469 /// registers in Regs as the register operands that would be loaded / stored. 470 /// It returns true if the transformation is done. 471 bool 472 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 473 MachineBasicBlock::iterator MBBI, 474 int Offset, unsigned Base, bool BaseKill, 475 int Opcode, ARMCC::CondCodes Pred, 476 unsigned PredReg, unsigned Scratch, DebugLoc dl, 477 ArrayRef<std::pair<unsigned, bool> > Regs, 478 ArrayRef<unsigned> ImpDefs) { 479 // Only a single register to load / store. Don't bother. 480 unsigned NumRegs = Regs.size(); 481 if (NumRegs <= 1) 482 return false; 483 484 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 485 // Compute liveness information for that register to make the decision. 486 bool SafeToClobberCPSR = !isThumb1 || 487 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == 488 MachineBasicBlock::LQR_Dead); 489 490 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 491 492 // Exception: If the base register is in the input reglist, Thumb1 LDM is 493 // non-writeback. 494 // It's also not possible to merge an STR of the base register in Thumb1. 495 if (isThumb1) 496 for (unsigned I = 0; I < NumRegs; ++I) 497 if (Base == Regs[I].first) { 498 if (Opcode == ARM::tLDRi) { 499 Writeback = false; 500 break; 501 } else if (Opcode == ARM::tSTRi) { 502 return false; 503 } 504 } 505 506 ARM_AM::AMSubMode Mode = ARM_AM::ia; 507 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 508 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 509 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 510 511 if (Offset == 4 && haveIBAndDA) { 512 Mode = ARM_AM::ib; 513 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 514 Mode = ARM_AM::da; 515 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 516 // VLDM/VSTM do not support DB mode without also updating the base reg. 517 Mode = ARM_AM::db; 518 } else if (Offset != 0) { 519 // Check if this is a supported opcode before inserting instructions to 520 // calculate a new base register. 521 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 522 523 // If starting offset isn't zero, insert a MI to materialize a new base. 524 // But only do so if it is cost effective, i.e. merging more than two 525 // loads / stores. 526 if (NumRegs <= 2) 527 return false; 528 529 // On Thumb1, it's not worth materializing a new base register without 530 // clobbering the CPSR (i.e. not using ADDS/SUBS). 531 if (!SafeToClobberCPSR) 532 return false; 533 534 unsigned NewBase; 535 if (isi32Load(Opcode)) { 536 // If it is a load, then just use one of the destination register to 537 // use as the new base. 538 NewBase = Regs[NumRegs-1].first; 539 } else { 540 // Use the scratch register to use as a new base. 541 NewBase = Scratch; 542 if (NewBase == 0) 543 return false; 544 } 545 546 int BaseOpc = 547 isThumb2 ? ARM::t2ADDri : 548 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 549 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 550 551 if (Offset < 0) { 552 Offset = - Offset; 553 BaseOpc = 554 isThumb2 ? ARM::t2SUBri : 555 (isThumb1 && Offset < 8) ? ARM::tSUBi3 : 556 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 557 } 558 559 if (!TL->isLegalAddImmediate(Offset)) 560 // FIXME: Try add with register operand? 561 return false; // Probably not worth it then. 562 563 if (isThumb1) { 564 // Thumb1: depending on immediate size, use either 565 // ADDS NewBase, Base, #imm3 566 // or 567 // MOV NewBase, Base 568 // ADDS NewBase, #imm8. 569 if (Base != NewBase && Offset >= 8) { 570 // Need to insert a MOV to the new base first. 571 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) 572 .addReg(Base, getKillRegState(BaseKill)) 573 .addImm(Pred).addReg(PredReg); 574 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. 575 Base = NewBase; 576 BaseKill = false; 577 } 578 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) 579 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 580 .addImm(Pred).addReg(PredReg); 581 } else { 582 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 583 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 584 .addImm(Pred).addReg(PredReg).addReg(0); 585 } 586 Base = NewBase; 587 BaseKill = true; // New base is always killed straight away. 588 } 589 590 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 591 Opcode == ARM::VLDRD); 592 593 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 594 // base register writeback. 595 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 596 if (!Opcode) return false; 597 598 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 599 // - There is no writeback (LDM of base register), 600 // - the base register is killed by the merged instruction, 601 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 602 // to reset the base register. 603 // Otherwise, don't merge. 604 // It's safe to return here since the code to materialize a new base register 605 // above is also conditional on SafeToClobberCPSR. 606 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 607 return false; 608 609 MachineInstrBuilder MIB; 610 611 if (Writeback) { 612 if (Opcode == ARM::tLDMIA) 613 // Update tLDMIA with writeback if necessary. 614 Opcode = ARM::tLDMIA_UPD; 615 616 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 617 618 // Thumb1: we might need to set base writeback when building the MI. 619 MIB.addReg(Base, getDefRegState(true)) 620 .addReg(Base, getKillRegState(BaseKill)); 621 622 // The base isn't dead after a merged instruction with writeback. 623 // Insert a sub instruction after the newly formed instruction to reset. 624 if (!BaseKill) 625 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); 626 627 } else { 628 // No writeback, simply build the MachineInstr. 629 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 630 MIB.addReg(Base, getKillRegState(BaseKill)); 631 } 632 633 MIB.addImm(Pred).addReg(PredReg); 634 635 for (unsigned i = 0; i != NumRegs; ++i) 636 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 637 | getKillRegState(Regs[i].second)); 638 639 // Add implicit defs for super-registers. 640 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) 641 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); 642 643 return true; 644 } 645 646 /// \brief Find all instructions using a given imp-def within a range. 647 /// 648 /// We are trying to combine a range of instructions, one of which (located at 649 /// position RangeBegin) implicitly defines a register. The final LDM/STM will 650 /// be placed at RangeEnd, and so any uses of this definition between RangeStart 651 /// and RangeEnd must be modified to use an undefined value. 652 /// 653 /// The live range continues until we find a second definition or one of the 654 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so 655 /// we must consider all uses and decide which are relevant in a second pass. 656 void ARMLoadStoreOpt::findUsesOfImpDef( 657 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, 658 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { 659 std::map<unsigned, MachineOperand *> Uses; 660 unsigned LastLivePos = RangeEnd; 661 662 // First we find all uses of this register with Position between RangeBegin 663 // and RangeEnd, any or all of these could be uses of a definition at 664 // RangeBegin. We also record the latest position a definition at RangeBegin 665 // would be considered live. 666 for (unsigned i = 0; i < MemOps.size(); ++i) { 667 MachineInstr &MI = *MemOps[i].MBBI; 668 unsigned MIPosition = MemOps[i].Position; 669 if (MIPosition <= RangeBegin || MIPosition > RangeEnd) 670 continue; 671 672 // If this instruction defines the register, then any later use will be of 673 // that definition rather than ours. 674 if (MI.definesRegister(DefReg)) 675 LastLivePos = std::min(LastLivePos, MIPosition); 676 677 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); 678 if (!UseOp) 679 continue; 680 681 // If this instruction kills the register then (assuming liveness is 682 // correct when we start) we don't need to think about anything after here. 683 if (UseOp->isKill()) 684 LastLivePos = std::min(LastLivePos, MIPosition); 685 686 Uses[MIPosition] = UseOp; 687 } 688 689 // Now we traverse the list of all uses, and append the ones that actually use 690 // our definition to the requested list. 691 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), 692 E = Uses.end(); 693 I != E; ++I) { 694 // List is sorted by position so once we've found one out of range there 695 // will be no more to consider. 696 if (I->first > LastLivePos) 697 break; 698 UsesOfImpDefs.push_back(I->second); 699 } 700 } 701 702 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 703 // success. 704 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 705 MemOpQueue &memOps, 706 unsigned memOpsBegin, unsigned memOpsEnd, 707 unsigned insertAfter, int Offset, 708 unsigned Base, bool BaseKill, 709 int Opcode, 710 ARMCC::CondCodes Pred, unsigned PredReg, 711 unsigned Scratch, 712 DebugLoc dl, 713 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 714 // First calculate which of the registers should be killed by the merged 715 // instruction. 716 const unsigned insertPos = memOps[insertAfter].Position; 717 SmallSet<unsigned, 4> KilledRegs; 718 DenseMap<unsigned, unsigned> Killer; 719 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 720 if (i == memOpsBegin) { 721 i = memOpsEnd; 722 if (i == e) 723 break; 724 } 725 if (memOps[i].Position < insertPos && memOps[i].isKill) { 726 unsigned Reg = memOps[i].Reg; 727 KilledRegs.insert(Reg); 728 Killer[Reg] = i; 729 } 730 } 731 732 SmallVector<std::pair<unsigned, bool>, 8> Regs; 733 SmallVector<unsigned, 8> ImpDefs; 734 SmallVector<MachineOperand *, 8> UsesOfImpDefs; 735 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 736 unsigned Reg = memOps[i].Reg; 737 // If we are inserting the merged operation after an operation that 738 // uses the same register, make sure to transfer any kill flag. 739 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 740 Regs.push_back(std::make_pair(Reg, isKill)); 741 742 // Collect any implicit defs of super-registers. They must be preserved. 743 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { 744 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) 745 continue; 746 unsigned DefReg = MO->getReg(); 747 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 748 ImpDefs.push_back(DefReg); 749 750 // There may be other uses of the definition between this instruction and 751 // the eventual LDM/STM position. These should be marked undef if the 752 // merge takes place. 753 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, 754 insertPos); 755 } 756 } 757 758 // Try to do the merge. 759 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 760 ++Loc; 761 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 762 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 763 return; 764 765 // Merge succeeded, update records. 766 Merges.push_back(std::prev(Loc)); 767 768 // In gathering loads together, we may have moved the imp-def of a register 769 // past one of its uses. This is OK, since we know better than the rest of 770 // LLVM what's OK with ARM loads and stores; but we still have to adjust the 771 // affected uses. 772 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), 773 E = UsesOfImpDefs.end(); 774 I != E; ++I) 775 (*I)->setIsUndef(); 776 777 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 778 // Remove kill flags from any memops that come before insertPos. 779 if (Regs[i-memOpsBegin].second) { 780 unsigned Reg = Regs[i-memOpsBegin].first; 781 if (KilledRegs.count(Reg)) { 782 unsigned j = Killer[Reg]; 783 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 784 assert(Idx >= 0 && "Cannot find killing operand"); 785 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 786 memOps[j].isKill = false; 787 } 788 memOps[i].isKill = true; 789 } 790 MBB.erase(memOps[i].MBBI); 791 // Update this memop to refer to the merged instruction. 792 // We may need to move kill flags again. 793 memOps[i].Merged = true; 794 memOps[i].MBBI = Merges.back(); 795 memOps[i].Position = insertPos; 796 } 797 798 // Update memOps offsets, since they may have been modified by MergeOps. 799 for (auto &MemOp : memOps) { 800 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); 801 } 802 } 803 804 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 805 /// load / store multiple instructions. 806 void 807 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 808 unsigned Base, int Opcode, unsigned Size, 809 ARMCC::CondCodes Pred, unsigned PredReg, 810 unsigned Scratch, MemOpQueue &MemOps, 811 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 812 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 813 int Offset = MemOps[SIndex].Offset; 814 int SOffset = Offset; 815 unsigned insertAfter = SIndex; 816 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 817 DebugLoc dl = Loc->getDebugLoc(); 818 const MachineOperand &PMO = Loc->getOperand(0); 819 unsigned PReg = PMO.getReg(); 820 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 821 unsigned Count = 1; 822 unsigned Limit = ~0U; 823 bool BaseKill = false; 824 // vldm / vstm limit are 32 for S variants, 16 for D variants. 825 826 switch (Opcode) { 827 default: break; 828 case ARM::VSTRS: 829 Limit = 32; 830 break; 831 case ARM::VSTRD: 832 Limit = 16; 833 break; 834 case ARM::VLDRD: 835 Limit = 16; 836 break; 837 case ARM::VLDRS: 838 Limit = 32; 839 break; 840 } 841 842 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 843 int NewOffset = MemOps[i].Offset; 844 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 845 unsigned Reg = MO.getReg(); 846 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 847 // Register numbers must be in ascending order. For VFP / NEON load and 848 // store multiples, the registers must also be consecutive and within the 849 // limit on the number of registers per instruction. 850 if (Reg != ARM::SP && 851 NewOffset == Offset + (int)Size && 852 ((isNotVFP && RegNum > PRegNum) || 853 ((Count < Limit) && RegNum == PRegNum+1)) && 854 // On Swift we don't want vldm/vstm to start with a odd register num 855 // because Q register unaligned vldm/vstm need more uops. 856 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 857 Offset += Size; 858 PRegNum = RegNum; 859 ++Count; 860 } else { 861 // Can't merge this in. Try merge the earlier ones first. 862 // We need to compute BaseKill here because the MemOps may have been 863 // reordered. 864 BaseKill = Loc->killsRegister(Base); 865 866 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, 867 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 868 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 869 MemOps, Merges); 870 return; 871 } 872 873 if (MemOps[i].Position > MemOps[insertAfter].Position) { 874 insertAfter = i; 875 Loc = MemOps[i].MBBI; 876 } 877 } 878 879 BaseKill = Loc->killsRegister(Base); 880 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 881 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 882 } 883 884 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 885 unsigned Bytes, unsigned Limit, 886 ARMCC::CondCodes Pred, unsigned PredReg) { 887 unsigned MyPredReg = 0; 888 if (!MI) 889 return false; 890 891 bool CheckCPSRDef = false; 892 switch (MI->getOpcode()) { 893 default: return false; 894 case ARM::tSUBi8: 895 case ARM::t2SUBri: 896 case ARM::SUBri: 897 CheckCPSRDef = true; 898 // fallthrough 899 case ARM::tSUBspi: 900 break; 901 } 902 903 // Make sure the offset fits in 8 bits. 904 if (Bytes == 0 || (Limit && Bytes >= Limit)) 905 return false; 906 907 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 908 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 909 if (!(MI->getOperand(0).getReg() == Base && 910 MI->getOperand(1).getReg() == Base && 911 (MI->getOperand(2).getImm() * Scale) == Bytes && 912 getInstrPredicate(MI, MyPredReg) == Pred && 913 MyPredReg == PredReg)) 914 return false; 915 916 return CheckCPSRDef ? !definesCPSR(MI) : true; 917 } 918 919 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 920 unsigned Bytes, unsigned Limit, 921 ARMCC::CondCodes Pred, unsigned PredReg) { 922 unsigned MyPredReg = 0; 923 if (!MI) 924 return false; 925 926 bool CheckCPSRDef = false; 927 switch (MI->getOpcode()) { 928 default: return false; 929 case ARM::tADDi8: 930 case ARM::t2ADDri: 931 case ARM::ADDri: 932 CheckCPSRDef = true; 933 // fallthrough 934 case ARM::tADDspi: 935 break; 936 } 937 938 if (Bytes == 0 || (Limit && Bytes >= Limit)) 939 // Make sure the offset fits in 8 bits. 940 return false; 941 942 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 943 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 944 if (!(MI->getOperand(0).getReg() == Base && 945 MI->getOperand(1).getReg() == Base && 946 (MI->getOperand(2).getImm() * Scale) == Bytes && 947 getInstrPredicate(MI, MyPredReg) == Pred && 948 MyPredReg == PredReg)) 949 return false; 950 951 return CheckCPSRDef ? !definesCPSR(MI) : true; 952 } 953 954 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 955 switch (MI->getOpcode()) { 956 default: return 0; 957 case ARM::LDRi12: 958 case ARM::STRi12: 959 case ARM::tLDRi: 960 case ARM::tSTRi: 961 case ARM::t2LDRi8: 962 case ARM::t2LDRi12: 963 case ARM::t2STRi8: 964 case ARM::t2STRi12: 965 case ARM::VLDRS: 966 case ARM::VSTRS: 967 return 4; 968 case ARM::VLDRD: 969 case ARM::VSTRD: 970 return 8; 971 case ARM::LDMIA: 972 case ARM::LDMDA: 973 case ARM::LDMDB: 974 case ARM::LDMIB: 975 case ARM::STMIA: 976 case ARM::STMDA: 977 case ARM::STMDB: 978 case ARM::STMIB: 979 case ARM::tLDMIA: 980 case ARM::tLDMIA_UPD: 981 case ARM::tSTMIA_UPD: 982 case ARM::t2LDMIA: 983 case ARM::t2LDMDB: 984 case ARM::t2STMIA: 985 case ARM::t2STMDB: 986 case ARM::VLDMSIA: 987 case ARM::VSTMSIA: 988 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 989 case ARM::VLDMDIA: 990 case ARM::VSTMDIA: 991 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 992 } 993 } 994 995 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 996 ARM_AM::AMSubMode Mode) { 997 switch (Opc) { 998 default: llvm_unreachable("Unhandled opcode!"); 999 case ARM::LDMIA: 1000 case ARM::LDMDA: 1001 case ARM::LDMDB: 1002 case ARM::LDMIB: 1003 switch (Mode) { 1004 default: llvm_unreachable("Unhandled submode!"); 1005 case ARM_AM::ia: return ARM::LDMIA_UPD; 1006 case ARM_AM::ib: return ARM::LDMIB_UPD; 1007 case ARM_AM::da: return ARM::LDMDA_UPD; 1008 case ARM_AM::db: return ARM::LDMDB_UPD; 1009 } 1010 case ARM::STMIA: 1011 case ARM::STMDA: 1012 case ARM::STMDB: 1013 case ARM::STMIB: 1014 switch (Mode) { 1015 default: llvm_unreachable("Unhandled submode!"); 1016 case ARM_AM::ia: return ARM::STMIA_UPD; 1017 case ARM_AM::ib: return ARM::STMIB_UPD; 1018 case ARM_AM::da: return ARM::STMDA_UPD; 1019 case ARM_AM::db: return ARM::STMDB_UPD; 1020 } 1021 case ARM::t2LDMIA: 1022 case ARM::t2LDMDB: 1023 switch (Mode) { 1024 default: llvm_unreachable("Unhandled submode!"); 1025 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1026 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1027 } 1028 case ARM::t2STMIA: 1029 case ARM::t2STMDB: 1030 switch (Mode) { 1031 default: llvm_unreachable("Unhandled submode!"); 1032 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1033 case ARM_AM::db: return ARM::t2STMDB_UPD; 1034 } 1035 case ARM::VLDMSIA: 1036 switch (Mode) { 1037 default: llvm_unreachable("Unhandled submode!"); 1038 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1039 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1040 } 1041 case ARM::VLDMDIA: 1042 switch (Mode) { 1043 default: llvm_unreachable("Unhandled submode!"); 1044 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1045 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1046 } 1047 case ARM::VSTMSIA: 1048 switch (Mode) { 1049 default: llvm_unreachable("Unhandled submode!"); 1050 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1051 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1052 } 1053 case ARM::VSTMDIA: 1054 switch (Mode) { 1055 default: llvm_unreachable("Unhandled submode!"); 1056 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1057 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1058 } 1059 } 1060 } 1061 1062 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 1063 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1064 /// 1065 /// stmia rn, <ra, rb, rc> 1066 /// rn := rn + 4 * 3; 1067 /// => 1068 /// stmia rn!, <ra, rb, rc> 1069 /// 1070 /// rn := rn - 4 * 3; 1071 /// ldmia rn, <ra, rb, rc> 1072 /// => 1073 /// ldmdb rn!, <ra, rb, rc> 1074 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 1075 MachineBasicBlock::iterator MBBI, 1076 bool &Advance, 1077 MachineBasicBlock::iterator &I) { 1078 // Thumb1 is already using updating loads/stores. 1079 if (isThumb1) return false; 1080 1081 MachineInstr *MI = MBBI; 1082 unsigned Base = MI->getOperand(0).getReg(); 1083 bool BaseKill = MI->getOperand(0).isKill(); 1084 unsigned Bytes = getLSMultipleTransferSize(MI); 1085 unsigned PredReg = 0; 1086 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1087 int Opcode = MI->getOpcode(); 1088 DebugLoc dl = MI->getDebugLoc(); 1089 1090 // Can't use an updating ld/st if the base register is also a dest 1091 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1092 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1093 if (MI->getOperand(i).getReg() == Base) 1094 return false; 1095 1096 bool DoMerge = false; 1097 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 1098 1099 // Try merging with the previous instruction. 1100 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1101 if (MBBI != BeginMBBI) { 1102 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1103 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1104 --PrevMBBI; 1105 if (Mode == ARM_AM::ia && 1106 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1107 Mode = ARM_AM::db; 1108 DoMerge = true; 1109 } else if (Mode == ARM_AM::ib && 1110 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1111 Mode = ARM_AM::da; 1112 DoMerge = true; 1113 } 1114 if (DoMerge) 1115 MBB.erase(PrevMBBI); 1116 } 1117 1118 // Try merging with the next instruction. 1119 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1120 if (!DoMerge && MBBI != EndMBBI) { 1121 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1122 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1123 ++NextMBBI; 1124 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 1125 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1126 DoMerge = true; 1127 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 1128 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1129 DoMerge = true; 1130 } 1131 if (DoMerge) { 1132 if (NextMBBI == I) { 1133 Advance = true; 1134 ++I; 1135 } 1136 MBB.erase(NextMBBI); 1137 } 1138 } 1139 1140 if (!DoMerge) 1141 return false; 1142 1143 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1144 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1145 .addReg(Base, getDefRegState(true)) // WB base register 1146 .addReg(Base, getKillRegState(BaseKill)) 1147 .addImm(Pred).addReg(PredReg); 1148 1149 // Transfer the rest of operands. 1150 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1151 MIB.addOperand(MI->getOperand(OpNum)); 1152 1153 // Transfer memoperands. 1154 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1155 1156 MBB.erase(MBBI); 1157 return true; 1158 } 1159 1160 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1161 ARM_AM::AddrOpc Mode) { 1162 switch (Opc) { 1163 case ARM::LDRi12: 1164 return ARM::LDR_PRE_IMM; 1165 case ARM::STRi12: 1166 return ARM::STR_PRE_IMM; 1167 case ARM::VLDRS: 1168 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1169 case ARM::VLDRD: 1170 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1171 case ARM::VSTRS: 1172 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1173 case ARM::VSTRD: 1174 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1175 case ARM::t2LDRi8: 1176 case ARM::t2LDRi12: 1177 return ARM::t2LDR_PRE; 1178 case ARM::t2STRi8: 1179 case ARM::t2STRi12: 1180 return ARM::t2STR_PRE; 1181 default: llvm_unreachable("Unhandled opcode!"); 1182 } 1183 } 1184 1185 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1186 ARM_AM::AddrOpc Mode) { 1187 switch (Opc) { 1188 case ARM::LDRi12: 1189 return ARM::LDR_POST_IMM; 1190 case ARM::STRi12: 1191 return ARM::STR_POST_IMM; 1192 case ARM::VLDRS: 1193 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1194 case ARM::VLDRD: 1195 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1196 case ARM::VSTRS: 1197 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1198 case ARM::VSTRD: 1199 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1200 case ARM::t2LDRi8: 1201 case ARM::t2LDRi12: 1202 return ARM::t2LDR_POST; 1203 case ARM::t2STRi8: 1204 case ARM::t2STRi12: 1205 return ARM::t2STR_POST; 1206 default: llvm_unreachable("Unhandled opcode!"); 1207 } 1208 } 1209 1210 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 1211 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1212 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 1213 MachineBasicBlock::iterator MBBI, 1214 const TargetInstrInfo *TII, 1215 bool &Advance, 1216 MachineBasicBlock::iterator &I) { 1217 // Thumb1 doesn't have updating LDR/STR. 1218 // FIXME: Use LDM/STM with single register instead. 1219 if (isThumb1) return false; 1220 1221 MachineInstr *MI = MBBI; 1222 unsigned Base = MI->getOperand(1).getReg(); 1223 bool BaseKill = MI->getOperand(1).isKill(); 1224 unsigned Bytes = getLSMultipleTransferSize(MI); 1225 int Opcode = MI->getOpcode(); 1226 DebugLoc dl = MI->getDebugLoc(); 1227 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1228 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1229 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1230 if (isi32Load(Opcode) || isi32Store(Opcode)) 1231 if (MI->getOperand(2).getImm() != 0) 1232 return false; 1233 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1234 return false; 1235 1236 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 1237 // Can't do the merge if the destination register is the same as the would-be 1238 // writeback register. 1239 if (MI->getOperand(0).getReg() == Base) 1240 return false; 1241 1242 unsigned PredReg = 0; 1243 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1244 bool DoMerge = false; 1245 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1246 unsigned NewOpc = 0; 1247 // AM2 - 12 bits, thumb2 - 8 bits. 1248 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 1249 1250 // Try merging with the previous instruction. 1251 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1252 if (MBBI != BeginMBBI) { 1253 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1254 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1255 --PrevMBBI; 1256 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1257 DoMerge = true; 1258 AddSub = ARM_AM::sub; 1259 } else if (!isAM5 && 1260 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1261 DoMerge = true; 1262 } 1263 if (DoMerge) { 1264 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 1265 MBB.erase(PrevMBBI); 1266 } 1267 } 1268 1269 // Try merging with the next instruction. 1270 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1271 if (!DoMerge && MBBI != EndMBBI) { 1272 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1273 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1274 ++NextMBBI; 1275 if (!isAM5 && 1276 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1277 DoMerge = true; 1278 AddSub = ARM_AM::sub; 1279 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1280 DoMerge = true; 1281 } 1282 if (DoMerge) { 1283 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 1284 if (NextMBBI == I) { 1285 Advance = true; 1286 ++I; 1287 } 1288 MBB.erase(NextMBBI); 1289 } 1290 } 1291 1292 if (!DoMerge) 1293 return false; 1294 1295 if (isAM5) { 1296 // VLDM[SD]_UPD, VSTM[SD]_UPD 1297 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1298 // updating load/store-multiple instructions can be used with only one 1299 // register.) 1300 MachineOperand &MO = MI->getOperand(0); 1301 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1302 .addReg(Base, getDefRegState(true)) // WB base register 1303 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1304 .addImm(Pred).addReg(PredReg) 1305 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1306 getKillRegState(MO.isKill()))); 1307 } else if (isLd) { 1308 if (isAM2) { 1309 // LDR_PRE, LDR_POST 1310 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1311 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1312 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1313 .addReg(Base, RegState::Define) 1314 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1315 } else { 1316 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1317 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1318 .addReg(Base, RegState::Define) 1319 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1320 } 1321 } else { 1322 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1323 // t2LDR_PRE, t2LDR_POST 1324 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1325 .addReg(Base, RegState::Define) 1326 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1327 } 1328 } else { 1329 MachineOperand &MO = MI->getOperand(0); 1330 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1331 // the vestigal zero-reg offset register. When that's fixed, this clause 1332 // can be removed entirely. 1333 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1334 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1335 // STR_PRE, STR_POST 1336 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1337 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1338 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1339 } else { 1340 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1341 // t2STR_PRE, t2STR_POST 1342 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1343 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1344 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1345 } 1346 } 1347 MBB.erase(MBBI); 1348 1349 return true; 1350 } 1351 1352 /// isMemoryOp - Returns true if instruction is a memory operation that this 1353 /// pass is capable of operating on. 1354 static bool isMemoryOp(const MachineInstr *MI) { 1355 // When no memory operands are present, conservatively assume unaligned, 1356 // volatile, unfoldable. 1357 if (!MI->hasOneMemOperand()) 1358 return false; 1359 1360 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1361 1362 // Don't touch volatile memory accesses - we may be changing their order. 1363 if (MMO->isVolatile()) 1364 return false; 1365 1366 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1367 // not. 1368 if (MMO->getAlignment() < 4) 1369 return false; 1370 1371 // str <undef> could probably be eliminated entirely, but for now we just want 1372 // to avoid making a mess of it. 1373 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1374 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1375 MI->getOperand(0).isUndef()) 1376 return false; 1377 1378 // Likewise don't mess with references to undefined addresses. 1379 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1380 MI->getOperand(1).isUndef()) 1381 return false; 1382 1383 int Opcode = MI->getOpcode(); 1384 switch (Opcode) { 1385 default: break; 1386 case ARM::VLDRS: 1387 case ARM::VSTRS: 1388 return MI->getOperand(1).isReg(); 1389 case ARM::VLDRD: 1390 case ARM::VSTRD: 1391 return MI->getOperand(1).isReg(); 1392 case ARM::LDRi12: 1393 case ARM::STRi12: 1394 case ARM::tLDRi: 1395 case ARM::tSTRi: 1396 case ARM::t2LDRi8: 1397 case ARM::t2LDRi12: 1398 case ARM::t2STRi8: 1399 case ARM::t2STRi12: 1400 return MI->getOperand(1).isReg(); 1401 } 1402 return false; 1403 } 1404 1405 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1406 /// op that is being merged. 1407 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1408 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1409 unsigned Position = MemOps[0].Position; 1410 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1411 if (MemOps[i].Position < Position) { 1412 Position = MemOps[i].Position; 1413 Loc = MemOps[i].MBBI; 1414 } 1415 } 1416 1417 if (Loc != MBB.begin()) 1418 RS->forward(std::prev(Loc)); 1419 } 1420 1421 static void InsertLDR_STR(MachineBasicBlock &MBB, 1422 MachineBasicBlock::iterator &MBBI, 1423 int Offset, bool isDef, 1424 DebugLoc dl, unsigned NewOpc, 1425 unsigned Reg, bool RegDeadKill, bool RegUndef, 1426 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1427 bool OffKill, bool OffUndef, 1428 ARMCC::CondCodes Pred, unsigned PredReg, 1429 const TargetInstrInfo *TII, bool isT2) { 1430 if (isDef) { 1431 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1432 TII->get(NewOpc)) 1433 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1434 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1435 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1436 } else { 1437 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1438 TII->get(NewOpc)) 1439 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1440 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1441 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1442 } 1443 } 1444 1445 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1446 MachineBasicBlock::iterator &MBBI) { 1447 MachineInstr *MI = &*MBBI; 1448 unsigned Opcode = MI->getOpcode(); 1449 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1450 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1451 const MachineOperand &BaseOp = MI->getOperand(2); 1452 unsigned BaseReg = BaseOp.getReg(); 1453 unsigned EvenReg = MI->getOperand(0).getReg(); 1454 unsigned OddReg = MI->getOperand(1).getReg(); 1455 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1456 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1457 // ARM errata 602117: LDRD with base in list may result in incorrect base 1458 // register when interrupted or faulted. 1459 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1460 if (!Errata602117 && 1461 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1462 return false; 1463 1464 MachineBasicBlock::iterator NewBBI = MBBI; 1465 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1466 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1467 bool EvenDeadKill = isLd ? 1468 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1469 bool EvenUndef = MI->getOperand(0).isUndef(); 1470 bool OddDeadKill = isLd ? 1471 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1472 bool OddUndef = MI->getOperand(1).isUndef(); 1473 bool BaseKill = BaseOp.isKill(); 1474 bool BaseUndef = BaseOp.isUndef(); 1475 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1476 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1477 int OffImm = getMemoryOpOffset(MI); 1478 unsigned PredReg = 0; 1479 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1480 1481 if (OddRegNum > EvenRegNum && OffImm == 0) { 1482 // Ascending register numbers and no offset. It's safe to change it to a 1483 // ldm or stm. 1484 unsigned NewOpc = (isLd) 1485 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1486 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1487 if (isLd) { 1488 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1489 .addReg(BaseReg, getKillRegState(BaseKill)) 1490 .addImm(Pred).addReg(PredReg) 1491 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1492 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1493 ++NumLDRD2LDM; 1494 } else { 1495 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1496 .addReg(BaseReg, getKillRegState(BaseKill)) 1497 .addImm(Pred).addReg(PredReg) 1498 .addReg(EvenReg, 1499 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1500 .addReg(OddReg, 1501 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1502 ++NumSTRD2STM; 1503 } 1504 NewBBI = std::prev(MBBI); 1505 } else { 1506 // Split into two instructions. 1507 unsigned NewOpc = (isLd) 1508 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1509 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1510 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1511 // so adjust and use t2LDRi12 here for that. 1512 unsigned NewOpc2 = (isLd) 1513 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1514 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1515 DebugLoc dl = MBBI->getDebugLoc(); 1516 // If this is a load and base register is killed, it may have been 1517 // re-defed by the load, make sure the first load does not clobber it. 1518 if (isLd && 1519 (BaseKill || OffKill) && 1520 (TRI->regsOverlap(EvenReg, BaseReg))) { 1521 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1522 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1523 OddReg, OddDeadKill, false, 1524 BaseReg, false, BaseUndef, false, OffUndef, 1525 Pred, PredReg, TII, isT2); 1526 NewBBI = std::prev(MBBI); 1527 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1528 EvenReg, EvenDeadKill, false, 1529 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1530 Pred, PredReg, TII, isT2); 1531 } else { 1532 if (OddReg == EvenReg && EvenDeadKill) { 1533 // If the two source operands are the same, the kill marker is 1534 // probably on the first one. e.g. 1535 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1536 EvenDeadKill = false; 1537 OddDeadKill = true; 1538 } 1539 // Never kill the base register in the first instruction. 1540 if (EvenReg == BaseReg) 1541 EvenDeadKill = false; 1542 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1543 EvenReg, EvenDeadKill, EvenUndef, 1544 BaseReg, false, BaseUndef, false, OffUndef, 1545 Pred, PredReg, TII, isT2); 1546 NewBBI = std::prev(MBBI); 1547 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1548 OddReg, OddDeadKill, OddUndef, 1549 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1550 Pred, PredReg, TII, isT2); 1551 } 1552 if (isLd) 1553 ++NumLDRD2LDR; 1554 else 1555 ++NumSTRD2STR; 1556 } 1557 1558 MBB.erase(MI); 1559 MBBI = NewBBI; 1560 return true; 1561 } 1562 return false; 1563 } 1564 1565 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1566 /// ops of the same base and incrementing offset into LDM / STM ops. 1567 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1568 unsigned NumMerges = 0; 1569 unsigned NumMemOps = 0; 1570 MemOpQueue MemOps; 1571 unsigned CurrBase = 0; 1572 int CurrOpc = -1; 1573 unsigned CurrSize = 0; 1574 ARMCC::CondCodes CurrPred = ARMCC::AL; 1575 unsigned CurrPredReg = 0; 1576 unsigned Position = 0; 1577 SmallVector<MachineBasicBlock::iterator,4> Merges; 1578 1579 RS->enterBasicBlock(&MBB); 1580 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1581 while (MBBI != E) { 1582 if (FixInvalidRegPairOp(MBB, MBBI)) 1583 continue; 1584 1585 bool Advance = false; 1586 bool TryMerge = false; 1587 bool Clobber = false; 1588 1589 bool isMemOp = isMemoryOp(MBBI); 1590 if (isMemOp) { 1591 int Opcode = MBBI->getOpcode(); 1592 unsigned Size = getLSMultipleTransferSize(MBBI); 1593 const MachineOperand &MO = MBBI->getOperand(0); 1594 unsigned Reg = MO.getReg(); 1595 bool isKill = MO.isDef() ? false : MO.isKill(); 1596 unsigned Base = MBBI->getOperand(1).getReg(); 1597 unsigned PredReg = 0; 1598 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1599 int Offset = getMemoryOpOffset(MBBI); 1600 // Watch out for: 1601 // r4 := ldr [r5] 1602 // r5 := ldr [r5, #4] 1603 // r6 := ldr [r5, #8] 1604 // 1605 // The second ldr has effectively broken the chain even though it 1606 // looks like the later ldr(s) use the same base register. Try to 1607 // merge the ldr's so far, including this one. But don't try to 1608 // combine the following ldr(s). 1609 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1610 1611 // Watch out for: 1612 // r4 := ldr [r0, #8] 1613 // r4 := ldr [r0, #4] 1614 // 1615 // The optimization may reorder the second ldr in front of the first 1616 // ldr, which violates write after write(WAW) dependence. The same as 1617 // str. Try to merge inst(s) already in MemOps. 1618 bool Overlap = false; 1619 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 1620 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 1621 Overlap = true; 1622 break; 1623 } 1624 } 1625 1626 if (CurrBase == 0 && !Clobber) { 1627 // Start of a new chain. 1628 CurrBase = Base; 1629 CurrOpc = Opcode; 1630 CurrSize = Size; 1631 CurrPred = Pred; 1632 CurrPredReg = PredReg; 1633 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1634 ++NumMemOps; 1635 Advance = true; 1636 } else if (!Overlap) { 1637 if (Clobber) { 1638 TryMerge = true; 1639 Advance = true; 1640 } 1641 1642 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1643 // No need to match PredReg. 1644 // Continue adding to the queue. 1645 if (Offset > MemOps.back().Offset) { 1646 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1647 Position, MBBI)); 1648 ++NumMemOps; 1649 Advance = true; 1650 } else { 1651 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1652 I != E; ++I) { 1653 if (Offset < I->Offset) { 1654 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1655 Position, MBBI)); 1656 ++NumMemOps; 1657 Advance = true; 1658 break; 1659 } else if (Offset == I->Offset) { 1660 // Collision! This can't be merged! 1661 break; 1662 } 1663 } 1664 } 1665 } 1666 } 1667 } 1668 1669 if (MBBI->isDebugValue()) { 1670 ++MBBI; 1671 if (MBBI == E) 1672 // Reach the end of the block, try merging the memory instructions. 1673 TryMerge = true; 1674 } else if (Advance) { 1675 ++Position; 1676 ++MBBI; 1677 if (MBBI == E) 1678 // Reach the end of the block, try merging the memory instructions. 1679 TryMerge = true; 1680 } else { 1681 TryMerge = true; 1682 } 1683 1684 if (TryMerge) { 1685 if (NumMemOps > 1) { 1686 // Try to find a free register to use as a new base in case it's needed. 1687 // First advance to the instruction just before the start of the chain. 1688 AdvanceRS(MBB, MemOps); 1689 1690 // Find a scratch register. 1691 unsigned Scratch = 1692 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); 1693 1694 // Process the load / store instructions. 1695 RS->forward(std::prev(MBBI)); 1696 1697 // Merge ops. 1698 Merges.clear(); 1699 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1700 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1701 1702 // Try folding preceding/trailing base inc/dec into the generated 1703 // LDM/STM ops. 1704 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1705 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1706 ++NumMerges; 1707 NumMerges += Merges.size(); 1708 1709 // Try folding preceding/trailing base inc/dec into those load/store 1710 // that were not merged to form LDM/STM ops. 1711 for (unsigned i = 0; i != NumMemOps; ++i) 1712 if (!MemOps[i].Merged) 1713 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1714 ++NumMerges; 1715 1716 // RS may be pointing to an instruction that's deleted. 1717 RS->skipTo(std::prev(MBBI)); 1718 } else if (NumMemOps == 1) { 1719 // Try folding preceding/trailing base inc/dec into the single 1720 // load/store. 1721 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1722 ++NumMerges; 1723 RS->forward(std::prev(MBBI)); 1724 } 1725 } 1726 1727 CurrBase = 0; 1728 CurrOpc = -1; 1729 CurrSize = 0; 1730 CurrPred = ARMCC::AL; 1731 CurrPredReg = 0; 1732 if (NumMemOps) { 1733 MemOps.clear(); 1734 NumMemOps = 0; 1735 } 1736 1737 // If iterator hasn't been advanced and this is not a memory op, skip it. 1738 // It can't start a new chain anyway. 1739 if (!Advance && !isMemOp && MBBI != E) { 1740 ++Position; 1741 ++MBBI; 1742 } 1743 } 1744 } 1745 return NumMerges > 0; 1746 } 1747 1748 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1749 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1750 /// directly restore the value of LR into pc. 1751 /// ldmfd sp!, {..., lr} 1752 /// bx lr 1753 /// or 1754 /// ldmfd sp!, {..., lr} 1755 /// mov pc, lr 1756 /// => 1757 /// ldmfd sp!, {..., pc} 1758 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1759 // Thumb1 LDM doesn't allow high registers. 1760 if (isThumb1) return false; 1761 if (MBB.empty()) return false; 1762 1763 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1764 if (MBBI != MBB.begin() && 1765 (MBBI->getOpcode() == ARM::BX_RET || 1766 MBBI->getOpcode() == ARM::tBX_RET || 1767 MBBI->getOpcode() == ARM::MOVPCLR)) { 1768 MachineInstr *PrevMI = std::prev(MBBI); 1769 unsigned Opcode = PrevMI->getOpcode(); 1770 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1771 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1772 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1773 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1774 if (MO.getReg() != ARM::LR) 1775 return false; 1776 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1777 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1778 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1779 PrevMI->setDesc(TII->get(NewOpc)); 1780 MO.setReg(ARM::PC); 1781 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1782 MBB.erase(MBBI); 1783 return true; 1784 } 1785 } 1786 return false; 1787 } 1788 1789 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1790 const TargetMachine &TM = Fn.getTarget(); 1791 TL = TM.getSubtargetImpl()->getTargetLowering(); 1792 AFI = Fn.getInfo<ARMFunctionInfo>(); 1793 TII = TM.getSubtargetImpl()->getInstrInfo(); 1794 TRI = TM.getSubtargetImpl()->getRegisterInfo(); 1795 STI = &TM.getSubtarget<ARMSubtarget>(); 1796 RS = new RegScavenger(); 1797 isThumb2 = AFI->isThumb2Function(); 1798 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1799 1800 bool Modified = false; 1801 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1802 ++MFI) { 1803 MachineBasicBlock &MBB = *MFI; 1804 Modified |= LoadStoreMultipleOpti(MBB); 1805 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1806 Modified |= MergeReturnIntoLDM(MBB); 1807 } 1808 1809 delete RS; 1810 return Modified; 1811 } 1812 1813 1814 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1815 /// load / stores from consecutive locations close to make it more 1816 /// likely they will be combined later. 1817 1818 namespace { 1819 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1820 static char ID; 1821 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1822 1823 const DataLayout *TD; 1824 const TargetInstrInfo *TII; 1825 const TargetRegisterInfo *TRI; 1826 const ARMSubtarget *STI; 1827 MachineRegisterInfo *MRI; 1828 MachineFunction *MF; 1829 1830 bool runOnMachineFunction(MachineFunction &Fn) override; 1831 1832 const char *getPassName() const override { 1833 return "ARM pre- register allocation load / store optimization pass"; 1834 } 1835 1836 private: 1837 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1838 unsigned &NewOpc, unsigned &EvenReg, 1839 unsigned &OddReg, unsigned &BaseReg, 1840 int &Offset, 1841 unsigned &PredReg, ARMCC::CondCodes &Pred, 1842 bool &isT2); 1843 bool RescheduleOps(MachineBasicBlock *MBB, 1844 SmallVectorImpl<MachineInstr *> &Ops, 1845 unsigned Base, bool isLd, 1846 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1847 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1848 }; 1849 char ARMPreAllocLoadStoreOpt::ID = 0; 1850 } 1851 1852 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1853 TD = Fn.getSubtarget().getDataLayout(); 1854 TII = Fn.getSubtarget().getInstrInfo(); 1855 TRI = Fn.getSubtarget().getRegisterInfo(); 1856 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1857 MRI = &Fn.getRegInfo(); 1858 MF = &Fn; 1859 1860 bool Modified = false; 1861 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1862 ++MFI) 1863 Modified |= RescheduleLoadStoreInstrs(MFI); 1864 1865 return Modified; 1866 } 1867 1868 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1869 MachineBasicBlock::iterator I, 1870 MachineBasicBlock::iterator E, 1871 SmallPtrSetImpl<MachineInstr*> &MemOps, 1872 SmallSet<unsigned, 4> &MemRegs, 1873 const TargetRegisterInfo *TRI) { 1874 // Are there stores / loads / calls between them? 1875 // FIXME: This is overly conservative. We should make use of alias information 1876 // some day. 1877 SmallSet<unsigned, 4> AddedRegPressure; 1878 while (++I != E) { 1879 if (I->isDebugValue() || MemOps.count(&*I)) 1880 continue; 1881 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1882 return false; 1883 if (isLd && I->mayStore()) 1884 return false; 1885 if (!isLd) { 1886 if (I->mayLoad()) 1887 return false; 1888 // It's not safe to move the first 'str' down. 1889 // str r1, [r0] 1890 // strh r5, [r0] 1891 // str r4, [r0, #+4] 1892 if (I->mayStore()) 1893 return false; 1894 } 1895 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1896 MachineOperand &MO = I->getOperand(j); 1897 if (!MO.isReg()) 1898 continue; 1899 unsigned Reg = MO.getReg(); 1900 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1901 return false; 1902 if (Reg != Base && !MemRegs.count(Reg)) 1903 AddedRegPressure.insert(Reg); 1904 } 1905 } 1906 1907 // Estimate register pressure increase due to the transformation. 1908 if (MemRegs.size() <= 4) 1909 // Ok if we are moving small number of instructions. 1910 return true; 1911 return AddedRegPressure.size() <= MemRegs.size() * 2; 1912 } 1913 1914 1915 /// Copy Op0 and Op1 operands into a new array assigned to MI. 1916 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1917 MachineInstr *Op1) { 1918 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1919 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1920 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1921 1922 MachineFunction *MF = MI->getParent()->getParent(); 1923 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1924 MachineSDNode::mmo_iterator MemEnd = 1925 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1926 MemEnd = 1927 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1928 MI->setMemRefs(MemBegin, MemEnd); 1929 } 1930 1931 bool 1932 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1933 DebugLoc &dl, 1934 unsigned &NewOpc, unsigned &EvenReg, 1935 unsigned &OddReg, unsigned &BaseReg, 1936 int &Offset, unsigned &PredReg, 1937 ARMCC::CondCodes &Pred, 1938 bool &isT2) { 1939 // Make sure we're allowed to generate LDRD/STRD. 1940 if (!STI->hasV5TEOps()) 1941 return false; 1942 1943 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1944 unsigned Scale = 1; 1945 unsigned Opcode = Op0->getOpcode(); 1946 if (Opcode == ARM::LDRi12) { 1947 NewOpc = ARM::LDRD; 1948 } else if (Opcode == ARM::STRi12) { 1949 NewOpc = ARM::STRD; 1950 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1951 NewOpc = ARM::t2LDRDi8; 1952 Scale = 4; 1953 isT2 = true; 1954 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1955 NewOpc = ARM::t2STRDi8; 1956 Scale = 4; 1957 isT2 = true; 1958 } else { 1959 return false; 1960 } 1961 1962 // Make sure the base address satisfies i64 ld / st alignment requirement. 1963 // At the moment, we ignore the memoryoperand's value. 1964 // If we want to use AliasAnalysis, we should check it accordingly. 1965 if (!Op0->hasOneMemOperand() || 1966 (*Op0->memoperands_begin())->isVolatile()) 1967 return false; 1968 1969 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1970 const Function *Func = MF->getFunction(); 1971 unsigned ReqAlign = STI->hasV6Ops() 1972 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1973 : 8; // Pre-v6 need 8-byte align 1974 if (Align < ReqAlign) 1975 return false; 1976 1977 // Then make sure the immediate offset fits. 1978 int OffImm = getMemoryOpOffset(Op0); 1979 if (isT2) { 1980 int Limit = (1 << 8) * Scale; 1981 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1982 return false; 1983 Offset = OffImm; 1984 } else { 1985 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1986 if (OffImm < 0) { 1987 AddSub = ARM_AM::sub; 1988 OffImm = - OffImm; 1989 } 1990 int Limit = (1 << 8) * Scale; 1991 if (OffImm >= Limit || (OffImm & (Scale-1))) 1992 return false; 1993 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1994 } 1995 EvenReg = Op0->getOperand(0).getReg(); 1996 OddReg = Op1->getOperand(0).getReg(); 1997 if (EvenReg == OddReg) 1998 return false; 1999 BaseReg = Op0->getOperand(1).getReg(); 2000 Pred = getInstrPredicate(Op0, PredReg); 2001 dl = Op0->getDebugLoc(); 2002 return true; 2003 } 2004 2005 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2006 SmallVectorImpl<MachineInstr *> &Ops, 2007 unsigned Base, bool isLd, 2008 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2009 bool RetVal = false; 2010 2011 // Sort by offset (in reverse order). 2012 std::sort(Ops.begin(), Ops.end(), 2013 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2014 int LOffset = getMemoryOpOffset(LHS); 2015 int ROffset = getMemoryOpOffset(RHS); 2016 assert(LHS == RHS || LOffset != ROffset); 2017 return LOffset > ROffset; 2018 }); 2019 2020 // The loads / stores of the same base are in order. Scan them from first to 2021 // last and check for the following: 2022 // 1. Any def of base. 2023 // 2. Any gaps. 2024 while (Ops.size() > 1) { 2025 unsigned FirstLoc = ~0U; 2026 unsigned LastLoc = 0; 2027 MachineInstr *FirstOp = nullptr; 2028 MachineInstr *LastOp = nullptr; 2029 int LastOffset = 0; 2030 unsigned LastOpcode = 0; 2031 unsigned LastBytes = 0; 2032 unsigned NumMove = 0; 2033 for (int i = Ops.size() - 1; i >= 0; --i) { 2034 MachineInstr *Op = Ops[i]; 2035 unsigned Loc = MI2LocMap[Op]; 2036 if (Loc <= FirstLoc) { 2037 FirstLoc = Loc; 2038 FirstOp = Op; 2039 } 2040 if (Loc >= LastLoc) { 2041 LastLoc = Loc; 2042 LastOp = Op; 2043 } 2044 2045 unsigned LSMOpcode 2046 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2047 if (LastOpcode && LSMOpcode != LastOpcode) 2048 break; 2049 2050 int Offset = getMemoryOpOffset(Op); 2051 unsigned Bytes = getLSMultipleTransferSize(Op); 2052 if (LastBytes) { 2053 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2054 break; 2055 } 2056 LastOffset = Offset; 2057 LastBytes = Bytes; 2058 LastOpcode = LSMOpcode; 2059 if (++NumMove == 8) // FIXME: Tune this limit. 2060 break; 2061 } 2062 2063 if (NumMove <= 1) 2064 Ops.pop_back(); 2065 else { 2066 SmallPtrSet<MachineInstr*, 4> MemOps; 2067 SmallSet<unsigned, 4> MemRegs; 2068 for (int i = NumMove-1; i >= 0; --i) { 2069 MemOps.insert(Ops[i]); 2070 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2071 } 2072 2073 // Be conservative, if the instructions are too far apart, don't 2074 // move them. We want to limit the increase of register pressure. 2075 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2076 if (DoMove) 2077 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2078 MemOps, MemRegs, TRI); 2079 if (!DoMove) { 2080 for (unsigned i = 0; i != NumMove; ++i) 2081 Ops.pop_back(); 2082 } else { 2083 // This is the new location for the loads / stores. 2084 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2085 while (InsertPos != MBB->end() 2086 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2087 ++InsertPos; 2088 2089 // If we are moving a pair of loads / stores, see if it makes sense 2090 // to try to allocate a pair of registers that can form register pairs. 2091 MachineInstr *Op0 = Ops.back(); 2092 MachineInstr *Op1 = Ops[Ops.size()-2]; 2093 unsigned EvenReg = 0, OddReg = 0; 2094 unsigned BaseReg = 0, PredReg = 0; 2095 ARMCC::CondCodes Pred = ARMCC::AL; 2096 bool isT2 = false; 2097 unsigned NewOpc = 0; 2098 int Offset = 0; 2099 DebugLoc dl; 2100 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2101 EvenReg, OddReg, BaseReg, 2102 Offset, PredReg, Pred, isT2)) { 2103 Ops.pop_back(); 2104 Ops.pop_back(); 2105 2106 const MCInstrDesc &MCID = TII->get(NewOpc); 2107 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2108 MRI->constrainRegClass(EvenReg, TRC); 2109 MRI->constrainRegClass(OddReg, TRC); 2110 2111 // Form the pair instruction. 2112 if (isLd) { 2113 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2114 .addReg(EvenReg, RegState::Define) 2115 .addReg(OddReg, RegState::Define) 2116 .addReg(BaseReg); 2117 // FIXME: We're converting from LDRi12 to an insn that still 2118 // uses addrmode2, so we need an explicit offset reg. It should 2119 // always by reg0 since we're transforming LDRi12s. 2120 if (!isT2) 2121 MIB.addReg(0); 2122 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2123 concatenateMemOperands(MIB, Op0, Op1); 2124 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2125 ++NumLDRDFormed; 2126 } else { 2127 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2128 .addReg(EvenReg) 2129 .addReg(OddReg) 2130 .addReg(BaseReg); 2131 // FIXME: We're converting from LDRi12 to an insn that still 2132 // uses addrmode2, so we need an explicit offset reg. It should 2133 // always by reg0 since we're transforming STRi12s. 2134 if (!isT2) 2135 MIB.addReg(0); 2136 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2137 concatenateMemOperands(MIB, Op0, Op1); 2138 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2139 ++NumSTRDFormed; 2140 } 2141 MBB->erase(Op0); 2142 MBB->erase(Op1); 2143 2144 // Add register allocation hints to form register pairs. 2145 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 2146 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 2147 } else { 2148 for (unsigned i = 0; i != NumMove; ++i) { 2149 MachineInstr *Op = Ops.back(); 2150 Ops.pop_back(); 2151 MBB->splice(InsertPos, MBB, Op); 2152 } 2153 } 2154 2155 NumLdStMoved += NumMove; 2156 RetVal = true; 2157 } 2158 } 2159 } 2160 2161 return RetVal; 2162 } 2163 2164 bool 2165 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2166 bool RetVal = false; 2167 2168 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2169 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2170 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2171 SmallVector<unsigned, 4> LdBases; 2172 SmallVector<unsigned, 4> StBases; 2173 2174 unsigned Loc = 0; 2175 MachineBasicBlock::iterator MBBI = MBB->begin(); 2176 MachineBasicBlock::iterator E = MBB->end(); 2177 while (MBBI != E) { 2178 for (; MBBI != E; ++MBBI) { 2179 MachineInstr *MI = MBBI; 2180 if (MI->isCall() || MI->isTerminator()) { 2181 // Stop at barriers. 2182 ++MBBI; 2183 break; 2184 } 2185 2186 if (!MI->isDebugValue()) 2187 MI2LocMap[MI] = ++Loc; 2188 2189 if (!isMemoryOp(MI)) 2190 continue; 2191 unsigned PredReg = 0; 2192 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2193 continue; 2194 2195 int Opc = MI->getOpcode(); 2196 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 2197 unsigned Base = MI->getOperand(1).getReg(); 2198 int Offset = getMemoryOpOffset(MI); 2199 2200 bool StopHere = false; 2201 if (isLd) { 2202 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2203 Base2LdsMap.find(Base); 2204 if (BI != Base2LdsMap.end()) { 2205 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2206 if (Offset == getMemoryOpOffset(BI->second[i])) { 2207 StopHere = true; 2208 break; 2209 } 2210 } 2211 if (!StopHere) 2212 BI->second.push_back(MI); 2213 } else { 2214 Base2LdsMap[Base].push_back(MI); 2215 LdBases.push_back(Base); 2216 } 2217 } else { 2218 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2219 Base2StsMap.find(Base); 2220 if (BI != Base2StsMap.end()) { 2221 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2222 if (Offset == getMemoryOpOffset(BI->second[i])) { 2223 StopHere = true; 2224 break; 2225 } 2226 } 2227 if (!StopHere) 2228 BI->second.push_back(MI); 2229 } else { 2230 Base2StsMap[Base].push_back(MI); 2231 StBases.push_back(Base); 2232 } 2233 } 2234 2235 if (StopHere) { 2236 // Found a duplicate (a base+offset combination that's seen earlier). 2237 // Backtrack. 2238 --Loc; 2239 break; 2240 } 2241 } 2242 2243 // Re-schedule loads. 2244 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2245 unsigned Base = LdBases[i]; 2246 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2247 if (Lds.size() > 1) 2248 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2249 } 2250 2251 // Re-schedule stores. 2252 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2253 unsigned Base = StBases[i]; 2254 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2255 if (Sts.size() > 1) 2256 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2257 } 2258 2259 if (MBBI != E) { 2260 Base2LdsMap.clear(); 2261 Base2StsMap.clear(); 2262 LdBases.clear(); 2263 StBases.clear(); 2264 } 2265 } 2266 2267 return RetVal; 2268 } 2269 2270 2271 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 2272 /// optimization pass. 2273 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2274 if (PreAlloc) 2275 return new ARMPreAllocLoadStoreOpt(); 2276 return new ARMLoadStoreOpt(); 2277 } 2278