1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 namespace { 64 /// Post- register allocation pass the combine load / store instructions to 65 /// form ldm / stm instructions. 66 struct ARMLoadStoreOpt : public MachineFunctionPass { 67 static char ID; 68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 69 70 const MachineFunction *MF; 71 const TargetInstrInfo *TII; 72 const TargetRegisterInfo *TRI; 73 const MachineRegisterInfo *MRI; 74 const ARMSubtarget *STI; 75 const TargetLowering *TL; 76 ARMFunctionInfo *AFI; 77 LivePhysRegs LiveRegs; 78 RegisterClassInfo RegClassInfo; 79 MachineBasicBlock::const_iterator LiveRegPos; 80 bool LiveRegsValid; 81 bool RegClassInfoValid; 82 bool isThumb1, isThumb2; 83 84 bool runOnMachineFunction(MachineFunction &Fn) override; 85 86 const char *getPassName() const override { 87 return "ARM load / store optimization pass"; 88 } 89 90 private: 91 /// A set of load/store MachineInstrs with same base register sorted by 92 /// offset. 93 struct MemOpQueueEntry { 94 MachineInstr *MI; 95 int Offset; ///< Load/Store offset. 96 unsigned Position; ///< Position as counted from end of basic block. 97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 98 : MI(MI), Offset(Offset), Position(Position) {} 99 }; 100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 101 102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 103 /// merged into a LDM/STM. 104 struct MergeCandidate { 105 /// List of instructions ordered by load/store offset. 106 SmallVector<MachineInstr*, 4> Instrs; 107 /// Index in Instrs of the instruction being latest in the schedule. 108 unsigned LatestMIIdx; 109 /// Index in Instrs of the instruction being earliest in the schedule. 110 unsigned EarliestMIIdx; 111 /// Index into the basic block where the merged instruction will be 112 /// inserted. (See MemOpQueueEntry.Position) 113 unsigned InsertPos; 114 /// Whether the instructions can be merged into a ldm/stm instruction. 115 bool CanMergeToLSMulti; 116 /// Whether the instructions can be merged into a ldrd/strd instruction. 117 bool CanMergeToLSDouble; 118 }; 119 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 120 SmallVector<const MergeCandidate*,4> Candidates; 121 122 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 123 MachineBasicBlock::const_iterator Before); 124 unsigned findFreeReg(const TargetRegisterClass &RegClass); 125 void UpdateBaseRegUses(MachineBasicBlock &MBB, 126 MachineBasicBlock::iterator MBBI, 127 DebugLoc DL, unsigned Base, unsigned WordOffset, 128 ARMCC::CondCodes Pred, unsigned PredReg); 129 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 130 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 131 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 132 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 133 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 134 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 135 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 136 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 137 void FormCandidates(const MemOpQueue &MemOps); 138 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 139 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 140 MachineBasicBlock::iterator &MBBI); 141 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 142 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 143 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 144 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 145 }; 146 char ARMLoadStoreOpt::ID = 0; 147 } 148 149 static bool definesCPSR(const MachineInstr *MI) { 150 for (const auto &MO : MI->operands()) { 151 if (!MO.isReg()) 152 continue; 153 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 154 // If the instruction has live CPSR def, then it's not safe to fold it 155 // into load / store. 156 return true; 157 } 158 159 return false; 160 } 161 162 static int getMemoryOpOffset(const MachineInstr *MI) { 163 unsigned Opcode = MI->getOpcode(); 164 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 165 unsigned NumOperands = MI->getDesc().getNumOperands(); 166 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 167 168 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 169 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 170 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 171 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 172 return OffField; 173 174 // Thumb1 immediate offsets are scaled by 4 175 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 176 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 177 return OffField * 4; 178 179 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 180 : ARM_AM::getAM5Offset(OffField) * 4; 181 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 182 : ARM_AM::getAM5Op(OffField); 183 184 if (Op == ARM_AM::sub) 185 return -Offset; 186 187 return Offset; 188 } 189 190 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 191 return MI.getOperand(1); 192 } 193 194 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 195 return MI.getOperand(0); 196 } 197 198 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 199 switch (Opcode) { 200 default: llvm_unreachable("Unhandled opcode!"); 201 case ARM::LDRi12: 202 ++NumLDMGened; 203 switch (Mode) { 204 default: llvm_unreachable("Unhandled submode!"); 205 case ARM_AM::ia: return ARM::LDMIA; 206 case ARM_AM::da: return ARM::LDMDA; 207 case ARM_AM::db: return ARM::LDMDB; 208 case ARM_AM::ib: return ARM::LDMIB; 209 } 210 case ARM::STRi12: 211 ++NumSTMGened; 212 switch (Mode) { 213 default: llvm_unreachable("Unhandled submode!"); 214 case ARM_AM::ia: return ARM::STMIA; 215 case ARM_AM::da: return ARM::STMDA; 216 case ARM_AM::db: return ARM::STMDB; 217 case ARM_AM::ib: return ARM::STMIB; 218 } 219 case ARM::tLDRi: 220 case ARM::tLDRspi: 221 // tLDMIA is writeback-only - unless the base register is in the input 222 // reglist. 223 ++NumLDMGened; 224 switch (Mode) { 225 default: llvm_unreachable("Unhandled submode!"); 226 case ARM_AM::ia: return ARM::tLDMIA; 227 } 228 case ARM::tSTRi: 229 case ARM::tSTRspi: 230 // There is no non-writeback tSTMIA either. 231 ++NumSTMGened; 232 switch (Mode) { 233 default: llvm_unreachable("Unhandled submode!"); 234 case ARM_AM::ia: return ARM::tSTMIA_UPD; 235 } 236 case ARM::t2LDRi8: 237 case ARM::t2LDRi12: 238 ++NumLDMGened; 239 switch (Mode) { 240 default: llvm_unreachable("Unhandled submode!"); 241 case ARM_AM::ia: return ARM::t2LDMIA; 242 case ARM_AM::db: return ARM::t2LDMDB; 243 } 244 case ARM::t2STRi8: 245 case ARM::t2STRi12: 246 ++NumSTMGened; 247 switch (Mode) { 248 default: llvm_unreachable("Unhandled submode!"); 249 case ARM_AM::ia: return ARM::t2STMIA; 250 case ARM_AM::db: return ARM::t2STMDB; 251 } 252 case ARM::VLDRS: 253 ++NumVLDMGened; 254 switch (Mode) { 255 default: llvm_unreachable("Unhandled submode!"); 256 case ARM_AM::ia: return ARM::VLDMSIA; 257 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 258 } 259 case ARM::VSTRS: 260 ++NumVSTMGened; 261 switch (Mode) { 262 default: llvm_unreachable("Unhandled submode!"); 263 case ARM_AM::ia: return ARM::VSTMSIA; 264 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 265 } 266 case ARM::VLDRD: 267 ++NumVLDMGened; 268 switch (Mode) { 269 default: llvm_unreachable("Unhandled submode!"); 270 case ARM_AM::ia: return ARM::VLDMDIA; 271 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 272 } 273 case ARM::VSTRD: 274 ++NumVSTMGened; 275 switch (Mode) { 276 default: llvm_unreachable("Unhandled submode!"); 277 case ARM_AM::ia: return ARM::VSTMDIA; 278 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 279 } 280 } 281 } 282 283 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 284 switch (Opcode) { 285 default: llvm_unreachable("Unhandled opcode!"); 286 case ARM::LDMIA_RET: 287 case ARM::LDMIA: 288 case ARM::LDMIA_UPD: 289 case ARM::STMIA: 290 case ARM::STMIA_UPD: 291 case ARM::tLDMIA: 292 case ARM::tLDMIA_UPD: 293 case ARM::tSTMIA_UPD: 294 case ARM::t2LDMIA_RET: 295 case ARM::t2LDMIA: 296 case ARM::t2LDMIA_UPD: 297 case ARM::t2STMIA: 298 case ARM::t2STMIA_UPD: 299 case ARM::VLDMSIA: 300 case ARM::VLDMSIA_UPD: 301 case ARM::VSTMSIA: 302 case ARM::VSTMSIA_UPD: 303 case ARM::VLDMDIA: 304 case ARM::VLDMDIA_UPD: 305 case ARM::VSTMDIA: 306 case ARM::VSTMDIA_UPD: 307 return ARM_AM::ia; 308 309 case ARM::LDMDA: 310 case ARM::LDMDA_UPD: 311 case ARM::STMDA: 312 case ARM::STMDA_UPD: 313 return ARM_AM::da; 314 315 case ARM::LDMDB: 316 case ARM::LDMDB_UPD: 317 case ARM::STMDB: 318 case ARM::STMDB_UPD: 319 case ARM::t2LDMDB: 320 case ARM::t2LDMDB_UPD: 321 case ARM::t2STMDB: 322 case ARM::t2STMDB_UPD: 323 case ARM::VLDMSDB_UPD: 324 case ARM::VSTMSDB_UPD: 325 case ARM::VLDMDDB_UPD: 326 case ARM::VSTMDDB_UPD: 327 return ARM_AM::db; 328 329 case ARM::LDMIB: 330 case ARM::LDMIB_UPD: 331 case ARM::STMIB: 332 case ARM::STMIB_UPD: 333 return ARM_AM::ib; 334 } 335 } 336 337 static bool isT1i32Load(unsigned Opc) { 338 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 339 } 340 341 static bool isT2i32Load(unsigned Opc) { 342 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 343 } 344 345 static bool isi32Load(unsigned Opc) { 346 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 347 } 348 349 static bool isT1i32Store(unsigned Opc) { 350 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 351 } 352 353 static bool isT2i32Store(unsigned Opc) { 354 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 355 } 356 357 static bool isi32Store(unsigned Opc) { 358 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 359 } 360 361 static bool isLoadSingle(unsigned Opc) { 362 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 363 } 364 365 static unsigned getImmScale(unsigned Opc) { 366 switch (Opc) { 367 default: llvm_unreachable("Unhandled opcode!"); 368 case ARM::tLDRi: 369 case ARM::tSTRi: 370 case ARM::tLDRspi: 371 case ARM::tSTRspi: 372 return 1; 373 case ARM::tLDRHi: 374 case ARM::tSTRHi: 375 return 2; 376 case ARM::tLDRBi: 377 case ARM::tSTRBi: 378 return 4; 379 } 380 } 381 382 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 383 switch (MI->getOpcode()) { 384 default: return 0; 385 case ARM::LDRi12: 386 case ARM::STRi12: 387 case ARM::tLDRi: 388 case ARM::tSTRi: 389 case ARM::tLDRspi: 390 case ARM::tSTRspi: 391 case ARM::t2LDRi8: 392 case ARM::t2LDRi12: 393 case ARM::t2STRi8: 394 case ARM::t2STRi12: 395 case ARM::VLDRS: 396 case ARM::VSTRS: 397 return 4; 398 case ARM::VLDRD: 399 case ARM::VSTRD: 400 return 8; 401 case ARM::LDMIA: 402 case ARM::LDMDA: 403 case ARM::LDMDB: 404 case ARM::LDMIB: 405 case ARM::STMIA: 406 case ARM::STMDA: 407 case ARM::STMDB: 408 case ARM::STMIB: 409 case ARM::tLDMIA: 410 case ARM::tLDMIA_UPD: 411 case ARM::tSTMIA_UPD: 412 case ARM::t2LDMIA: 413 case ARM::t2LDMDB: 414 case ARM::t2STMIA: 415 case ARM::t2STMDB: 416 case ARM::VLDMSIA: 417 case ARM::VSTMSIA: 418 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 419 case ARM::VLDMDIA: 420 case ARM::VSTMDIA: 421 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 422 } 423 } 424 425 /// Update future uses of the base register with the offset introduced 426 /// due to writeback. This function only works on Thumb1. 427 void 428 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 429 MachineBasicBlock::iterator MBBI, 430 DebugLoc DL, unsigned Base, 431 unsigned WordOffset, 432 ARMCC::CondCodes Pred, unsigned PredReg) { 433 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 434 // Start updating any instructions with immediate offsets. Insert a SUB before 435 // the first non-updateable instruction (if any). 436 for (; MBBI != MBB.end(); ++MBBI) { 437 bool InsertSub = false; 438 unsigned Opc = MBBI->getOpcode(); 439 440 if (MBBI->readsRegister(Base)) { 441 int Offset; 442 bool IsLoad = 443 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 444 bool IsStore = 445 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 446 447 if (IsLoad || IsStore) { 448 // Loads and stores with immediate offsets can be updated, but only if 449 // the new offset isn't negative. 450 // The MachineOperand containing the offset immediate is the last one 451 // before predicates. 452 MachineOperand &MO = 453 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 454 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 455 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 456 457 // If storing the base register, it needs to be reset first. 458 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 459 460 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 461 MO.setImm(Offset); 462 else 463 InsertSub = true; 464 465 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 466 !definesCPSR(MBBI)) { 467 // SUBS/ADDS using this register, with a dead def of the CPSR. 468 // Merge it with the update; if the merged offset is too large, 469 // insert a new sub instead. 470 MachineOperand &MO = 471 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 472 Offset = (Opc == ARM::tSUBi8) ? 473 MO.getImm() + WordOffset * 4 : 474 MO.getImm() - WordOffset * 4 ; 475 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 476 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 477 // Offset == 0. 478 MO.setImm(Offset); 479 // The base register has now been reset, so exit early. 480 return; 481 } else { 482 InsertSub = true; 483 } 484 485 } else { 486 // Can't update the instruction. 487 InsertSub = true; 488 } 489 490 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 491 // Since SUBS sets the condition flags, we can't place the base reset 492 // after an instruction that has a live CPSR def. 493 // The base register might also contain an argument for a function call. 494 InsertSub = true; 495 } 496 497 if (InsertSub) { 498 // An instruction above couldn't be updated, so insert a sub. 499 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 500 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 501 return; 502 } 503 504 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 505 // Register got killed. Stop updating. 506 return; 507 } 508 509 // End of block was reached. 510 if (MBB.succ_size() > 0) { 511 // FIXME: Because of a bug, live registers are sometimes missing from 512 // the successor blocks' live-in sets. This means we can't trust that 513 // information and *always* have to reset at the end of a block. 514 // See PR21029. 515 if (MBBI != MBB.end()) --MBBI; 516 AddDefaultT1CC( 517 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 518 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 519 } 520 } 521 522 /// Return the first register of class \p RegClass that is not in \p Regs. 523 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 524 if (!RegClassInfoValid) { 525 RegClassInfo.runOnMachineFunction(*MF); 526 RegClassInfoValid = true; 527 } 528 529 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 530 if (!LiveRegs.contains(Reg)) 531 return Reg; 532 return 0; 533 } 534 535 /// Compute live registers just before instruction \p Before (in normal schedule 536 /// direction). Computes backwards so multiple queries in the same block must 537 /// come in reverse order. 538 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 539 MachineBasicBlock::const_iterator Before) { 540 // Initialize if we never queried in this block. 541 if (!LiveRegsValid) { 542 LiveRegs.init(TRI); 543 LiveRegs.addLiveOuts(&MBB, true); 544 LiveRegPos = MBB.end(); 545 LiveRegsValid = true; 546 } 547 // Move backward just before the "Before" position. 548 while (LiveRegPos != Before) { 549 --LiveRegPos; 550 LiveRegs.stepBackward(*LiveRegPos); 551 } 552 } 553 554 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 555 unsigned Reg) { 556 for (const std::pair<unsigned, bool> &R : Regs) 557 if (R.first == Reg) 558 return true; 559 return false; 560 } 561 562 /// Create and insert a LDM or STM with Base as base register and registers in 563 /// Regs as the register operands that would be loaded / stored. It returns 564 /// true if the transformation is done. 565 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 566 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 567 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 568 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 569 unsigned NumRegs = Regs.size(); 570 assert(NumRegs > 1); 571 572 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 573 // Compute liveness information for that register to make the decision. 574 bool SafeToClobberCPSR = !isThumb1 || 575 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 576 MachineBasicBlock::LQR_Dead); 577 578 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 579 580 // Exception: If the base register is in the input reglist, Thumb1 LDM is 581 // non-writeback. 582 // It's also not possible to merge an STR of the base register in Thumb1. 583 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 584 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 585 if (Opcode == ARM::tLDRi) { 586 Writeback = false; 587 } else if (Opcode == ARM::tSTRi) { 588 return nullptr; 589 } 590 } 591 592 ARM_AM::AMSubMode Mode = ARM_AM::ia; 593 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 594 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 595 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 596 597 if (Offset == 4 && haveIBAndDA) { 598 Mode = ARM_AM::ib; 599 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 600 Mode = ARM_AM::da; 601 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 602 // VLDM/VSTM do not support DB mode without also updating the base reg. 603 Mode = ARM_AM::db; 604 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 605 // Check if this is a supported opcode before inserting instructions to 606 // calculate a new base register. 607 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 608 609 // If starting offset isn't zero, insert a MI to materialize a new base. 610 // But only do so if it is cost effective, i.e. merging more than two 611 // loads / stores. 612 if (NumRegs <= 2) 613 return nullptr; 614 615 // On Thumb1, it's not worth materializing a new base register without 616 // clobbering the CPSR (i.e. not using ADDS/SUBS). 617 if (!SafeToClobberCPSR) 618 return nullptr; 619 620 unsigned NewBase; 621 if (isi32Load(Opcode)) { 622 // If it is a load, then just use one of the destination register to 623 // use as the new base. 624 NewBase = Regs[NumRegs-1].first; 625 } else { 626 // Find a free register that we can use as scratch register. 627 moveLiveRegsBefore(MBB, InsertBefore); 628 // The merged instruction does not exist yet but will use several Regs if 629 // it is a Store. 630 if (!isLoadSingle(Opcode)) 631 for (const std::pair<unsigned, bool> &R : Regs) 632 LiveRegs.addReg(R.first); 633 634 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 635 if (NewBase == 0) 636 return nullptr; 637 } 638 639 int BaseOpc = 640 isThumb2 ? ARM::t2ADDri : 641 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 642 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 643 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 644 645 if (Offset < 0) { 646 Offset = - Offset; 647 BaseOpc = 648 isThumb2 ? ARM::t2SUBri : 649 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 650 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 651 } 652 653 if (!TL->isLegalAddImmediate(Offset)) 654 // FIXME: Try add with register operand? 655 return nullptr; // Probably not worth it then. 656 657 // We can only append a kill flag to the add/sub input if the value is not 658 // used in the register list of the stm as well. 659 bool KillOldBase = BaseKill && 660 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 661 662 if (isThumb1) { 663 // Thumb1: depending on immediate size, use either 664 // ADDS NewBase, Base, #imm3 665 // or 666 // MOV NewBase, Base 667 // ADDS NewBase, #imm8. 668 if (Base != NewBase && 669 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 670 // Need to insert a MOV to the new base first. 671 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 672 !STI->hasV6Ops()) { 673 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 674 if (Pred != ARMCC::AL) 675 return nullptr; 676 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 677 .addReg(Base, getKillRegState(KillOldBase)); 678 } else 679 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 680 .addReg(Base, getKillRegState(KillOldBase)) 681 .addImm(Pred).addReg(PredReg); 682 683 // The following ADDS/SUBS becomes an update. 684 Base = NewBase; 685 KillOldBase = true; 686 } 687 if (BaseOpc == ARM::tADDrSPi) { 688 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 689 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 690 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 691 .addImm(Pred).addReg(PredReg); 692 } else 693 AddDefaultT1CC( 694 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 695 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 696 .addImm(Pred).addReg(PredReg); 697 } else { 698 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 699 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 700 .addImm(Pred).addReg(PredReg).addReg(0); 701 } 702 Base = NewBase; 703 BaseKill = true; // New base is always killed straight away. 704 } 705 706 bool isDef = isLoadSingle(Opcode); 707 708 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 709 // base register writeback. 710 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 711 if (!Opcode) 712 return nullptr; 713 714 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 715 // - There is no writeback (LDM of base register), 716 // - the base register is killed by the merged instruction, 717 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 718 // to reset the base register. 719 // Otherwise, don't merge. 720 // It's safe to return here since the code to materialize a new base register 721 // above is also conditional on SafeToClobberCPSR. 722 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 723 return nullptr; 724 725 MachineInstrBuilder MIB; 726 727 if (Writeback) { 728 if (Opcode == ARM::tLDMIA) 729 // Update tLDMIA with writeback if necessary. 730 Opcode = ARM::tLDMIA_UPD; 731 732 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 733 734 // Thumb1: we might need to set base writeback when building the MI. 735 MIB.addReg(Base, getDefRegState(true)) 736 .addReg(Base, getKillRegState(BaseKill)); 737 738 // The base isn't dead after a merged instruction with writeback. 739 // Insert a sub instruction after the newly formed instruction to reset. 740 if (!BaseKill) 741 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 742 743 } else { 744 // No writeback, simply build the MachineInstr. 745 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 746 MIB.addReg(Base, getKillRegState(BaseKill)); 747 } 748 749 MIB.addImm(Pred).addReg(PredReg); 750 751 for (const std::pair<unsigned, bool> &R : Regs) 752 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 753 754 return MIB.getInstr(); 755 } 756 757 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 758 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 759 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 760 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 761 bool IsLoad = isi32Load(Opcode); 762 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 763 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 764 765 assert(Regs.size() == 2); 766 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 767 TII->get(LoadStoreOpcode)); 768 if (IsLoad) { 769 MIB.addReg(Regs[0].first, RegState::Define) 770 .addReg(Regs[1].first, RegState::Define); 771 } else { 772 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 773 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 774 } 775 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 776 return MIB.getInstr(); 777 } 778 779 /// Call MergeOps and update MemOps and merges accordingly on success. 780 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 781 const MachineInstr *First = Cand.Instrs.front(); 782 unsigned Opcode = First->getOpcode(); 783 bool IsLoad = isLoadSingle(Opcode); 784 SmallVector<std::pair<unsigned, bool>, 8> Regs; 785 SmallVector<unsigned, 4> ImpDefs; 786 DenseSet<unsigned> KilledRegs; 787 // Determine list of registers and list of implicit super-register defs. 788 for (const MachineInstr *MI : Cand.Instrs) { 789 const MachineOperand &MO = getLoadStoreRegOp(*MI); 790 unsigned Reg = MO.getReg(); 791 bool IsKill = MO.isKill(); 792 if (IsKill) 793 KilledRegs.insert(Reg); 794 Regs.push_back(std::make_pair(Reg, IsKill)); 795 796 if (IsLoad) { 797 // Collect any implicit defs of super-registers, after merging we can't 798 // be sure anymore that we properly preserved these live ranges and must 799 // removed these implicit operands. 800 for (const MachineOperand &MO : MI->implicit_operands()) { 801 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 802 continue; 803 assert(MO.isImplicit()); 804 unsigned DefReg = MO.getReg(); 805 806 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 807 continue; 808 // We can ignore cases where the super-reg is read and written. 809 if (MI->readsRegister(DefReg)) 810 continue; 811 ImpDefs.push_back(DefReg); 812 } 813 } 814 } 815 816 // Attempt the merge. 817 typedef MachineBasicBlock::iterator iterator; 818 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 819 iterator InsertBefore = std::next(iterator(LatestMI)); 820 MachineBasicBlock &MBB = *LatestMI->getParent(); 821 unsigned Offset = getMemoryOpOffset(First); 822 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 823 bool BaseKill = LatestMI->killsRegister(Base); 824 unsigned PredReg = 0; 825 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); 826 DebugLoc DL = First->getDebugLoc(); 827 MachineInstr *Merged = nullptr; 828 if (Cand.CanMergeToLSDouble) 829 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 830 Opcode, Pred, PredReg, DL, Regs); 831 if (!Merged && Cand.CanMergeToLSMulti) 832 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 833 Opcode, Pred, PredReg, DL, Regs); 834 if (!Merged) 835 return nullptr; 836 837 // Determine earliest instruction that will get removed. We then keep an 838 // iterator just above it so the following erases don't invalidated it. 839 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 840 bool EarliestAtBegin = false; 841 if (EarliestI == MBB.begin()) { 842 EarliestAtBegin = true; 843 } else { 844 EarliestI = std::prev(EarliestI); 845 } 846 847 // Remove instructions which have been merged. 848 for (MachineInstr *MI : Cand.Instrs) 849 MBB.erase(MI); 850 851 // Determine range between the earliest removed instruction and the new one. 852 if (EarliestAtBegin) 853 EarliestI = MBB.begin(); 854 else 855 EarliestI = std::next(EarliestI); 856 auto FixupRange = make_range(EarliestI, iterator(Merged)); 857 858 if (isLoadSingle(Opcode)) { 859 // If the previous loads defined a super-reg, then we have to mark earlier 860 // operands undef; Replicate the super-reg def on the merged instruction. 861 for (MachineInstr &MI : FixupRange) { 862 for (unsigned &ImpDefReg : ImpDefs) { 863 for (MachineOperand &MO : MI.implicit_operands()) { 864 if (!MO.isReg() || MO.getReg() != ImpDefReg) 865 continue; 866 if (MO.readsReg()) 867 MO.setIsUndef(); 868 else if (MO.isDef()) 869 ImpDefReg = 0; 870 } 871 } 872 } 873 874 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 875 for (unsigned ImpDef : ImpDefs) 876 MIB.addReg(ImpDef, RegState::ImplicitDefine); 877 } else { 878 // Remove kill flags: We are possibly storing the values later now. 879 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 880 for (MachineInstr &MI : FixupRange) { 881 for (MachineOperand &MO : MI.uses()) { 882 if (!MO.isReg() || !MO.isKill()) 883 continue; 884 if (KilledRegs.count(MO.getReg())) 885 MO.setIsKill(false); 886 } 887 } 888 assert(ImpDefs.empty()); 889 } 890 891 return Merged; 892 } 893 894 static bool isValidLSDoubleOffset(int Offset) { 895 unsigned Value = abs(Offset); 896 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 897 // multiplied by 4. 898 return (Value % 4) == 0 && Value < 1024; 899 } 900 901 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 902 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 903 const MachineInstr *FirstMI = MemOps[0].MI; 904 unsigned Opcode = FirstMI->getOpcode(); 905 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 906 unsigned Size = getLSMultipleTransferSize(FirstMI); 907 908 unsigned SIndex = 0; 909 unsigned EIndex = MemOps.size(); 910 do { 911 // Look at the first instruction. 912 const MachineInstr *MI = MemOps[SIndex].MI; 913 int Offset = MemOps[SIndex].Offset; 914 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 915 unsigned PReg = PMO.getReg(); 916 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 917 unsigned Latest = SIndex; 918 unsigned Earliest = SIndex; 919 unsigned Count = 1; 920 bool CanMergeToLSDouble = 921 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 922 // ARM errata 602117: LDRD with base in list may result in incorrect base 923 // register when interrupted or faulted. 924 if (STI->isCortexM3() && isi32Load(Opcode) && 925 PReg == getLoadStoreBaseOp(*MI).getReg()) 926 CanMergeToLSDouble = false; 927 928 bool CanMergeToLSMulti = true; 929 // On swift vldm/vstm starting with an odd register number as that needs 930 // more uops than single vldrs. 931 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 932 CanMergeToLSMulti = false; 933 934 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it 935 // deprecated; LDM to PC is fine but cannot happen here. 936 if (PReg == ARM::SP || PReg == ARM::PC) 937 CanMergeToLSMulti = CanMergeToLSDouble = false; 938 939 // Merge following instructions where possible. 940 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 941 int NewOffset = MemOps[I].Offset; 942 if (NewOffset != Offset + (int)Size) 943 break; 944 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 945 unsigned Reg = MO.getReg(); 946 if (Reg == ARM::SP || Reg == ARM::PC) 947 break; 948 949 // See if the current load/store may be part of a multi load/store. 950 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 951 bool PartOfLSMulti = CanMergeToLSMulti; 952 if (PartOfLSMulti) { 953 // Register numbers must be in ascending order. 954 if (RegNum <= PRegNum) 955 PartOfLSMulti = false; 956 // For VFP / NEON load/store multiples, the registers must be 957 // consecutive and within the limit on the number of registers per 958 // instruction. 959 else if (!isNotVFP && RegNum != PRegNum+1) 960 PartOfLSMulti = false; 961 } 962 // See if the current load/store may be part of a double load/store. 963 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 964 965 if (!PartOfLSMulti && !PartOfLSDouble) 966 break; 967 CanMergeToLSMulti &= PartOfLSMulti; 968 CanMergeToLSDouble &= PartOfLSDouble; 969 // Track MemOp with latest and earliest position (Positions are 970 // counted in reverse). 971 unsigned Position = MemOps[I].Position; 972 if (Position < MemOps[Latest].Position) 973 Latest = I; 974 else if (Position > MemOps[Earliest].Position) 975 Earliest = I; 976 // Prepare for next MemOp. 977 Offset += Size; 978 PRegNum = RegNum; 979 } 980 981 // Form a candidate from the Ops collected so far. 982 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 983 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 984 Candidate->Instrs.push_back(MemOps[C].MI); 985 Candidate->LatestMIIdx = Latest - SIndex; 986 Candidate->EarliestMIIdx = Earliest - SIndex; 987 Candidate->InsertPos = MemOps[Latest].Position; 988 if (Count == 1) 989 CanMergeToLSMulti = CanMergeToLSDouble = false; 990 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 991 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 992 Candidates.push_back(Candidate); 993 // Continue after the chain. 994 SIndex += Count; 995 } while (SIndex < EIndex); 996 } 997 998 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 999 unsigned Bytes, unsigned Limit, 1000 ARMCC::CondCodes Pred, unsigned PredReg) { 1001 unsigned MyPredReg = 0; 1002 if (!MI) 1003 return false; 1004 1005 bool CheckCPSRDef = false; 1006 switch (MI->getOpcode()) { 1007 default: return false; 1008 case ARM::tSUBi8: 1009 case ARM::t2SUBri: 1010 case ARM::SUBri: 1011 CheckCPSRDef = true; 1012 break; 1013 case ARM::tSUBspi: 1014 break; 1015 } 1016 1017 // Make sure the offset fits in 8 bits. 1018 if (Bytes == 0 || (Limit && Bytes >= Limit)) 1019 return false; 1020 1021 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 1022 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 1023 if (!(MI->getOperand(0).getReg() == Base && 1024 MI->getOperand(1).getReg() == Base && 1025 (MI->getOperand(2).getImm() * Scale) == Bytes && 1026 getInstrPredicate(MI, MyPredReg) == Pred && 1027 MyPredReg == PredReg)) 1028 return false; 1029 1030 return CheckCPSRDef ? !definesCPSR(MI) : true; 1031 } 1032 1033 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 1034 unsigned Bytes, unsigned Limit, 1035 ARMCC::CondCodes Pred, unsigned PredReg) { 1036 unsigned MyPredReg = 0; 1037 if (!MI) 1038 return false; 1039 1040 bool CheckCPSRDef = false; 1041 switch (MI->getOpcode()) { 1042 default: return false; 1043 case ARM::tADDi8: 1044 case ARM::t2ADDri: 1045 case ARM::ADDri: 1046 CheckCPSRDef = true; 1047 break; 1048 case ARM::tADDspi: 1049 break; 1050 } 1051 1052 if (Bytes == 0 || (Limit && Bytes >= Limit)) 1053 // Make sure the offset fits in 8 bits. 1054 return false; 1055 1056 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 1057 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 1058 if (!(MI->getOperand(0).getReg() == Base && 1059 MI->getOperand(1).getReg() == Base && 1060 (MI->getOperand(2).getImm() * Scale) == Bytes && 1061 getInstrPredicate(MI, MyPredReg) == Pred && 1062 MyPredReg == PredReg)) 1063 return false; 1064 1065 return CheckCPSRDef ? !definesCPSR(MI) : true; 1066 } 1067 1068 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1069 ARM_AM::AMSubMode Mode) { 1070 switch (Opc) { 1071 default: llvm_unreachable("Unhandled opcode!"); 1072 case ARM::LDMIA: 1073 case ARM::LDMDA: 1074 case ARM::LDMDB: 1075 case ARM::LDMIB: 1076 switch (Mode) { 1077 default: llvm_unreachable("Unhandled submode!"); 1078 case ARM_AM::ia: return ARM::LDMIA_UPD; 1079 case ARM_AM::ib: return ARM::LDMIB_UPD; 1080 case ARM_AM::da: return ARM::LDMDA_UPD; 1081 case ARM_AM::db: return ARM::LDMDB_UPD; 1082 } 1083 case ARM::STMIA: 1084 case ARM::STMDA: 1085 case ARM::STMDB: 1086 case ARM::STMIB: 1087 switch (Mode) { 1088 default: llvm_unreachable("Unhandled submode!"); 1089 case ARM_AM::ia: return ARM::STMIA_UPD; 1090 case ARM_AM::ib: return ARM::STMIB_UPD; 1091 case ARM_AM::da: return ARM::STMDA_UPD; 1092 case ARM_AM::db: return ARM::STMDB_UPD; 1093 } 1094 case ARM::t2LDMIA: 1095 case ARM::t2LDMDB: 1096 switch (Mode) { 1097 default: llvm_unreachable("Unhandled submode!"); 1098 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1099 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1100 } 1101 case ARM::t2STMIA: 1102 case ARM::t2STMDB: 1103 switch (Mode) { 1104 default: llvm_unreachable("Unhandled submode!"); 1105 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1106 case ARM_AM::db: return ARM::t2STMDB_UPD; 1107 } 1108 case ARM::VLDMSIA: 1109 switch (Mode) { 1110 default: llvm_unreachable("Unhandled submode!"); 1111 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1112 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1113 } 1114 case ARM::VLDMDIA: 1115 switch (Mode) { 1116 default: llvm_unreachable("Unhandled submode!"); 1117 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1118 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1119 } 1120 case ARM::VSTMSIA: 1121 switch (Mode) { 1122 default: llvm_unreachable("Unhandled submode!"); 1123 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1124 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1125 } 1126 case ARM::VSTMDIA: 1127 switch (Mode) { 1128 default: llvm_unreachable("Unhandled submode!"); 1129 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1130 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1131 } 1132 } 1133 } 1134 1135 /// Fold proceeding/trailing inc/dec of base register into the 1136 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1137 /// 1138 /// stmia rn, <ra, rb, rc> 1139 /// rn := rn + 4 * 3; 1140 /// => 1141 /// stmia rn!, <ra, rb, rc> 1142 /// 1143 /// rn := rn - 4 * 3; 1144 /// ldmia rn, <ra, rb, rc> 1145 /// => 1146 /// ldmdb rn!, <ra, rb, rc> 1147 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1148 // Thumb1 is already using updating loads/stores. 1149 if (isThumb1) return false; 1150 1151 const MachineOperand &BaseOP = MI->getOperand(0); 1152 unsigned Base = BaseOP.getReg(); 1153 bool BaseKill = BaseOP.isKill(); 1154 unsigned Bytes = getLSMultipleTransferSize(MI); 1155 unsigned PredReg = 0; 1156 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1157 unsigned Opcode = MI->getOpcode(); 1158 DebugLoc DL = MI->getDebugLoc(); 1159 1160 // Can't use an updating ld/st if the base register is also a dest 1161 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1162 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1163 if (MI->getOperand(i).getReg() == Base) 1164 return false; 1165 1166 bool DoMerge = false; 1167 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1168 1169 // Try merging with the previous instruction. 1170 MachineBasicBlock &MBB = *MI->getParent(); 1171 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1172 MachineBasicBlock::iterator MBBI(MI); 1173 if (MBBI != BeginMBBI) { 1174 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1175 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1176 --PrevMBBI; 1177 if (Mode == ARM_AM::ia && 1178 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1179 Mode = ARM_AM::db; 1180 DoMerge = true; 1181 } else if (Mode == ARM_AM::ib && 1182 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1183 Mode = ARM_AM::da; 1184 DoMerge = true; 1185 } 1186 if (DoMerge) 1187 MBB.erase(PrevMBBI); 1188 } 1189 1190 // Try merging with the next instruction. 1191 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1192 if (!DoMerge && MBBI != EndMBBI) { 1193 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1194 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1195 ++NextMBBI; 1196 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 1197 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1198 DoMerge = true; 1199 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 1200 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1201 DoMerge = true; 1202 } 1203 if (DoMerge) 1204 MBB.erase(NextMBBI); 1205 } 1206 1207 if (!DoMerge) 1208 return false; 1209 1210 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1211 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1212 .addReg(Base, getDefRegState(true)) // WB base register 1213 .addReg(Base, getKillRegState(BaseKill)) 1214 .addImm(Pred).addReg(PredReg); 1215 1216 // Transfer the rest of operands. 1217 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1218 MIB.addOperand(MI->getOperand(OpNum)); 1219 1220 // Transfer memoperands. 1221 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1222 1223 MBB.erase(MBBI); 1224 return true; 1225 } 1226 1227 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1228 ARM_AM::AddrOpc Mode) { 1229 switch (Opc) { 1230 case ARM::LDRi12: 1231 return ARM::LDR_PRE_IMM; 1232 case ARM::STRi12: 1233 return ARM::STR_PRE_IMM; 1234 case ARM::VLDRS: 1235 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1236 case ARM::VLDRD: 1237 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1238 case ARM::VSTRS: 1239 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1240 case ARM::VSTRD: 1241 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1242 case ARM::t2LDRi8: 1243 case ARM::t2LDRi12: 1244 return ARM::t2LDR_PRE; 1245 case ARM::t2STRi8: 1246 case ARM::t2STRi12: 1247 return ARM::t2STR_PRE; 1248 default: llvm_unreachable("Unhandled opcode!"); 1249 } 1250 } 1251 1252 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1253 ARM_AM::AddrOpc Mode) { 1254 switch (Opc) { 1255 case ARM::LDRi12: 1256 return ARM::LDR_POST_IMM; 1257 case ARM::STRi12: 1258 return ARM::STR_POST_IMM; 1259 case ARM::VLDRS: 1260 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1261 case ARM::VLDRD: 1262 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1263 case ARM::VSTRS: 1264 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1265 case ARM::VSTRD: 1266 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1267 case ARM::t2LDRi8: 1268 case ARM::t2LDRi12: 1269 return ARM::t2LDR_POST; 1270 case ARM::t2STRi8: 1271 case ARM::t2STRi12: 1272 return ARM::t2STR_POST; 1273 default: llvm_unreachable("Unhandled opcode!"); 1274 } 1275 } 1276 1277 /// Fold proceeding/trailing inc/dec of base register into the 1278 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1279 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1280 // Thumb1 doesn't have updating LDR/STR. 1281 // FIXME: Use LDM/STM with single register instead. 1282 if (isThumb1) return false; 1283 1284 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1285 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1286 unsigned Bytes = getLSMultipleTransferSize(MI); 1287 unsigned Opcode = MI->getOpcode(); 1288 DebugLoc DL = MI->getDebugLoc(); 1289 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1290 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1291 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1292 if (isi32Load(Opcode) || isi32Store(Opcode)) 1293 if (MI->getOperand(2).getImm() != 0) 1294 return false; 1295 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1296 return false; 1297 1298 bool isLd = isLoadSingle(Opcode); 1299 // Can't do the merge if the destination register is the same as the would-be 1300 // writeback register. 1301 if (MI->getOperand(0).getReg() == Base) 1302 return false; 1303 1304 unsigned PredReg = 0; 1305 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1306 bool DoMerge = false; 1307 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1308 unsigned NewOpc = 0; 1309 // AM2 - 12 bits, thumb2 - 8 bits. 1310 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 1311 1312 // Try merging with the previous instruction. 1313 MachineBasicBlock &MBB = *MI->getParent(); 1314 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1315 MachineBasicBlock::iterator MBBI(MI); 1316 if (MBBI != BeginMBBI) { 1317 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1318 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1319 --PrevMBBI; 1320 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1321 DoMerge = true; 1322 AddSub = ARM_AM::sub; 1323 } else if (!isAM5 && 1324 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1325 DoMerge = true; 1326 } 1327 if (DoMerge) { 1328 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 1329 MBB.erase(PrevMBBI); 1330 } 1331 } 1332 1333 // Try merging with the next instruction. 1334 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1335 if (!DoMerge && MBBI != EndMBBI) { 1336 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1337 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1338 ++NextMBBI; 1339 if (!isAM5 && 1340 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1341 DoMerge = true; 1342 AddSub = ARM_AM::sub; 1343 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1344 DoMerge = true; 1345 } 1346 if (DoMerge) { 1347 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 1348 MBB.erase(NextMBBI); 1349 } 1350 } 1351 1352 if (!DoMerge) 1353 return false; 1354 1355 if (isAM5) { 1356 // VLDM[SD]_UPD, VSTM[SD]_UPD 1357 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1358 // updating load/store-multiple instructions can be used with only one 1359 // register.) 1360 MachineOperand &MO = MI->getOperand(0); 1361 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1362 .addReg(Base, getDefRegState(true)) // WB base register 1363 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1364 .addImm(Pred).addReg(PredReg) 1365 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1366 getKillRegState(MO.isKill()))); 1367 } else if (isLd) { 1368 if (isAM2) { 1369 // LDR_PRE, LDR_POST 1370 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1371 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1372 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1373 .addReg(Base, RegState::Define) 1374 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1375 } else { 1376 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1377 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1378 .addReg(Base, RegState::Define) 1379 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1380 } 1381 } else { 1382 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1383 // t2LDR_PRE, t2LDR_POST 1384 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1385 .addReg(Base, RegState::Define) 1386 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1387 } 1388 } else { 1389 MachineOperand &MO = MI->getOperand(0); 1390 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1391 // the vestigal zero-reg offset register. When that's fixed, this clause 1392 // can be removed entirely. 1393 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1394 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1395 // STR_PRE, STR_POST 1396 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1397 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1398 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1399 } else { 1400 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1401 // t2STR_PRE, t2STR_POST 1402 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1403 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1404 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1405 } 1406 } 1407 MBB.erase(MBBI); 1408 1409 return true; 1410 } 1411 1412 /// Returns true if instruction is a memory operation that this pass is capable 1413 /// of operating on. 1414 static bool isMemoryOp(const MachineInstr *MI) { 1415 // When no memory operands are present, conservatively assume unaligned, 1416 // volatile, unfoldable. 1417 if (!MI->hasOneMemOperand()) 1418 return false; 1419 1420 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1421 1422 // Don't touch volatile memory accesses - we may be changing their order. 1423 if (MMO->isVolatile()) 1424 return false; 1425 1426 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1427 // not. 1428 if (MMO->getAlignment() < 4) 1429 return false; 1430 1431 // str <undef> could probably be eliminated entirely, but for now we just want 1432 // to avoid making a mess of it. 1433 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1434 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1435 MI->getOperand(0).isUndef()) 1436 return false; 1437 1438 // Likewise don't mess with references to undefined addresses. 1439 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1440 MI->getOperand(1).isUndef()) 1441 return false; 1442 1443 unsigned Opcode = MI->getOpcode(); 1444 switch (Opcode) { 1445 default: break; 1446 case ARM::VLDRS: 1447 case ARM::VSTRS: 1448 return MI->getOperand(1).isReg(); 1449 case ARM::VLDRD: 1450 case ARM::VSTRD: 1451 return MI->getOperand(1).isReg(); 1452 case ARM::LDRi12: 1453 case ARM::STRi12: 1454 case ARM::tLDRi: 1455 case ARM::tSTRi: 1456 case ARM::tLDRspi: 1457 case ARM::tSTRspi: 1458 case ARM::t2LDRi8: 1459 case ARM::t2LDRi12: 1460 case ARM::t2STRi8: 1461 case ARM::t2STRi12: 1462 return MI->getOperand(1).isReg(); 1463 } 1464 return false; 1465 } 1466 1467 static void InsertLDR_STR(MachineBasicBlock &MBB, 1468 MachineBasicBlock::iterator &MBBI, 1469 int Offset, bool isDef, 1470 DebugLoc DL, unsigned NewOpc, 1471 unsigned Reg, bool RegDeadKill, bool RegUndef, 1472 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1473 bool OffKill, bool OffUndef, 1474 ARMCC::CondCodes Pred, unsigned PredReg, 1475 const TargetInstrInfo *TII, bool isT2) { 1476 if (isDef) { 1477 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1478 TII->get(NewOpc)) 1479 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1480 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1481 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1482 } else { 1483 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1484 TII->get(NewOpc)) 1485 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1486 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1487 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1488 } 1489 } 1490 1491 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1492 MachineBasicBlock::iterator &MBBI) { 1493 MachineInstr *MI = &*MBBI; 1494 unsigned Opcode = MI->getOpcode(); 1495 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1496 return false; 1497 1498 const MachineOperand &BaseOp = MI->getOperand(2); 1499 unsigned BaseReg = BaseOp.getReg(); 1500 unsigned EvenReg = MI->getOperand(0).getReg(); 1501 unsigned OddReg = MI->getOperand(1).getReg(); 1502 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1503 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1504 1505 // ARM errata 602117: LDRD with base in list may result in incorrect base 1506 // register when interrupted or faulted. 1507 bool Errata602117 = EvenReg == BaseReg && 1508 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1509 // ARM LDRD/STRD needs consecutive registers. 1510 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1511 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1512 1513 if (!Errata602117 && !NonConsecutiveRegs) 1514 return false; 1515 1516 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1517 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1518 bool EvenDeadKill = isLd ? 1519 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1520 bool EvenUndef = MI->getOperand(0).isUndef(); 1521 bool OddDeadKill = isLd ? 1522 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1523 bool OddUndef = MI->getOperand(1).isUndef(); 1524 bool BaseKill = BaseOp.isKill(); 1525 bool BaseUndef = BaseOp.isUndef(); 1526 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1527 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1528 int OffImm = getMemoryOpOffset(MI); 1529 unsigned PredReg = 0; 1530 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1531 1532 if (OddRegNum > EvenRegNum && OffImm == 0) { 1533 // Ascending register numbers and no offset. It's safe to change it to a 1534 // ldm or stm. 1535 unsigned NewOpc = (isLd) 1536 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1537 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1538 if (isLd) { 1539 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1540 .addReg(BaseReg, getKillRegState(BaseKill)) 1541 .addImm(Pred).addReg(PredReg) 1542 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1543 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1544 ++NumLDRD2LDM; 1545 } else { 1546 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1547 .addReg(BaseReg, getKillRegState(BaseKill)) 1548 .addImm(Pred).addReg(PredReg) 1549 .addReg(EvenReg, 1550 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1551 .addReg(OddReg, 1552 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1553 ++NumSTRD2STM; 1554 } 1555 } else { 1556 // Split into two instructions. 1557 unsigned NewOpc = (isLd) 1558 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1559 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1560 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1561 // so adjust and use t2LDRi12 here for that. 1562 unsigned NewOpc2 = (isLd) 1563 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1564 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1565 DebugLoc dl = MBBI->getDebugLoc(); 1566 // If this is a load and base register is killed, it may have been 1567 // re-defed by the load, make sure the first load does not clobber it. 1568 if (isLd && 1569 (BaseKill || OffKill) && 1570 (TRI->regsOverlap(EvenReg, BaseReg))) { 1571 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1572 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1573 OddReg, OddDeadKill, false, 1574 BaseReg, false, BaseUndef, false, OffUndef, 1575 Pred, PredReg, TII, isT2); 1576 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1577 EvenReg, EvenDeadKill, false, 1578 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1579 Pred, PredReg, TII, isT2); 1580 } else { 1581 if (OddReg == EvenReg && EvenDeadKill) { 1582 // If the two source operands are the same, the kill marker is 1583 // probably on the first one. e.g. 1584 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1585 EvenDeadKill = false; 1586 OddDeadKill = true; 1587 } 1588 // Never kill the base register in the first instruction. 1589 if (EvenReg == BaseReg) 1590 EvenDeadKill = false; 1591 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1592 EvenReg, EvenDeadKill, EvenUndef, 1593 BaseReg, false, BaseUndef, false, OffUndef, 1594 Pred, PredReg, TII, isT2); 1595 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1596 OddReg, OddDeadKill, OddUndef, 1597 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1598 Pred, PredReg, TII, isT2); 1599 } 1600 if (isLd) 1601 ++NumLDRD2LDR; 1602 else 1603 ++NumSTRD2STR; 1604 } 1605 1606 MBBI = MBB.erase(MBBI); 1607 return true; 1608 } 1609 1610 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1611 /// incrementing offset into LDM / STM ops. 1612 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1613 MemOpQueue MemOps; 1614 unsigned CurrBase = 0; 1615 unsigned CurrOpc = ~0u; 1616 ARMCC::CondCodes CurrPred = ARMCC::AL; 1617 unsigned Position = 0; 1618 assert(Candidates.size() == 0); 1619 LiveRegsValid = false; 1620 1621 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1622 I = MBBI) { 1623 // The instruction in front of the iterator is the one we look at. 1624 MBBI = std::prev(I); 1625 if (FixInvalidRegPairOp(MBB, MBBI)) 1626 continue; 1627 ++Position; 1628 1629 if (isMemoryOp(MBBI)) { 1630 unsigned Opcode = MBBI->getOpcode(); 1631 const MachineOperand &MO = MBBI->getOperand(0); 1632 unsigned Reg = MO.getReg(); 1633 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1634 unsigned PredReg = 0; 1635 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1636 int Offset = getMemoryOpOffset(MBBI); 1637 if (CurrBase == 0) { 1638 // Start of a new chain. 1639 CurrBase = Base; 1640 CurrOpc = Opcode; 1641 CurrPred = Pred; 1642 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1643 continue; 1644 } 1645 // Note: No need to match PredReg in the next if. 1646 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1647 // Watch out for: 1648 // r4 := ldr [r0, #8] 1649 // r4 := ldr [r0, #4] 1650 // or 1651 // r0 := ldr [r0] 1652 // If a load overrides the base register or a register loaded by 1653 // another load in our chain, we cannot take this instruction. 1654 bool Overlap = false; 1655 if (isLoadSingle(Opcode)) { 1656 Overlap = (Base == Reg); 1657 if (!Overlap) { 1658 for (const MemOpQueueEntry &E : MemOps) { 1659 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1660 Overlap = true; 1661 break; 1662 } 1663 } 1664 } 1665 } 1666 1667 if (!Overlap) { 1668 // Check offset and sort memory operation into the current chain. 1669 if (Offset > MemOps.back().Offset) { 1670 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1671 continue; 1672 } else { 1673 MemOpQueue::iterator MI, ME; 1674 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1675 if (Offset < MI->Offset) { 1676 // Found a place to insert. 1677 break; 1678 } 1679 if (Offset == MI->Offset) { 1680 // Collision, abort. 1681 MI = ME; 1682 break; 1683 } 1684 } 1685 if (MI != MemOps.end()) { 1686 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1687 continue; 1688 } 1689 } 1690 } 1691 } 1692 1693 // Don't advance the iterator; The op will start a new chain next. 1694 MBBI = I; 1695 --Position; 1696 // Fallthrough to look into existing chain. 1697 } else if (MBBI->isDebugValue()) 1698 continue; 1699 1700 // If we are here then the chain is broken; Extract candidates for a merge. 1701 if (MemOps.size() > 0) { 1702 FormCandidates(MemOps); 1703 // Reset for the next chain. 1704 CurrBase = 0; 1705 CurrOpc = ~0u; 1706 CurrPred = ARMCC::AL; 1707 MemOps.clear(); 1708 } 1709 } 1710 if (MemOps.size() > 0) 1711 FormCandidates(MemOps); 1712 1713 // Sort candidates so they get processed from end to begin of the basic 1714 // block later; This is necessary for liveness calculation. 1715 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1716 return M0->InsertPos < M1->InsertPos; 1717 }; 1718 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1719 1720 // Go through list of candidates and merge. 1721 bool Changed = false; 1722 for (const MergeCandidate *Candidate : Candidates) { 1723 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1724 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1725 // Merge preceding/trailing base inc/dec into the merged op. 1726 if (Merged) { 1727 Changed = true; 1728 unsigned Opcode = Merged->getOpcode(); 1729 if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8) 1730 MergeBaseUpdateLSMultiple(Merged); 1731 } else { 1732 for (MachineInstr *MI : Candidate->Instrs) { 1733 if (MergeBaseUpdateLoadStore(MI)) 1734 Changed = true; 1735 } 1736 } 1737 } else { 1738 assert(Candidate->Instrs.size() == 1); 1739 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1740 Changed = true; 1741 } 1742 } 1743 Candidates.clear(); 1744 1745 return Changed; 1746 } 1747 1748 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1749 /// into the preceding stack restore so it directly restore the value of LR 1750 /// into pc. 1751 /// ldmfd sp!, {..., lr} 1752 /// bx lr 1753 /// or 1754 /// ldmfd sp!, {..., lr} 1755 /// mov pc, lr 1756 /// => 1757 /// ldmfd sp!, {..., pc} 1758 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1759 // Thumb1 LDM doesn't allow high registers. 1760 if (isThumb1) return false; 1761 if (MBB.empty()) return false; 1762 1763 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1764 if (MBBI != MBB.begin() && 1765 (MBBI->getOpcode() == ARM::BX_RET || 1766 MBBI->getOpcode() == ARM::tBX_RET || 1767 MBBI->getOpcode() == ARM::MOVPCLR)) { 1768 MachineInstr *PrevMI = std::prev(MBBI); 1769 unsigned Opcode = PrevMI->getOpcode(); 1770 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1771 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1772 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1773 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1774 if (MO.getReg() != ARM::LR) 1775 return false; 1776 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1777 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1778 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1779 PrevMI->setDesc(TII->get(NewOpc)); 1780 MO.setReg(ARM::PC); 1781 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1782 MBB.erase(MBBI); 1783 return true; 1784 } 1785 } 1786 return false; 1787 } 1788 1789 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1790 MF = &Fn; 1791 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1792 TL = STI->getTargetLowering(); 1793 AFI = Fn.getInfo<ARMFunctionInfo>(); 1794 TII = STI->getInstrInfo(); 1795 TRI = STI->getRegisterInfo(); 1796 MRI = &Fn.getRegInfo(); 1797 RegClassInfoValid = false; 1798 isThumb2 = AFI->isThumb2Function(); 1799 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1800 1801 bool Modified = false; 1802 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1803 ++MFI) { 1804 MachineBasicBlock &MBB = *MFI; 1805 Modified |= LoadStoreMultipleOpti(MBB); 1806 if (STI->hasV5TOps()) 1807 Modified |= MergeReturnIntoLDM(MBB); 1808 } 1809 1810 Allocator.DestroyAll(); 1811 return Modified; 1812 } 1813 1814 namespace { 1815 /// Pre- register allocation pass that move load / stores from consecutive 1816 /// locations close to make it more likely they will be combined later. 1817 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1818 static char ID; 1819 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1820 1821 const DataLayout *TD; 1822 const TargetInstrInfo *TII; 1823 const TargetRegisterInfo *TRI; 1824 const ARMSubtarget *STI; 1825 MachineRegisterInfo *MRI; 1826 MachineFunction *MF; 1827 1828 bool runOnMachineFunction(MachineFunction &Fn) override; 1829 1830 const char *getPassName() const override { 1831 return "ARM pre- register allocation load / store optimization pass"; 1832 } 1833 1834 private: 1835 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1836 unsigned &NewOpc, unsigned &EvenReg, 1837 unsigned &OddReg, unsigned &BaseReg, 1838 int &Offset, 1839 unsigned &PredReg, ARMCC::CondCodes &Pred, 1840 bool &isT2); 1841 bool RescheduleOps(MachineBasicBlock *MBB, 1842 SmallVectorImpl<MachineInstr *> &Ops, 1843 unsigned Base, bool isLd, 1844 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1845 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1846 }; 1847 char ARMPreAllocLoadStoreOpt::ID = 0; 1848 } 1849 1850 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1851 TD = Fn.getTarget().getDataLayout(); 1852 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1853 TII = STI->getInstrInfo(); 1854 TRI = STI->getRegisterInfo(); 1855 MRI = &Fn.getRegInfo(); 1856 MF = &Fn; 1857 1858 bool Modified = false; 1859 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1860 ++MFI) 1861 Modified |= RescheduleLoadStoreInstrs(MFI); 1862 1863 return Modified; 1864 } 1865 1866 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1867 MachineBasicBlock::iterator I, 1868 MachineBasicBlock::iterator E, 1869 SmallPtrSetImpl<MachineInstr*> &MemOps, 1870 SmallSet<unsigned, 4> &MemRegs, 1871 const TargetRegisterInfo *TRI) { 1872 // Are there stores / loads / calls between them? 1873 // FIXME: This is overly conservative. We should make use of alias information 1874 // some day. 1875 SmallSet<unsigned, 4> AddedRegPressure; 1876 while (++I != E) { 1877 if (I->isDebugValue() || MemOps.count(&*I)) 1878 continue; 1879 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1880 return false; 1881 if (isLd && I->mayStore()) 1882 return false; 1883 if (!isLd) { 1884 if (I->mayLoad()) 1885 return false; 1886 // It's not safe to move the first 'str' down. 1887 // str r1, [r0] 1888 // strh r5, [r0] 1889 // str r4, [r0, #+4] 1890 if (I->mayStore()) 1891 return false; 1892 } 1893 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1894 MachineOperand &MO = I->getOperand(j); 1895 if (!MO.isReg()) 1896 continue; 1897 unsigned Reg = MO.getReg(); 1898 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1899 return false; 1900 if (Reg != Base && !MemRegs.count(Reg)) 1901 AddedRegPressure.insert(Reg); 1902 } 1903 } 1904 1905 // Estimate register pressure increase due to the transformation. 1906 if (MemRegs.size() <= 4) 1907 // Ok if we are moving small number of instructions. 1908 return true; 1909 return AddedRegPressure.size() <= MemRegs.size() * 2; 1910 } 1911 1912 1913 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1914 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1915 MachineInstr *Op1) { 1916 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1917 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1918 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1919 1920 MachineFunction *MF = MI->getParent()->getParent(); 1921 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1922 MachineSDNode::mmo_iterator MemEnd = 1923 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1924 MemEnd = 1925 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1926 MI->setMemRefs(MemBegin, MemEnd); 1927 } 1928 1929 bool 1930 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1931 DebugLoc &dl, unsigned &NewOpc, 1932 unsigned &FirstReg, 1933 unsigned &SecondReg, 1934 unsigned &BaseReg, int &Offset, 1935 unsigned &PredReg, 1936 ARMCC::CondCodes &Pred, 1937 bool &isT2) { 1938 // Make sure we're allowed to generate LDRD/STRD. 1939 if (!STI->hasV5TEOps()) 1940 return false; 1941 1942 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1943 unsigned Scale = 1; 1944 unsigned Opcode = Op0->getOpcode(); 1945 if (Opcode == ARM::LDRi12) { 1946 NewOpc = ARM::LDRD; 1947 } else if (Opcode == ARM::STRi12) { 1948 NewOpc = ARM::STRD; 1949 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1950 NewOpc = ARM::t2LDRDi8; 1951 Scale = 4; 1952 isT2 = true; 1953 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1954 NewOpc = ARM::t2STRDi8; 1955 Scale = 4; 1956 isT2 = true; 1957 } else { 1958 return false; 1959 } 1960 1961 // Make sure the base address satisfies i64 ld / st alignment requirement. 1962 // At the moment, we ignore the memoryoperand's value. 1963 // If we want to use AliasAnalysis, we should check it accordingly. 1964 if (!Op0->hasOneMemOperand() || 1965 (*Op0->memoperands_begin())->isVolatile()) 1966 return false; 1967 1968 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1969 const Function *Func = MF->getFunction(); 1970 unsigned ReqAlign = STI->hasV6Ops() 1971 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1972 : 8; // Pre-v6 need 8-byte align 1973 if (Align < ReqAlign) 1974 return false; 1975 1976 // Then make sure the immediate offset fits. 1977 int OffImm = getMemoryOpOffset(Op0); 1978 if (isT2) { 1979 int Limit = (1 << 8) * Scale; 1980 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1981 return false; 1982 Offset = OffImm; 1983 } else { 1984 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1985 if (OffImm < 0) { 1986 AddSub = ARM_AM::sub; 1987 OffImm = - OffImm; 1988 } 1989 int Limit = (1 << 8) * Scale; 1990 if (OffImm >= Limit || (OffImm & (Scale-1))) 1991 return false; 1992 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1993 } 1994 FirstReg = Op0->getOperand(0).getReg(); 1995 SecondReg = Op1->getOperand(0).getReg(); 1996 if (FirstReg == SecondReg) 1997 return false; 1998 BaseReg = Op0->getOperand(1).getReg(); 1999 Pred = getInstrPredicate(Op0, PredReg); 2000 dl = Op0->getDebugLoc(); 2001 return true; 2002 } 2003 2004 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2005 SmallVectorImpl<MachineInstr *> &Ops, 2006 unsigned Base, bool isLd, 2007 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2008 bool RetVal = false; 2009 2010 // Sort by offset (in reverse order). 2011 std::sort(Ops.begin(), Ops.end(), 2012 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2013 int LOffset = getMemoryOpOffset(LHS); 2014 int ROffset = getMemoryOpOffset(RHS); 2015 assert(LHS == RHS || LOffset != ROffset); 2016 return LOffset > ROffset; 2017 }); 2018 2019 // The loads / stores of the same base are in order. Scan them from first to 2020 // last and check for the following: 2021 // 1. Any def of base. 2022 // 2. Any gaps. 2023 while (Ops.size() > 1) { 2024 unsigned FirstLoc = ~0U; 2025 unsigned LastLoc = 0; 2026 MachineInstr *FirstOp = nullptr; 2027 MachineInstr *LastOp = nullptr; 2028 int LastOffset = 0; 2029 unsigned LastOpcode = 0; 2030 unsigned LastBytes = 0; 2031 unsigned NumMove = 0; 2032 for (int i = Ops.size() - 1; i >= 0; --i) { 2033 MachineInstr *Op = Ops[i]; 2034 unsigned Loc = MI2LocMap[Op]; 2035 if (Loc <= FirstLoc) { 2036 FirstLoc = Loc; 2037 FirstOp = Op; 2038 } 2039 if (Loc >= LastLoc) { 2040 LastLoc = Loc; 2041 LastOp = Op; 2042 } 2043 2044 unsigned LSMOpcode 2045 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2046 if (LastOpcode && LSMOpcode != LastOpcode) 2047 break; 2048 2049 int Offset = getMemoryOpOffset(Op); 2050 unsigned Bytes = getLSMultipleTransferSize(Op); 2051 if (LastBytes) { 2052 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2053 break; 2054 } 2055 LastOffset = Offset; 2056 LastBytes = Bytes; 2057 LastOpcode = LSMOpcode; 2058 if (++NumMove == 8) // FIXME: Tune this limit. 2059 break; 2060 } 2061 2062 if (NumMove <= 1) 2063 Ops.pop_back(); 2064 else { 2065 SmallPtrSet<MachineInstr*, 4> MemOps; 2066 SmallSet<unsigned, 4> MemRegs; 2067 for (int i = NumMove-1; i >= 0; --i) { 2068 MemOps.insert(Ops[i]); 2069 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2070 } 2071 2072 // Be conservative, if the instructions are too far apart, don't 2073 // move them. We want to limit the increase of register pressure. 2074 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2075 if (DoMove) 2076 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2077 MemOps, MemRegs, TRI); 2078 if (!DoMove) { 2079 for (unsigned i = 0; i != NumMove; ++i) 2080 Ops.pop_back(); 2081 } else { 2082 // This is the new location for the loads / stores. 2083 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2084 while (InsertPos != MBB->end() 2085 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2086 ++InsertPos; 2087 2088 // If we are moving a pair of loads / stores, see if it makes sense 2089 // to try to allocate a pair of registers that can form register pairs. 2090 MachineInstr *Op0 = Ops.back(); 2091 MachineInstr *Op1 = Ops[Ops.size()-2]; 2092 unsigned FirstReg = 0, SecondReg = 0; 2093 unsigned BaseReg = 0, PredReg = 0; 2094 ARMCC::CondCodes Pred = ARMCC::AL; 2095 bool isT2 = false; 2096 unsigned NewOpc = 0; 2097 int Offset = 0; 2098 DebugLoc dl; 2099 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2100 FirstReg, SecondReg, BaseReg, 2101 Offset, PredReg, Pred, isT2)) { 2102 Ops.pop_back(); 2103 Ops.pop_back(); 2104 2105 const MCInstrDesc &MCID = TII->get(NewOpc); 2106 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2107 MRI->constrainRegClass(FirstReg, TRC); 2108 MRI->constrainRegClass(SecondReg, TRC); 2109 2110 // Form the pair instruction. 2111 if (isLd) { 2112 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2113 .addReg(FirstReg, RegState::Define) 2114 .addReg(SecondReg, RegState::Define) 2115 .addReg(BaseReg); 2116 // FIXME: We're converting from LDRi12 to an insn that still 2117 // uses addrmode2, so we need an explicit offset reg. It should 2118 // always by reg0 since we're transforming LDRi12s. 2119 if (!isT2) 2120 MIB.addReg(0); 2121 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2122 concatenateMemOperands(MIB, Op0, Op1); 2123 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2124 ++NumLDRDFormed; 2125 } else { 2126 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2127 .addReg(FirstReg) 2128 .addReg(SecondReg) 2129 .addReg(BaseReg); 2130 // FIXME: We're converting from LDRi12 to an insn that still 2131 // uses addrmode2, so we need an explicit offset reg. It should 2132 // always by reg0 since we're transforming STRi12s. 2133 if (!isT2) 2134 MIB.addReg(0); 2135 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2136 concatenateMemOperands(MIB, Op0, Op1); 2137 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2138 ++NumSTRDFormed; 2139 } 2140 MBB->erase(Op0); 2141 MBB->erase(Op1); 2142 2143 if (!isT2) { 2144 // Add register allocation hints to form register pairs. 2145 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2146 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2147 } 2148 } else { 2149 for (unsigned i = 0; i != NumMove; ++i) { 2150 MachineInstr *Op = Ops.back(); 2151 Ops.pop_back(); 2152 MBB->splice(InsertPos, MBB, Op); 2153 } 2154 } 2155 2156 NumLdStMoved += NumMove; 2157 RetVal = true; 2158 } 2159 } 2160 } 2161 2162 return RetVal; 2163 } 2164 2165 bool 2166 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2167 bool RetVal = false; 2168 2169 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2170 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2171 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2172 SmallVector<unsigned, 4> LdBases; 2173 SmallVector<unsigned, 4> StBases; 2174 2175 unsigned Loc = 0; 2176 MachineBasicBlock::iterator MBBI = MBB->begin(); 2177 MachineBasicBlock::iterator E = MBB->end(); 2178 while (MBBI != E) { 2179 for (; MBBI != E; ++MBBI) { 2180 MachineInstr *MI = MBBI; 2181 if (MI->isCall() || MI->isTerminator()) { 2182 // Stop at barriers. 2183 ++MBBI; 2184 break; 2185 } 2186 2187 if (!MI->isDebugValue()) 2188 MI2LocMap[MI] = ++Loc; 2189 2190 if (!isMemoryOp(MI)) 2191 continue; 2192 unsigned PredReg = 0; 2193 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2194 continue; 2195 2196 int Opc = MI->getOpcode(); 2197 bool isLd = isLoadSingle(Opc); 2198 unsigned Base = MI->getOperand(1).getReg(); 2199 int Offset = getMemoryOpOffset(MI); 2200 2201 bool StopHere = false; 2202 if (isLd) { 2203 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2204 Base2LdsMap.find(Base); 2205 if (BI != Base2LdsMap.end()) { 2206 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2207 if (Offset == getMemoryOpOffset(BI->second[i])) { 2208 StopHere = true; 2209 break; 2210 } 2211 } 2212 if (!StopHere) 2213 BI->second.push_back(MI); 2214 } else { 2215 Base2LdsMap[Base].push_back(MI); 2216 LdBases.push_back(Base); 2217 } 2218 } else { 2219 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2220 Base2StsMap.find(Base); 2221 if (BI != Base2StsMap.end()) { 2222 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2223 if (Offset == getMemoryOpOffset(BI->second[i])) { 2224 StopHere = true; 2225 break; 2226 } 2227 } 2228 if (!StopHere) 2229 BI->second.push_back(MI); 2230 } else { 2231 Base2StsMap[Base].push_back(MI); 2232 StBases.push_back(Base); 2233 } 2234 } 2235 2236 if (StopHere) { 2237 // Found a duplicate (a base+offset combination that's seen earlier). 2238 // Backtrack. 2239 --Loc; 2240 break; 2241 } 2242 } 2243 2244 // Re-schedule loads. 2245 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2246 unsigned Base = LdBases[i]; 2247 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2248 if (Lds.size() > 1) 2249 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2250 } 2251 2252 // Re-schedule stores. 2253 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2254 unsigned Base = StBases[i]; 2255 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2256 if (Sts.size() > 1) 2257 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2258 } 2259 2260 if (MBBI != E) { 2261 Base2LdsMap.clear(); 2262 Base2StsMap.clear(); 2263 LdBases.clear(); 2264 StBases.clear(); 2265 } 2266 } 2267 2268 return RetVal; 2269 } 2270 2271 2272 /// Returns an instance of the load / store optimization pass. 2273 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2274 if (PreAlloc) 2275 return new ARMPreAllocLoadStoreOpt(); 2276 return new ARMLoadStoreOpt(); 2277 } 2278