1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 namespace { 64 /// Post- register allocation pass the combine load / store instructions to 65 /// form ldm / stm instructions. 66 struct ARMLoadStoreOpt : public MachineFunctionPass { 67 static char ID; 68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 69 70 const MachineFunction *MF; 71 const TargetInstrInfo *TII; 72 const TargetRegisterInfo *TRI; 73 const MachineRegisterInfo *MRI; 74 const ARMSubtarget *STI; 75 const TargetLowering *TL; 76 ARMFunctionInfo *AFI; 77 LivePhysRegs LiveRegs; 78 RegisterClassInfo RegClassInfo; 79 MachineBasicBlock::const_iterator LiveRegPos; 80 bool LiveRegsValid; 81 bool RegClassInfoValid; 82 bool isThumb1, isThumb2; 83 84 bool runOnMachineFunction(MachineFunction &Fn) override; 85 86 const char *getPassName() const override { 87 return "ARM load / store optimization pass"; 88 } 89 90 private: 91 /// A set of load/store MachineInstrs with same base register sorted by 92 /// offset. 93 struct MemOpQueueEntry { 94 MachineInstr *MI; 95 int Offset; ///< Load/Store offset. 96 unsigned Position; ///< Position as counted from end of basic block. 97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 98 : MI(MI), Offset(Offset), Position(Position) {} 99 }; 100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 101 102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 103 /// merged into a LDM/STM. 104 struct MergeCandidate { 105 /// List of instructions ordered by load/store offset. 106 SmallVector<MachineInstr*, 4> Instrs; 107 /// Index in Instrs of the instruction being latest in the schedule. 108 unsigned LatestMIIdx; 109 /// Index in Instrs of the instruction being earliest in the schedule. 110 unsigned EarliestMIIdx; 111 /// Index into the basic block where the merged instruction will be 112 /// inserted. (See MemOpQueueEntry.Position) 113 unsigned InsertPos; 114 /// Whether the instructions can be merged into a ldm/stm instruction. 115 bool CanMergeToLSMulti; 116 /// Whether the instructions can be merged into a ldrd/strd instruction. 117 bool CanMergeToLSDouble; 118 }; 119 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 120 SmallVector<const MergeCandidate*,4> Candidates; 121 SmallVector<MachineInstr*,4> MergeBaseCandidates; 122 123 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 124 MachineBasicBlock::const_iterator Before); 125 unsigned findFreeReg(const TargetRegisterClass &RegClass); 126 void UpdateBaseRegUses(MachineBasicBlock &MBB, 127 MachineBasicBlock::iterator MBBI, 128 DebugLoc DL, unsigned Base, unsigned WordOffset, 129 ARMCC::CondCodes Pred, unsigned PredReg); 130 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 131 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 132 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 133 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 134 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 135 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 136 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 137 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 138 void FormCandidates(const MemOpQueue &MemOps); 139 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 140 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 141 MachineBasicBlock::iterator &MBBI); 142 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 143 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 144 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 145 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 146 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 147 }; 148 char ARMLoadStoreOpt::ID = 0; 149 } 150 151 static bool definesCPSR(const MachineInstr *MI) { 152 for (const auto &MO : MI->operands()) { 153 if (!MO.isReg()) 154 continue; 155 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 156 // If the instruction has live CPSR def, then it's not safe to fold it 157 // into load / store. 158 return true; 159 } 160 161 return false; 162 } 163 164 static int getMemoryOpOffset(const MachineInstr *MI) { 165 unsigned Opcode = MI->getOpcode(); 166 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 167 unsigned NumOperands = MI->getDesc().getNumOperands(); 168 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 169 170 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 171 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 172 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 173 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 174 return OffField; 175 176 // Thumb1 immediate offsets are scaled by 4 177 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 178 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 179 return OffField * 4; 180 181 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 182 : ARM_AM::getAM5Offset(OffField) * 4; 183 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 184 : ARM_AM::getAM5Op(OffField); 185 186 if (Op == ARM_AM::sub) 187 return -Offset; 188 189 return Offset; 190 } 191 192 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 193 return MI.getOperand(1); 194 } 195 196 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 197 return MI.getOperand(0); 198 } 199 200 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 201 switch (Opcode) { 202 default: llvm_unreachable("Unhandled opcode!"); 203 case ARM::LDRi12: 204 ++NumLDMGened; 205 switch (Mode) { 206 default: llvm_unreachable("Unhandled submode!"); 207 case ARM_AM::ia: return ARM::LDMIA; 208 case ARM_AM::da: return ARM::LDMDA; 209 case ARM_AM::db: return ARM::LDMDB; 210 case ARM_AM::ib: return ARM::LDMIB; 211 } 212 case ARM::STRi12: 213 ++NumSTMGened; 214 switch (Mode) { 215 default: llvm_unreachable("Unhandled submode!"); 216 case ARM_AM::ia: return ARM::STMIA; 217 case ARM_AM::da: return ARM::STMDA; 218 case ARM_AM::db: return ARM::STMDB; 219 case ARM_AM::ib: return ARM::STMIB; 220 } 221 case ARM::tLDRi: 222 case ARM::tLDRspi: 223 // tLDMIA is writeback-only - unless the base register is in the input 224 // reglist. 225 ++NumLDMGened; 226 switch (Mode) { 227 default: llvm_unreachable("Unhandled submode!"); 228 case ARM_AM::ia: return ARM::tLDMIA; 229 } 230 case ARM::tSTRi: 231 case ARM::tSTRspi: 232 // There is no non-writeback tSTMIA either. 233 ++NumSTMGened; 234 switch (Mode) { 235 default: llvm_unreachable("Unhandled submode!"); 236 case ARM_AM::ia: return ARM::tSTMIA_UPD; 237 } 238 case ARM::t2LDRi8: 239 case ARM::t2LDRi12: 240 ++NumLDMGened; 241 switch (Mode) { 242 default: llvm_unreachable("Unhandled submode!"); 243 case ARM_AM::ia: return ARM::t2LDMIA; 244 case ARM_AM::db: return ARM::t2LDMDB; 245 } 246 case ARM::t2STRi8: 247 case ARM::t2STRi12: 248 ++NumSTMGened; 249 switch (Mode) { 250 default: llvm_unreachable("Unhandled submode!"); 251 case ARM_AM::ia: return ARM::t2STMIA; 252 case ARM_AM::db: return ARM::t2STMDB; 253 } 254 case ARM::VLDRS: 255 ++NumVLDMGened; 256 switch (Mode) { 257 default: llvm_unreachable("Unhandled submode!"); 258 case ARM_AM::ia: return ARM::VLDMSIA; 259 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 260 } 261 case ARM::VSTRS: 262 ++NumVSTMGened; 263 switch (Mode) { 264 default: llvm_unreachable("Unhandled submode!"); 265 case ARM_AM::ia: return ARM::VSTMSIA; 266 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 267 } 268 case ARM::VLDRD: 269 ++NumVLDMGened; 270 switch (Mode) { 271 default: llvm_unreachable("Unhandled submode!"); 272 case ARM_AM::ia: return ARM::VLDMDIA; 273 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 274 } 275 case ARM::VSTRD: 276 ++NumVSTMGened; 277 switch (Mode) { 278 default: llvm_unreachable("Unhandled submode!"); 279 case ARM_AM::ia: return ARM::VSTMDIA; 280 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 281 } 282 } 283 } 284 285 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 286 switch (Opcode) { 287 default: llvm_unreachable("Unhandled opcode!"); 288 case ARM::LDMIA_RET: 289 case ARM::LDMIA: 290 case ARM::LDMIA_UPD: 291 case ARM::STMIA: 292 case ARM::STMIA_UPD: 293 case ARM::tLDMIA: 294 case ARM::tLDMIA_UPD: 295 case ARM::tSTMIA_UPD: 296 case ARM::t2LDMIA_RET: 297 case ARM::t2LDMIA: 298 case ARM::t2LDMIA_UPD: 299 case ARM::t2STMIA: 300 case ARM::t2STMIA_UPD: 301 case ARM::VLDMSIA: 302 case ARM::VLDMSIA_UPD: 303 case ARM::VSTMSIA: 304 case ARM::VSTMSIA_UPD: 305 case ARM::VLDMDIA: 306 case ARM::VLDMDIA_UPD: 307 case ARM::VSTMDIA: 308 case ARM::VSTMDIA_UPD: 309 return ARM_AM::ia; 310 311 case ARM::LDMDA: 312 case ARM::LDMDA_UPD: 313 case ARM::STMDA: 314 case ARM::STMDA_UPD: 315 return ARM_AM::da; 316 317 case ARM::LDMDB: 318 case ARM::LDMDB_UPD: 319 case ARM::STMDB: 320 case ARM::STMDB_UPD: 321 case ARM::t2LDMDB: 322 case ARM::t2LDMDB_UPD: 323 case ARM::t2STMDB: 324 case ARM::t2STMDB_UPD: 325 case ARM::VLDMSDB_UPD: 326 case ARM::VSTMSDB_UPD: 327 case ARM::VLDMDDB_UPD: 328 case ARM::VSTMDDB_UPD: 329 return ARM_AM::db; 330 331 case ARM::LDMIB: 332 case ARM::LDMIB_UPD: 333 case ARM::STMIB: 334 case ARM::STMIB_UPD: 335 return ARM_AM::ib; 336 } 337 } 338 339 static bool isT1i32Load(unsigned Opc) { 340 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 341 } 342 343 static bool isT2i32Load(unsigned Opc) { 344 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 345 } 346 347 static bool isi32Load(unsigned Opc) { 348 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 349 } 350 351 static bool isT1i32Store(unsigned Opc) { 352 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 353 } 354 355 static bool isT2i32Store(unsigned Opc) { 356 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 357 } 358 359 static bool isi32Store(unsigned Opc) { 360 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 361 } 362 363 static bool isLoadSingle(unsigned Opc) { 364 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 365 } 366 367 static unsigned getImmScale(unsigned Opc) { 368 switch (Opc) { 369 default: llvm_unreachable("Unhandled opcode!"); 370 case ARM::tLDRi: 371 case ARM::tSTRi: 372 case ARM::tLDRspi: 373 case ARM::tSTRspi: 374 return 1; 375 case ARM::tLDRHi: 376 case ARM::tSTRHi: 377 return 2; 378 case ARM::tLDRBi: 379 case ARM::tSTRBi: 380 return 4; 381 } 382 } 383 384 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 385 switch (MI->getOpcode()) { 386 default: return 0; 387 case ARM::LDRi12: 388 case ARM::STRi12: 389 case ARM::tLDRi: 390 case ARM::tSTRi: 391 case ARM::tLDRspi: 392 case ARM::tSTRspi: 393 case ARM::t2LDRi8: 394 case ARM::t2LDRi12: 395 case ARM::t2STRi8: 396 case ARM::t2STRi12: 397 case ARM::VLDRS: 398 case ARM::VSTRS: 399 return 4; 400 case ARM::VLDRD: 401 case ARM::VSTRD: 402 return 8; 403 case ARM::LDMIA: 404 case ARM::LDMDA: 405 case ARM::LDMDB: 406 case ARM::LDMIB: 407 case ARM::STMIA: 408 case ARM::STMDA: 409 case ARM::STMDB: 410 case ARM::STMIB: 411 case ARM::tLDMIA: 412 case ARM::tLDMIA_UPD: 413 case ARM::tSTMIA_UPD: 414 case ARM::t2LDMIA: 415 case ARM::t2LDMDB: 416 case ARM::t2STMIA: 417 case ARM::t2STMDB: 418 case ARM::VLDMSIA: 419 case ARM::VSTMSIA: 420 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 421 case ARM::VLDMDIA: 422 case ARM::VSTMDIA: 423 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 424 } 425 } 426 427 /// Update future uses of the base register with the offset introduced 428 /// due to writeback. This function only works on Thumb1. 429 void 430 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 431 MachineBasicBlock::iterator MBBI, 432 DebugLoc DL, unsigned Base, 433 unsigned WordOffset, 434 ARMCC::CondCodes Pred, unsigned PredReg) { 435 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 436 // Start updating any instructions with immediate offsets. Insert a SUB before 437 // the first non-updateable instruction (if any). 438 for (; MBBI != MBB.end(); ++MBBI) { 439 bool InsertSub = false; 440 unsigned Opc = MBBI->getOpcode(); 441 442 if (MBBI->readsRegister(Base)) { 443 int Offset; 444 bool IsLoad = 445 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 446 bool IsStore = 447 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 448 449 if (IsLoad || IsStore) { 450 // Loads and stores with immediate offsets can be updated, but only if 451 // the new offset isn't negative. 452 // The MachineOperand containing the offset immediate is the last one 453 // before predicates. 454 MachineOperand &MO = 455 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 456 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 457 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 458 459 // If storing the base register, it needs to be reset first. 460 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 461 462 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 463 MO.setImm(Offset); 464 else 465 InsertSub = true; 466 467 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 468 !definesCPSR(MBBI)) { 469 // SUBS/ADDS using this register, with a dead def of the CPSR. 470 // Merge it with the update; if the merged offset is too large, 471 // insert a new sub instead. 472 MachineOperand &MO = 473 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 474 Offset = (Opc == ARM::tSUBi8) ? 475 MO.getImm() + WordOffset * 4 : 476 MO.getImm() - WordOffset * 4 ; 477 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 478 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 479 // Offset == 0. 480 MO.setImm(Offset); 481 // The base register has now been reset, so exit early. 482 return; 483 } else { 484 InsertSub = true; 485 } 486 487 } else { 488 // Can't update the instruction. 489 InsertSub = true; 490 } 491 492 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 493 // Since SUBS sets the condition flags, we can't place the base reset 494 // after an instruction that has a live CPSR def. 495 // The base register might also contain an argument for a function call. 496 InsertSub = true; 497 } 498 499 if (InsertSub) { 500 // An instruction above couldn't be updated, so insert a sub. 501 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 502 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 503 return; 504 } 505 506 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 507 // Register got killed. Stop updating. 508 return; 509 } 510 511 // End of block was reached. 512 if (MBB.succ_size() > 0) { 513 // FIXME: Because of a bug, live registers are sometimes missing from 514 // the successor blocks' live-in sets. This means we can't trust that 515 // information and *always* have to reset at the end of a block. 516 // See PR21029. 517 if (MBBI != MBB.end()) --MBBI; 518 AddDefaultT1CC( 519 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 520 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 521 } 522 } 523 524 /// Return the first register of class \p RegClass that is not in \p Regs. 525 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 526 if (!RegClassInfoValid) { 527 RegClassInfo.runOnMachineFunction(*MF); 528 RegClassInfoValid = true; 529 } 530 531 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 532 if (!LiveRegs.contains(Reg)) 533 return Reg; 534 return 0; 535 } 536 537 /// Compute live registers just before instruction \p Before (in normal schedule 538 /// direction). Computes backwards so multiple queries in the same block must 539 /// come in reverse order. 540 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 541 MachineBasicBlock::const_iterator Before) { 542 // Initialize if we never queried in this block. 543 if (!LiveRegsValid) { 544 LiveRegs.init(TRI); 545 LiveRegs.addLiveOuts(&MBB, true); 546 LiveRegPos = MBB.end(); 547 LiveRegsValid = true; 548 } 549 // Move backward just before the "Before" position. 550 while (LiveRegPos != Before) { 551 --LiveRegPos; 552 LiveRegs.stepBackward(*LiveRegPos); 553 } 554 } 555 556 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 557 unsigned Reg) { 558 for (const std::pair<unsigned, bool> &R : Regs) 559 if (R.first == Reg) 560 return true; 561 return false; 562 } 563 564 /// Create and insert a LDM or STM with Base as base register and registers in 565 /// Regs as the register operands that would be loaded / stored. It returns 566 /// true if the transformation is done. 567 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 568 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 569 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 570 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 571 unsigned NumRegs = Regs.size(); 572 assert(NumRegs > 1); 573 574 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 575 // Compute liveness information for that register to make the decision. 576 bool SafeToClobberCPSR = !isThumb1 || 577 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 578 MachineBasicBlock::LQR_Dead); 579 580 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 581 582 // Exception: If the base register is in the input reglist, Thumb1 LDM is 583 // non-writeback. 584 // It's also not possible to merge an STR of the base register in Thumb1. 585 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 586 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 587 if (Opcode == ARM::tLDRi) { 588 Writeback = false; 589 } else if (Opcode == ARM::tSTRi) { 590 return nullptr; 591 } 592 } 593 594 ARM_AM::AMSubMode Mode = ARM_AM::ia; 595 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 596 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 597 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 598 599 if (Offset == 4 && haveIBAndDA) { 600 Mode = ARM_AM::ib; 601 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 602 Mode = ARM_AM::da; 603 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 604 // VLDM/VSTM do not support DB mode without also updating the base reg. 605 Mode = ARM_AM::db; 606 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 607 // Check if this is a supported opcode before inserting instructions to 608 // calculate a new base register. 609 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 610 611 // If starting offset isn't zero, insert a MI to materialize a new base. 612 // But only do so if it is cost effective, i.e. merging more than two 613 // loads / stores. 614 if (NumRegs <= 2) 615 return nullptr; 616 617 // On Thumb1, it's not worth materializing a new base register without 618 // clobbering the CPSR (i.e. not using ADDS/SUBS). 619 if (!SafeToClobberCPSR) 620 return nullptr; 621 622 unsigned NewBase; 623 if (isi32Load(Opcode)) { 624 // If it is a load, then just use one of the destination register to 625 // use as the new base. 626 NewBase = Regs[NumRegs-1].first; 627 } else { 628 // Find a free register that we can use as scratch register. 629 moveLiveRegsBefore(MBB, InsertBefore); 630 // The merged instruction does not exist yet but will use several Regs if 631 // it is a Store. 632 if (!isLoadSingle(Opcode)) 633 for (const std::pair<unsigned, bool> &R : Regs) 634 LiveRegs.addReg(R.first); 635 636 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 637 if (NewBase == 0) 638 return nullptr; 639 } 640 641 int BaseOpc = 642 isThumb2 ? ARM::t2ADDri : 643 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 644 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 645 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 646 647 if (Offset < 0) { 648 Offset = - Offset; 649 BaseOpc = 650 isThumb2 ? ARM::t2SUBri : 651 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 652 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 653 } 654 655 if (!TL->isLegalAddImmediate(Offset)) 656 // FIXME: Try add with register operand? 657 return nullptr; // Probably not worth it then. 658 659 // We can only append a kill flag to the add/sub input if the value is not 660 // used in the register list of the stm as well. 661 bool KillOldBase = BaseKill && 662 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 663 664 if (isThumb1) { 665 // Thumb1: depending on immediate size, use either 666 // ADDS NewBase, Base, #imm3 667 // or 668 // MOV NewBase, Base 669 // ADDS NewBase, #imm8. 670 if (Base != NewBase && 671 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 672 // Need to insert a MOV to the new base first. 673 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 674 !STI->hasV6Ops()) { 675 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 676 if (Pred != ARMCC::AL) 677 return nullptr; 678 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 679 .addReg(Base, getKillRegState(KillOldBase)); 680 } else 681 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 682 .addReg(Base, getKillRegState(KillOldBase)) 683 .addImm(Pred).addReg(PredReg); 684 685 // The following ADDS/SUBS becomes an update. 686 Base = NewBase; 687 KillOldBase = true; 688 } 689 if (BaseOpc == ARM::tADDrSPi) { 690 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 691 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 692 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 693 .addImm(Pred).addReg(PredReg); 694 } else 695 AddDefaultT1CC( 696 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 697 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 698 .addImm(Pred).addReg(PredReg); 699 } else { 700 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 701 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 702 .addImm(Pred).addReg(PredReg).addReg(0); 703 } 704 Base = NewBase; 705 BaseKill = true; // New base is always killed straight away. 706 } 707 708 bool isDef = isLoadSingle(Opcode); 709 710 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 711 // base register writeback. 712 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 713 if (!Opcode) 714 return nullptr; 715 716 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 717 // - There is no writeback (LDM of base register), 718 // - the base register is killed by the merged instruction, 719 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 720 // to reset the base register. 721 // Otherwise, don't merge. 722 // It's safe to return here since the code to materialize a new base register 723 // above is also conditional on SafeToClobberCPSR. 724 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 725 return nullptr; 726 727 MachineInstrBuilder MIB; 728 729 if (Writeback) { 730 if (Opcode == ARM::tLDMIA) 731 // Update tLDMIA with writeback if necessary. 732 Opcode = ARM::tLDMIA_UPD; 733 734 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 735 736 // Thumb1: we might need to set base writeback when building the MI. 737 MIB.addReg(Base, getDefRegState(true)) 738 .addReg(Base, getKillRegState(BaseKill)); 739 740 // The base isn't dead after a merged instruction with writeback. 741 // Insert a sub instruction after the newly formed instruction to reset. 742 if (!BaseKill) 743 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 744 745 } else { 746 // No writeback, simply build the MachineInstr. 747 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 748 MIB.addReg(Base, getKillRegState(BaseKill)); 749 } 750 751 MIB.addImm(Pred).addReg(PredReg); 752 753 for (const std::pair<unsigned, bool> &R : Regs) 754 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 755 756 return MIB.getInstr(); 757 } 758 759 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 760 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 761 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 762 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 763 bool IsLoad = isi32Load(Opcode); 764 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 765 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 766 767 assert(Regs.size() == 2); 768 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 769 TII->get(LoadStoreOpcode)); 770 if (IsLoad) { 771 MIB.addReg(Regs[0].first, RegState::Define) 772 .addReg(Regs[1].first, RegState::Define); 773 } else { 774 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 775 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 776 } 777 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 778 return MIB.getInstr(); 779 } 780 781 /// Call MergeOps and update MemOps and merges accordingly on success. 782 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 783 const MachineInstr *First = Cand.Instrs.front(); 784 unsigned Opcode = First->getOpcode(); 785 bool IsLoad = isLoadSingle(Opcode); 786 SmallVector<std::pair<unsigned, bool>, 8> Regs; 787 SmallVector<unsigned, 4> ImpDefs; 788 DenseSet<unsigned> KilledRegs; 789 // Determine list of registers and list of implicit super-register defs. 790 for (const MachineInstr *MI : Cand.Instrs) { 791 const MachineOperand &MO = getLoadStoreRegOp(*MI); 792 unsigned Reg = MO.getReg(); 793 bool IsKill = MO.isKill(); 794 if (IsKill) 795 KilledRegs.insert(Reg); 796 Regs.push_back(std::make_pair(Reg, IsKill)); 797 798 if (IsLoad) { 799 // Collect any implicit defs of super-registers, after merging we can't 800 // be sure anymore that we properly preserved these live ranges and must 801 // removed these implicit operands. 802 for (const MachineOperand &MO : MI->implicit_operands()) { 803 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 804 continue; 805 assert(MO.isImplicit()); 806 unsigned DefReg = MO.getReg(); 807 808 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 809 continue; 810 // We can ignore cases where the super-reg is read and written. 811 if (MI->readsRegister(DefReg)) 812 continue; 813 ImpDefs.push_back(DefReg); 814 } 815 } 816 } 817 818 // Attempt the merge. 819 typedef MachineBasicBlock::iterator iterator; 820 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 821 iterator InsertBefore = std::next(iterator(LatestMI)); 822 MachineBasicBlock &MBB = *LatestMI->getParent(); 823 unsigned Offset = getMemoryOpOffset(First); 824 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 825 bool BaseKill = LatestMI->killsRegister(Base); 826 unsigned PredReg = 0; 827 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); 828 DebugLoc DL = First->getDebugLoc(); 829 MachineInstr *Merged = nullptr; 830 if (Cand.CanMergeToLSDouble) 831 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 832 Opcode, Pred, PredReg, DL, Regs); 833 if (!Merged && Cand.CanMergeToLSMulti) 834 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 835 Opcode, Pred, PredReg, DL, Regs); 836 if (!Merged) 837 return nullptr; 838 839 // Determine earliest instruction that will get removed. We then keep an 840 // iterator just above it so the following erases don't invalidated it. 841 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 842 bool EarliestAtBegin = false; 843 if (EarliestI == MBB.begin()) { 844 EarliestAtBegin = true; 845 } else { 846 EarliestI = std::prev(EarliestI); 847 } 848 849 // Remove instructions which have been merged. 850 for (MachineInstr *MI : Cand.Instrs) 851 MBB.erase(MI); 852 853 // Determine range between the earliest removed instruction and the new one. 854 if (EarliestAtBegin) 855 EarliestI = MBB.begin(); 856 else 857 EarliestI = std::next(EarliestI); 858 auto FixupRange = make_range(EarliestI, iterator(Merged)); 859 860 if (isLoadSingle(Opcode)) { 861 // If the previous loads defined a super-reg, then we have to mark earlier 862 // operands undef; Replicate the super-reg def on the merged instruction. 863 for (MachineInstr &MI : FixupRange) { 864 for (unsigned &ImpDefReg : ImpDefs) { 865 for (MachineOperand &MO : MI.implicit_operands()) { 866 if (!MO.isReg() || MO.getReg() != ImpDefReg) 867 continue; 868 if (MO.readsReg()) 869 MO.setIsUndef(); 870 else if (MO.isDef()) 871 ImpDefReg = 0; 872 } 873 } 874 } 875 876 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 877 for (unsigned ImpDef : ImpDefs) 878 MIB.addReg(ImpDef, RegState::ImplicitDefine); 879 } else { 880 // Remove kill flags: We are possibly storing the values later now. 881 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 882 for (MachineInstr &MI : FixupRange) { 883 for (MachineOperand &MO : MI.uses()) { 884 if (!MO.isReg() || !MO.isKill()) 885 continue; 886 if (KilledRegs.count(MO.getReg())) 887 MO.setIsKill(false); 888 } 889 } 890 assert(ImpDefs.empty()); 891 } 892 893 return Merged; 894 } 895 896 static bool isValidLSDoubleOffset(int Offset) { 897 unsigned Value = abs(Offset); 898 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 899 // multiplied by 4. 900 return (Value % 4) == 0 && Value < 1024; 901 } 902 903 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 904 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 905 const MachineInstr *FirstMI = MemOps[0].MI; 906 unsigned Opcode = FirstMI->getOpcode(); 907 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 908 unsigned Size = getLSMultipleTransferSize(FirstMI); 909 910 unsigned SIndex = 0; 911 unsigned EIndex = MemOps.size(); 912 do { 913 // Look at the first instruction. 914 const MachineInstr *MI = MemOps[SIndex].MI; 915 int Offset = MemOps[SIndex].Offset; 916 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 917 unsigned PReg = PMO.getReg(); 918 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 919 unsigned Latest = SIndex; 920 unsigned Earliest = SIndex; 921 unsigned Count = 1; 922 bool CanMergeToLSDouble = 923 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 924 // ARM errata 602117: LDRD with base in list may result in incorrect base 925 // register when interrupted or faulted. 926 if (STI->isCortexM3() && isi32Load(Opcode) && 927 PReg == getLoadStoreBaseOp(*MI).getReg()) 928 CanMergeToLSDouble = false; 929 930 bool CanMergeToLSMulti = true; 931 // On swift vldm/vstm starting with an odd register number as that needs 932 // more uops than single vldrs. 933 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 934 CanMergeToLSMulti = false; 935 936 // Merge following instructions where possible. 937 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 938 int NewOffset = MemOps[I].Offset; 939 if (NewOffset != Offset + (int)Size) 940 break; 941 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 942 unsigned Reg = MO.getReg(); 943 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 944 945 // See if the current load/store may be part of a multi load/store. 946 bool PartOfLSMulti = CanMergeToLSMulti; 947 if (PartOfLSMulti) { 948 // Cannot load from SP 949 if (Reg == ARM::SP) 950 PartOfLSMulti = false; 951 // Register numbers must be in ascending order. 952 else if (RegNum <= PRegNum) 953 PartOfLSMulti = false; 954 // For VFP / NEON load/store multiples, the registers must be 955 // consecutive and within the limit on the number of registers per 956 // instruction. 957 else if (!isNotVFP && RegNum != PRegNum+1) 958 PartOfLSMulti = false; 959 } 960 // See if the current load/store may be part of a double load/store. 961 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 962 963 if (!PartOfLSMulti && !PartOfLSDouble) 964 break; 965 CanMergeToLSMulti &= PartOfLSMulti; 966 CanMergeToLSDouble &= PartOfLSDouble; 967 // Track MemOp with latest and earliest position (Positions are 968 // counted in reverse). 969 unsigned Position = MemOps[I].Position; 970 if (Position < MemOps[Latest].Position) 971 Latest = I; 972 else if (Position > MemOps[Earliest].Position) 973 Earliest = I; 974 // Prepare for next MemOp. 975 Offset += Size; 976 PRegNum = RegNum; 977 } 978 979 // Form a candidate from the Ops collected so far. 980 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 981 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 982 Candidate->Instrs.push_back(MemOps[C].MI); 983 Candidate->LatestMIIdx = Latest - SIndex; 984 Candidate->EarliestMIIdx = Earliest - SIndex; 985 Candidate->InsertPos = MemOps[Latest].Position; 986 if (Count == 1) 987 CanMergeToLSMulti = CanMergeToLSDouble = false; 988 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 989 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 990 Candidates.push_back(Candidate); 991 // Continue after the chain. 992 SIndex += Count; 993 } while (SIndex < EIndex); 994 } 995 996 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 997 ARM_AM::AMSubMode Mode) { 998 switch (Opc) { 999 default: llvm_unreachable("Unhandled opcode!"); 1000 case ARM::LDMIA: 1001 case ARM::LDMDA: 1002 case ARM::LDMDB: 1003 case ARM::LDMIB: 1004 switch (Mode) { 1005 default: llvm_unreachable("Unhandled submode!"); 1006 case ARM_AM::ia: return ARM::LDMIA_UPD; 1007 case ARM_AM::ib: return ARM::LDMIB_UPD; 1008 case ARM_AM::da: return ARM::LDMDA_UPD; 1009 case ARM_AM::db: return ARM::LDMDB_UPD; 1010 } 1011 case ARM::STMIA: 1012 case ARM::STMDA: 1013 case ARM::STMDB: 1014 case ARM::STMIB: 1015 switch (Mode) { 1016 default: llvm_unreachable("Unhandled submode!"); 1017 case ARM_AM::ia: return ARM::STMIA_UPD; 1018 case ARM_AM::ib: return ARM::STMIB_UPD; 1019 case ARM_AM::da: return ARM::STMDA_UPD; 1020 case ARM_AM::db: return ARM::STMDB_UPD; 1021 } 1022 case ARM::t2LDMIA: 1023 case ARM::t2LDMDB: 1024 switch (Mode) { 1025 default: llvm_unreachable("Unhandled submode!"); 1026 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1027 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1028 } 1029 case ARM::t2STMIA: 1030 case ARM::t2STMDB: 1031 switch (Mode) { 1032 default: llvm_unreachable("Unhandled submode!"); 1033 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1034 case ARM_AM::db: return ARM::t2STMDB_UPD; 1035 } 1036 case ARM::VLDMSIA: 1037 switch (Mode) { 1038 default: llvm_unreachable("Unhandled submode!"); 1039 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1040 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1041 } 1042 case ARM::VLDMDIA: 1043 switch (Mode) { 1044 default: llvm_unreachable("Unhandled submode!"); 1045 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1046 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1047 } 1048 case ARM::VSTMSIA: 1049 switch (Mode) { 1050 default: llvm_unreachable("Unhandled submode!"); 1051 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1052 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1053 } 1054 case ARM::VSTMDIA: 1055 switch (Mode) { 1056 default: llvm_unreachable("Unhandled submode!"); 1057 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1058 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1059 } 1060 } 1061 } 1062 1063 /// Check if the given instruction increments or decrements a register and 1064 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1065 /// generated by the instruction are possibly read as well. 1066 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1067 ARMCC::CondCodes Pred, unsigned PredReg) { 1068 bool CheckCPSRDef; 1069 int Scale; 1070 switch (MI.getOpcode()) { 1071 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1072 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1073 case ARM::t2SUBri: 1074 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1075 case ARM::t2ADDri: 1076 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1077 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1078 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1079 default: return 0; 1080 } 1081 1082 unsigned MIPredReg; 1083 if (MI.getOperand(0).getReg() != Reg || 1084 MI.getOperand(1).getReg() != Reg || 1085 getInstrPredicate(&MI, MIPredReg) != Pred || 1086 MIPredReg != PredReg) 1087 return 0; 1088 1089 if (CheckCPSRDef && definesCPSR(&MI)) 1090 return 0; 1091 return MI.getOperand(2).getImm() * Scale; 1092 } 1093 1094 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1095 static MachineBasicBlock::iterator 1096 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1097 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1098 Offset = 0; 1099 MachineBasicBlock &MBB = *MBBI->getParent(); 1100 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1101 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1102 if (MBBI == BeginMBBI) 1103 return EndMBBI; 1104 1105 // Skip debug values. 1106 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1107 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1108 --PrevMBBI; 1109 1110 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1111 return Offset == 0 ? EndMBBI : PrevMBBI; 1112 } 1113 1114 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1115 static MachineBasicBlock::iterator 1116 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1117 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1118 Offset = 0; 1119 MachineBasicBlock &MBB = *MBBI->getParent(); 1120 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1121 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1122 // Skip debug values. 1123 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1124 ++NextMBBI; 1125 if (NextMBBI == EndMBBI) 1126 return EndMBBI; 1127 1128 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1129 return Offset == 0 ? EndMBBI : NextMBBI; 1130 } 1131 1132 /// Fold proceeding/trailing inc/dec of base register into the 1133 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1134 /// 1135 /// stmia rn, <ra, rb, rc> 1136 /// rn := rn + 4 * 3; 1137 /// => 1138 /// stmia rn!, <ra, rb, rc> 1139 /// 1140 /// rn := rn - 4 * 3; 1141 /// ldmia rn, <ra, rb, rc> 1142 /// => 1143 /// ldmdb rn!, <ra, rb, rc> 1144 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1145 // Thumb1 is already using updating loads/stores. 1146 if (isThumb1) return false; 1147 1148 const MachineOperand &BaseOP = MI->getOperand(0); 1149 unsigned Base = BaseOP.getReg(); 1150 bool BaseKill = BaseOP.isKill(); 1151 unsigned PredReg = 0; 1152 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1153 unsigned Opcode = MI->getOpcode(); 1154 DebugLoc DL = MI->getDebugLoc(); 1155 1156 // Can't use an updating ld/st if the base register is also a dest 1157 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1158 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1159 if (MI->getOperand(i).getReg() == Base) 1160 return false; 1161 1162 int Bytes = getLSMultipleTransferSize(MI); 1163 MachineBasicBlock &MBB = *MI->getParent(); 1164 MachineBasicBlock::iterator MBBI(MI); 1165 int Offset; 1166 MachineBasicBlock::iterator MergeInstr 1167 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1168 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1169 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1170 Mode = ARM_AM::db; 1171 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1172 Mode = ARM_AM::da; 1173 } else { 1174 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1175 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1176 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) 1177 return false; 1178 } 1179 MBB.erase(MergeInstr); 1180 1181 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1182 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1183 .addReg(Base, getDefRegState(true)) // WB base register 1184 .addReg(Base, getKillRegState(BaseKill)) 1185 .addImm(Pred).addReg(PredReg); 1186 1187 // Transfer the rest of operands. 1188 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1189 MIB.addOperand(MI->getOperand(OpNum)); 1190 1191 // Transfer memoperands. 1192 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1193 1194 MBB.erase(MBBI); 1195 return true; 1196 } 1197 1198 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1199 ARM_AM::AddrOpc Mode) { 1200 switch (Opc) { 1201 case ARM::LDRi12: 1202 return ARM::LDR_PRE_IMM; 1203 case ARM::STRi12: 1204 return ARM::STR_PRE_IMM; 1205 case ARM::VLDRS: 1206 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1207 case ARM::VLDRD: 1208 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1209 case ARM::VSTRS: 1210 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1211 case ARM::VSTRD: 1212 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1213 case ARM::t2LDRi8: 1214 case ARM::t2LDRi12: 1215 return ARM::t2LDR_PRE; 1216 case ARM::t2STRi8: 1217 case ARM::t2STRi12: 1218 return ARM::t2STR_PRE; 1219 default: llvm_unreachable("Unhandled opcode!"); 1220 } 1221 } 1222 1223 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1224 ARM_AM::AddrOpc Mode) { 1225 switch (Opc) { 1226 case ARM::LDRi12: 1227 return ARM::LDR_POST_IMM; 1228 case ARM::STRi12: 1229 return ARM::STR_POST_IMM; 1230 case ARM::VLDRS: 1231 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1232 case ARM::VLDRD: 1233 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1234 case ARM::VSTRS: 1235 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1236 case ARM::VSTRD: 1237 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1238 case ARM::t2LDRi8: 1239 case ARM::t2LDRi12: 1240 return ARM::t2LDR_POST; 1241 case ARM::t2STRi8: 1242 case ARM::t2STRi12: 1243 return ARM::t2STR_POST; 1244 default: llvm_unreachable("Unhandled opcode!"); 1245 } 1246 } 1247 1248 /// Fold proceeding/trailing inc/dec of base register into the 1249 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1250 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1251 // Thumb1 doesn't have updating LDR/STR. 1252 // FIXME: Use LDM/STM with single register instead. 1253 if (isThumb1) return false; 1254 1255 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1256 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1257 unsigned Opcode = MI->getOpcode(); 1258 DebugLoc DL = MI->getDebugLoc(); 1259 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1260 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1261 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1262 if (isi32Load(Opcode) || isi32Store(Opcode)) 1263 if (MI->getOperand(2).getImm() != 0) 1264 return false; 1265 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1266 return false; 1267 1268 // Can't do the merge if the destination register is the same as the would-be 1269 // writeback register. 1270 if (MI->getOperand(0).getReg() == Base) 1271 return false; 1272 1273 unsigned PredReg = 0; 1274 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1275 int Bytes = getLSMultipleTransferSize(MI); 1276 MachineBasicBlock &MBB = *MI->getParent(); 1277 MachineBasicBlock::iterator MBBI(MI); 1278 int Offset; 1279 MachineBasicBlock::iterator MergeInstr 1280 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1281 unsigned NewOpc; 1282 if (!isAM5 && Offset == Bytes) { 1283 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1284 } else if (Offset == -Bytes) { 1285 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1286 } else { 1287 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1288 if (Offset == Bytes) { 1289 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1290 } else if (!isAM5 && Offset == -Bytes) { 1291 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1292 } else 1293 return false; 1294 } 1295 MBB.erase(MergeInstr); 1296 1297 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1298 1299 bool isLd = isLoadSingle(Opcode); 1300 if (isAM5) { 1301 // VLDM[SD]_UPD, VSTM[SD]_UPD 1302 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1303 // updating load/store-multiple instructions can be used with only one 1304 // register.) 1305 MachineOperand &MO = MI->getOperand(0); 1306 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1307 .addReg(Base, getDefRegState(true)) // WB base register 1308 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1309 .addImm(Pred).addReg(PredReg) 1310 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1311 getKillRegState(MO.isKill()))); 1312 } else if (isLd) { 1313 if (isAM2) { 1314 // LDR_PRE, LDR_POST 1315 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1316 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1317 .addReg(Base, RegState::Define) 1318 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1319 } else { 1320 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1321 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1322 .addReg(Base, RegState::Define) 1323 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1324 } 1325 } else { 1326 // t2LDR_PRE, t2LDR_POST 1327 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1328 .addReg(Base, RegState::Define) 1329 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1330 } 1331 } else { 1332 MachineOperand &MO = MI->getOperand(0); 1333 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1334 // the vestigal zero-reg offset register. When that's fixed, this clause 1335 // can be removed entirely. 1336 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1337 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1338 // STR_PRE, STR_POST 1339 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1340 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1341 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1342 } else { 1343 // t2STR_PRE, t2STR_POST 1344 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1345 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1346 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1347 } 1348 } 1349 MBB.erase(MBBI); 1350 1351 return true; 1352 } 1353 1354 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1355 unsigned Opcode = MI.getOpcode(); 1356 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1357 "Must have t2STRDi8 or t2LDRDi8"); 1358 if (MI.getOperand(3).getImm() != 0) 1359 return false; 1360 1361 // Behaviour for writeback is undefined if base register is the same as one 1362 // of the others. 1363 const MachineOperand &BaseOp = MI.getOperand(2); 1364 unsigned Base = BaseOp.getReg(); 1365 const MachineOperand &Reg0Op = MI.getOperand(0); 1366 const MachineOperand &Reg1Op = MI.getOperand(1); 1367 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1368 return false; 1369 1370 unsigned PredReg; 1371 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); 1372 MachineBasicBlock::iterator MBBI(MI); 1373 MachineBasicBlock &MBB = *MI.getParent(); 1374 int Offset; 1375 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1376 PredReg, Offset); 1377 unsigned NewOpc; 1378 if (Offset == 8 || Offset == -8) { 1379 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1380 } else { 1381 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1382 if (Offset == 8 || Offset == -8) { 1383 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1384 } else 1385 return false; 1386 } 1387 MBB.erase(MergeInstr); 1388 1389 DebugLoc DL = MI.getDebugLoc(); 1390 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1391 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1392 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1393 .addReg(BaseOp.getReg(), RegState::Define); 1394 } else { 1395 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1396 MIB.addReg(BaseOp.getReg(), RegState::Define) 1397 .addOperand(Reg0Op).addOperand(Reg1Op); 1398 } 1399 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1400 .addImm(Offset).addImm(Pred).addReg(PredReg); 1401 assert(TII->get(Opcode).getNumOperands() == 6 && 1402 TII->get(NewOpc).getNumOperands() == 7 && 1403 "Unexpected number of operands in Opcode specification."); 1404 1405 // Transfer implicit operands. 1406 for (const MachineOperand &MO : MI.implicit_operands()) 1407 MIB.addOperand(MO); 1408 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1409 1410 MBB.erase(MBBI); 1411 return true; 1412 } 1413 1414 /// Returns true if instruction is a memory operation that this pass is capable 1415 /// of operating on. 1416 static bool isMemoryOp(const MachineInstr *MI) { 1417 // When no memory operands are present, conservatively assume unaligned, 1418 // volatile, unfoldable. 1419 if (!MI->hasOneMemOperand()) 1420 return false; 1421 1422 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1423 1424 // Don't touch volatile memory accesses - we may be changing their order. 1425 if (MMO->isVolatile()) 1426 return false; 1427 1428 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1429 // not. 1430 if (MMO->getAlignment() < 4) 1431 return false; 1432 1433 // str <undef> could probably be eliminated entirely, but for now we just want 1434 // to avoid making a mess of it. 1435 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1436 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1437 MI->getOperand(0).isUndef()) 1438 return false; 1439 1440 // Likewise don't mess with references to undefined addresses. 1441 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1442 MI->getOperand(1).isUndef()) 1443 return false; 1444 1445 unsigned Opcode = MI->getOpcode(); 1446 switch (Opcode) { 1447 default: break; 1448 case ARM::VLDRS: 1449 case ARM::VSTRS: 1450 return MI->getOperand(1).isReg(); 1451 case ARM::VLDRD: 1452 case ARM::VSTRD: 1453 return MI->getOperand(1).isReg(); 1454 case ARM::LDRi12: 1455 case ARM::STRi12: 1456 case ARM::tLDRi: 1457 case ARM::tSTRi: 1458 case ARM::tLDRspi: 1459 case ARM::tSTRspi: 1460 case ARM::t2LDRi8: 1461 case ARM::t2LDRi12: 1462 case ARM::t2STRi8: 1463 case ARM::t2STRi12: 1464 return MI->getOperand(1).isReg(); 1465 } 1466 return false; 1467 } 1468 1469 static void InsertLDR_STR(MachineBasicBlock &MBB, 1470 MachineBasicBlock::iterator &MBBI, 1471 int Offset, bool isDef, 1472 DebugLoc DL, unsigned NewOpc, 1473 unsigned Reg, bool RegDeadKill, bool RegUndef, 1474 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1475 bool OffKill, bool OffUndef, 1476 ARMCC::CondCodes Pred, unsigned PredReg, 1477 const TargetInstrInfo *TII, bool isT2) { 1478 if (isDef) { 1479 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1480 TII->get(NewOpc)) 1481 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1482 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1483 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1484 } else { 1485 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1486 TII->get(NewOpc)) 1487 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1488 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1489 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1490 } 1491 } 1492 1493 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1494 MachineBasicBlock::iterator &MBBI) { 1495 MachineInstr *MI = &*MBBI; 1496 unsigned Opcode = MI->getOpcode(); 1497 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1498 return false; 1499 1500 const MachineOperand &BaseOp = MI->getOperand(2); 1501 unsigned BaseReg = BaseOp.getReg(); 1502 unsigned EvenReg = MI->getOperand(0).getReg(); 1503 unsigned OddReg = MI->getOperand(1).getReg(); 1504 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1505 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1506 1507 // ARM errata 602117: LDRD with base in list may result in incorrect base 1508 // register when interrupted or faulted. 1509 bool Errata602117 = EvenReg == BaseReg && 1510 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1511 // ARM LDRD/STRD needs consecutive registers. 1512 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1513 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1514 1515 if (!Errata602117 && !NonConsecutiveRegs) 1516 return false; 1517 1518 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1519 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1520 bool EvenDeadKill = isLd ? 1521 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1522 bool EvenUndef = MI->getOperand(0).isUndef(); 1523 bool OddDeadKill = isLd ? 1524 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1525 bool OddUndef = MI->getOperand(1).isUndef(); 1526 bool BaseKill = BaseOp.isKill(); 1527 bool BaseUndef = BaseOp.isUndef(); 1528 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1529 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1530 int OffImm = getMemoryOpOffset(MI); 1531 unsigned PredReg = 0; 1532 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1533 1534 if (OddRegNum > EvenRegNum && OffImm == 0) { 1535 // Ascending register numbers and no offset. It's safe to change it to a 1536 // ldm or stm. 1537 unsigned NewOpc = (isLd) 1538 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1539 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1540 if (isLd) { 1541 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1542 .addReg(BaseReg, getKillRegState(BaseKill)) 1543 .addImm(Pred).addReg(PredReg) 1544 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1545 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1546 ++NumLDRD2LDM; 1547 } else { 1548 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1549 .addReg(BaseReg, getKillRegState(BaseKill)) 1550 .addImm(Pred).addReg(PredReg) 1551 .addReg(EvenReg, 1552 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1553 .addReg(OddReg, 1554 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1555 ++NumSTRD2STM; 1556 } 1557 } else { 1558 // Split into two instructions. 1559 unsigned NewOpc = (isLd) 1560 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1561 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1562 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1563 // so adjust and use t2LDRi12 here for that. 1564 unsigned NewOpc2 = (isLd) 1565 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1566 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1567 DebugLoc dl = MBBI->getDebugLoc(); 1568 // If this is a load and base register is killed, it may have been 1569 // re-defed by the load, make sure the first load does not clobber it. 1570 if (isLd && 1571 (BaseKill || OffKill) && 1572 (TRI->regsOverlap(EvenReg, BaseReg))) { 1573 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1574 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1575 OddReg, OddDeadKill, false, 1576 BaseReg, false, BaseUndef, false, OffUndef, 1577 Pred, PredReg, TII, isT2); 1578 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1579 EvenReg, EvenDeadKill, false, 1580 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1581 Pred, PredReg, TII, isT2); 1582 } else { 1583 if (OddReg == EvenReg && EvenDeadKill) { 1584 // If the two source operands are the same, the kill marker is 1585 // probably on the first one. e.g. 1586 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1587 EvenDeadKill = false; 1588 OddDeadKill = true; 1589 } 1590 // Never kill the base register in the first instruction. 1591 if (EvenReg == BaseReg) 1592 EvenDeadKill = false; 1593 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1594 EvenReg, EvenDeadKill, EvenUndef, 1595 BaseReg, false, BaseUndef, false, OffUndef, 1596 Pred, PredReg, TII, isT2); 1597 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1598 OddReg, OddDeadKill, OddUndef, 1599 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1600 Pred, PredReg, TII, isT2); 1601 } 1602 if (isLd) 1603 ++NumLDRD2LDR; 1604 else 1605 ++NumSTRD2STR; 1606 } 1607 1608 MBBI = MBB.erase(MBBI); 1609 return true; 1610 } 1611 1612 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1613 /// incrementing offset into LDM / STM ops. 1614 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1615 MemOpQueue MemOps; 1616 unsigned CurrBase = 0; 1617 unsigned CurrOpc = ~0u; 1618 ARMCC::CondCodes CurrPred = ARMCC::AL; 1619 unsigned Position = 0; 1620 assert(Candidates.size() == 0); 1621 assert(MergeBaseCandidates.size() == 0); 1622 LiveRegsValid = false; 1623 1624 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1625 I = MBBI) { 1626 // The instruction in front of the iterator is the one we look at. 1627 MBBI = std::prev(I); 1628 if (FixInvalidRegPairOp(MBB, MBBI)) 1629 continue; 1630 ++Position; 1631 1632 if (isMemoryOp(MBBI)) { 1633 unsigned Opcode = MBBI->getOpcode(); 1634 const MachineOperand &MO = MBBI->getOperand(0); 1635 unsigned Reg = MO.getReg(); 1636 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1637 unsigned PredReg = 0; 1638 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1639 int Offset = getMemoryOpOffset(MBBI); 1640 if (CurrBase == 0) { 1641 // Start of a new chain. 1642 CurrBase = Base; 1643 CurrOpc = Opcode; 1644 CurrPred = Pred; 1645 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1646 continue; 1647 } 1648 // Note: No need to match PredReg in the next if. 1649 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1650 // Watch out for: 1651 // r4 := ldr [r0, #8] 1652 // r4 := ldr [r0, #4] 1653 // or 1654 // r0 := ldr [r0] 1655 // If a load overrides the base register or a register loaded by 1656 // another load in our chain, we cannot take this instruction. 1657 bool Overlap = false; 1658 if (isLoadSingle(Opcode)) { 1659 Overlap = (Base == Reg); 1660 if (!Overlap) { 1661 for (const MemOpQueueEntry &E : MemOps) { 1662 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1663 Overlap = true; 1664 break; 1665 } 1666 } 1667 } 1668 } 1669 1670 if (!Overlap) { 1671 // Check offset and sort memory operation into the current chain. 1672 if (Offset > MemOps.back().Offset) { 1673 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1674 continue; 1675 } else { 1676 MemOpQueue::iterator MI, ME; 1677 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1678 if (Offset < MI->Offset) { 1679 // Found a place to insert. 1680 break; 1681 } 1682 if (Offset == MI->Offset) { 1683 // Collision, abort. 1684 MI = ME; 1685 break; 1686 } 1687 } 1688 if (MI != MemOps.end()) { 1689 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1690 continue; 1691 } 1692 } 1693 } 1694 } 1695 1696 // Don't advance the iterator; The op will start a new chain next. 1697 MBBI = I; 1698 --Position; 1699 // Fallthrough to look into existing chain. 1700 } else if (MBBI->isDebugValue()) { 1701 continue; 1702 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1703 MBBI->getOpcode() == ARM::t2STRDi8) { 1704 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1705 // remember them because we may still be able to merge add/sub into them. 1706 MergeBaseCandidates.push_back(MBBI); 1707 } 1708 1709 1710 // If we are here then the chain is broken; Extract candidates for a merge. 1711 if (MemOps.size() > 0) { 1712 FormCandidates(MemOps); 1713 // Reset for the next chain. 1714 CurrBase = 0; 1715 CurrOpc = ~0u; 1716 CurrPred = ARMCC::AL; 1717 MemOps.clear(); 1718 } 1719 } 1720 if (MemOps.size() > 0) 1721 FormCandidates(MemOps); 1722 1723 // Sort candidates so they get processed from end to begin of the basic 1724 // block later; This is necessary for liveness calculation. 1725 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1726 return M0->InsertPos < M1->InsertPos; 1727 }; 1728 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1729 1730 // Go through list of candidates and merge. 1731 bool Changed = false; 1732 for (const MergeCandidate *Candidate : Candidates) { 1733 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1734 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1735 // Merge preceding/trailing base inc/dec into the merged op. 1736 if (Merged) { 1737 Changed = true; 1738 unsigned Opcode = Merged->getOpcode(); 1739 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1740 MergeBaseUpdateLSDouble(*Merged); 1741 else 1742 MergeBaseUpdateLSMultiple(Merged); 1743 } else { 1744 for (MachineInstr *MI : Candidate->Instrs) { 1745 if (MergeBaseUpdateLoadStore(MI)) 1746 Changed = true; 1747 } 1748 } 1749 } else { 1750 assert(Candidate->Instrs.size() == 1); 1751 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1752 Changed = true; 1753 } 1754 } 1755 Candidates.clear(); 1756 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1757 for (MachineInstr *MI : MergeBaseCandidates) 1758 MergeBaseUpdateLSDouble(*MI); 1759 MergeBaseCandidates.clear(); 1760 1761 return Changed; 1762 } 1763 1764 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1765 /// into the preceding stack restore so it directly restore the value of LR 1766 /// into pc. 1767 /// ldmfd sp!, {..., lr} 1768 /// bx lr 1769 /// or 1770 /// ldmfd sp!, {..., lr} 1771 /// mov pc, lr 1772 /// => 1773 /// ldmfd sp!, {..., pc} 1774 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1775 // Thumb1 LDM doesn't allow high registers. 1776 if (isThumb1) return false; 1777 if (MBB.empty()) return false; 1778 1779 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1780 if (MBBI != MBB.begin() && 1781 (MBBI->getOpcode() == ARM::BX_RET || 1782 MBBI->getOpcode() == ARM::tBX_RET || 1783 MBBI->getOpcode() == ARM::MOVPCLR)) { 1784 MachineInstr *PrevMI = std::prev(MBBI); 1785 unsigned Opcode = PrevMI->getOpcode(); 1786 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1787 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1788 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1789 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1790 if (MO.getReg() != ARM::LR) 1791 return false; 1792 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1793 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1794 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1795 PrevMI->setDesc(TII->get(NewOpc)); 1796 MO.setReg(ARM::PC); 1797 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1798 MBB.erase(MBBI); 1799 return true; 1800 } 1801 } 1802 return false; 1803 } 1804 1805 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1806 MF = &Fn; 1807 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1808 TL = STI->getTargetLowering(); 1809 AFI = Fn.getInfo<ARMFunctionInfo>(); 1810 TII = STI->getInstrInfo(); 1811 TRI = STI->getRegisterInfo(); 1812 MRI = &Fn.getRegInfo(); 1813 RegClassInfoValid = false; 1814 isThumb2 = AFI->isThumb2Function(); 1815 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1816 1817 bool Modified = false; 1818 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1819 ++MFI) { 1820 MachineBasicBlock &MBB = *MFI; 1821 Modified |= LoadStoreMultipleOpti(MBB); 1822 if (STI->hasV5TOps()) 1823 Modified |= MergeReturnIntoLDM(MBB); 1824 } 1825 1826 Allocator.DestroyAll(); 1827 return Modified; 1828 } 1829 1830 namespace { 1831 /// Pre- register allocation pass that move load / stores from consecutive 1832 /// locations close to make it more likely they will be combined later. 1833 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1834 static char ID; 1835 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1836 1837 const DataLayout *TD; 1838 const TargetInstrInfo *TII; 1839 const TargetRegisterInfo *TRI; 1840 const ARMSubtarget *STI; 1841 MachineRegisterInfo *MRI; 1842 MachineFunction *MF; 1843 1844 bool runOnMachineFunction(MachineFunction &Fn) override; 1845 1846 const char *getPassName() const override { 1847 return "ARM pre- register allocation load / store optimization pass"; 1848 } 1849 1850 private: 1851 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1852 unsigned &NewOpc, unsigned &EvenReg, 1853 unsigned &OddReg, unsigned &BaseReg, 1854 int &Offset, 1855 unsigned &PredReg, ARMCC::CondCodes &Pred, 1856 bool &isT2); 1857 bool RescheduleOps(MachineBasicBlock *MBB, 1858 SmallVectorImpl<MachineInstr *> &Ops, 1859 unsigned Base, bool isLd, 1860 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1861 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1862 }; 1863 char ARMPreAllocLoadStoreOpt::ID = 0; 1864 } 1865 1866 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1867 TD = Fn.getTarget().getDataLayout(); 1868 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1869 TII = STI->getInstrInfo(); 1870 TRI = STI->getRegisterInfo(); 1871 MRI = &Fn.getRegInfo(); 1872 MF = &Fn; 1873 1874 bool Modified = false; 1875 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1876 ++MFI) 1877 Modified |= RescheduleLoadStoreInstrs(MFI); 1878 1879 return Modified; 1880 } 1881 1882 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1883 MachineBasicBlock::iterator I, 1884 MachineBasicBlock::iterator E, 1885 SmallPtrSetImpl<MachineInstr*> &MemOps, 1886 SmallSet<unsigned, 4> &MemRegs, 1887 const TargetRegisterInfo *TRI) { 1888 // Are there stores / loads / calls between them? 1889 // FIXME: This is overly conservative. We should make use of alias information 1890 // some day. 1891 SmallSet<unsigned, 4> AddedRegPressure; 1892 while (++I != E) { 1893 if (I->isDebugValue() || MemOps.count(&*I)) 1894 continue; 1895 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1896 return false; 1897 if (isLd && I->mayStore()) 1898 return false; 1899 if (!isLd) { 1900 if (I->mayLoad()) 1901 return false; 1902 // It's not safe to move the first 'str' down. 1903 // str r1, [r0] 1904 // strh r5, [r0] 1905 // str r4, [r0, #+4] 1906 if (I->mayStore()) 1907 return false; 1908 } 1909 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1910 MachineOperand &MO = I->getOperand(j); 1911 if (!MO.isReg()) 1912 continue; 1913 unsigned Reg = MO.getReg(); 1914 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1915 return false; 1916 if (Reg != Base && !MemRegs.count(Reg)) 1917 AddedRegPressure.insert(Reg); 1918 } 1919 } 1920 1921 // Estimate register pressure increase due to the transformation. 1922 if (MemRegs.size() <= 4) 1923 // Ok if we are moving small number of instructions. 1924 return true; 1925 return AddedRegPressure.size() <= MemRegs.size() * 2; 1926 } 1927 1928 1929 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1930 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1931 MachineInstr *Op1) { 1932 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1933 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1934 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1935 1936 MachineFunction *MF = MI->getParent()->getParent(); 1937 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1938 MachineSDNode::mmo_iterator MemEnd = 1939 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1940 MemEnd = 1941 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1942 MI->setMemRefs(MemBegin, MemEnd); 1943 } 1944 1945 bool 1946 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1947 DebugLoc &dl, unsigned &NewOpc, 1948 unsigned &FirstReg, 1949 unsigned &SecondReg, 1950 unsigned &BaseReg, int &Offset, 1951 unsigned &PredReg, 1952 ARMCC::CondCodes &Pred, 1953 bool &isT2) { 1954 // Make sure we're allowed to generate LDRD/STRD. 1955 if (!STI->hasV5TEOps()) 1956 return false; 1957 1958 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1959 unsigned Scale = 1; 1960 unsigned Opcode = Op0->getOpcode(); 1961 if (Opcode == ARM::LDRi12) { 1962 NewOpc = ARM::LDRD; 1963 } else if (Opcode == ARM::STRi12) { 1964 NewOpc = ARM::STRD; 1965 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1966 NewOpc = ARM::t2LDRDi8; 1967 Scale = 4; 1968 isT2 = true; 1969 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1970 NewOpc = ARM::t2STRDi8; 1971 Scale = 4; 1972 isT2 = true; 1973 } else { 1974 return false; 1975 } 1976 1977 // Make sure the base address satisfies i64 ld / st alignment requirement. 1978 // At the moment, we ignore the memoryoperand's value. 1979 // If we want to use AliasAnalysis, we should check it accordingly. 1980 if (!Op0->hasOneMemOperand() || 1981 (*Op0->memoperands_begin())->isVolatile()) 1982 return false; 1983 1984 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1985 const Function *Func = MF->getFunction(); 1986 unsigned ReqAlign = STI->hasV6Ops() 1987 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1988 : 8; // Pre-v6 need 8-byte align 1989 if (Align < ReqAlign) 1990 return false; 1991 1992 // Then make sure the immediate offset fits. 1993 int OffImm = getMemoryOpOffset(Op0); 1994 if (isT2) { 1995 int Limit = (1 << 8) * Scale; 1996 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1997 return false; 1998 Offset = OffImm; 1999 } else { 2000 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2001 if (OffImm < 0) { 2002 AddSub = ARM_AM::sub; 2003 OffImm = - OffImm; 2004 } 2005 int Limit = (1 << 8) * Scale; 2006 if (OffImm >= Limit || (OffImm & (Scale-1))) 2007 return false; 2008 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2009 } 2010 FirstReg = Op0->getOperand(0).getReg(); 2011 SecondReg = Op1->getOperand(0).getReg(); 2012 if (FirstReg == SecondReg) 2013 return false; 2014 BaseReg = Op0->getOperand(1).getReg(); 2015 Pred = getInstrPredicate(Op0, PredReg); 2016 dl = Op0->getDebugLoc(); 2017 return true; 2018 } 2019 2020 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2021 SmallVectorImpl<MachineInstr *> &Ops, 2022 unsigned Base, bool isLd, 2023 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2024 bool RetVal = false; 2025 2026 // Sort by offset (in reverse order). 2027 std::sort(Ops.begin(), Ops.end(), 2028 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2029 int LOffset = getMemoryOpOffset(LHS); 2030 int ROffset = getMemoryOpOffset(RHS); 2031 assert(LHS == RHS || LOffset != ROffset); 2032 return LOffset > ROffset; 2033 }); 2034 2035 // The loads / stores of the same base are in order. Scan them from first to 2036 // last and check for the following: 2037 // 1. Any def of base. 2038 // 2. Any gaps. 2039 while (Ops.size() > 1) { 2040 unsigned FirstLoc = ~0U; 2041 unsigned LastLoc = 0; 2042 MachineInstr *FirstOp = nullptr; 2043 MachineInstr *LastOp = nullptr; 2044 int LastOffset = 0; 2045 unsigned LastOpcode = 0; 2046 unsigned LastBytes = 0; 2047 unsigned NumMove = 0; 2048 for (int i = Ops.size() - 1; i >= 0; --i) { 2049 MachineInstr *Op = Ops[i]; 2050 unsigned Loc = MI2LocMap[Op]; 2051 if (Loc <= FirstLoc) { 2052 FirstLoc = Loc; 2053 FirstOp = Op; 2054 } 2055 if (Loc >= LastLoc) { 2056 LastLoc = Loc; 2057 LastOp = Op; 2058 } 2059 2060 unsigned LSMOpcode 2061 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2062 if (LastOpcode && LSMOpcode != LastOpcode) 2063 break; 2064 2065 int Offset = getMemoryOpOffset(Op); 2066 unsigned Bytes = getLSMultipleTransferSize(Op); 2067 if (LastBytes) { 2068 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2069 break; 2070 } 2071 LastOffset = Offset; 2072 LastBytes = Bytes; 2073 LastOpcode = LSMOpcode; 2074 if (++NumMove == 8) // FIXME: Tune this limit. 2075 break; 2076 } 2077 2078 if (NumMove <= 1) 2079 Ops.pop_back(); 2080 else { 2081 SmallPtrSet<MachineInstr*, 4> MemOps; 2082 SmallSet<unsigned, 4> MemRegs; 2083 for (int i = NumMove-1; i >= 0; --i) { 2084 MemOps.insert(Ops[i]); 2085 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2086 } 2087 2088 // Be conservative, if the instructions are too far apart, don't 2089 // move them. We want to limit the increase of register pressure. 2090 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2091 if (DoMove) 2092 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2093 MemOps, MemRegs, TRI); 2094 if (!DoMove) { 2095 for (unsigned i = 0; i != NumMove; ++i) 2096 Ops.pop_back(); 2097 } else { 2098 // This is the new location for the loads / stores. 2099 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2100 while (InsertPos != MBB->end() 2101 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2102 ++InsertPos; 2103 2104 // If we are moving a pair of loads / stores, see if it makes sense 2105 // to try to allocate a pair of registers that can form register pairs. 2106 MachineInstr *Op0 = Ops.back(); 2107 MachineInstr *Op1 = Ops[Ops.size()-2]; 2108 unsigned FirstReg = 0, SecondReg = 0; 2109 unsigned BaseReg = 0, PredReg = 0; 2110 ARMCC::CondCodes Pred = ARMCC::AL; 2111 bool isT2 = false; 2112 unsigned NewOpc = 0; 2113 int Offset = 0; 2114 DebugLoc dl; 2115 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2116 FirstReg, SecondReg, BaseReg, 2117 Offset, PredReg, Pred, isT2)) { 2118 Ops.pop_back(); 2119 Ops.pop_back(); 2120 2121 const MCInstrDesc &MCID = TII->get(NewOpc); 2122 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2123 MRI->constrainRegClass(FirstReg, TRC); 2124 MRI->constrainRegClass(SecondReg, TRC); 2125 2126 // Form the pair instruction. 2127 if (isLd) { 2128 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2129 .addReg(FirstReg, RegState::Define) 2130 .addReg(SecondReg, RegState::Define) 2131 .addReg(BaseReg); 2132 // FIXME: We're converting from LDRi12 to an insn that still 2133 // uses addrmode2, so we need an explicit offset reg. It should 2134 // always by reg0 since we're transforming LDRi12s. 2135 if (!isT2) 2136 MIB.addReg(0); 2137 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2138 concatenateMemOperands(MIB, Op0, Op1); 2139 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2140 ++NumLDRDFormed; 2141 } else { 2142 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2143 .addReg(FirstReg) 2144 .addReg(SecondReg) 2145 .addReg(BaseReg); 2146 // FIXME: We're converting from LDRi12 to an insn that still 2147 // uses addrmode2, so we need an explicit offset reg. It should 2148 // always by reg0 since we're transforming STRi12s. 2149 if (!isT2) 2150 MIB.addReg(0); 2151 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2152 concatenateMemOperands(MIB, Op0, Op1); 2153 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2154 ++NumSTRDFormed; 2155 } 2156 MBB->erase(Op0); 2157 MBB->erase(Op1); 2158 2159 if (!isT2) { 2160 // Add register allocation hints to form register pairs. 2161 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2162 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2163 } 2164 } else { 2165 for (unsigned i = 0; i != NumMove; ++i) { 2166 MachineInstr *Op = Ops.back(); 2167 Ops.pop_back(); 2168 MBB->splice(InsertPos, MBB, Op); 2169 } 2170 } 2171 2172 NumLdStMoved += NumMove; 2173 RetVal = true; 2174 } 2175 } 2176 } 2177 2178 return RetVal; 2179 } 2180 2181 bool 2182 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2183 bool RetVal = false; 2184 2185 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2186 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2187 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2188 SmallVector<unsigned, 4> LdBases; 2189 SmallVector<unsigned, 4> StBases; 2190 2191 unsigned Loc = 0; 2192 MachineBasicBlock::iterator MBBI = MBB->begin(); 2193 MachineBasicBlock::iterator E = MBB->end(); 2194 while (MBBI != E) { 2195 for (; MBBI != E; ++MBBI) { 2196 MachineInstr *MI = MBBI; 2197 if (MI->isCall() || MI->isTerminator()) { 2198 // Stop at barriers. 2199 ++MBBI; 2200 break; 2201 } 2202 2203 if (!MI->isDebugValue()) 2204 MI2LocMap[MI] = ++Loc; 2205 2206 if (!isMemoryOp(MI)) 2207 continue; 2208 unsigned PredReg = 0; 2209 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2210 continue; 2211 2212 int Opc = MI->getOpcode(); 2213 bool isLd = isLoadSingle(Opc); 2214 unsigned Base = MI->getOperand(1).getReg(); 2215 int Offset = getMemoryOpOffset(MI); 2216 2217 bool StopHere = false; 2218 if (isLd) { 2219 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2220 Base2LdsMap.find(Base); 2221 if (BI != Base2LdsMap.end()) { 2222 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2223 if (Offset == getMemoryOpOffset(BI->second[i])) { 2224 StopHere = true; 2225 break; 2226 } 2227 } 2228 if (!StopHere) 2229 BI->second.push_back(MI); 2230 } else { 2231 Base2LdsMap[Base].push_back(MI); 2232 LdBases.push_back(Base); 2233 } 2234 } else { 2235 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2236 Base2StsMap.find(Base); 2237 if (BI != Base2StsMap.end()) { 2238 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2239 if (Offset == getMemoryOpOffset(BI->second[i])) { 2240 StopHere = true; 2241 break; 2242 } 2243 } 2244 if (!StopHere) 2245 BI->second.push_back(MI); 2246 } else { 2247 Base2StsMap[Base].push_back(MI); 2248 StBases.push_back(Base); 2249 } 2250 } 2251 2252 if (StopHere) { 2253 // Found a duplicate (a base+offset combination that's seen earlier). 2254 // Backtrack. 2255 --Loc; 2256 break; 2257 } 2258 } 2259 2260 // Re-schedule loads. 2261 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2262 unsigned Base = LdBases[i]; 2263 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2264 if (Lds.size() > 1) 2265 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2266 } 2267 2268 // Re-schedule stores. 2269 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2270 unsigned Base = StBases[i]; 2271 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2272 if (Sts.size() > 1) 2273 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2274 } 2275 2276 if (MBBI != E) { 2277 Base2LdsMap.clear(); 2278 Base2StsMap.clear(); 2279 LdBases.clear(); 2280 StBases.clear(); 2281 } 2282 } 2283 2284 return RetVal; 2285 } 2286 2287 2288 /// Returns an instance of the load / store optimization pass. 2289 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2290 if (PreAlloc) 2291 return new ARMPreAllocLoadStoreOpt(); 2292 return new ARMLoadStoreOpt(); 2293 } 2294