1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 namespace { 64 /// Post- register allocation pass the combine load / store instructions to 65 /// form ldm / stm instructions. 66 struct ARMLoadStoreOpt : public MachineFunctionPass { 67 static char ID; 68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 69 70 const MachineFunction *MF; 71 const TargetInstrInfo *TII; 72 const TargetRegisterInfo *TRI; 73 const MachineRegisterInfo *MRI; 74 const ARMSubtarget *STI; 75 const TargetLowering *TL; 76 ARMFunctionInfo *AFI; 77 LivePhysRegs LiveRegs; 78 RegisterClassInfo RegClassInfo; 79 MachineBasicBlock::const_iterator LiveRegPos; 80 bool LiveRegsValid; 81 bool RegClassInfoValid; 82 bool isThumb1, isThumb2; 83 84 bool runOnMachineFunction(MachineFunction &Fn) override; 85 86 const char *getPassName() const override { 87 return "ARM load / store optimization pass"; 88 } 89 90 private: 91 /// A set of load/store MachineInstrs with same base register sorted by 92 /// offset. 93 struct MemOpQueueEntry { 94 MachineInstr *MI; 95 int Offset; ///< Load/Store offset. 96 unsigned Position; ///< Position as counted from end of basic block. 97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 98 : MI(MI), Offset(Offset), Position(Position) {} 99 }; 100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 101 102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 103 /// merged into a LDM/STM. 104 struct MergeCandidate { 105 /// List of instructions ordered by load/store offset. 106 SmallVector<MachineInstr*, 4> Instrs; 107 /// Index in Instrs of the instruction being latest in the schedule. 108 unsigned LatestMIIdx; 109 /// Index in Instrs of the instruction being earliest in the schedule. 110 unsigned EarliestMIIdx; 111 /// Index into the basic block where the merged instruction will be 112 /// inserted. (See MemOpQueueEntry.Position) 113 unsigned InsertPos; 114 /// Whether the instructions can be merged into a ldm/stm instruction. 115 bool CanMergeToLSMulti; 116 /// Whether the instructions can be merged into a ldrd/strd instruction. 117 bool CanMergeToLSDouble; 118 }; 119 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 120 SmallVector<const MergeCandidate*,4> Candidates; 121 SmallVector<MachineInstr*,4> MergeBaseCandidates; 122 123 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 124 MachineBasicBlock::const_iterator Before); 125 unsigned findFreeReg(const TargetRegisterClass &RegClass); 126 void UpdateBaseRegUses(MachineBasicBlock &MBB, 127 MachineBasicBlock::iterator MBBI, 128 DebugLoc DL, unsigned Base, unsigned WordOffset, 129 ARMCC::CondCodes Pred, unsigned PredReg); 130 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 131 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 132 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 133 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 134 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 135 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 136 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 137 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 138 void FormCandidates(const MemOpQueue &MemOps); 139 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 140 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 141 MachineBasicBlock::iterator &MBBI); 142 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 143 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 144 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 145 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 146 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 147 }; 148 char ARMLoadStoreOpt::ID = 0; 149 } 150 151 static bool definesCPSR(const MachineInstr *MI) { 152 for (const auto &MO : MI->operands()) { 153 if (!MO.isReg()) 154 continue; 155 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 156 // If the instruction has live CPSR def, then it's not safe to fold it 157 // into load / store. 158 return true; 159 } 160 161 return false; 162 } 163 164 static int getMemoryOpOffset(const MachineInstr *MI) { 165 unsigned Opcode = MI->getOpcode(); 166 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 167 unsigned NumOperands = MI->getDesc().getNumOperands(); 168 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 169 170 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 171 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 172 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 173 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 174 return OffField; 175 176 // Thumb1 immediate offsets are scaled by 4 177 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 178 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 179 return OffField * 4; 180 181 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 182 : ARM_AM::getAM5Offset(OffField) * 4; 183 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 184 : ARM_AM::getAM5Op(OffField); 185 186 if (Op == ARM_AM::sub) 187 return -Offset; 188 189 return Offset; 190 } 191 192 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 193 return MI.getOperand(1); 194 } 195 196 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 197 return MI.getOperand(0); 198 } 199 200 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 201 switch (Opcode) { 202 default: llvm_unreachable("Unhandled opcode!"); 203 case ARM::LDRi12: 204 ++NumLDMGened; 205 switch (Mode) { 206 default: llvm_unreachable("Unhandled submode!"); 207 case ARM_AM::ia: return ARM::LDMIA; 208 case ARM_AM::da: return ARM::LDMDA; 209 case ARM_AM::db: return ARM::LDMDB; 210 case ARM_AM::ib: return ARM::LDMIB; 211 } 212 case ARM::STRi12: 213 ++NumSTMGened; 214 switch (Mode) { 215 default: llvm_unreachable("Unhandled submode!"); 216 case ARM_AM::ia: return ARM::STMIA; 217 case ARM_AM::da: return ARM::STMDA; 218 case ARM_AM::db: return ARM::STMDB; 219 case ARM_AM::ib: return ARM::STMIB; 220 } 221 case ARM::tLDRi: 222 case ARM::tLDRspi: 223 // tLDMIA is writeback-only - unless the base register is in the input 224 // reglist. 225 ++NumLDMGened; 226 switch (Mode) { 227 default: llvm_unreachable("Unhandled submode!"); 228 case ARM_AM::ia: return ARM::tLDMIA; 229 } 230 case ARM::tSTRi: 231 case ARM::tSTRspi: 232 // There is no non-writeback tSTMIA either. 233 ++NumSTMGened; 234 switch (Mode) { 235 default: llvm_unreachable("Unhandled submode!"); 236 case ARM_AM::ia: return ARM::tSTMIA_UPD; 237 } 238 case ARM::t2LDRi8: 239 case ARM::t2LDRi12: 240 ++NumLDMGened; 241 switch (Mode) { 242 default: llvm_unreachable("Unhandled submode!"); 243 case ARM_AM::ia: return ARM::t2LDMIA; 244 case ARM_AM::db: return ARM::t2LDMDB; 245 } 246 case ARM::t2STRi8: 247 case ARM::t2STRi12: 248 ++NumSTMGened; 249 switch (Mode) { 250 default: llvm_unreachable("Unhandled submode!"); 251 case ARM_AM::ia: return ARM::t2STMIA; 252 case ARM_AM::db: return ARM::t2STMDB; 253 } 254 case ARM::VLDRS: 255 ++NumVLDMGened; 256 switch (Mode) { 257 default: llvm_unreachable("Unhandled submode!"); 258 case ARM_AM::ia: return ARM::VLDMSIA; 259 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 260 } 261 case ARM::VSTRS: 262 ++NumVSTMGened; 263 switch (Mode) { 264 default: llvm_unreachable("Unhandled submode!"); 265 case ARM_AM::ia: return ARM::VSTMSIA; 266 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 267 } 268 case ARM::VLDRD: 269 ++NumVLDMGened; 270 switch (Mode) { 271 default: llvm_unreachable("Unhandled submode!"); 272 case ARM_AM::ia: return ARM::VLDMDIA; 273 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 274 } 275 case ARM::VSTRD: 276 ++NumVSTMGened; 277 switch (Mode) { 278 default: llvm_unreachable("Unhandled submode!"); 279 case ARM_AM::ia: return ARM::VSTMDIA; 280 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 281 } 282 } 283 } 284 285 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 286 switch (Opcode) { 287 default: llvm_unreachable("Unhandled opcode!"); 288 case ARM::LDMIA_RET: 289 case ARM::LDMIA: 290 case ARM::LDMIA_UPD: 291 case ARM::STMIA: 292 case ARM::STMIA_UPD: 293 case ARM::tLDMIA: 294 case ARM::tLDMIA_UPD: 295 case ARM::tSTMIA_UPD: 296 case ARM::t2LDMIA_RET: 297 case ARM::t2LDMIA: 298 case ARM::t2LDMIA_UPD: 299 case ARM::t2STMIA: 300 case ARM::t2STMIA_UPD: 301 case ARM::VLDMSIA: 302 case ARM::VLDMSIA_UPD: 303 case ARM::VSTMSIA: 304 case ARM::VSTMSIA_UPD: 305 case ARM::VLDMDIA: 306 case ARM::VLDMDIA_UPD: 307 case ARM::VSTMDIA: 308 case ARM::VSTMDIA_UPD: 309 return ARM_AM::ia; 310 311 case ARM::LDMDA: 312 case ARM::LDMDA_UPD: 313 case ARM::STMDA: 314 case ARM::STMDA_UPD: 315 return ARM_AM::da; 316 317 case ARM::LDMDB: 318 case ARM::LDMDB_UPD: 319 case ARM::STMDB: 320 case ARM::STMDB_UPD: 321 case ARM::t2LDMDB: 322 case ARM::t2LDMDB_UPD: 323 case ARM::t2STMDB: 324 case ARM::t2STMDB_UPD: 325 case ARM::VLDMSDB_UPD: 326 case ARM::VSTMSDB_UPD: 327 case ARM::VLDMDDB_UPD: 328 case ARM::VSTMDDB_UPD: 329 return ARM_AM::db; 330 331 case ARM::LDMIB: 332 case ARM::LDMIB_UPD: 333 case ARM::STMIB: 334 case ARM::STMIB_UPD: 335 return ARM_AM::ib; 336 } 337 } 338 339 static bool isT1i32Load(unsigned Opc) { 340 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 341 } 342 343 static bool isT2i32Load(unsigned Opc) { 344 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 345 } 346 347 static bool isi32Load(unsigned Opc) { 348 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 349 } 350 351 static bool isT1i32Store(unsigned Opc) { 352 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 353 } 354 355 static bool isT2i32Store(unsigned Opc) { 356 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 357 } 358 359 static bool isi32Store(unsigned Opc) { 360 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 361 } 362 363 static bool isLoadSingle(unsigned Opc) { 364 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 365 } 366 367 static unsigned getImmScale(unsigned Opc) { 368 switch (Opc) { 369 default: llvm_unreachable("Unhandled opcode!"); 370 case ARM::tLDRi: 371 case ARM::tSTRi: 372 case ARM::tLDRspi: 373 case ARM::tSTRspi: 374 return 1; 375 case ARM::tLDRHi: 376 case ARM::tSTRHi: 377 return 2; 378 case ARM::tLDRBi: 379 case ARM::tSTRBi: 380 return 4; 381 } 382 } 383 384 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 385 switch (MI->getOpcode()) { 386 default: return 0; 387 case ARM::LDRi12: 388 case ARM::STRi12: 389 case ARM::tLDRi: 390 case ARM::tSTRi: 391 case ARM::tLDRspi: 392 case ARM::tSTRspi: 393 case ARM::t2LDRi8: 394 case ARM::t2LDRi12: 395 case ARM::t2STRi8: 396 case ARM::t2STRi12: 397 case ARM::VLDRS: 398 case ARM::VSTRS: 399 return 4; 400 case ARM::VLDRD: 401 case ARM::VSTRD: 402 return 8; 403 case ARM::LDMIA: 404 case ARM::LDMDA: 405 case ARM::LDMDB: 406 case ARM::LDMIB: 407 case ARM::STMIA: 408 case ARM::STMDA: 409 case ARM::STMDB: 410 case ARM::STMIB: 411 case ARM::tLDMIA: 412 case ARM::tLDMIA_UPD: 413 case ARM::tSTMIA_UPD: 414 case ARM::t2LDMIA: 415 case ARM::t2LDMDB: 416 case ARM::t2STMIA: 417 case ARM::t2STMDB: 418 case ARM::VLDMSIA: 419 case ARM::VSTMSIA: 420 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 421 case ARM::VLDMDIA: 422 case ARM::VSTMDIA: 423 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 424 } 425 } 426 427 /// Update future uses of the base register with the offset introduced 428 /// due to writeback. This function only works on Thumb1. 429 void 430 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 431 MachineBasicBlock::iterator MBBI, 432 DebugLoc DL, unsigned Base, 433 unsigned WordOffset, 434 ARMCC::CondCodes Pred, unsigned PredReg) { 435 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 436 // Start updating any instructions with immediate offsets. Insert a SUB before 437 // the first non-updateable instruction (if any). 438 for (; MBBI != MBB.end(); ++MBBI) { 439 bool InsertSub = false; 440 unsigned Opc = MBBI->getOpcode(); 441 442 if (MBBI->readsRegister(Base)) { 443 int Offset; 444 bool IsLoad = 445 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 446 bool IsStore = 447 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 448 449 if (IsLoad || IsStore) { 450 // Loads and stores with immediate offsets can be updated, but only if 451 // the new offset isn't negative. 452 // The MachineOperand containing the offset immediate is the last one 453 // before predicates. 454 MachineOperand &MO = 455 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 456 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 457 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 458 459 // If storing the base register, it needs to be reset first. 460 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 461 462 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 463 MO.setImm(Offset); 464 else 465 InsertSub = true; 466 467 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 468 !definesCPSR(MBBI)) { 469 // SUBS/ADDS using this register, with a dead def of the CPSR. 470 // Merge it with the update; if the merged offset is too large, 471 // insert a new sub instead. 472 MachineOperand &MO = 473 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 474 Offset = (Opc == ARM::tSUBi8) ? 475 MO.getImm() + WordOffset * 4 : 476 MO.getImm() - WordOffset * 4 ; 477 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 478 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 479 // Offset == 0. 480 MO.setImm(Offset); 481 // The base register has now been reset, so exit early. 482 return; 483 } else { 484 InsertSub = true; 485 } 486 487 } else { 488 // Can't update the instruction. 489 InsertSub = true; 490 } 491 492 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 493 // Since SUBS sets the condition flags, we can't place the base reset 494 // after an instruction that has a live CPSR def. 495 // The base register might also contain an argument for a function call. 496 InsertSub = true; 497 } 498 499 if (InsertSub) { 500 // An instruction above couldn't be updated, so insert a sub. 501 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 502 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 503 return; 504 } 505 506 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 507 // Register got killed. Stop updating. 508 return; 509 } 510 511 // End of block was reached. 512 if (MBB.succ_size() > 0) { 513 // FIXME: Because of a bug, live registers are sometimes missing from 514 // the successor blocks' live-in sets. This means we can't trust that 515 // information and *always* have to reset at the end of a block. 516 // See PR21029. 517 if (MBBI != MBB.end()) --MBBI; 518 AddDefaultT1CC( 519 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 520 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 521 } 522 } 523 524 /// Return the first register of class \p RegClass that is not in \p Regs. 525 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 526 if (!RegClassInfoValid) { 527 RegClassInfo.runOnMachineFunction(*MF); 528 RegClassInfoValid = true; 529 } 530 531 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 532 if (!LiveRegs.contains(Reg)) 533 return Reg; 534 return 0; 535 } 536 537 /// Compute live registers just before instruction \p Before (in normal schedule 538 /// direction). Computes backwards so multiple queries in the same block must 539 /// come in reverse order. 540 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 541 MachineBasicBlock::const_iterator Before) { 542 // Initialize if we never queried in this block. 543 if (!LiveRegsValid) { 544 LiveRegs.init(TRI); 545 LiveRegs.addLiveOuts(&MBB, true); 546 LiveRegPos = MBB.end(); 547 LiveRegsValid = true; 548 } 549 // Move backward just before the "Before" position. 550 while (LiveRegPos != Before) { 551 --LiveRegPos; 552 LiveRegs.stepBackward(*LiveRegPos); 553 } 554 } 555 556 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 557 unsigned Reg) { 558 for (const std::pair<unsigned, bool> &R : Regs) 559 if (R.first == Reg) 560 return true; 561 return false; 562 } 563 564 /// Create and insert a LDM or STM with Base as base register and registers in 565 /// Regs as the register operands that would be loaded / stored. It returns 566 /// true if the transformation is done. 567 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 568 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 569 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 570 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 571 unsigned NumRegs = Regs.size(); 572 assert(NumRegs > 1); 573 574 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 575 // Compute liveness information for that register to make the decision. 576 bool SafeToClobberCPSR = !isThumb1 || 577 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 578 MachineBasicBlock::LQR_Dead); 579 580 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 581 582 // Exception: If the base register is in the input reglist, Thumb1 LDM is 583 // non-writeback. 584 // It's also not possible to merge an STR of the base register in Thumb1. 585 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 586 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 587 if (Opcode == ARM::tLDRi) { 588 Writeback = false; 589 } else if (Opcode == ARM::tSTRi) { 590 return nullptr; 591 } 592 } 593 594 ARM_AM::AMSubMode Mode = ARM_AM::ia; 595 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 596 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 597 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 598 599 if (Offset == 4 && haveIBAndDA) { 600 Mode = ARM_AM::ib; 601 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 602 Mode = ARM_AM::da; 603 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 604 // VLDM/VSTM do not support DB mode without also updating the base reg. 605 Mode = ARM_AM::db; 606 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 607 // Check if this is a supported opcode before inserting instructions to 608 // calculate a new base register. 609 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 610 611 // If starting offset isn't zero, insert a MI to materialize a new base. 612 // But only do so if it is cost effective, i.e. merging more than two 613 // loads / stores. 614 if (NumRegs <= 2) 615 return nullptr; 616 617 // On Thumb1, it's not worth materializing a new base register without 618 // clobbering the CPSR (i.e. not using ADDS/SUBS). 619 if (!SafeToClobberCPSR) 620 return nullptr; 621 622 unsigned NewBase; 623 if (isi32Load(Opcode)) { 624 // If it is a load, then just use one of the destination register to 625 // use as the new base. 626 NewBase = Regs[NumRegs-1].first; 627 } else { 628 // Find a free register that we can use as scratch register. 629 moveLiveRegsBefore(MBB, InsertBefore); 630 // The merged instruction does not exist yet but will use several Regs if 631 // it is a Store. 632 if (!isLoadSingle(Opcode)) 633 for (const std::pair<unsigned, bool> &R : Regs) 634 LiveRegs.addReg(R.first); 635 636 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 637 if (NewBase == 0) 638 return nullptr; 639 } 640 641 int BaseOpc = 642 isThumb2 ? ARM::t2ADDri : 643 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 644 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 645 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 646 647 if (Offset < 0) { 648 Offset = - Offset; 649 BaseOpc = 650 isThumb2 ? ARM::t2SUBri : 651 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 652 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 653 } 654 655 if (!TL->isLegalAddImmediate(Offset)) 656 // FIXME: Try add with register operand? 657 return nullptr; // Probably not worth it then. 658 659 // We can only append a kill flag to the add/sub input if the value is not 660 // used in the register list of the stm as well. 661 bool KillOldBase = BaseKill && 662 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 663 664 if (isThumb1) { 665 // Thumb1: depending on immediate size, use either 666 // ADDS NewBase, Base, #imm3 667 // or 668 // MOV NewBase, Base 669 // ADDS NewBase, #imm8. 670 if (Base != NewBase && 671 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 672 // Need to insert a MOV to the new base first. 673 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 674 !STI->hasV6Ops()) { 675 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 676 if (Pred != ARMCC::AL) 677 return nullptr; 678 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 679 .addReg(Base, getKillRegState(KillOldBase)); 680 } else 681 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 682 .addReg(Base, getKillRegState(KillOldBase)) 683 .addImm(Pred).addReg(PredReg); 684 685 // The following ADDS/SUBS becomes an update. 686 Base = NewBase; 687 KillOldBase = true; 688 } 689 if (BaseOpc == ARM::tADDrSPi) { 690 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 691 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 692 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 693 .addImm(Pred).addReg(PredReg); 694 } else 695 AddDefaultT1CC( 696 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 697 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 698 .addImm(Pred).addReg(PredReg); 699 } else { 700 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 701 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 702 .addImm(Pred).addReg(PredReg).addReg(0); 703 } 704 Base = NewBase; 705 BaseKill = true; // New base is always killed straight away. 706 } 707 708 bool isDef = isLoadSingle(Opcode); 709 710 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 711 // base register writeback. 712 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 713 if (!Opcode) 714 return nullptr; 715 716 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 717 // - There is no writeback (LDM of base register), 718 // - the base register is killed by the merged instruction, 719 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 720 // to reset the base register. 721 // Otherwise, don't merge. 722 // It's safe to return here since the code to materialize a new base register 723 // above is also conditional on SafeToClobberCPSR. 724 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 725 return nullptr; 726 727 MachineInstrBuilder MIB; 728 729 if (Writeback) { 730 if (Opcode == ARM::tLDMIA) 731 // Update tLDMIA with writeback if necessary. 732 Opcode = ARM::tLDMIA_UPD; 733 734 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 735 736 // Thumb1: we might need to set base writeback when building the MI. 737 MIB.addReg(Base, getDefRegState(true)) 738 .addReg(Base, getKillRegState(BaseKill)); 739 740 // The base isn't dead after a merged instruction with writeback. 741 // Insert a sub instruction after the newly formed instruction to reset. 742 if (!BaseKill) 743 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 744 745 } else { 746 // No writeback, simply build the MachineInstr. 747 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 748 MIB.addReg(Base, getKillRegState(BaseKill)); 749 } 750 751 MIB.addImm(Pred).addReg(PredReg); 752 753 for (const std::pair<unsigned, bool> &R : Regs) 754 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 755 756 return MIB.getInstr(); 757 } 758 759 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 760 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 761 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 762 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 763 bool IsLoad = isi32Load(Opcode); 764 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 765 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 766 767 assert(Regs.size() == 2); 768 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 769 TII->get(LoadStoreOpcode)); 770 if (IsLoad) { 771 MIB.addReg(Regs[0].first, RegState::Define) 772 .addReg(Regs[1].first, RegState::Define); 773 } else { 774 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 775 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 776 } 777 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 778 return MIB.getInstr(); 779 } 780 781 /// Call MergeOps and update MemOps and merges accordingly on success. 782 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 783 const MachineInstr *First = Cand.Instrs.front(); 784 unsigned Opcode = First->getOpcode(); 785 bool IsLoad = isLoadSingle(Opcode); 786 SmallVector<std::pair<unsigned, bool>, 8> Regs; 787 SmallVector<unsigned, 4> ImpDefs; 788 DenseSet<unsigned> KilledRegs; 789 DenseSet<unsigned> UsedRegs; 790 // Determine list of registers and list of implicit super-register defs. 791 for (const MachineInstr *MI : Cand.Instrs) { 792 const MachineOperand &MO = getLoadStoreRegOp(*MI); 793 unsigned Reg = MO.getReg(); 794 bool IsKill = MO.isKill(); 795 if (IsKill) 796 KilledRegs.insert(Reg); 797 Regs.push_back(std::make_pair(Reg, IsKill)); 798 UsedRegs.insert(Reg); 799 800 if (IsLoad) { 801 // Collect any implicit defs of super-registers, after merging we can't 802 // be sure anymore that we properly preserved these live ranges and must 803 // removed these implicit operands. 804 for (const MachineOperand &MO : MI->implicit_operands()) { 805 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 806 continue; 807 assert(MO.isImplicit()); 808 unsigned DefReg = MO.getReg(); 809 810 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 811 continue; 812 // We can ignore cases where the super-reg is read and written. 813 if (MI->readsRegister(DefReg)) 814 continue; 815 ImpDefs.push_back(DefReg); 816 } 817 } 818 } 819 820 // Attempt the merge. 821 typedef MachineBasicBlock::iterator iterator; 822 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 823 iterator InsertBefore = std::next(iterator(LatestMI)); 824 MachineBasicBlock &MBB = *LatestMI->getParent(); 825 unsigned Offset = getMemoryOpOffset(First); 826 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 827 bool BaseKill = LatestMI->killsRegister(Base); 828 unsigned PredReg = 0; 829 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); 830 DebugLoc DL = First->getDebugLoc(); 831 MachineInstr *Merged = nullptr; 832 if (Cand.CanMergeToLSDouble) 833 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 834 Opcode, Pred, PredReg, DL, Regs); 835 if (!Merged && Cand.CanMergeToLSMulti) 836 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 837 Opcode, Pred, PredReg, DL, Regs); 838 if (!Merged) 839 return nullptr; 840 841 // Determine earliest instruction that will get removed. We then keep an 842 // iterator just above it so the following erases don't invalidated it. 843 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 844 bool EarliestAtBegin = false; 845 if (EarliestI == MBB.begin()) { 846 EarliestAtBegin = true; 847 } else { 848 EarliestI = std::prev(EarliestI); 849 } 850 851 // Remove instructions which have been merged. 852 for (MachineInstr *MI : Cand.Instrs) 853 MBB.erase(MI); 854 855 // Determine range between the earliest removed instruction and the new one. 856 if (EarliestAtBegin) 857 EarliestI = MBB.begin(); 858 else 859 EarliestI = std::next(EarliestI); 860 auto FixupRange = make_range(EarliestI, iterator(Merged)); 861 862 if (isLoadSingle(Opcode)) { 863 // If the previous loads defined a super-reg, then we have to mark earlier 864 // operands undef; Replicate the super-reg def on the merged instruction. 865 for (MachineInstr &MI : FixupRange) { 866 for (unsigned &ImpDefReg : ImpDefs) { 867 for (MachineOperand &MO : MI.implicit_operands()) { 868 if (!MO.isReg() || MO.getReg() != ImpDefReg) 869 continue; 870 if (MO.readsReg()) 871 MO.setIsUndef(); 872 else if (MO.isDef()) 873 ImpDefReg = 0; 874 } 875 } 876 } 877 878 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 879 for (unsigned ImpDef : ImpDefs) 880 MIB.addReg(ImpDef, RegState::ImplicitDefine); 881 } else { 882 // Remove kill flags: We are possibly storing the values later now. 883 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 884 for (MachineInstr &MI : FixupRange) { 885 for (MachineOperand &MO : MI.uses()) { 886 if (!MO.isReg() || !MO.isKill()) 887 continue; 888 if (UsedRegs.count(MO.getReg())) 889 MO.setIsKill(false); 890 } 891 } 892 assert(ImpDefs.empty()); 893 } 894 895 return Merged; 896 } 897 898 static bool isValidLSDoubleOffset(int Offset) { 899 unsigned Value = abs(Offset); 900 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 901 // multiplied by 4. 902 return (Value % 4) == 0 && Value < 1024; 903 } 904 905 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 906 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 907 const MachineInstr *FirstMI = MemOps[0].MI; 908 unsigned Opcode = FirstMI->getOpcode(); 909 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 910 unsigned Size = getLSMultipleTransferSize(FirstMI); 911 912 unsigned SIndex = 0; 913 unsigned EIndex = MemOps.size(); 914 do { 915 // Look at the first instruction. 916 const MachineInstr *MI = MemOps[SIndex].MI; 917 int Offset = MemOps[SIndex].Offset; 918 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 919 unsigned PReg = PMO.getReg(); 920 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 921 unsigned Latest = SIndex; 922 unsigned Earliest = SIndex; 923 unsigned Count = 1; 924 bool CanMergeToLSDouble = 925 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 926 // ARM errata 602117: LDRD with base in list may result in incorrect base 927 // register when interrupted or faulted. 928 if (STI->isCortexM3() && isi32Load(Opcode) && 929 PReg == getLoadStoreBaseOp(*MI).getReg()) 930 CanMergeToLSDouble = false; 931 932 bool CanMergeToLSMulti = true; 933 // On swift vldm/vstm starting with an odd register number as that needs 934 // more uops than single vldrs. 935 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 936 CanMergeToLSMulti = false; 937 938 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it 939 // deprecated; LDM to PC is fine but cannot happen here. 940 if (PReg == ARM::SP || PReg == ARM::PC) 941 CanMergeToLSMulti = CanMergeToLSDouble = false; 942 943 // Merge following instructions where possible. 944 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 945 int NewOffset = MemOps[I].Offset; 946 if (NewOffset != Offset + (int)Size) 947 break; 948 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 949 unsigned Reg = MO.getReg(); 950 if (Reg == ARM::SP || Reg == ARM::PC) 951 break; 952 953 // See if the current load/store may be part of a multi load/store. 954 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 955 bool PartOfLSMulti = CanMergeToLSMulti; 956 if (PartOfLSMulti) { 957 // Register numbers must be in ascending order. 958 if (RegNum <= PRegNum) 959 PartOfLSMulti = false; 960 // For VFP / NEON load/store multiples, the registers must be 961 // consecutive and within the limit on the number of registers per 962 // instruction. 963 else if (!isNotVFP && RegNum != PRegNum+1) 964 PartOfLSMulti = false; 965 } 966 // See if the current load/store may be part of a double load/store. 967 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 968 969 if (!PartOfLSMulti && !PartOfLSDouble) 970 break; 971 CanMergeToLSMulti &= PartOfLSMulti; 972 CanMergeToLSDouble &= PartOfLSDouble; 973 // Track MemOp with latest and earliest position (Positions are 974 // counted in reverse). 975 unsigned Position = MemOps[I].Position; 976 if (Position < MemOps[Latest].Position) 977 Latest = I; 978 else if (Position > MemOps[Earliest].Position) 979 Earliest = I; 980 // Prepare for next MemOp. 981 Offset += Size; 982 PRegNum = RegNum; 983 } 984 985 // Form a candidate from the Ops collected so far. 986 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 987 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 988 Candidate->Instrs.push_back(MemOps[C].MI); 989 Candidate->LatestMIIdx = Latest - SIndex; 990 Candidate->EarliestMIIdx = Earliest - SIndex; 991 Candidate->InsertPos = MemOps[Latest].Position; 992 if (Count == 1) 993 CanMergeToLSMulti = CanMergeToLSDouble = false; 994 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 995 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 996 Candidates.push_back(Candidate); 997 // Continue after the chain. 998 SIndex += Count; 999 } while (SIndex < EIndex); 1000 } 1001 1002 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1003 ARM_AM::AMSubMode Mode) { 1004 switch (Opc) { 1005 default: llvm_unreachable("Unhandled opcode!"); 1006 case ARM::LDMIA: 1007 case ARM::LDMDA: 1008 case ARM::LDMDB: 1009 case ARM::LDMIB: 1010 switch (Mode) { 1011 default: llvm_unreachable("Unhandled submode!"); 1012 case ARM_AM::ia: return ARM::LDMIA_UPD; 1013 case ARM_AM::ib: return ARM::LDMIB_UPD; 1014 case ARM_AM::da: return ARM::LDMDA_UPD; 1015 case ARM_AM::db: return ARM::LDMDB_UPD; 1016 } 1017 case ARM::STMIA: 1018 case ARM::STMDA: 1019 case ARM::STMDB: 1020 case ARM::STMIB: 1021 switch (Mode) { 1022 default: llvm_unreachable("Unhandled submode!"); 1023 case ARM_AM::ia: return ARM::STMIA_UPD; 1024 case ARM_AM::ib: return ARM::STMIB_UPD; 1025 case ARM_AM::da: return ARM::STMDA_UPD; 1026 case ARM_AM::db: return ARM::STMDB_UPD; 1027 } 1028 case ARM::t2LDMIA: 1029 case ARM::t2LDMDB: 1030 switch (Mode) { 1031 default: llvm_unreachable("Unhandled submode!"); 1032 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1033 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1034 } 1035 case ARM::t2STMIA: 1036 case ARM::t2STMDB: 1037 switch (Mode) { 1038 default: llvm_unreachable("Unhandled submode!"); 1039 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1040 case ARM_AM::db: return ARM::t2STMDB_UPD; 1041 } 1042 case ARM::VLDMSIA: 1043 switch (Mode) { 1044 default: llvm_unreachable("Unhandled submode!"); 1045 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1046 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1047 } 1048 case ARM::VLDMDIA: 1049 switch (Mode) { 1050 default: llvm_unreachable("Unhandled submode!"); 1051 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1052 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1053 } 1054 case ARM::VSTMSIA: 1055 switch (Mode) { 1056 default: llvm_unreachable("Unhandled submode!"); 1057 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1058 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1059 } 1060 case ARM::VSTMDIA: 1061 switch (Mode) { 1062 default: llvm_unreachable("Unhandled submode!"); 1063 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1064 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1065 } 1066 } 1067 } 1068 1069 /// Check if the given instruction increments or decrements a register and 1070 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1071 /// generated by the instruction are possibly read as well. 1072 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1073 ARMCC::CondCodes Pred, unsigned PredReg) { 1074 bool CheckCPSRDef; 1075 int Scale; 1076 switch (MI.getOpcode()) { 1077 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1078 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1079 case ARM::t2SUBri: 1080 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1081 case ARM::t2ADDri: 1082 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1083 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1084 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1085 default: return 0; 1086 } 1087 1088 unsigned MIPredReg; 1089 if (MI.getOperand(0).getReg() != Reg || 1090 MI.getOperand(1).getReg() != Reg || 1091 getInstrPredicate(&MI, MIPredReg) != Pred || 1092 MIPredReg != PredReg) 1093 return 0; 1094 1095 if (CheckCPSRDef && definesCPSR(&MI)) 1096 return 0; 1097 return MI.getOperand(2).getImm() * Scale; 1098 } 1099 1100 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1101 static MachineBasicBlock::iterator 1102 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1103 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1104 Offset = 0; 1105 MachineBasicBlock &MBB = *MBBI->getParent(); 1106 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1107 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1108 if (MBBI == BeginMBBI) 1109 return EndMBBI; 1110 1111 // Skip debug values. 1112 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1113 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1114 --PrevMBBI; 1115 1116 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1117 return Offset == 0 ? EndMBBI : PrevMBBI; 1118 } 1119 1120 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1121 static MachineBasicBlock::iterator 1122 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1123 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1124 Offset = 0; 1125 MachineBasicBlock &MBB = *MBBI->getParent(); 1126 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1127 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1128 // Skip debug values. 1129 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1130 ++NextMBBI; 1131 if (NextMBBI == EndMBBI) 1132 return EndMBBI; 1133 1134 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1135 return Offset == 0 ? EndMBBI : NextMBBI; 1136 } 1137 1138 /// Fold proceeding/trailing inc/dec of base register into the 1139 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1140 /// 1141 /// stmia rn, <ra, rb, rc> 1142 /// rn := rn + 4 * 3; 1143 /// => 1144 /// stmia rn!, <ra, rb, rc> 1145 /// 1146 /// rn := rn - 4 * 3; 1147 /// ldmia rn, <ra, rb, rc> 1148 /// => 1149 /// ldmdb rn!, <ra, rb, rc> 1150 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1151 // Thumb1 is already using updating loads/stores. 1152 if (isThumb1) return false; 1153 1154 const MachineOperand &BaseOP = MI->getOperand(0); 1155 unsigned Base = BaseOP.getReg(); 1156 bool BaseKill = BaseOP.isKill(); 1157 unsigned PredReg = 0; 1158 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1159 unsigned Opcode = MI->getOpcode(); 1160 DebugLoc DL = MI->getDebugLoc(); 1161 1162 // Can't use an updating ld/st if the base register is also a dest 1163 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1164 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1165 if (MI->getOperand(i).getReg() == Base) 1166 return false; 1167 1168 int Bytes = getLSMultipleTransferSize(MI); 1169 MachineBasicBlock &MBB = *MI->getParent(); 1170 MachineBasicBlock::iterator MBBI(MI); 1171 int Offset; 1172 MachineBasicBlock::iterator MergeInstr 1173 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1174 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1175 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1176 Mode = ARM_AM::db; 1177 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1178 Mode = ARM_AM::da; 1179 } else { 1180 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1181 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1182 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) 1183 return false; 1184 } 1185 MBB.erase(MergeInstr); 1186 1187 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1188 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1189 .addReg(Base, getDefRegState(true)) // WB base register 1190 .addReg(Base, getKillRegState(BaseKill)) 1191 .addImm(Pred).addReg(PredReg); 1192 1193 // Transfer the rest of operands. 1194 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1195 MIB.addOperand(MI->getOperand(OpNum)); 1196 1197 // Transfer memoperands. 1198 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1199 1200 MBB.erase(MBBI); 1201 return true; 1202 } 1203 1204 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1205 ARM_AM::AddrOpc Mode) { 1206 switch (Opc) { 1207 case ARM::LDRi12: 1208 return ARM::LDR_PRE_IMM; 1209 case ARM::STRi12: 1210 return ARM::STR_PRE_IMM; 1211 case ARM::VLDRS: 1212 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1213 case ARM::VLDRD: 1214 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1215 case ARM::VSTRS: 1216 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1217 case ARM::VSTRD: 1218 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1219 case ARM::t2LDRi8: 1220 case ARM::t2LDRi12: 1221 return ARM::t2LDR_PRE; 1222 case ARM::t2STRi8: 1223 case ARM::t2STRi12: 1224 return ARM::t2STR_PRE; 1225 default: llvm_unreachable("Unhandled opcode!"); 1226 } 1227 } 1228 1229 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1230 ARM_AM::AddrOpc Mode) { 1231 switch (Opc) { 1232 case ARM::LDRi12: 1233 return ARM::LDR_POST_IMM; 1234 case ARM::STRi12: 1235 return ARM::STR_POST_IMM; 1236 case ARM::VLDRS: 1237 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1238 case ARM::VLDRD: 1239 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1240 case ARM::VSTRS: 1241 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1242 case ARM::VSTRD: 1243 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1244 case ARM::t2LDRi8: 1245 case ARM::t2LDRi12: 1246 return ARM::t2LDR_POST; 1247 case ARM::t2STRi8: 1248 case ARM::t2STRi12: 1249 return ARM::t2STR_POST; 1250 default: llvm_unreachable("Unhandled opcode!"); 1251 } 1252 } 1253 1254 /// Fold proceeding/trailing inc/dec of base register into the 1255 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1256 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1257 // Thumb1 doesn't have updating LDR/STR. 1258 // FIXME: Use LDM/STM with single register instead. 1259 if (isThumb1) return false; 1260 1261 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1262 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1263 unsigned Opcode = MI->getOpcode(); 1264 DebugLoc DL = MI->getDebugLoc(); 1265 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1266 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1267 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1268 if (isi32Load(Opcode) || isi32Store(Opcode)) 1269 if (MI->getOperand(2).getImm() != 0) 1270 return false; 1271 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1272 return false; 1273 1274 // Can't do the merge if the destination register is the same as the would-be 1275 // writeback register. 1276 if (MI->getOperand(0).getReg() == Base) 1277 return false; 1278 1279 unsigned PredReg = 0; 1280 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1281 int Bytes = getLSMultipleTransferSize(MI); 1282 MachineBasicBlock &MBB = *MI->getParent(); 1283 MachineBasicBlock::iterator MBBI(MI); 1284 int Offset; 1285 MachineBasicBlock::iterator MergeInstr 1286 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1287 unsigned NewOpc; 1288 if (!isAM5 && Offset == Bytes) { 1289 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1290 } else if (Offset == -Bytes) { 1291 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1292 } else { 1293 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1294 if (Offset == Bytes) { 1295 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1296 } else if (!isAM5 && Offset == -Bytes) { 1297 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1298 } else 1299 return false; 1300 } 1301 MBB.erase(MergeInstr); 1302 1303 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1304 1305 bool isLd = isLoadSingle(Opcode); 1306 if (isAM5) { 1307 // VLDM[SD]_UPD, VSTM[SD]_UPD 1308 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1309 // updating load/store-multiple instructions can be used with only one 1310 // register.) 1311 MachineOperand &MO = MI->getOperand(0); 1312 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1313 .addReg(Base, getDefRegState(true)) // WB base register 1314 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1315 .addImm(Pred).addReg(PredReg) 1316 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1317 getKillRegState(MO.isKill()))); 1318 } else if (isLd) { 1319 if (isAM2) { 1320 // LDR_PRE, LDR_POST 1321 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1322 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1323 .addReg(Base, RegState::Define) 1324 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1325 } else { 1326 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1327 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1328 .addReg(Base, RegState::Define) 1329 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1330 } 1331 } else { 1332 // t2LDR_PRE, t2LDR_POST 1333 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1334 .addReg(Base, RegState::Define) 1335 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1336 } 1337 } else { 1338 MachineOperand &MO = MI->getOperand(0); 1339 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1340 // the vestigal zero-reg offset register. When that's fixed, this clause 1341 // can be removed entirely. 1342 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1343 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1344 // STR_PRE, STR_POST 1345 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1346 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1347 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1348 } else { 1349 // t2STR_PRE, t2STR_POST 1350 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1351 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1352 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1353 } 1354 } 1355 MBB.erase(MBBI); 1356 1357 return true; 1358 } 1359 1360 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1361 unsigned Opcode = MI.getOpcode(); 1362 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1363 "Must have t2STRDi8 or t2LDRDi8"); 1364 if (MI.getOperand(3).getImm() != 0) 1365 return false; 1366 1367 // Behaviour for writeback is undefined if base register is the same as one 1368 // of the others. 1369 const MachineOperand &BaseOp = MI.getOperand(2); 1370 unsigned Base = BaseOp.getReg(); 1371 const MachineOperand &Reg0Op = MI.getOperand(0); 1372 const MachineOperand &Reg1Op = MI.getOperand(1); 1373 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1374 return false; 1375 1376 unsigned PredReg; 1377 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); 1378 MachineBasicBlock::iterator MBBI(MI); 1379 MachineBasicBlock &MBB = *MI.getParent(); 1380 int Offset; 1381 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1382 PredReg, Offset); 1383 unsigned NewOpc; 1384 if (Offset == 8 || Offset == -8) { 1385 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1386 } else { 1387 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1388 if (Offset == 8 || Offset == -8) { 1389 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1390 } else 1391 return false; 1392 } 1393 MBB.erase(MergeInstr); 1394 1395 DebugLoc DL = MI.getDebugLoc(); 1396 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1397 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1398 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1399 .addReg(BaseOp.getReg(), RegState::Define); 1400 } else { 1401 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1402 MIB.addReg(BaseOp.getReg(), RegState::Define) 1403 .addOperand(Reg0Op).addOperand(Reg1Op); 1404 } 1405 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1406 .addImm(Offset).addImm(Pred).addReg(PredReg); 1407 assert(TII->get(Opcode).getNumOperands() == 6 && 1408 TII->get(NewOpc).getNumOperands() == 7 && 1409 "Unexpected number of operands in Opcode specification."); 1410 1411 // Transfer implicit operands. 1412 for (const MachineOperand &MO : MI.implicit_operands()) 1413 MIB.addOperand(MO); 1414 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1415 1416 MBB.erase(MBBI); 1417 return true; 1418 } 1419 1420 /// Returns true if instruction is a memory operation that this pass is capable 1421 /// of operating on. 1422 static bool isMemoryOp(const MachineInstr *MI) { 1423 // When no memory operands are present, conservatively assume unaligned, 1424 // volatile, unfoldable. 1425 if (!MI->hasOneMemOperand()) 1426 return false; 1427 1428 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1429 1430 // Don't touch volatile memory accesses - we may be changing their order. 1431 if (MMO->isVolatile()) 1432 return false; 1433 1434 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1435 // not. 1436 if (MMO->getAlignment() < 4) 1437 return false; 1438 1439 // str <undef> could probably be eliminated entirely, but for now we just want 1440 // to avoid making a mess of it. 1441 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1442 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1443 MI->getOperand(0).isUndef()) 1444 return false; 1445 1446 // Likewise don't mess with references to undefined addresses. 1447 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1448 MI->getOperand(1).isUndef()) 1449 return false; 1450 1451 unsigned Opcode = MI->getOpcode(); 1452 switch (Opcode) { 1453 default: break; 1454 case ARM::VLDRS: 1455 case ARM::VSTRS: 1456 return MI->getOperand(1).isReg(); 1457 case ARM::VLDRD: 1458 case ARM::VSTRD: 1459 return MI->getOperand(1).isReg(); 1460 case ARM::LDRi12: 1461 case ARM::STRi12: 1462 case ARM::tLDRi: 1463 case ARM::tSTRi: 1464 case ARM::tLDRspi: 1465 case ARM::tSTRspi: 1466 case ARM::t2LDRi8: 1467 case ARM::t2LDRi12: 1468 case ARM::t2STRi8: 1469 case ARM::t2STRi12: 1470 return MI->getOperand(1).isReg(); 1471 } 1472 return false; 1473 } 1474 1475 static void InsertLDR_STR(MachineBasicBlock &MBB, 1476 MachineBasicBlock::iterator &MBBI, 1477 int Offset, bool isDef, 1478 DebugLoc DL, unsigned NewOpc, 1479 unsigned Reg, bool RegDeadKill, bool RegUndef, 1480 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1481 bool OffKill, bool OffUndef, 1482 ARMCC::CondCodes Pred, unsigned PredReg, 1483 const TargetInstrInfo *TII, bool isT2) { 1484 if (isDef) { 1485 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1486 TII->get(NewOpc)) 1487 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1488 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1489 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1490 } else { 1491 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1492 TII->get(NewOpc)) 1493 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1494 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1495 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1496 } 1497 } 1498 1499 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1500 MachineBasicBlock::iterator &MBBI) { 1501 MachineInstr *MI = &*MBBI; 1502 unsigned Opcode = MI->getOpcode(); 1503 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1504 return false; 1505 1506 const MachineOperand &BaseOp = MI->getOperand(2); 1507 unsigned BaseReg = BaseOp.getReg(); 1508 unsigned EvenReg = MI->getOperand(0).getReg(); 1509 unsigned OddReg = MI->getOperand(1).getReg(); 1510 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1511 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1512 1513 // ARM errata 602117: LDRD with base in list may result in incorrect base 1514 // register when interrupted or faulted. 1515 bool Errata602117 = EvenReg == BaseReg && 1516 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1517 // ARM LDRD/STRD needs consecutive registers. 1518 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1519 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1520 1521 if (!Errata602117 && !NonConsecutiveRegs) 1522 return false; 1523 1524 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1525 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1526 bool EvenDeadKill = isLd ? 1527 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1528 bool EvenUndef = MI->getOperand(0).isUndef(); 1529 bool OddDeadKill = isLd ? 1530 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1531 bool OddUndef = MI->getOperand(1).isUndef(); 1532 bool BaseKill = BaseOp.isKill(); 1533 bool BaseUndef = BaseOp.isUndef(); 1534 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1535 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1536 int OffImm = getMemoryOpOffset(MI); 1537 unsigned PredReg = 0; 1538 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1539 1540 if (OddRegNum > EvenRegNum && OffImm == 0) { 1541 // Ascending register numbers and no offset. It's safe to change it to a 1542 // ldm or stm. 1543 unsigned NewOpc = (isLd) 1544 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1545 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1546 if (isLd) { 1547 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1548 .addReg(BaseReg, getKillRegState(BaseKill)) 1549 .addImm(Pred).addReg(PredReg) 1550 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1551 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1552 ++NumLDRD2LDM; 1553 } else { 1554 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1555 .addReg(BaseReg, getKillRegState(BaseKill)) 1556 .addImm(Pred).addReg(PredReg) 1557 .addReg(EvenReg, 1558 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1559 .addReg(OddReg, 1560 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1561 ++NumSTRD2STM; 1562 } 1563 } else { 1564 // Split into two instructions. 1565 unsigned NewOpc = (isLd) 1566 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1567 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1568 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1569 // so adjust and use t2LDRi12 here for that. 1570 unsigned NewOpc2 = (isLd) 1571 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1572 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1573 DebugLoc dl = MBBI->getDebugLoc(); 1574 // If this is a load and base register is killed, it may have been 1575 // re-defed by the load, make sure the first load does not clobber it. 1576 if (isLd && 1577 (BaseKill || OffKill) && 1578 (TRI->regsOverlap(EvenReg, BaseReg))) { 1579 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1580 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1581 OddReg, OddDeadKill, false, 1582 BaseReg, false, BaseUndef, false, OffUndef, 1583 Pred, PredReg, TII, isT2); 1584 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1585 EvenReg, EvenDeadKill, false, 1586 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1587 Pred, PredReg, TII, isT2); 1588 } else { 1589 if (OddReg == EvenReg && EvenDeadKill) { 1590 // If the two source operands are the same, the kill marker is 1591 // probably on the first one. e.g. 1592 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1593 EvenDeadKill = false; 1594 OddDeadKill = true; 1595 } 1596 // Never kill the base register in the first instruction. 1597 if (EvenReg == BaseReg) 1598 EvenDeadKill = false; 1599 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1600 EvenReg, EvenDeadKill, EvenUndef, 1601 BaseReg, false, BaseUndef, false, OffUndef, 1602 Pred, PredReg, TII, isT2); 1603 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1604 OddReg, OddDeadKill, OddUndef, 1605 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1606 Pred, PredReg, TII, isT2); 1607 } 1608 if (isLd) 1609 ++NumLDRD2LDR; 1610 else 1611 ++NumSTRD2STR; 1612 } 1613 1614 MBBI = MBB.erase(MBBI); 1615 return true; 1616 } 1617 1618 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1619 /// incrementing offset into LDM / STM ops. 1620 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1621 MemOpQueue MemOps; 1622 unsigned CurrBase = 0; 1623 unsigned CurrOpc = ~0u; 1624 ARMCC::CondCodes CurrPred = ARMCC::AL; 1625 unsigned Position = 0; 1626 assert(Candidates.size() == 0); 1627 assert(MergeBaseCandidates.size() == 0); 1628 LiveRegsValid = false; 1629 1630 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1631 I = MBBI) { 1632 // The instruction in front of the iterator is the one we look at. 1633 MBBI = std::prev(I); 1634 if (FixInvalidRegPairOp(MBB, MBBI)) 1635 continue; 1636 ++Position; 1637 1638 if (isMemoryOp(MBBI)) { 1639 unsigned Opcode = MBBI->getOpcode(); 1640 const MachineOperand &MO = MBBI->getOperand(0); 1641 unsigned Reg = MO.getReg(); 1642 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1643 unsigned PredReg = 0; 1644 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1645 int Offset = getMemoryOpOffset(MBBI); 1646 if (CurrBase == 0) { 1647 // Start of a new chain. 1648 CurrBase = Base; 1649 CurrOpc = Opcode; 1650 CurrPred = Pred; 1651 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1652 continue; 1653 } 1654 // Note: No need to match PredReg in the next if. 1655 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1656 // Watch out for: 1657 // r4 := ldr [r0, #8] 1658 // r4 := ldr [r0, #4] 1659 // or 1660 // r0 := ldr [r0] 1661 // If a load overrides the base register or a register loaded by 1662 // another load in our chain, we cannot take this instruction. 1663 bool Overlap = false; 1664 if (isLoadSingle(Opcode)) { 1665 Overlap = (Base == Reg); 1666 if (!Overlap) { 1667 for (const MemOpQueueEntry &E : MemOps) { 1668 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1669 Overlap = true; 1670 break; 1671 } 1672 } 1673 } 1674 } 1675 1676 if (!Overlap) { 1677 // Check offset and sort memory operation into the current chain. 1678 if (Offset > MemOps.back().Offset) { 1679 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1680 continue; 1681 } else { 1682 MemOpQueue::iterator MI, ME; 1683 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1684 if (Offset < MI->Offset) { 1685 // Found a place to insert. 1686 break; 1687 } 1688 if (Offset == MI->Offset) { 1689 // Collision, abort. 1690 MI = ME; 1691 break; 1692 } 1693 } 1694 if (MI != MemOps.end()) { 1695 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1696 continue; 1697 } 1698 } 1699 } 1700 } 1701 1702 // Don't advance the iterator; The op will start a new chain next. 1703 MBBI = I; 1704 --Position; 1705 // Fallthrough to look into existing chain. 1706 } else if (MBBI->isDebugValue()) { 1707 continue; 1708 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1709 MBBI->getOpcode() == ARM::t2STRDi8) { 1710 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1711 // remember them because we may still be able to merge add/sub into them. 1712 MergeBaseCandidates.push_back(MBBI); 1713 } 1714 1715 1716 // If we are here then the chain is broken; Extract candidates for a merge. 1717 if (MemOps.size() > 0) { 1718 FormCandidates(MemOps); 1719 // Reset for the next chain. 1720 CurrBase = 0; 1721 CurrOpc = ~0u; 1722 CurrPred = ARMCC::AL; 1723 MemOps.clear(); 1724 } 1725 } 1726 if (MemOps.size() > 0) 1727 FormCandidates(MemOps); 1728 1729 // Sort candidates so they get processed from end to begin of the basic 1730 // block later; This is necessary for liveness calculation. 1731 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1732 return M0->InsertPos < M1->InsertPos; 1733 }; 1734 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1735 1736 // Go through list of candidates and merge. 1737 bool Changed = false; 1738 for (const MergeCandidate *Candidate : Candidates) { 1739 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1740 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1741 // Merge preceding/trailing base inc/dec into the merged op. 1742 if (Merged) { 1743 Changed = true; 1744 unsigned Opcode = Merged->getOpcode(); 1745 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1746 MergeBaseUpdateLSDouble(*Merged); 1747 else 1748 MergeBaseUpdateLSMultiple(Merged); 1749 } else { 1750 for (MachineInstr *MI : Candidate->Instrs) { 1751 if (MergeBaseUpdateLoadStore(MI)) 1752 Changed = true; 1753 } 1754 } 1755 } else { 1756 assert(Candidate->Instrs.size() == 1); 1757 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1758 Changed = true; 1759 } 1760 } 1761 Candidates.clear(); 1762 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1763 for (MachineInstr *MI : MergeBaseCandidates) 1764 MergeBaseUpdateLSDouble(*MI); 1765 MergeBaseCandidates.clear(); 1766 1767 return Changed; 1768 } 1769 1770 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1771 /// into the preceding stack restore so it directly restore the value of LR 1772 /// into pc. 1773 /// ldmfd sp!, {..., lr} 1774 /// bx lr 1775 /// or 1776 /// ldmfd sp!, {..., lr} 1777 /// mov pc, lr 1778 /// => 1779 /// ldmfd sp!, {..., pc} 1780 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1781 // Thumb1 LDM doesn't allow high registers. 1782 if (isThumb1) return false; 1783 if (MBB.empty()) return false; 1784 1785 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1786 if (MBBI != MBB.begin() && 1787 (MBBI->getOpcode() == ARM::BX_RET || 1788 MBBI->getOpcode() == ARM::tBX_RET || 1789 MBBI->getOpcode() == ARM::MOVPCLR)) { 1790 MachineInstr *PrevMI = std::prev(MBBI); 1791 unsigned Opcode = PrevMI->getOpcode(); 1792 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1793 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1794 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1795 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1796 if (MO.getReg() != ARM::LR) 1797 return false; 1798 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1799 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1800 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1801 PrevMI->setDesc(TII->get(NewOpc)); 1802 MO.setReg(ARM::PC); 1803 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1804 MBB.erase(MBBI); 1805 return true; 1806 } 1807 } 1808 return false; 1809 } 1810 1811 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1812 MF = &Fn; 1813 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1814 TL = STI->getTargetLowering(); 1815 AFI = Fn.getInfo<ARMFunctionInfo>(); 1816 TII = STI->getInstrInfo(); 1817 TRI = STI->getRegisterInfo(); 1818 MRI = &Fn.getRegInfo(); 1819 RegClassInfoValid = false; 1820 isThumb2 = AFI->isThumb2Function(); 1821 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1822 1823 bool Modified = false; 1824 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1825 ++MFI) { 1826 MachineBasicBlock &MBB = *MFI; 1827 Modified |= LoadStoreMultipleOpti(MBB); 1828 if (STI->hasV5TOps()) 1829 Modified |= MergeReturnIntoLDM(MBB); 1830 } 1831 1832 Allocator.DestroyAll(); 1833 return Modified; 1834 } 1835 1836 namespace { 1837 /// Pre- register allocation pass that move load / stores from consecutive 1838 /// locations close to make it more likely they will be combined later. 1839 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1840 static char ID; 1841 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1842 1843 const DataLayout *TD; 1844 const TargetInstrInfo *TII; 1845 const TargetRegisterInfo *TRI; 1846 const ARMSubtarget *STI; 1847 MachineRegisterInfo *MRI; 1848 MachineFunction *MF; 1849 1850 bool runOnMachineFunction(MachineFunction &Fn) override; 1851 1852 const char *getPassName() const override { 1853 return "ARM pre- register allocation load / store optimization pass"; 1854 } 1855 1856 private: 1857 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1858 unsigned &NewOpc, unsigned &EvenReg, 1859 unsigned &OddReg, unsigned &BaseReg, 1860 int &Offset, 1861 unsigned &PredReg, ARMCC::CondCodes &Pred, 1862 bool &isT2); 1863 bool RescheduleOps(MachineBasicBlock *MBB, 1864 SmallVectorImpl<MachineInstr *> &Ops, 1865 unsigned Base, bool isLd, 1866 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1867 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1868 }; 1869 char ARMPreAllocLoadStoreOpt::ID = 0; 1870 } 1871 1872 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1873 TD = &Fn.getDataLayout(); 1874 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1875 TII = STI->getInstrInfo(); 1876 TRI = STI->getRegisterInfo(); 1877 MRI = &Fn.getRegInfo(); 1878 MF = &Fn; 1879 1880 bool Modified = false; 1881 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1882 ++MFI) 1883 Modified |= RescheduleLoadStoreInstrs(MFI); 1884 1885 return Modified; 1886 } 1887 1888 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1889 MachineBasicBlock::iterator I, 1890 MachineBasicBlock::iterator E, 1891 SmallPtrSetImpl<MachineInstr*> &MemOps, 1892 SmallSet<unsigned, 4> &MemRegs, 1893 const TargetRegisterInfo *TRI) { 1894 // Are there stores / loads / calls between them? 1895 // FIXME: This is overly conservative. We should make use of alias information 1896 // some day. 1897 SmallSet<unsigned, 4> AddedRegPressure; 1898 while (++I != E) { 1899 if (I->isDebugValue() || MemOps.count(&*I)) 1900 continue; 1901 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1902 return false; 1903 if (isLd && I->mayStore()) 1904 return false; 1905 if (!isLd) { 1906 if (I->mayLoad()) 1907 return false; 1908 // It's not safe to move the first 'str' down. 1909 // str r1, [r0] 1910 // strh r5, [r0] 1911 // str r4, [r0, #+4] 1912 if (I->mayStore()) 1913 return false; 1914 } 1915 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1916 MachineOperand &MO = I->getOperand(j); 1917 if (!MO.isReg()) 1918 continue; 1919 unsigned Reg = MO.getReg(); 1920 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1921 return false; 1922 if (Reg != Base && !MemRegs.count(Reg)) 1923 AddedRegPressure.insert(Reg); 1924 } 1925 } 1926 1927 // Estimate register pressure increase due to the transformation. 1928 if (MemRegs.size() <= 4) 1929 // Ok if we are moving small number of instructions. 1930 return true; 1931 return AddedRegPressure.size() <= MemRegs.size() * 2; 1932 } 1933 1934 1935 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1936 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1937 MachineInstr *Op1) { 1938 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1939 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1940 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1941 1942 MachineFunction *MF = MI->getParent()->getParent(); 1943 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1944 MachineSDNode::mmo_iterator MemEnd = 1945 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1946 MemEnd = 1947 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1948 MI->setMemRefs(MemBegin, MemEnd); 1949 } 1950 1951 bool 1952 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1953 DebugLoc &dl, unsigned &NewOpc, 1954 unsigned &FirstReg, 1955 unsigned &SecondReg, 1956 unsigned &BaseReg, int &Offset, 1957 unsigned &PredReg, 1958 ARMCC::CondCodes &Pred, 1959 bool &isT2) { 1960 // Make sure we're allowed to generate LDRD/STRD. 1961 if (!STI->hasV5TEOps()) 1962 return false; 1963 1964 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1965 unsigned Scale = 1; 1966 unsigned Opcode = Op0->getOpcode(); 1967 if (Opcode == ARM::LDRi12) { 1968 NewOpc = ARM::LDRD; 1969 } else if (Opcode == ARM::STRi12) { 1970 NewOpc = ARM::STRD; 1971 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1972 NewOpc = ARM::t2LDRDi8; 1973 Scale = 4; 1974 isT2 = true; 1975 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1976 NewOpc = ARM::t2STRDi8; 1977 Scale = 4; 1978 isT2 = true; 1979 } else { 1980 return false; 1981 } 1982 1983 // Make sure the base address satisfies i64 ld / st alignment requirement. 1984 // At the moment, we ignore the memoryoperand's value. 1985 // If we want to use AliasAnalysis, we should check it accordingly. 1986 if (!Op0->hasOneMemOperand() || 1987 (*Op0->memoperands_begin())->isVolatile()) 1988 return false; 1989 1990 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1991 const Function *Func = MF->getFunction(); 1992 unsigned ReqAlign = STI->hasV6Ops() 1993 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1994 : 8; // Pre-v6 need 8-byte align 1995 if (Align < ReqAlign) 1996 return false; 1997 1998 // Then make sure the immediate offset fits. 1999 int OffImm = getMemoryOpOffset(Op0); 2000 if (isT2) { 2001 int Limit = (1 << 8) * Scale; 2002 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2003 return false; 2004 Offset = OffImm; 2005 } else { 2006 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2007 if (OffImm < 0) { 2008 AddSub = ARM_AM::sub; 2009 OffImm = - OffImm; 2010 } 2011 int Limit = (1 << 8) * Scale; 2012 if (OffImm >= Limit || (OffImm & (Scale-1))) 2013 return false; 2014 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2015 } 2016 FirstReg = Op0->getOperand(0).getReg(); 2017 SecondReg = Op1->getOperand(0).getReg(); 2018 if (FirstReg == SecondReg) 2019 return false; 2020 BaseReg = Op0->getOperand(1).getReg(); 2021 Pred = getInstrPredicate(Op0, PredReg); 2022 dl = Op0->getDebugLoc(); 2023 return true; 2024 } 2025 2026 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2027 SmallVectorImpl<MachineInstr *> &Ops, 2028 unsigned Base, bool isLd, 2029 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2030 bool RetVal = false; 2031 2032 // Sort by offset (in reverse order). 2033 std::sort(Ops.begin(), Ops.end(), 2034 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2035 int LOffset = getMemoryOpOffset(LHS); 2036 int ROffset = getMemoryOpOffset(RHS); 2037 assert(LHS == RHS || LOffset != ROffset); 2038 return LOffset > ROffset; 2039 }); 2040 2041 // The loads / stores of the same base are in order. Scan them from first to 2042 // last and check for the following: 2043 // 1. Any def of base. 2044 // 2. Any gaps. 2045 while (Ops.size() > 1) { 2046 unsigned FirstLoc = ~0U; 2047 unsigned LastLoc = 0; 2048 MachineInstr *FirstOp = nullptr; 2049 MachineInstr *LastOp = nullptr; 2050 int LastOffset = 0; 2051 unsigned LastOpcode = 0; 2052 unsigned LastBytes = 0; 2053 unsigned NumMove = 0; 2054 for (int i = Ops.size() - 1; i >= 0; --i) { 2055 MachineInstr *Op = Ops[i]; 2056 unsigned Loc = MI2LocMap[Op]; 2057 if (Loc <= FirstLoc) { 2058 FirstLoc = Loc; 2059 FirstOp = Op; 2060 } 2061 if (Loc >= LastLoc) { 2062 LastLoc = Loc; 2063 LastOp = Op; 2064 } 2065 2066 unsigned LSMOpcode 2067 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2068 if (LastOpcode && LSMOpcode != LastOpcode) 2069 break; 2070 2071 int Offset = getMemoryOpOffset(Op); 2072 unsigned Bytes = getLSMultipleTransferSize(Op); 2073 if (LastBytes) { 2074 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2075 break; 2076 } 2077 LastOffset = Offset; 2078 LastBytes = Bytes; 2079 LastOpcode = LSMOpcode; 2080 if (++NumMove == 8) // FIXME: Tune this limit. 2081 break; 2082 } 2083 2084 if (NumMove <= 1) 2085 Ops.pop_back(); 2086 else { 2087 SmallPtrSet<MachineInstr*, 4> MemOps; 2088 SmallSet<unsigned, 4> MemRegs; 2089 for (int i = NumMove-1; i >= 0; --i) { 2090 MemOps.insert(Ops[i]); 2091 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2092 } 2093 2094 // Be conservative, if the instructions are too far apart, don't 2095 // move them. We want to limit the increase of register pressure. 2096 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2097 if (DoMove) 2098 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2099 MemOps, MemRegs, TRI); 2100 if (!DoMove) { 2101 for (unsigned i = 0; i != NumMove; ++i) 2102 Ops.pop_back(); 2103 } else { 2104 // This is the new location for the loads / stores. 2105 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2106 while (InsertPos != MBB->end() 2107 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2108 ++InsertPos; 2109 2110 // If we are moving a pair of loads / stores, see if it makes sense 2111 // to try to allocate a pair of registers that can form register pairs. 2112 MachineInstr *Op0 = Ops.back(); 2113 MachineInstr *Op1 = Ops[Ops.size()-2]; 2114 unsigned FirstReg = 0, SecondReg = 0; 2115 unsigned BaseReg = 0, PredReg = 0; 2116 ARMCC::CondCodes Pred = ARMCC::AL; 2117 bool isT2 = false; 2118 unsigned NewOpc = 0; 2119 int Offset = 0; 2120 DebugLoc dl; 2121 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2122 FirstReg, SecondReg, BaseReg, 2123 Offset, PredReg, Pred, isT2)) { 2124 Ops.pop_back(); 2125 Ops.pop_back(); 2126 2127 const MCInstrDesc &MCID = TII->get(NewOpc); 2128 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2129 MRI->constrainRegClass(FirstReg, TRC); 2130 MRI->constrainRegClass(SecondReg, TRC); 2131 2132 // Form the pair instruction. 2133 if (isLd) { 2134 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2135 .addReg(FirstReg, RegState::Define) 2136 .addReg(SecondReg, RegState::Define) 2137 .addReg(BaseReg); 2138 // FIXME: We're converting from LDRi12 to an insn that still 2139 // uses addrmode2, so we need an explicit offset reg. It should 2140 // always by reg0 since we're transforming LDRi12s. 2141 if (!isT2) 2142 MIB.addReg(0); 2143 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2144 concatenateMemOperands(MIB, Op0, Op1); 2145 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2146 ++NumLDRDFormed; 2147 } else { 2148 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2149 .addReg(FirstReg) 2150 .addReg(SecondReg) 2151 .addReg(BaseReg); 2152 // FIXME: We're converting from LDRi12 to an insn that still 2153 // uses addrmode2, so we need an explicit offset reg. It should 2154 // always by reg0 since we're transforming STRi12s. 2155 if (!isT2) 2156 MIB.addReg(0); 2157 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2158 concatenateMemOperands(MIB, Op0, Op1); 2159 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2160 ++NumSTRDFormed; 2161 } 2162 MBB->erase(Op0); 2163 MBB->erase(Op1); 2164 2165 if (!isT2) { 2166 // Add register allocation hints to form register pairs. 2167 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2168 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2169 } 2170 } else { 2171 for (unsigned i = 0; i != NumMove; ++i) { 2172 MachineInstr *Op = Ops.back(); 2173 Ops.pop_back(); 2174 MBB->splice(InsertPos, MBB, Op); 2175 } 2176 } 2177 2178 NumLdStMoved += NumMove; 2179 RetVal = true; 2180 } 2181 } 2182 } 2183 2184 return RetVal; 2185 } 2186 2187 bool 2188 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2189 bool RetVal = false; 2190 2191 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2192 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2193 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2194 SmallVector<unsigned, 4> LdBases; 2195 SmallVector<unsigned, 4> StBases; 2196 2197 unsigned Loc = 0; 2198 MachineBasicBlock::iterator MBBI = MBB->begin(); 2199 MachineBasicBlock::iterator E = MBB->end(); 2200 while (MBBI != E) { 2201 for (; MBBI != E; ++MBBI) { 2202 MachineInstr *MI = MBBI; 2203 if (MI->isCall() || MI->isTerminator()) { 2204 // Stop at barriers. 2205 ++MBBI; 2206 break; 2207 } 2208 2209 if (!MI->isDebugValue()) 2210 MI2LocMap[MI] = ++Loc; 2211 2212 if (!isMemoryOp(MI)) 2213 continue; 2214 unsigned PredReg = 0; 2215 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2216 continue; 2217 2218 int Opc = MI->getOpcode(); 2219 bool isLd = isLoadSingle(Opc); 2220 unsigned Base = MI->getOperand(1).getReg(); 2221 int Offset = getMemoryOpOffset(MI); 2222 2223 bool StopHere = false; 2224 if (isLd) { 2225 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2226 Base2LdsMap.find(Base); 2227 if (BI != Base2LdsMap.end()) { 2228 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2229 if (Offset == getMemoryOpOffset(BI->second[i])) { 2230 StopHere = true; 2231 break; 2232 } 2233 } 2234 if (!StopHere) 2235 BI->second.push_back(MI); 2236 } else { 2237 Base2LdsMap[Base].push_back(MI); 2238 LdBases.push_back(Base); 2239 } 2240 } else { 2241 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2242 Base2StsMap.find(Base); 2243 if (BI != Base2StsMap.end()) { 2244 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2245 if (Offset == getMemoryOpOffset(BI->second[i])) { 2246 StopHere = true; 2247 break; 2248 } 2249 } 2250 if (!StopHere) 2251 BI->second.push_back(MI); 2252 } else { 2253 Base2StsMap[Base].push_back(MI); 2254 StBases.push_back(Base); 2255 } 2256 } 2257 2258 if (StopHere) { 2259 // Found a duplicate (a base+offset combination that's seen earlier). 2260 // Backtrack. 2261 --Loc; 2262 break; 2263 } 2264 } 2265 2266 // Re-schedule loads. 2267 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2268 unsigned Base = LdBases[i]; 2269 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2270 if (Lds.size() > 1) 2271 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2272 } 2273 2274 // Re-schedule stores. 2275 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2276 unsigned Base = StBases[i]; 2277 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2278 if (Sts.size() > 1) 2279 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2280 } 2281 2282 if (MBBI != E) { 2283 Base2LdsMap.clear(); 2284 Base2StsMap.clear(); 2285 LdBases.clear(); 2286 StBases.clear(); 2287 } 2288 } 2289 2290 return RetVal; 2291 } 2292 2293 2294 /// Returns an instance of the load / store optimization pass. 2295 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2296 if (PreAlloc) 2297 return new ARMPreAllocLoadStoreOpt(); 2298 return new ARMLoadStoreOpt(); 2299 } 2300