1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 namespace llvm { 64 void initializeARMLoadStoreOptPass(PassRegistry &); 65 } 66 67 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" 68 69 namespace { 70 /// Post- register allocation pass the combine load / store instructions to 71 /// form ldm / stm instructions. 72 struct ARMLoadStoreOpt : public MachineFunctionPass { 73 static char ID; 74 ARMLoadStoreOpt() : MachineFunctionPass(ID) { 75 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry()); 76 } 77 78 const MachineFunction *MF; 79 const TargetInstrInfo *TII; 80 const TargetRegisterInfo *TRI; 81 const ARMSubtarget *STI; 82 const TargetLowering *TL; 83 ARMFunctionInfo *AFI; 84 LivePhysRegs LiveRegs; 85 RegisterClassInfo RegClassInfo; 86 MachineBasicBlock::const_iterator LiveRegPos; 87 bool LiveRegsValid; 88 bool RegClassInfoValid; 89 bool isThumb1, isThumb2; 90 91 bool runOnMachineFunction(MachineFunction &Fn) override; 92 93 const char *getPassName() const override { 94 return ARM_LOAD_STORE_OPT_NAME; 95 } 96 97 private: 98 /// A set of load/store MachineInstrs with same base register sorted by 99 /// offset. 100 struct MemOpQueueEntry { 101 MachineInstr *MI; 102 int Offset; ///< Load/Store offset. 103 unsigned Position; ///< Position as counted from end of basic block. 104 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 105 : MI(MI), Offset(Offset), Position(Position) {} 106 }; 107 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 108 109 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 110 /// merged into a LDM/STM. 111 struct MergeCandidate { 112 /// List of instructions ordered by load/store offset. 113 SmallVector<MachineInstr*, 4> Instrs; 114 /// Index in Instrs of the instruction being latest in the schedule. 115 unsigned LatestMIIdx; 116 /// Index in Instrs of the instruction being earliest in the schedule. 117 unsigned EarliestMIIdx; 118 /// Index into the basic block where the merged instruction will be 119 /// inserted. (See MemOpQueueEntry.Position) 120 unsigned InsertPos; 121 /// Whether the instructions can be merged into a ldm/stm instruction. 122 bool CanMergeToLSMulti; 123 /// Whether the instructions can be merged into a ldrd/strd instruction. 124 bool CanMergeToLSDouble; 125 }; 126 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 127 SmallVector<const MergeCandidate*,4> Candidates; 128 SmallVector<MachineInstr*,4> MergeBaseCandidates; 129 130 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 131 MachineBasicBlock::const_iterator Before); 132 unsigned findFreeReg(const TargetRegisterClass &RegClass); 133 void UpdateBaseRegUses(MachineBasicBlock &MBB, 134 MachineBasicBlock::iterator MBBI, 135 DebugLoc DL, unsigned Base, unsigned WordOffset, 136 ARMCC::CondCodes Pred, unsigned PredReg); 137 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 138 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 139 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 140 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 141 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 142 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 143 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 144 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 145 void FormCandidates(const MemOpQueue &MemOps); 146 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 147 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 148 MachineBasicBlock::iterator &MBBI); 149 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 150 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 151 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 152 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 153 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 154 }; 155 char ARMLoadStoreOpt::ID = 0; 156 } 157 158 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false) 159 160 static bool definesCPSR(const MachineInstr *MI) { 161 for (const auto &MO : MI->operands()) { 162 if (!MO.isReg()) 163 continue; 164 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 165 // If the instruction has live CPSR def, then it's not safe to fold it 166 // into load / store. 167 return true; 168 } 169 170 return false; 171 } 172 173 static int getMemoryOpOffset(const MachineInstr *MI) { 174 unsigned Opcode = MI->getOpcode(); 175 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 176 unsigned NumOperands = MI->getDesc().getNumOperands(); 177 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 178 179 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 180 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 181 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 182 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 183 return OffField; 184 185 // Thumb1 immediate offsets are scaled by 4 186 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 187 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 188 return OffField * 4; 189 190 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 191 : ARM_AM::getAM5Offset(OffField) * 4; 192 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 193 : ARM_AM::getAM5Op(OffField); 194 195 if (Op == ARM_AM::sub) 196 return -Offset; 197 198 return Offset; 199 } 200 201 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 202 return MI.getOperand(1); 203 } 204 205 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 206 return MI.getOperand(0); 207 } 208 209 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 210 switch (Opcode) { 211 default: llvm_unreachable("Unhandled opcode!"); 212 case ARM::LDRi12: 213 ++NumLDMGened; 214 switch (Mode) { 215 default: llvm_unreachable("Unhandled submode!"); 216 case ARM_AM::ia: return ARM::LDMIA; 217 case ARM_AM::da: return ARM::LDMDA; 218 case ARM_AM::db: return ARM::LDMDB; 219 case ARM_AM::ib: return ARM::LDMIB; 220 } 221 case ARM::STRi12: 222 ++NumSTMGened; 223 switch (Mode) { 224 default: llvm_unreachable("Unhandled submode!"); 225 case ARM_AM::ia: return ARM::STMIA; 226 case ARM_AM::da: return ARM::STMDA; 227 case ARM_AM::db: return ARM::STMDB; 228 case ARM_AM::ib: return ARM::STMIB; 229 } 230 case ARM::tLDRi: 231 case ARM::tLDRspi: 232 // tLDMIA is writeback-only - unless the base register is in the input 233 // reglist. 234 ++NumLDMGened; 235 switch (Mode) { 236 default: llvm_unreachable("Unhandled submode!"); 237 case ARM_AM::ia: return ARM::tLDMIA; 238 } 239 case ARM::tSTRi: 240 case ARM::tSTRspi: 241 // There is no non-writeback tSTMIA either. 242 ++NumSTMGened; 243 switch (Mode) { 244 default: llvm_unreachable("Unhandled submode!"); 245 case ARM_AM::ia: return ARM::tSTMIA_UPD; 246 } 247 case ARM::t2LDRi8: 248 case ARM::t2LDRi12: 249 ++NumLDMGened; 250 switch (Mode) { 251 default: llvm_unreachable("Unhandled submode!"); 252 case ARM_AM::ia: return ARM::t2LDMIA; 253 case ARM_AM::db: return ARM::t2LDMDB; 254 } 255 case ARM::t2STRi8: 256 case ARM::t2STRi12: 257 ++NumSTMGened; 258 switch (Mode) { 259 default: llvm_unreachable("Unhandled submode!"); 260 case ARM_AM::ia: return ARM::t2STMIA; 261 case ARM_AM::db: return ARM::t2STMDB; 262 } 263 case ARM::VLDRS: 264 ++NumVLDMGened; 265 switch (Mode) { 266 default: llvm_unreachable("Unhandled submode!"); 267 case ARM_AM::ia: return ARM::VLDMSIA; 268 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 269 } 270 case ARM::VSTRS: 271 ++NumVSTMGened; 272 switch (Mode) { 273 default: llvm_unreachable("Unhandled submode!"); 274 case ARM_AM::ia: return ARM::VSTMSIA; 275 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 276 } 277 case ARM::VLDRD: 278 ++NumVLDMGened; 279 switch (Mode) { 280 default: llvm_unreachable("Unhandled submode!"); 281 case ARM_AM::ia: return ARM::VLDMDIA; 282 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 283 } 284 case ARM::VSTRD: 285 ++NumVSTMGened; 286 switch (Mode) { 287 default: llvm_unreachable("Unhandled submode!"); 288 case ARM_AM::ia: return ARM::VSTMDIA; 289 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 290 } 291 } 292 } 293 294 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 295 switch (Opcode) { 296 default: llvm_unreachable("Unhandled opcode!"); 297 case ARM::LDMIA_RET: 298 case ARM::LDMIA: 299 case ARM::LDMIA_UPD: 300 case ARM::STMIA: 301 case ARM::STMIA_UPD: 302 case ARM::tLDMIA: 303 case ARM::tLDMIA_UPD: 304 case ARM::tSTMIA_UPD: 305 case ARM::t2LDMIA_RET: 306 case ARM::t2LDMIA: 307 case ARM::t2LDMIA_UPD: 308 case ARM::t2STMIA: 309 case ARM::t2STMIA_UPD: 310 case ARM::VLDMSIA: 311 case ARM::VLDMSIA_UPD: 312 case ARM::VSTMSIA: 313 case ARM::VSTMSIA_UPD: 314 case ARM::VLDMDIA: 315 case ARM::VLDMDIA_UPD: 316 case ARM::VSTMDIA: 317 case ARM::VSTMDIA_UPD: 318 return ARM_AM::ia; 319 320 case ARM::LDMDA: 321 case ARM::LDMDA_UPD: 322 case ARM::STMDA: 323 case ARM::STMDA_UPD: 324 return ARM_AM::da; 325 326 case ARM::LDMDB: 327 case ARM::LDMDB_UPD: 328 case ARM::STMDB: 329 case ARM::STMDB_UPD: 330 case ARM::t2LDMDB: 331 case ARM::t2LDMDB_UPD: 332 case ARM::t2STMDB: 333 case ARM::t2STMDB_UPD: 334 case ARM::VLDMSDB_UPD: 335 case ARM::VSTMSDB_UPD: 336 case ARM::VLDMDDB_UPD: 337 case ARM::VSTMDDB_UPD: 338 return ARM_AM::db; 339 340 case ARM::LDMIB: 341 case ARM::LDMIB_UPD: 342 case ARM::STMIB: 343 case ARM::STMIB_UPD: 344 return ARM_AM::ib; 345 } 346 } 347 348 static bool isT1i32Load(unsigned Opc) { 349 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 350 } 351 352 static bool isT2i32Load(unsigned Opc) { 353 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 354 } 355 356 static bool isi32Load(unsigned Opc) { 357 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 358 } 359 360 static bool isT1i32Store(unsigned Opc) { 361 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 362 } 363 364 static bool isT2i32Store(unsigned Opc) { 365 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 366 } 367 368 static bool isi32Store(unsigned Opc) { 369 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 370 } 371 372 static bool isLoadSingle(unsigned Opc) { 373 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 374 } 375 376 static unsigned getImmScale(unsigned Opc) { 377 switch (Opc) { 378 default: llvm_unreachable("Unhandled opcode!"); 379 case ARM::tLDRi: 380 case ARM::tSTRi: 381 case ARM::tLDRspi: 382 case ARM::tSTRspi: 383 return 1; 384 case ARM::tLDRHi: 385 case ARM::tSTRHi: 386 return 2; 387 case ARM::tLDRBi: 388 case ARM::tSTRBi: 389 return 4; 390 } 391 } 392 393 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 394 switch (MI->getOpcode()) { 395 default: return 0; 396 case ARM::LDRi12: 397 case ARM::STRi12: 398 case ARM::tLDRi: 399 case ARM::tSTRi: 400 case ARM::tLDRspi: 401 case ARM::tSTRspi: 402 case ARM::t2LDRi8: 403 case ARM::t2LDRi12: 404 case ARM::t2STRi8: 405 case ARM::t2STRi12: 406 case ARM::VLDRS: 407 case ARM::VSTRS: 408 return 4; 409 case ARM::VLDRD: 410 case ARM::VSTRD: 411 return 8; 412 case ARM::LDMIA: 413 case ARM::LDMDA: 414 case ARM::LDMDB: 415 case ARM::LDMIB: 416 case ARM::STMIA: 417 case ARM::STMDA: 418 case ARM::STMDB: 419 case ARM::STMIB: 420 case ARM::tLDMIA: 421 case ARM::tLDMIA_UPD: 422 case ARM::tSTMIA_UPD: 423 case ARM::t2LDMIA: 424 case ARM::t2LDMDB: 425 case ARM::t2STMIA: 426 case ARM::t2STMDB: 427 case ARM::VLDMSIA: 428 case ARM::VSTMSIA: 429 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 430 case ARM::VLDMDIA: 431 case ARM::VSTMDIA: 432 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 433 } 434 } 435 436 /// Update future uses of the base register with the offset introduced 437 /// due to writeback. This function only works on Thumb1. 438 void 439 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 440 MachineBasicBlock::iterator MBBI, 441 DebugLoc DL, unsigned Base, 442 unsigned WordOffset, 443 ARMCC::CondCodes Pred, unsigned PredReg) { 444 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 445 // Start updating any instructions with immediate offsets. Insert a SUB before 446 // the first non-updateable instruction (if any). 447 for (; MBBI != MBB.end(); ++MBBI) { 448 bool InsertSub = false; 449 unsigned Opc = MBBI->getOpcode(); 450 451 if (MBBI->readsRegister(Base)) { 452 int Offset; 453 bool IsLoad = 454 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 455 bool IsStore = 456 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 457 458 if (IsLoad || IsStore) { 459 // Loads and stores with immediate offsets can be updated, but only if 460 // the new offset isn't negative. 461 // The MachineOperand containing the offset immediate is the last one 462 // before predicates. 463 MachineOperand &MO = 464 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 465 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 466 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 467 468 // If storing the base register, it needs to be reset first. 469 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 470 471 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 472 MO.setImm(Offset); 473 else 474 InsertSub = true; 475 476 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 477 !definesCPSR(MBBI)) { 478 // SUBS/ADDS using this register, with a dead def of the CPSR. 479 // Merge it with the update; if the merged offset is too large, 480 // insert a new sub instead. 481 MachineOperand &MO = 482 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 483 Offset = (Opc == ARM::tSUBi8) ? 484 MO.getImm() + WordOffset * 4 : 485 MO.getImm() - WordOffset * 4 ; 486 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 487 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 488 // Offset == 0. 489 MO.setImm(Offset); 490 // The base register has now been reset, so exit early. 491 return; 492 } else { 493 InsertSub = true; 494 } 495 496 } else { 497 // Can't update the instruction. 498 InsertSub = true; 499 } 500 501 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 502 // Since SUBS sets the condition flags, we can't place the base reset 503 // after an instruction that has a live CPSR def. 504 // The base register might also contain an argument for a function call. 505 InsertSub = true; 506 } 507 508 if (InsertSub) { 509 // An instruction above couldn't be updated, so insert a sub. 510 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 511 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 512 return; 513 } 514 515 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 516 // Register got killed. Stop updating. 517 return; 518 } 519 520 // End of block was reached. 521 if (MBB.succ_size() > 0) { 522 // FIXME: Because of a bug, live registers are sometimes missing from 523 // the successor blocks' live-in sets. This means we can't trust that 524 // information and *always* have to reset at the end of a block. 525 // See PR21029. 526 if (MBBI != MBB.end()) --MBBI; 527 AddDefaultT1CC( 528 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 529 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 530 } 531 } 532 533 /// Return the first register of class \p RegClass that is not in \p Regs. 534 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 535 if (!RegClassInfoValid) { 536 RegClassInfo.runOnMachineFunction(*MF); 537 RegClassInfoValid = true; 538 } 539 540 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 541 if (!LiveRegs.contains(Reg)) 542 return Reg; 543 return 0; 544 } 545 546 /// Compute live registers just before instruction \p Before (in normal schedule 547 /// direction). Computes backwards so multiple queries in the same block must 548 /// come in reverse order. 549 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 550 MachineBasicBlock::const_iterator Before) { 551 // Initialize if we never queried in this block. 552 if (!LiveRegsValid) { 553 LiveRegs.init(TRI); 554 LiveRegs.addLiveOuts(&MBB, true); 555 LiveRegPos = MBB.end(); 556 LiveRegsValid = true; 557 } 558 // Move backward just before the "Before" position. 559 while (LiveRegPos != Before) { 560 --LiveRegPos; 561 LiveRegs.stepBackward(*LiveRegPos); 562 } 563 } 564 565 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 566 unsigned Reg) { 567 for (const std::pair<unsigned, bool> &R : Regs) 568 if (R.first == Reg) 569 return true; 570 return false; 571 } 572 573 /// Create and insert a LDM or STM with Base as base register and registers in 574 /// Regs as the register operands that would be loaded / stored. It returns 575 /// true if the transformation is done. 576 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 577 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 578 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 579 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 580 unsigned NumRegs = Regs.size(); 581 assert(NumRegs > 1); 582 583 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 584 // Compute liveness information for that register to make the decision. 585 bool SafeToClobberCPSR = !isThumb1 || 586 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 587 MachineBasicBlock::LQR_Dead); 588 589 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 590 591 // Exception: If the base register is in the input reglist, Thumb1 LDM is 592 // non-writeback. 593 // It's also not possible to merge an STR of the base register in Thumb1. 594 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 595 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 596 if (Opcode == ARM::tLDRi) { 597 Writeback = false; 598 } else if (Opcode == ARM::tSTRi) { 599 return nullptr; 600 } 601 } 602 603 ARM_AM::AMSubMode Mode = ARM_AM::ia; 604 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 605 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 606 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 607 608 if (Offset == 4 && haveIBAndDA) { 609 Mode = ARM_AM::ib; 610 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 611 Mode = ARM_AM::da; 612 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 613 // VLDM/VSTM do not support DB mode without also updating the base reg. 614 Mode = ARM_AM::db; 615 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 616 // Check if this is a supported opcode before inserting instructions to 617 // calculate a new base register. 618 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 619 620 // If starting offset isn't zero, insert a MI to materialize a new base. 621 // But only do so if it is cost effective, i.e. merging more than two 622 // loads / stores. 623 if (NumRegs <= 2) 624 return nullptr; 625 626 // On Thumb1, it's not worth materializing a new base register without 627 // clobbering the CPSR (i.e. not using ADDS/SUBS). 628 if (!SafeToClobberCPSR) 629 return nullptr; 630 631 unsigned NewBase; 632 if (isi32Load(Opcode)) { 633 // If it is a load, then just use one of the destination register to 634 // use as the new base. 635 NewBase = Regs[NumRegs-1].first; 636 } else { 637 // Find a free register that we can use as scratch register. 638 moveLiveRegsBefore(MBB, InsertBefore); 639 // The merged instruction does not exist yet but will use several Regs if 640 // it is a Store. 641 if (!isLoadSingle(Opcode)) 642 for (const std::pair<unsigned, bool> &R : Regs) 643 LiveRegs.addReg(R.first); 644 645 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 646 if (NewBase == 0) 647 return nullptr; 648 } 649 650 int BaseOpc = 651 isThumb2 ? ARM::t2ADDri : 652 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 653 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 654 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 655 656 if (Offset < 0) { 657 Offset = - Offset; 658 BaseOpc = 659 isThumb2 ? ARM::t2SUBri : 660 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 661 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 662 } 663 664 if (!TL->isLegalAddImmediate(Offset)) 665 // FIXME: Try add with register operand? 666 return nullptr; // Probably not worth it then. 667 668 // We can only append a kill flag to the add/sub input if the value is not 669 // used in the register list of the stm as well. 670 bool KillOldBase = BaseKill && 671 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 672 673 if (isThumb1) { 674 // Thumb1: depending on immediate size, use either 675 // ADDS NewBase, Base, #imm3 676 // or 677 // MOV NewBase, Base 678 // ADDS NewBase, #imm8. 679 if (Base != NewBase && 680 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 681 // Need to insert a MOV to the new base first. 682 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 683 !STI->hasV6Ops()) { 684 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 685 if (Pred != ARMCC::AL) 686 return nullptr; 687 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 688 .addReg(Base, getKillRegState(KillOldBase)); 689 } else 690 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 691 .addReg(Base, getKillRegState(KillOldBase)) 692 .addImm(Pred).addReg(PredReg); 693 694 // The following ADDS/SUBS becomes an update. 695 Base = NewBase; 696 KillOldBase = true; 697 } 698 if (BaseOpc == ARM::tADDrSPi) { 699 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 700 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 701 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 702 .addImm(Pred).addReg(PredReg); 703 } else 704 AddDefaultT1CC( 705 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 706 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 707 .addImm(Pred).addReg(PredReg); 708 } else { 709 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 710 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 711 .addImm(Pred).addReg(PredReg).addReg(0); 712 } 713 Base = NewBase; 714 BaseKill = true; // New base is always killed straight away. 715 } 716 717 bool isDef = isLoadSingle(Opcode); 718 719 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 720 // base register writeback. 721 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 722 if (!Opcode) 723 return nullptr; 724 725 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 726 // - There is no writeback (LDM of base register), 727 // - the base register is killed by the merged instruction, 728 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 729 // to reset the base register. 730 // Otherwise, don't merge. 731 // It's safe to return here since the code to materialize a new base register 732 // above is also conditional on SafeToClobberCPSR. 733 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 734 return nullptr; 735 736 MachineInstrBuilder MIB; 737 738 if (Writeback) { 739 if (Opcode == ARM::tLDMIA) 740 // Update tLDMIA with writeback if necessary. 741 Opcode = ARM::tLDMIA_UPD; 742 743 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 744 745 // Thumb1: we might need to set base writeback when building the MI. 746 MIB.addReg(Base, getDefRegState(true)) 747 .addReg(Base, getKillRegState(BaseKill)); 748 749 // The base isn't dead after a merged instruction with writeback. 750 // Insert a sub instruction after the newly formed instruction to reset. 751 if (!BaseKill) 752 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 753 754 } else { 755 // No writeback, simply build the MachineInstr. 756 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 757 MIB.addReg(Base, getKillRegState(BaseKill)); 758 } 759 760 MIB.addImm(Pred).addReg(PredReg); 761 762 for (const std::pair<unsigned, bool> &R : Regs) 763 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 764 765 return MIB.getInstr(); 766 } 767 768 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 769 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 770 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 771 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 772 bool IsLoad = isi32Load(Opcode); 773 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 774 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 775 776 assert(Regs.size() == 2); 777 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 778 TII->get(LoadStoreOpcode)); 779 if (IsLoad) { 780 MIB.addReg(Regs[0].first, RegState::Define) 781 .addReg(Regs[1].first, RegState::Define); 782 } else { 783 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 784 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 785 } 786 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 787 return MIB.getInstr(); 788 } 789 790 /// Call MergeOps and update MemOps and merges accordingly on success. 791 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 792 const MachineInstr *First = Cand.Instrs.front(); 793 unsigned Opcode = First->getOpcode(); 794 bool IsLoad = isLoadSingle(Opcode); 795 SmallVector<std::pair<unsigned, bool>, 8> Regs; 796 SmallVector<unsigned, 4> ImpDefs; 797 DenseSet<unsigned> KilledRegs; 798 DenseSet<unsigned> UsedRegs; 799 // Determine list of registers and list of implicit super-register defs. 800 for (const MachineInstr *MI : Cand.Instrs) { 801 const MachineOperand &MO = getLoadStoreRegOp(*MI); 802 unsigned Reg = MO.getReg(); 803 bool IsKill = MO.isKill(); 804 if (IsKill) 805 KilledRegs.insert(Reg); 806 Regs.push_back(std::make_pair(Reg, IsKill)); 807 UsedRegs.insert(Reg); 808 809 if (IsLoad) { 810 // Collect any implicit defs of super-registers, after merging we can't 811 // be sure anymore that we properly preserved these live ranges and must 812 // removed these implicit operands. 813 for (const MachineOperand &MO : MI->implicit_operands()) { 814 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 815 continue; 816 assert(MO.isImplicit()); 817 unsigned DefReg = MO.getReg(); 818 819 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 820 continue; 821 // We can ignore cases where the super-reg is read and written. 822 if (MI->readsRegister(DefReg)) 823 continue; 824 ImpDefs.push_back(DefReg); 825 } 826 } 827 } 828 829 // Attempt the merge. 830 typedef MachineBasicBlock::iterator iterator; 831 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 832 iterator InsertBefore = std::next(iterator(LatestMI)); 833 MachineBasicBlock &MBB = *LatestMI->getParent(); 834 unsigned Offset = getMemoryOpOffset(First); 835 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 836 bool BaseKill = LatestMI->killsRegister(Base); 837 unsigned PredReg = 0; 838 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); 839 DebugLoc DL = First->getDebugLoc(); 840 MachineInstr *Merged = nullptr; 841 if (Cand.CanMergeToLSDouble) 842 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 843 Opcode, Pred, PredReg, DL, Regs); 844 if (!Merged && Cand.CanMergeToLSMulti) 845 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 846 Opcode, Pred, PredReg, DL, Regs); 847 if (!Merged) 848 return nullptr; 849 850 // Determine earliest instruction that will get removed. We then keep an 851 // iterator just above it so the following erases don't invalidated it. 852 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 853 bool EarliestAtBegin = false; 854 if (EarliestI == MBB.begin()) { 855 EarliestAtBegin = true; 856 } else { 857 EarliestI = std::prev(EarliestI); 858 } 859 860 // Remove instructions which have been merged. 861 for (MachineInstr *MI : Cand.Instrs) 862 MBB.erase(MI); 863 864 // Determine range between the earliest removed instruction and the new one. 865 if (EarliestAtBegin) 866 EarliestI = MBB.begin(); 867 else 868 EarliestI = std::next(EarliestI); 869 auto FixupRange = make_range(EarliestI, iterator(Merged)); 870 871 if (isLoadSingle(Opcode)) { 872 // If the previous loads defined a super-reg, then we have to mark earlier 873 // operands undef; Replicate the super-reg def on the merged instruction. 874 for (MachineInstr &MI : FixupRange) { 875 for (unsigned &ImpDefReg : ImpDefs) { 876 for (MachineOperand &MO : MI.implicit_operands()) { 877 if (!MO.isReg() || MO.getReg() != ImpDefReg) 878 continue; 879 if (MO.readsReg()) 880 MO.setIsUndef(); 881 else if (MO.isDef()) 882 ImpDefReg = 0; 883 } 884 } 885 } 886 887 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 888 for (unsigned ImpDef : ImpDefs) 889 MIB.addReg(ImpDef, RegState::ImplicitDefine); 890 } else { 891 // Remove kill flags: We are possibly storing the values later now. 892 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 893 for (MachineInstr &MI : FixupRange) { 894 for (MachineOperand &MO : MI.uses()) { 895 if (!MO.isReg() || !MO.isKill()) 896 continue; 897 if (UsedRegs.count(MO.getReg())) 898 MO.setIsKill(false); 899 } 900 } 901 assert(ImpDefs.empty()); 902 } 903 904 return Merged; 905 } 906 907 static bool isValidLSDoubleOffset(int Offset) { 908 unsigned Value = abs(Offset); 909 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 910 // multiplied by 4. 911 return (Value % 4) == 0 && Value < 1024; 912 } 913 914 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 915 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 916 const MachineInstr *FirstMI = MemOps[0].MI; 917 unsigned Opcode = FirstMI->getOpcode(); 918 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 919 unsigned Size = getLSMultipleTransferSize(FirstMI); 920 921 unsigned SIndex = 0; 922 unsigned EIndex = MemOps.size(); 923 do { 924 // Look at the first instruction. 925 const MachineInstr *MI = MemOps[SIndex].MI; 926 int Offset = MemOps[SIndex].Offset; 927 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 928 unsigned PReg = PMO.getReg(); 929 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 930 unsigned Latest = SIndex; 931 unsigned Earliest = SIndex; 932 unsigned Count = 1; 933 bool CanMergeToLSDouble = 934 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 935 // ARM errata 602117: LDRD with base in list may result in incorrect base 936 // register when interrupted or faulted. 937 if (STI->isCortexM3() && isi32Load(Opcode) && 938 PReg == getLoadStoreBaseOp(*MI).getReg()) 939 CanMergeToLSDouble = false; 940 941 bool CanMergeToLSMulti = true; 942 // On swift vldm/vstm starting with an odd register number as that needs 943 // more uops than single vldrs. 944 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 945 CanMergeToLSMulti = false; 946 947 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it 948 // deprecated; LDM to PC is fine but cannot happen here. 949 if (PReg == ARM::SP || PReg == ARM::PC) 950 CanMergeToLSMulti = CanMergeToLSDouble = false; 951 952 // Merge following instructions where possible. 953 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 954 int NewOffset = MemOps[I].Offset; 955 if (NewOffset != Offset + (int)Size) 956 break; 957 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 958 unsigned Reg = MO.getReg(); 959 if (Reg == ARM::SP || Reg == ARM::PC) 960 break; 961 962 // See if the current load/store may be part of a multi load/store. 963 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 964 bool PartOfLSMulti = CanMergeToLSMulti; 965 if (PartOfLSMulti) { 966 // Register numbers must be in ascending order. 967 if (RegNum <= PRegNum) 968 PartOfLSMulti = false; 969 // For VFP / NEON load/store multiples, the registers must be 970 // consecutive and within the limit on the number of registers per 971 // instruction. 972 else if (!isNotVFP && RegNum != PRegNum+1) 973 PartOfLSMulti = false; 974 } 975 // See if the current load/store may be part of a double load/store. 976 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 977 978 if (!PartOfLSMulti && !PartOfLSDouble) 979 break; 980 CanMergeToLSMulti &= PartOfLSMulti; 981 CanMergeToLSDouble &= PartOfLSDouble; 982 // Track MemOp with latest and earliest position (Positions are 983 // counted in reverse). 984 unsigned Position = MemOps[I].Position; 985 if (Position < MemOps[Latest].Position) 986 Latest = I; 987 else if (Position > MemOps[Earliest].Position) 988 Earliest = I; 989 // Prepare for next MemOp. 990 Offset += Size; 991 PRegNum = RegNum; 992 } 993 994 // Form a candidate from the Ops collected so far. 995 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 996 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 997 Candidate->Instrs.push_back(MemOps[C].MI); 998 Candidate->LatestMIIdx = Latest - SIndex; 999 Candidate->EarliestMIIdx = Earliest - SIndex; 1000 Candidate->InsertPos = MemOps[Latest].Position; 1001 if (Count == 1) 1002 CanMergeToLSMulti = CanMergeToLSDouble = false; 1003 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 1004 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 1005 Candidates.push_back(Candidate); 1006 // Continue after the chain. 1007 SIndex += Count; 1008 } while (SIndex < EIndex); 1009 } 1010 1011 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1012 ARM_AM::AMSubMode Mode) { 1013 switch (Opc) { 1014 default: llvm_unreachable("Unhandled opcode!"); 1015 case ARM::LDMIA: 1016 case ARM::LDMDA: 1017 case ARM::LDMDB: 1018 case ARM::LDMIB: 1019 switch (Mode) { 1020 default: llvm_unreachable("Unhandled submode!"); 1021 case ARM_AM::ia: return ARM::LDMIA_UPD; 1022 case ARM_AM::ib: return ARM::LDMIB_UPD; 1023 case ARM_AM::da: return ARM::LDMDA_UPD; 1024 case ARM_AM::db: return ARM::LDMDB_UPD; 1025 } 1026 case ARM::STMIA: 1027 case ARM::STMDA: 1028 case ARM::STMDB: 1029 case ARM::STMIB: 1030 switch (Mode) { 1031 default: llvm_unreachable("Unhandled submode!"); 1032 case ARM_AM::ia: return ARM::STMIA_UPD; 1033 case ARM_AM::ib: return ARM::STMIB_UPD; 1034 case ARM_AM::da: return ARM::STMDA_UPD; 1035 case ARM_AM::db: return ARM::STMDB_UPD; 1036 } 1037 case ARM::t2LDMIA: 1038 case ARM::t2LDMDB: 1039 switch (Mode) { 1040 default: llvm_unreachable("Unhandled submode!"); 1041 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1042 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1043 } 1044 case ARM::t2STMIA: 1045 case ARM::t2STMDB: 1046 switch (Mode) { 1047 default: llvm_unreachable("Unhandled submode!"); 1048 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1049 case ARM_AM::db: return ARM::t2STMDB_UPD; 1050 } 1051 case ARM::VLDMSIA: 1052 switch (Mode) { 1053 default: llvm_unreachable("Unhandled submode!"); 1054 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1055 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1056 } 1057 case ARM::VLDMDIA: 1058 switch (Mode) { 1059 default: llvm_unreachable("Unhandled submode!"); 1060 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1061 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1062 } 1063 case ARM::VSTMSIA: 1064 switch (Mode) { 1065 default: llvm_unreachable("Unhandled submode!"); 1066 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1067 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1068 } 1069 case ARM::VSTMDIA: 1070 switch (Mode) { 1071 default: llvm_unreachable("Unhandled submode!"); 1072 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1073 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1074 } 1075 } 1076 } 1077 1078 /// Check if the given instruction increments or decrements a register and 1079 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1080 /// generated by the instruction are possibly read as well. 1081 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1082 ARMCC::CondCodes Pred, unsigned PredReg) { 1083 bool CheckCPSRDef; 1084 int Scale; 1085 switch (MI.getOpcode()) { 1086 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1087 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1088 case ARM::t2SUBri: 1089 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1090 case ARM::t2ADDri: 1091 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1092 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1093 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1094 default: return 0; 1095 } 1096 1097 unsigned MIPredReg; 1098 if (MI.getOperand(0).getReg() != Reg || 1099 MI.getOperand(1).getReg() != Reg || 1100 getInstrPredicate(&MI, MIPredReg) != Pred || 1101 MIPredReg != PredReg) 1102 return 0; 1103 1104 if (CheckCPSRDef && definesCPSR(&MI)) 1105 return 0; 1106 return MI.getOperand(2).getImm() * Scale; 1107 } 1108 1109 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1110 static MachineBasicBlock::iterator 1111 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1112 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1113 Offset = 0; 1114 MachineBasicBlock &MBB = *MBBI->getParent(); 1115 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1116 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1117 if (MBBI == BeginMBBI) 1118 return EndMBBI; 1119 1120 // Skip debug values. 1121 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1122 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1123 --PrevMBBI; 1124 1125 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1126 return Offset == 0 ? EndMBBI : PrevMBBI; 1127 } 1128 1129 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1130 static MachineBasicBlock::iterator 1131 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1132 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1133 Offset = 0; 1134 MachineBasicBlock &MBB = *MBBI->getParent(); 1135 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1136 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1137 // Skip debug values. 1138 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1139 ++NextMBBI; 1140 if (NextMBBI == EndMBBI) 1141 return EndMBBI; 1142 1143 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1144 return Offset == 0 ? EndMBBI : NextMBBI; 1145 } 1146 1147 /// Fold proceeding/trailing inc/dec of base register into the 1148 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1149 /// 1150 /// stmia rn, <ra, rb, rc> 1151 /// rn := rn + 4 * 3; 1152 /// => 1153 /// stmia rn!, <ra, rb, rc> 1154 /// 1155 /// rn := rn - 4 * 3; 1156 /// ldmia rn, <ra, rb, rc> 1157 /// => 1158 /// ldmdb rn!, <ra, rb, rc> 1159 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1160 // Thumb1 is already using updating loads/stores. 1161 if (isThumb1) return false; 1162 1163 const MachineOperand &BaseOP = MI->getOperand(0); 1164 unsigned Base = BaseOP.getReg(); 1165 bool BaseKill = BaseOP.isKill(); 1166 unsigned PredReg = 0; 1167 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1168 unsigned Opcode = MI->getOpcode(); 1169 DebugLoc DL = MI->getDebugLoc(); 1170 1171 // Can't use an updating ld/st if the base register is also a dest 1172 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1173 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1174 if (MI->getOperand(i).getReg() == Base) 1175 return false; 1176 1177 int Bytes = getLSMultipleTransferSize(MI); 1178 MachineBasicBlock &MBB = *MI->getParent(); 1179 MachineBasicBlock::iterator MBBI(MI); 1180 int Offset; 1181 MachineBasicBlock::iterator MergeInstr 1182 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1183 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1184 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1185 Mode = ARM_AM::db; 1186 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1187 Mode = ARM_AM::da; 1188 } else { 1189 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1190 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1191 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) 1192 return false; 1193 } 1194 MBB.erase(MergeInstr); 1195 1196 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1197 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1198 .addReg(Base, getDefRegState(true)) // WB base register 1199 .addReg(Base, getKillRegState(BaseKill)) 1200 .addImm(Pred).addReg(PredReg); 1201 1202 // Transfer the rest of operands. 1203 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1204 MIB.addOperand(MI->getOperand(OpNum)); 1205 1206 // Transfer memoperands. 1207 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1208 1209 MBB.erase(MBBI); 1210 return true; 1211 } 1212 1213 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1214 ARM_AM::AddrOpc Mode) { 1215 switch (Opc) { 1216 case ARM::LDRi12: 1217 return ARM::LDR_PRE_IMM; 1218 case ARM::STRi12: 1219 return ARM::STR_PRE_IMM; 1220 case ARM::VLDRS: 1221 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1222 case ARM::VLDRD: 1223 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1224 case ARM::VSTRS: 1225 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1226 case ARM::VSTRD: 1227 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1228 case ARM::t2LDRi8: 1229 case ARM::t2LDRi12: 1230 return ARM::t2LDR_PRE; 1231 case ARM::t2STRi8: 1232 case ARM::t2STRi12: 1233 return ARM::t2STR_PRE; 1234 default: llvm_unreachable("Unhandled opcode!"); 1235 } 1236 } 1237 1238 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1239 ARM_AM::AddrOpc Mode) { 1240 switch (Opc) { 1241 case ARM::LDRi12: 1242 return ARM::LDR_POST_IMM; 1243 case ARM::STRi12: 1244 return ARM::STR_POST_IMM; 1245 case ARM::VLDRS: 1246 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1247 case ARM::VLDRD: 1248 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1249 case ARM::VSTRS: 1250 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1251 case ARM::VSTRD: 1252 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1253 case ARM::t2LDRi8: 1254 case ARM::t2LDRi12: 1255 return ARM::t2LDR_POST; 1256 case ARM::t2STRi8: 1257 case ARM::t2STRi12: 1258 return ARM::t2STR_POST; 1259 default: llvm_unreachable("Unhandled opcode!"); 1260 } 1261 } 1262 1263 /// Fold proceeding/trailing inc/dec of base register into the 1264 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1265 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1266 // Thumb1 doesn't have updating LDR/STR. 1267 // FIXME: Use LDM/STM with single register instead. 1268 if (isThumb1) return false; 1269 1270 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1271 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1272 unsigned Opcode = MI->getOpcode(); 1273 DebugLoc DL = MI->getDebugLoc(); 1274 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1275 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1276 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1277 if (isi32Load(Opcode) || isi32Store(Opcode)) 1278 if (MI->getOperand(2).getImm() != 0) 1279 return false; 1280 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1281 return false; 1282 1283 // Can't do the merge if the destination register is the same as the would-be 1284 // writeback register. 1285 if (MI->getOperand(0).getReg() == Base) 1286 return false; 1287 1288 unsigned PredReg = 0; 1289 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1290 int Bytes = getLSMultipleTransferSize(MI); 1291 MachineBasicBlock &MBB = *MI->getParent(); 1292 MachineBasicBlock::iterator MBBI(MI); 1293 int Offset; 1294 MachineBasicBlock::iterator MergeInstr 1295 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1296 unsigned NewOpc; 1297 if (!isAM5 && Offset == Bytes) { 1298 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1299 } else if (Offset == -Bytes) { 1300 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1301 } else { 1302 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1303 if (Offset == Bytes) { 1304 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1305 } else if (!isAM5 && Offset == -Bytes) { 1306 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1307 } else 1308 return false; 1309 } 1310 MBB.erase(MergeInstr); 1311 1312 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1313 1314 bool isLd = isLoadSingle(Opcode); 1315 if (isAM5) { 1316 // VLDM[SD]_UPD, VSTM[SD]_UPD 1317 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1318 // updating load/store-multiple instructions can be used with only one 1319 // register.) 1320 MachineOperand &MO = MI->getOperand(0); 1321 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1322 .addReg(Base, getDefRegState(true)) // WB base register 1323 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1324 .addImm(Pred).addReg(PredReg) 1325 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1326 getKillRegState(MO.isKill()))); 1327 } else if (isLd) { 1328 if (isAM2) { 1329 // LDR_PRE, LDR_POST 1330 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1331 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1332 .addReg(Base, RegState::Define) 1333 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1334 } else { 1335 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1336 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1337 .addReg(Base, RegState::Define) 1338 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1339 } 1340 } else { 1341 // t2LDR_PRE, t2LDR_POST 1342 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1343 .addReg(Base, RegState::Define) 1344 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1345 } 1346 } else { 1347 MachineOperand &MO = MI->getOperand(0); 1348 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1349 // the vestigal zero-reg offset register. When that's fixed, this clause 1350 // can be removed entirely. 1351 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1352 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1353 // STR_PRE, STR_POST 1354 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1355 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1356 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1357 } else { 1358 // t2STR_PRE, t2STR_POST 1359 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1360 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1361 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1362 } 1363 } 1364 MBB.erase(MBBI); 1365 1366 return true; 1367 } 1368 1369 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1370 unsigned Opcode = MI.getOpcode(); 1371 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1372 "Must have t2STRDi8 or t2LDRDi8"); 1373 if (MI.getOperand(3).getImm() != 0) 1374 return false; 1375 1376 // Behaviour for writeback is undefined if base register is the same as one 1377 // of the others. 1378 const MachineOperand &BaseOp = MI.getOperand(2); 1379 unsigned Base = BaseOp.getReg(); 1380 const MachineOperand &Reg0Op = MI.getOperand(0); 1381 const MachineOperand &Reg1Op = MI.getOperand(1); 1382 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1383 return false; 1384 1385 unsigned PredReg; 1386 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); 1387 MachineBasicBlock::iterator MBBI(MI); 1388 MachineBasicBlock &MBB = *MI.getParent(); 1389 int Offset; 1390 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1391 PredReg, Offset); 1392 unsigned NewOpc; 1393 if (Offset == 8 || Offset == -8) { 1394 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1395 } else { 1396 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1397 if (Offset == 8 || Offset == -8) { 1398 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1399 } else 1400 return false; 1401 } 1402 MBB.erase(MergeInstr); 1403 1404 DebugLoc DL = MI.getDebugLoc(); 1405 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1406 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1407 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1408 .addReg(BaseOp.getReg(), RegState::Define); 1409 } else { 1410 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1411 MIB.addReg(BaseOp.getReg(), RegState::Define) 1412 .addOperand(Reg0Op).addOperand(Reg1Op); 1413 } 1414 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1415 .addImm(Offset).addImm(Pred).addReg(PredReg); 1416 assert(TII->get(Opcode).getNumOperands() == 6 && 1417 TII->get(NewOpc).getNumOperands() == 7 && 1418 "Unexpected number of operands in Opcode specification."); 1419 1420 // Transfer implicit operands. 1421 for (const MachineOperand &MO : MI.implicit_operands()) 1422 MIB.addOperand(MO); 1423 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1424 1425 MBB.erase(MBBI); 1426 return true; 1427 } 1428 1429 /// Returns true if instruction is a memory operation that this pass is capable 1430 /// of operating on. 1431 static bool isMemoryOp(const MachineInstr *MI) { 1432 // When no memory operands are present, conservatively assume unaligned, 1433 // volatile, unfoldable. 1434 if (!MI->hasOneMemOperand()) 1435 return false; 1436 1437 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1438 1439 // Don't touch volatile memory accesses - we may be changing their order. 1440 if (MMO->isVolatile()) 1441 return false; 1442 1443 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1444 // not. 1445 if (MMO->getAlignment() < 4) 1446 return false; 1447 1448 // str <undef> could probably be eliminated entirely, but for now we just want 1449 // to avoid making a mess of it. 1450 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1451 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1452 MI->getOperand(0).isUndef()) 1453 return false; 1454 1455 // Likewise don't mess with references to undefined addresses. 1456 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1457 MI->getOperand(1).isUndef()) 1458 return false; 1459 1460 unsigned Opcode = MI->getOpcode(); 1461 switch (Opcode) { 1462 default: break; 1463 case ARM::VLDRS: 1464 case ARM::VSTRS: 1465 return MI->getOperand(1).isReg(); 1466 case ARM::VLDRD: 1467 case ARM::VSTRD: 1468 return MI->getOperand(1).isReg(); 1469 case ARM::LDRi12: 1470 case ARM::STRi12: 1471 case ARM::tLDRi: 1472 case ARM::tSTRi: 1473 case ARM::tLDRspi: 1474 case ARM::tSTRspi: 1475 case ARM::t2LDRi8: 1476 case ARM::t2LDRi12: 1477 case ARM::t2STRi8: 1478 case ARM::t2STRi12: 1479 return MI->getOperand(1).isReg(); 1480 } 1481 return false; 1482 } 1483 1484 static void InsertLDR_STR(MachineBasicBlock &MBB, 1485 MachineBasicBlock::iterator &MBBI, 1486 int Offset, bool isDef, 1487 DebugLoc DL, unsigned NewOpc, 1488 unsigned Reg, bool RegDeadKill, bool RegUndef, 1489 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1490 bool OffKill, bool OffUndef, 1491 ARMCC::CondCodes Pred, unsigned PredReg, 1492 const TargetInstrInfo *TII, bool isT2) { 1493 if (isDef) { 1494 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1495 TII->get(NewOpc)) 1496 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1497 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1498 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1499 } else { 1500 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1501 TII->get(NewOpc)) 1502 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1503 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1504 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1505 } 1506 } 1507 1508 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1509 MachineBasicBlock::iterator &MBBI) { 1510 MachineInstr *MI = &*MBBI; 1511 unsigned Opcode = MI->getOpcode(); 1512 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1513 return false; 1514 1515 const MachineOperand &BaseOp = MI->getOperand(2); 1516 unsigned BaseReg = BaseOp.getReg(); 1517 unsigned EvenReg = MI->getOperand(0).getReg(); 1518 unsigned OddReg = MI->getOperand(1).getReg(); 1519 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1520 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1521 1522 // ARM errata 602117: LDRD with base in list may result in incorrect base 1523 // register when interrupted or faulted. 1524 bool Errata602117 = EvenReg == BaseReg && 1525 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1526 // ARM LDRD/STRD needs consecutive registers. 1527 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1528 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1529 1530 if (!Errata602117 && !NonConsecutiveRegs) 1531 return false; 1532 1533 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1534 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1535 bool EvenDeadKill = isLd ? 1536 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1537 bool EvenUndef = MI->getOperand(0).isUndef(); 1538 bool OddDeadKill = isLd ? 1539 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1540 bool OddUndef = MI->getOperand(1).isUndef(); 1541 bool BaseKill = BaseOp.isKill(); 1542 bool BaseUndef = BaseOp.isUndef(); 1543 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1544 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1545 int OffImm = getMemoryOpOffset(MI); 1546 unsigned PredReg = 0; 1547 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1548 1549 if (OddRegNum > EvenRegNum && OffImm == 0) { 1550 // Ascending register numbers and no offset. It's safe to change it to a 1551 // ldm or stm. 1552 unsigned NewOpc = (isLd) 1553 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1554 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1555 if (isLd) { 1556 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1557 .addReg(BaseReg, getKillRegState(BaseKill)) 1558 .addImm(Pred).addReg(PredReg) 1559 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1560 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1561 ++NumLDRD2LDM; 1562 } else { 1563 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1564 .addReg(BaseReg, getKillRegState(BaseKill)) 1565 .addImm(Pred).addReg(PredReg) 1566 .addReg(EvenReg, 1567 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1568 .addReg(OddReg, 1569 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1570 ++NumSTRD2STM; 1571 } 1572 } else { 1573 // Split into two instructions. 1574 unsigned NewOpc = (isLd) 1575 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1576 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1577 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1578 // so adjust and use t2LDRi12 here for that. 1579 unsigned NewOpc2 = (isLd) 1580 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1581 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1582 DebugLoc dl = MBBI->getDebugLoc(); 1583 // If this is a load and base register is killed, it may have been 1584 // re-defed by the load, make sure the first load does not clobber it. 1585 if (isLd && 1586 (BaseKill || OffKill) && 1587 (TRI->regsOverlap(EvenReg, BaseReg))) { 1588 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1589 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1590 OddReg, OddDeadKill, false, 1591 BaseReg, false, BaseUndef, false, OffUndef, 1592 Pred, PredReg, TII, isT2); 1593 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1594 EvenReg, EvenDeadKill, false, 1595 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1596 Pred, PredReg, TII, isT2); 1597 } else { 1598 if (OddReg == EvenReg && EvenDeadKill) { 1599 // If the two source operands are the same, the kill marker is 1600 // probably on the first one. e.g. 1601 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1602 EvenDeadKill = false; 1603 OddDeadKill = true; 1604 } 1605 // Never kill the base register in the first instruction. 1606 if (EvenReg == BaseReg) 1607 EvenDeadKill = false; 1608 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1609 EvenReg, EvenDeadKill, EvenUndef, 1610 BaseReg, false, BaseUndef, false, OffUndef, 1611 Pred, PredReg, TII, isT2); 1612 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1613 OddReg, OddDeadKill, OddUndef, 1614 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1615 Pred, PredReg, TII, isT2); 1616 } 1617 if (isLd) 1618 ++NumLDRD2LDR; 1619 else 1620 ++NumSTRD2STR; 1621 } 1622 1623 MBBI = MBB.erase(MBBI); 1624 return true; 1625 } 1626 1627 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1628 /// incrementing offset into LDM / STM ops. 1629 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1630 MemOpQueue MemOps; 1631 unsigned CurrBase = 0; 1632 unsigned CurrOpc = ~0u; 1633 ARMCC::CondCodes CurrPred = ARMCC::AL; 1634 unsigned Position = 0; 1635 assert(Candidates.size() == 0); 1636 assert(MergeBaseCandidates.size() == 0); 1637 LiveRegsValid = false; 1638 1639 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1640 I = MBBI) { 1641 // The instruction in front of the iterator is the one we look at. 1642 MBBI = std::prev(I); 1643 if (FixInvalidRegPairOp(MBB, MBBI)) 1644 continue; 1645 ++Position; 1646 1647 if (isMemoryOp(MBBI)) { 1648 unsigned Opcode = MBBI->getOpcode(); 1649 const MachineOperand &MO = MBBI->getOperand(0); 1650 unsigned Reg = MO.getReg(); 1651 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1652 unsigned PredReg = 0; 1653 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1654 int Offset = getMemoryOpOffset(MBBI); 1655 if (CurrBase == 0) { 1656 // Start of a new chain. 1657 CurrBase = Base; 1658 CurrOpc = Opcode; 1659 CurrPred = Pred; 1660 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1661 continue; 1662 } 1663 // Note: No need to match PredReg in the next if. 1664 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1665 // Watch out for: 1666 // r4 := ldr [r0, #8] 1667 // r4 := ldr [r0, #4] 1668 // or 1669 // r0 := ldr [r0] 1670 // If a load overrides the base register or a register loaded by 1671 // another load in our chain, we cannot take this instruction. 1672 bool Overlap = false; 1673 if (isLoadSingle(Opcode)) { 1674 Overlap = (Base == Reg); 1675 if (!Overlap) { 1676 for (const MemOpQueueEntry &E : MemOps) { 1677 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1678 Overlap = true; 1679 break; 1680 } 1681 } 1682 } 1683 } 1684 1685 if (!Overlap) { 1686 // Check offset and sort memory operation into the current chain. 1687 if (Offset > MemOps.back().Offset) { 1688 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1689 continue; 1690 } else { 1691 MemOpQueue::iterator MI, ME; 1692 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1693 if (Offset < MI->Offset) { 1694 // Found a place to insert. 1695 break; 1696 } 1697 if (Offset == MI->Offset) { 1698 // Collision, abort. 1699 MI = ME; 1700 break; 1701 } 1702 } 1703 if (MI != MemOps.end()) { 1704 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1705 continue; 1706 } 1707 } 1708 } 1709 } 1710 1711 // Don't advance the iterator; The op will start a new chain next. 1712 MBBI = I; 1713 --Position; 1714 // Fallthrough to look into existing chain. 1715 } else if (MBBI->isDebugValue()) { 1716 continue; 1717 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1718 MBBI->getOpcode() == ARM::t2STRDi8) { 1719 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1720 // remember them because we may still be able to merge add/sub into them. 1721 MergeBaseCandidates.push_back(MBBI); 1722 } 1723 1724 1725 // If we are here then the chain is broken; Extract candidates for a merge. 1726 if (MemOps.size() > 0) { 1727 FormCandidates(MemOps); 1728 // Reset for the next chain. 1729 CurrBase = 0; 1730 CurrOpc = ~0u; 1731 CurrPred = ARMCC::AL; 1732 MemOps.clear(); 1733 } 1734 } 1735 if (MemOps.size() > 0) 1736 FormCandidates(MemOps); 1737 1738 // Sort candidates so they get processed from end to begin of the basic 1739 // block later; This is necessary for liveness calculation. 1740 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1741 return M0->InsertPos < M1->InsertPos; 1742 }; 1743 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1744 1745 // Go through list of candidates and merge. 1746 bool Changed = false; 1747 for (const MergeCandidate *Candidate : Candidates) { 1748 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1749 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1750 // Merge preceding/trailing base inc/dec into the merged op. 1751 if (Merged) { 1752 Changed = true; 1753 unsigned Opcode = Merged->getOpcode(); 1754 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1755 MergeBaseUpdateLSDouble(*Merged); 1756 else 1757 MergeBaseUpdateLSMultiple(Merged); 1758 } else { 1759 for (MachineInstr *MI : Candidate->Instrs) { 1760 if (MergeBaseUpdateLoadStore(MI)) 1761 Changed = true; 1762 } 1763 } 1764 } else { 1765 assert(Candidate->Instrs.size() == 1); 1766 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1767 Changed = true; 1768 } 1769 } 1770 Candidates.clear(); 1771 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1772 for (MachineInstr *MI : MergeBaseCandidates) 1773 MergeBaseUpdateLSDouble(*MI); 1774 MergeBaseCandidates.clear(); 1775 1776 return Changed; 1777 } 1778 1779 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1780 /// into the preceding stack restore so it directly restore the value of LR 1781 /// into pc. 1782 /// ldmfd sp!, {..., lr} 1783 /// bx lr 1784 /// or 1785 /// ldmfd sp!, {..., lr} 1786 /// mov pc, lr 1787 /// => 1788 /// ldmfd sp!, {..., pc} 1789 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1790 // Thumb1 LDM doesn't allow high registers. 1791 if (isThumb1) return false; 1792 if (MBB.empty()) return false; 1793 1794 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1795 if (MBBI != MBB.begin() && 1796 (MBBI->getOpcode() == ARM::BX_RET || 1797 MBBI->getOpcode() == ARM::tBX_RET || 1798 MBBI->getOpcode() == ARM::MOVPCLR)) { 1799 MachineInstr *PrevMI = std::prev(MBBI); 1800 unsigned Opcode = PrevMI->getOpcode(); 1801 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1802 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1803 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1804 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1805 if (MO.getReg() != ARM::LR) 1806 return false; 1807 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1808 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1809 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1810 PrevMI->setDesc(TII->get(NewOpc)); 1811 MO.setReg(ARM::PC); 1812 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1813 MBB.erase(MBBI); 1814 return true; 1815 } 1816 } 1817 return false; 1818 } 1819 1820 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1821 MF = &Fn; 1822 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1823 TL = STI->getTargetLowering(); 1824 AFI = Fn.getInfo<ARMFunctionInfo>(); 1825 TII = STI->getInstrInfo(); 1826 TRI = STI->getRegisterInfo(); 1827 1828 RegClassInfoValid = false; 1829 isThumb2 = AFI->isThumb2Function(); 1830 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1831 1832 bool Modified = false; 1833 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1834 ++MFI) { 1835 MachineBasicBlock &MBB = *MFI; 1836 Modified |= LoadStoreMultipleOpti(MBB); 1837 if (STI->hasV5TOps()) 1838 Modified |= MergeReturnIntoLDM(MBB); 1839 } 1840 1841 Allocator.DestroyAll(); 1842 return Modified; 1843 } 1844 1845 namespace { 1846 /// Pre- register allocation pass that move load / stores from consecutive 1847 /// locations close to make it more likely they will be combined later. 1848 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1849 static char ID; 1850 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1851 1852 const DataLayout *TD; 1853 const TargetInstrInfo *TII; 1854 const TargetRegisterInfo *TRI; 1855 const ARMSubtarget *STI; 1856 MachineRegisterInfo *MRI; 1857 MachineFunction *MF; 1858 1859 bool runOnMachineFunction(MachineFunction &Fn) override; 1860 1861 const char *getPassName() const override { 1862 return "ARM pre- register allocation load / store optimization pass"; 1863 } 1864 1865 private: 1866 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1867 unsigned &NewOpc, unsigned &EvenReg, 1868 unsigned &OddReg, unsigned &BaseReg, 1869 int &Offset, 1870 unsigned &PredReg, ARMCC::CondCodes &Pred, 1871 bool &isT2); 1872 bool RescheduleOps(MachineBasicBlock *MBB, 1873 SmallVectorImpl<MachineInstr *> &Ops, 1874 unsigned Base, bool isLd, 1875 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1876 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1877 }; 1878 char ARMPreAllocLoadStoreOpt::ID = 0; 1879 } 1880 1881 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1882 TD = &Fn.getDataLayout(); 1883 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1884 TII = STI->getInstrInfo(); 1885 TRI = STI->getRegisterInfo(); 1886 MRI = &Fn.getRegInfo(); 1887 MF = &Fn; 1888 1889 bool Modified = false; 1890 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1891 ++MFI) 1892 Modified |= RescheduleLoadStoreInstrs(MFI); 1893 1894 return Modified; 1895 } 1896 1897 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1898 MachineBasicBlock::iterator I, 1899 MachineBasicBlock::iterator E, 1900 SmallPtrSetImpl<MachineInstr*> &MemOps, 1901 SmallSet<unsigned, 4> &MemRegs, 1902 const TargetRegisterInfo *TRI) { 1903 // Are there stores / loads / calls between them? 1904 // FIXME: This is overly conservative. We should make use of alias information 1905 // some day. 1906 SmallSet<unsigned, 4> AddedRegPressure; 1907 while (++I != E) { 1908 if (I->isDebugValue() || MemOps.count(&*I)) 1909 continue; 1910 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1911 return false; 1912 if (isLd && I->mayStore()) 1913 return false; 1914 if (!isLd) { 1915 if (I->mayLoad()) 1916 return false; 1917 // It's not safe to move the first 'str' down. 1918 // str r1, [r0] 1919 // strh r5, [r0] 1920 // str r4, [r0, #+4] 1921 if (I->mayStore()) 1922 return false; 1923 } 1924 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1925 MachineOperand &MO = I->getOperand(j); 1926 if (!MO.isReg()) 1927 continue; 1928 unsigned Reg = MO.getReg(); 1929 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1930 return false; 1931 if (Reg != Base && !MemRegs.count(Reg)) 1932 AddedRegPressure.insert(Reg); 1933 } 1934 } 1935 1936 // Estimate register pressure increase due to the transformation. 1937 if (MemRegs.size() <= 4) 1938 // Ok if we are moving small number of instructions. 1939 return true; 1940 return AddedRegPressure.size() <= MemRegs.size() * 2; 1941 } 1942 1943 1944 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1945 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1946 MachineInstr *Op1) { 1947 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1948 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1949 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1950 1951 MachineFunction *MF = MI->getParent()->getParent(); 1952 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1953 MachineSDNode::mmo_iterator MemEnd = 1954 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1955 MemEnd = 1956 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1957 MI->setMemRefs(MemBegin, MemEnd); 1958 } 1959 1960 bool 1961 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1962 DebugLoc &dl, unsigned &NewOpc, 1963 unsigned &FirstReg, 1964 unsigned &SecondReg, 1965 unsigned &BaseReg, int &Offset, 1966 unsigned &PredReg, 1967 ARMCC::CondCodes &Pred, 1968 bool &isT2) { 1969 // Make sure we're allowed to generate LDRD/STRD. 1970 if (!STI->hasV5TEOps()) 1971 return false; 1972 1973 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1974 unsigned Scale = 1; 1975 unsigned Opcode = Op0->getOpcode(); 1976 if (Opcode == ARM::LDRi12) { 1977 NewOpc = ARM::LDRD; 1978 } else if (Opcode == ARM::STRi12) { 1979 NewOpc = ARM::STRD; 1980 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1981 NewOpc = ARM::t2LDRDi8; 1982 Scale = 4; 1983 isT2 = true; 1984 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1985 NewOpc = ARM::t2STRDi8; 1986 Scale = 4; 1987 isT2 = true; 1988 } else { 1989 return false; 1990 } 1991 1992 // Make sure the base address satisfies i64 ld / st alignment requirement. 1993 // At the moment, we ignore the memoryoperand's value. 1994 // If we want to use AliasAnalysis, we should check it accordingly. 1995 if (!Op0->hasOneMemOperand() || 1996 (*Op0->memoperands_begin())->isVolatile()) 1997 return false; 1998 1999 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 2000 const Function *Func = MF->getFunction(); 2001 unsigned ReqAlign = STI->hasV6Ops() 2002 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 2003 : 8; // Pre-v6 need 8-byte align 2004 if (Align < ReqAlign) 2005 return false; 2006 2007 // Then make sure the immediate offset fits. 2008 int OffImm = getMemoryOpOffset(Op0); 2009 if (isT2) { 2010 int Limit = (1 << 8) * Scale; 2011 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2012 return false; 2013 Offset = OffImm; 2014 } else { 2015 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2016 if (OffImm < 0) { 2017 AddSub = ARM_AM::sub; 2018 OffImm = - OffImm; 2019 } 2020 int Limit = (1 << 8) * Scale; 2021 if (OffImm >= Limit || (OffImm & (Scale-1))) 2022 return false; 2023 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2024 } 2025 FirstReg = Op0->getOperand(0).getReg(); 2026 SecondReg = Op1->getOperand(0).getReg(); 2027 if (FirstReg == SecondReg) 2028 return false; 2029 BaseReg = Op0->getOperand(1).getReg(); 2030 Pred = getInstrPredicate(Op0, PredReg); 2031 dl = Op0->getDebugLoc(); 2032 return true; 2033 } 2034 2035 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2036 SmallVectorImpl<MachineInstr *> &Ops, 2037 unsigned Base, bool isLd, 2038 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2039 bool RetVal = false; 2040 2041 // Sort by offset (in reverse order). 2042 std::sort(Ops.begin(), Ops.end(), 2043 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2044 int LOffset = getMemoryOpOffset(LHS); 2045 int ROffset = getMemoryOpOffset(RHS); 2046 assert(LHS == RHS || LOffset != ROffset); 2047 return LOffset > ROffset; 2048 }); 2049 2050 // The loads / stores of the same base are in order. Scan them from first to 2051 // last and check for the following: 2052 // 1. Any def of base. 2053 // 2. Any gaps. 2054 while (Ops.size() > 1) { 2055 unsigned FirstLoc = ~0U; 2056 unsigned LastLoc = 0; 2057 MachineInstr *FirstOp = nullptr; 2058 MachineInstr *LastOp = nullptr; 2059 int LastOffset = 0; 2060 unsigned LastOpcode = 0; 2061 unsigned LastBytes = 0; 2062 unsigned NumMove = 0; 2063 for (int i = Ops.size() - 1; i >= 0; --i) { 2064 MachineInstr *Op = Ops[i]; 2065 unsigned Loc = MI2LocMap[Op]; 2066 if (Loc <= FirstLoc) { 2067 FirstLoc = Loc; 2068 FirstOp = Op; 2069 } 2070 if (Loc >= LastLoc) { 2071 LastLoc = Loc; 2072 LastOp = Op; 2073 } 2074 2075 unsigned LSMOpcode 2076 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2077 if (LastOpcode && LSMOpcode != LastOpcode) 2078 break; 2079 2080 int Offset = getMemoryOpOffset(Op); 2081 unsigned Bytes = getLSMultipleTransferSize(Op); 2082 if (LastBytes) { 2083 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2084 break; 2085 } 2086 LastOffset = Offset; 2087 LastBytes = Bytes; 2088 LastOpcode = LSMOpcode; 2089 if (++NumMove == 8) // FIXME: Tune this limit. 2090 break; 2091 } 2092 2093 if (NumMove <= 1) 2094 Ops.pop_back(); 2095 else { 2096 SmallPtrSet<MachineInstr*, 4> MemOps; 2097 SmallSet<unsigned, 4> MemRegs; 2098 for (int i = NumMove-1; i >= 0; --i) { 2099 MemOps.insert(Ops[i]); 2100 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2101 } 2102 2103 // Be conservative, if the instructions are too far apart, don't 2104 // move them. We want to limit the increase of register pressure. 2105 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2106 if (DoMove) 2107 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2108 MemOps, MemRegs, TRI); 2109 if (!DoMove) { 2110 for (unsigned i = 0; i != NumMove; ++i) 2111 Ops.pop_back(); 2112 } else { 2113 // This is the new location for the loads / stores. 2114 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2115 while (InsertPos != MBB->end() 2116 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2117 ++InsertPos; 2118 2119 // If we are moving a pair of loads / stores, see if it makes sense 2120 // to try to allocate a pair of registers that can form register pairs. 2121 MachineInstr *Op0 = Ops.back(); 2122 MachineInstr *Op1 = Ops[Ops.size()-2]; 2123 unsigned FirstReg = 0, SecondReg = 0; 2124 unsigned BaseReg = 0, PredReg = 0; 2125 ARMCC::CondCodes Pred = ARMCC::AL; 2126 bool isT2 = false; 2127 unsigned NewOpc = 0; 2128 int Offset = 0; 2129 DebugLoc dl; 2130 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2131 FirstReg, SecondReg, BaseReg, 2132 Offset, PredReg, Pred, isT2)) { 2133 Ops.pop_back(); 2134 Ops.pop_back(); 2135 2136 const MCInstrDesc &MCID = TII->get(NewOpc); 2137 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2138 MRI->constrainRegClass(FirstReg, TRC); 2139 MRI->constrainRegClass(SecondReg, TRC); 2140 2141 // Form the pair instruction. 2142 if (isLd) { 2143 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2144 .addReg(FirstReg, RegState::Define) 2145 .addReg(SecondReg, RegState::Define) 2146 .addReg(BaseReg); 2147 // FIXME: We're converting from LDRi12 to an insn that still 2148 // uses addrmode2, so we need an explicit offset reg. It should 2149 // always by reg0 since we're transforming LDRi12s. 2150 if (!isT2) 2151 MIB.addReg(0); 2152 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2153 concatenateMemOperands(MIB, Op0, Op1); 2154 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2155 ++NumLDRDFormed; 2156 } else { 2157 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2158 .addReg(FirstReg) 2159 .addReg(SecondReg) 2160 .addReg(BaseReg); 2161 // FIXME: We're converting from LDRi12 to an insn that still 2162 // uses addrmode2, so we need an explicit offset reg. It should 2163 // always by reg0 since we're transforming STRi12s. 2164 if (!isT2) 2165 MIB.addReg(0); 2166 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2167 concatenateMemOperands(MIB, Op0, Op1); 2168 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2169 ++NumSTRDFormed; 2170 } 2171 MBB->erase(Op0); 2172 MBB->erase(Op1); 2173 2174 if (!isT2) { 2175 // Add register allocation hints to form register pairs. 2176 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2177 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2178 } 2179 } else { 2180 for (unsigned i = 0; i != NumMove; ++i) { 2181 MachineInstr *Op = Ops.back(); 2182 Ops.pop_back(); 2183 MBB->splice(InsertPos, MBB, Op); 2184 } 2185 } 2186 2187 NumLdStMoved += NumMove; 2188 RetVal = true; 2189 } 2190 } 2191 } 2192 2193 return RetVal; 2194 } 2195 2196 bool 2197 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2198 bool RetVal = false; 2199 2200 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2201 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2202 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2203 SmallVector<unsigned, 4> LdBases; 2204 SmallVector<unsigned, 4> StBases; 2205 2206 unsigned Loc = 0; 2207 MachineBasicBlock::iterator MBBI = MBB->begin(); 2208 MachineBasicBlock::iterator E = MBB->end(); 2209 while (MBBI != E) { 2210 for (; MBBI != E; ++MBBI) { 2211 MachineInstr *MI = MBBI; 2212 if (MI->isCall() || MI->isTerminator()) { 2213 // Stop at barriers. 2214 ++MBBI; 2215 break; 2216 } 2217 2218 if (!MI->isDebugValue()) 2219 MI2LocMap[MI] = ++Loc; 2220 2221 if (!isMemoryOp(MI)) 2222 continue; 2223 unsigned PredReg = 0; 2224 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2225 continue; 2226 2227 int Opc = MI->getOpcode(); 2228 bool isLd = isLoadSingle(Opc); 2229 unsigned Base = MI->getOperand(1).getReg(); 2230 int Offset = getMemoryOpOffset(MI); 2231 2232 bool StopHere = false; 2233 if (isLd) { 2234 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2235 Base2LdsMap.find(Base); 2236 if (BI != Base2LdsMap.end()) { 2237 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2238 if (Offset == getMemoryOpOffset(BI->second[i])) { 2239 StopHere = true; 2240 break; 2241 } 2242 } 2243 if (!StopHere) 2244 BI->second.push_back(MI); 2245 } else { 2246 Base2LdsMap[Base].push_back(MI); 2247 LdBases.push_back(Base); 2248 } 2249 } else { 2250 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2251 Base2StsMap.find(Base); 2252 if (BI != Base2StsMap.end()) { 2253 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2254 if (Offset == getMemoryOpOffset(BI->second[i])) { 2255 StopHere = true; 2256 break; 2257 } 2258 } 2259 if (!StopHere) 2260 BI->second.push_back(MI); 2261 } else { 2262 Base2StsMap[Base].push_back(MI); 2263 StBases.push_back(Base); 2264 } 2265 } 2266 2267 if (StopHere) { 2268 // Found a duplicate (a base+offset combination that's seen earlier). 2269 // Backtrack. 2270 --Loc; 2271 break; 2272 } 2273 } 2274 2275 // Re-schedule loads. 2276 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2277 unsigned Base = LdBases[i]; 2278 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2279 if (Lds.size() > 1) 2280 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2281 } 2282 2283 // Re-schedule stores. 2284 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2285 unsigned Base = StBases[i]; 2286 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2287 if (Sts.size() > 1) 2288 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2289 } 2290 2291 if (MBBI != E) { 2292 Base2LdsMap.clear(); 2293 Base2StsMap.clear(); 2294 LdBases.clear(); 2295 StBases.clear(); 2296 } 2297 } 2298 2299 return RetVal; 2300 } 2301 2302 2303 /// Returns an instance of the load / store optimization pass. 2304 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2305 if (PreAlloc) 2306 return new ARMPreAllocLoadStoreOpt(); 2307 return new ARMLoadStoreOpt(); 2308 } 2309 2310