1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 namespace llvm { 64 void initializeARMLoadStoreOptPass(PassRegistry &); 65 } 66 67 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" 68 69 namespace { 70 /// Post- register allocation pass the combine load / store instructions to 71 /// form ldm / stm instructions. 72 struct ARMLoadStoreOpt : public MachineFunctionPass { 73 static char ID; 74 ARMLoadStoreOpt() : MachineFunctionPass(ID) { 75 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry()); 76 } 77 78 const MachineFunction *MF; 79 const TargetInstrInfo *TII; 80 const TargetRegisterInfo *TRI; 81 const MachineRegisterInfo *MRI; 82 const ARMSubtarget *STI; 83 const TargetLowering *TL; 84 ARMFunctionInfo *AFI; 85 LivePhysRegs LiveRegs; 86 RegisterClassInfo RegClassInfo; 87 MachineBasicBlock::const_iterator LiveRegPos; 88 bool LiveRegsValid; 89 bool RegClassInfoValid; 90 bool isThumb1, isThumb2; 91 92 bool runOnMachineFunction(MachineFunction &Fn) override; 93 94 const char *getPassName() const override { 95 return ARM_LOAD_STORE_OPT_NAME; 96 } 97 98 private: 99 /// A set of load/store MachineInstrs with same base register sorted by 100 /// offset. 101 struct MemOpQueueEntry { 102 MachineInstr *MI; 103 int Offset; ///< Load/Store offset. 104 unsigned Position; ///< Position as counted from end of basic block. 105 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 106 : MI(MI), Offset(Offset), Position(Position) {} 107 }; 108 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 109 110 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 111 /// merged into a LDM/STM. 112 struct MergeCandidate { 113 /// List of instructions ordered by load/store offset. 114 SmallVector<MachineInstr*, 4> Instrs; 115 /// Index in Instrs of the instruction being latest in the schedule. 116 unsigned LatestMIIdx; 117 /// Index in Instrs of the instruction being earliest in the schedule. 118 unsigned EarliestMIIdx; 119 /// Index into the basic block where the merged instruction will be 120 /// inserted. (See MemOpQueueEntry.Position) 121 unsigned InsertPos; 122 /// Whether the instructions can be merged into a ldm/stm instruction. 123 bool CanMergeToLSMulti; 124 /// Whether the instructions can be merged into a ldrd/strd instruction. 125 bool CanMergeToLSDouble; 126 }; 127 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 128 SmallVector<const MergeCandidate*,4> Candidates; 129 SmallVector<MachineInstr*,4> MergeBaseCandidates; 130 131 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 132 MachineBasicBlock::const_iterator Before); 133 unsigned findFreeReg(const TargetRegisterClass &RegClass); 134 void UpdateBaseRegUses(MachineBasicBlock &MBB, 135 MachineBasicBlock::iterator MBBI, 136 DebugLoc DL, unsigned Base, unsigned WordOffset, 137 ARMCC::CondCodes Pred, unsigned PredReg); 138 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 139 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 140 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 141 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 142 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 143 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 144 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 145 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 146 void FormCandidates(const MemOpQueue &MemOps); 147 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 148 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 149 MachineBasicBlock::iterator &MBBI); 150 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 151 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 152 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 153 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 154 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 155 }; 156 char ARMLoadStoreOpt::ID = 0; 157 } 158 159 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false) 160 161 static bool definesCPSR(const MachineInstr *MI) { 162 for (const auto &MO : MI->operands()) { 163 if (!MO.isReg()) 164 continue; 165 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 166 // If the instruction has live CPSR def, then it's not safe to fold it 167 // into load / store. 168 return true; 169 } 170 171 return false; 172 } 173 174 static int getMemoryOpOffset(const MachineInstr *MI) { 175 unsigned Opcode = MI->getOpcode(); 176 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 177 unsigned NumOperands = MI->getDesc().getNumOperands(); 178 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 179 180 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 181 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 182 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 183 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 184 return OffField; 185 186 // Thumb1 immediate offsets are scaled by 4 187 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 188 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 189 return OffField * 4; 190 191 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 192 : ARM_AM::getAM5Offset(OffField) * 4; 193 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 194 : ARM_AM::getAM5Op(OffField); 195 196 if (Op == ARM_AM::sub) 197 return -Offset; 198 199 return Offset; 200 } 201 202 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 203 return MI.getOperand(1); 204 } 205 206 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 207 return MI.getOperand(0); 208 } 209 210 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 211 switch (Opcode) { 212 default: llvm_unreachable("Unhandled opcode!"); 213 case ARM::LDRi12: 214 ++NumLDMGened; 215 switch (Mode) { 216 default: llvm_unreachable("Unhandled submode!"); 217 case ARM_AM::ia: return ARM::LDMIA; 218 case ARM_AM::da: return ARM::LDMDA; 219 case ARM_AM::db: return ARM::LDMDB; 220 case ARM_AM::ib: return ARM::LDMIB; 221 } 222 case ARM::STRi12: 223 ++NumSTMGened; 224 switch (Mode) { 225 default: llvm_unreachable("Unhandled submode!"); 226 case ARM_AM::ia: return ARM::STMIA; 227 case ARM_AM::da: return ARM::STMDA; 228 case ARM_AM::db: return ARM::STMDB; 229 case ARM_AM::ib: return ARM::STMIB; 230 } 231 case ARM::tLDRi: 232 case ARM::tLDRspi: 233 // tLDMIA is writeback-only - unless the base register is in the input 234 // reglist. 235 ++NumLDMGened; 236 switch (Mode) { 237 default: llvm_unreachable("Unhandled submode!"); 238 case ARM_AM::ia: return ARM::tLDMIA; 239 } 240 case ARM::tSTRi: 241 case ARM::tSTRspi: 242 // There is no non-writeback tSTMIA either. 243 ++NumSTMGened; 244 switch (Mode) { 245 default: llvm_unreachable("Unhandled submode!"); 246 case ARM_AM::ia: return ARM::tSTMIA_UPD; 247 } 248 case ARM::t2LDRi8: 249 case ARM::t2LDRi12: 250 ++NumLDMGened; 251 switch (Mode) { 252 default: llvm_unreachable("Unhandled submode!"); 253 case ARM_AM::ia: return ARM::t2LDMIA; 254 case ARM_AM::db: return ARM::t2LDMDB; 255 } 256 case ARM::t2STRi8: 257 case ARM::t2STRi12: 258 ++NumSTMGened; 259 switch (Mode) { 260 default: llvm_unreachable("Unhandled submode!"); 261 case ARM_AM::ia: return ARM::t2STMIA; 262 case ARM_AM::db: return ARM::t2STMDB; 263 } 264 case ARM::VLDRS: 265 ++NumVLDMGened; 266 switch (Mode) { 267 default: llvm_unreachable("Unhandled submode!"); 268 case ARM_AM::ia: return ARM::VLDMSIA; 269 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 270 } 271 case ARM::VSTRS: 272 ++NumVSTMGened; 273 switch (Mode) { 274 default: llvm_unreachable("Unhandled submode!"); 275 case ARM_AM::ia: return ARM::VSTMSIA; 276 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 277 } 278 case ARM::VLDRD: 279 ++NumVLDMGened; 280 switch (Mode) { 281 default: llvm_unreachable("Unhandled submode!"); 282 case ARM_AM::ia: return ARM::VLDMDIA; 283 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 284 } 285 case ARM::VSTRD: 286 ++NumVSTMGened; 287 switch (Mode) { 288 default: llvm_unreachable("Unhandled submode!"); 289 case ARM_AM::ia: return ARM::VSTMDIA; 290 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 291 } 292 } 293 } 294 295 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 296 switch (Opcode) { 297 default: llvm_unreachable("Unhandled opcode!"); 298 case ARM::LDMIA_RET: 299 case ARM::LDMIA: 300 case ARM::LDMIA_UPD: 301 case ARM::STMIA: 302 case ARM::STMIA_UPD: 303 case ARM::tLDMIA: 304 case ARM::tLDMIA_UPD: 305 case ARM::tSTMIA_UPD: 306 case ARM::t2LDMIA_RET: 307 case ARM::t2LDMIA: 308 case ARM::t2LDMIA_UPD: 309 case ARM::t2STMIA: 310 case ARM::t2STMIA_UPD: 311 case ARM::VLDMSIA: 312 case ARM::VLDMSIA_UPD: 313 case ARM::VSTMSIA: 314 case ARM::VSTMSIA_UPD: 315 case ARM::VLDMDIA: 316 case ARM::VLDMDIA_UPD: 317 case ARM::VSTMDIA: 318 case ARM::VSTMDIA_UPD: 319 return ARM_AM::ia; 320 321 case ARM::LDMDA: 322 case ARM::LDMDA_UPD: 323 case ARM::STMDA: 324 case ARM::STMDA_UPD: 325 return ARM_AM::da; 326 327 case ARM::LDMDB: 328 case ARM::LDMDB_UPD: 329 case ARM::STMDB: 330 case ARM::STMDB_UPD: 331 case ARM::t2LDMDB: 332 case ARM::t2LDMDB_UPD: 333 case ARM::t2STMDB: 334 case ARM::t2STMDB_UPD: 335 case ARM::VLDMSDB_UPD: 336 case ARM::VSTMSDB_UPD: 337 case ARM::VLDMDDB_UPD: 338 case ARM::VSTMDDB_UPD: 339 return ARM_AM::db; 340 341 case ARM::LDMIB: 342 case ARM::LDMIB_UPD: 343 case ARM::STMIB: 344 case ARM::STMIB_UPD: 345 return ARM_AM::ib; 346 } 347 } 348 349 static bool isT1i32Load(unsigned Opc) { 350 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 351 } 352 353 static bool isT2i32Load(unsigned Opc) { 354 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 355 } 356 357 static bool isi32Load(unsigned Opc) { 358 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 359 } 360 361 static bool isT1i32Store(unsigned Opc) { 362 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 363 } 364 365 static bool isT2i32Store(unsigned Opc) { 366 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 367 } 368 369 static bool isi32Store(unsigned Opc) { 370 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 371 } 372 373 static bool isLoadSingle(unsigned Opc) { 374 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 375 } 376 377 static unsigned getImmScale(unsigned Opc) { 378 switch (Opc) { 379 default: llvm_unreachable("Unhandled opcode!"); 380 case ARM::tLDRi: 381 case ARM::tSTRi: 382 case ARM::tLDRspi: 383 case ARM::tSTRspi: 384 return 1; 385 case ARM::tLDRHi: 386 case ARM::tSTRHi: 387 return 2; 388 case ARM::tLDRBi: 389 case ARM::tSTRBi: 390 return 4; 391 } 392 } 393 394 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 395 switch (MI->getOpcode()) { 396 default: return 0; 397 case ARM::LDRi12: 398 case ARM::STRi12: 399 case ARM::tLDRi: 400 case ARM::tSTRi: 401 case ARM::tLDRspi: 402 case ARM::tSTRspi: 403 case ARM::t2LDRi8: 404 case ARM::t2LDRi12: 405 case ARM::t2STRi8: 406 case ARM::t2STRi12: 407 case ARM::VLDRS: 408 case ARM::VSTRS: 409 return 4; 410 case ARM::VLDRD: 411 case ARM::VSTRD: 412 return 8; 413 case ARM::LDMIA: 414 case ARM::LDMDA: 415 case ARM::LDMDB: 416 case ARM::LDMIB: 417 case ARM::STMIA: 418 case ARM::STMDA: 419 case ARM::STMDB: 420 case ARM::STMIB: 421 case ARM::tLDMIA: 422 case ARM::tLDMIA_UPD: 423 case ARM::tSTMIA_UPD: 424 case ARM::t2LDMIA: 425 case ARM::t2LDMDB: 426 case ARM::t2STMIA: 427 case ARM::t2STMDB: 428 case ARM::VLDMSIA: 429 case ARM::VSTMSIA: 430 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 431 case ARM::VLDMDIA: 432 case ARM::VSTMDIA: 433 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 434 } 435 } 436 437 /// Update future uses of the base register with the offset introduced 438 /// due to writeback. This function only works on Thumb1. 439 void 440 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 441 MachineBasicBlock::iterator MBBI, 442 DebugLoc DL, unsigned Base, 443 unsigned WordOffset, 444 ARMCC::CondCodes Pred, unsigned PredReg) { 445 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 446 // Start updating any instructions with immediate offsets. Insert a SUB before 447 // the first non-updateable instruction (if any). 448 for (; MBBI != MBB.end(); ++MBBI) { 449 bool InsertSub = false; 450 unsigned Opc = MBBI->getOpcode(); 451 452 if (MBBI->readsRegister(Base)) { 453 int Offset; 454 bool IsLoad = 455 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 456 bool IsStore = 457 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 458 459 if (IsLoad || IsStore) { 460 // Loads and stores with immediate offsets can be updated, but only if 461 // the new offset isn't negative. 462 // The MachineOperand containing the offset immediate is the last one 463 // before predicates. 464 MachineOperand &MO = 465 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 466 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 467 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 468 469 // If storing the base register, it needs to be reset first. 470 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 471 472 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 473 MO.setImm(Offset); 474 else 475 InsertSub = true; 476 477 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 478 !definesCPSR(MBBI)) { 479 // SUBS/ADDS using this register, with a dead def of the CPSR. 480 // Merge it with the update; if the merged offset is too large, 481 // insert a new sub instead. 482 MachineOperand &MO = 483 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 484 Offset = (Opc == ARM::tSUBi8) ? 485 MO.getImm() + WordOffset * 4 : 486 MO.getImm() - WordOffset * 4 ; 487 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 488 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 489 // Offset == 0. 490 MO.setImm(Offset); 491 // The base register has now been reset, so exit early. 492 return; 493 } else { 494 InsertSub = true; 495 } 496 497 } else { 498 // Can't update the instruction. 499 InsertSub = true; 500 } 501 502 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 503 // Since SUBS sets the condition flags, we can't place the base reset 504 // after an instruction that has a live CPSR def. 505 // The base register might also contain an argument for a function call. 506 InsertSub = true; 507 } 508 509 if (InsertSub) { 510 // An instruction above couldn't be updated, so insert a sub. 511 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 512 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 513 return; 514 } 515 516 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 517 // Register got killed. Stop updating. 518 return; 519 } 520 521 // End of block was reached. 522 if (MBB.succ_size() > 0) { 523 // FIXME: Because of a bug, live registers are sometimes missing from 524 // the successor blocks' live-in sets. This means we can't trust that 525 // information and *always* have to reset at the end of a block. 526 // See PR21029. 527 if (MBBI != MBB.end()) --MBBI; 528 AddDefaultT1CC( 529 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 530 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 531 } 532 } 533 534 /// Return the first register of class \p RegClass that is not in \p Regs. 535 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 536 if (!RegClassInfoValid) { 537 RegClassInfo.runOnMachineFunction(*MF); 538 RegClassInfoValid = true; 539 } 540 541 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 542 if (!LiveRegs.contains(Reg)) 543 return Reg; 544 return 0; 545 } 546 547 /// Compute live registers just before instruction \p Before (in normal schedule 548 /// direction). Computes backwards so multiple queries in the same block must 549 /// come in reverse order. 550 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 551 MachineBasicBlock::const_iterator Before) { 552 // Initialize if we never queried in this block. 553 if (!LiveRegsValid) { 554 LiveRegs.init(TRI); 555 LiveRegs.addLiveOuts(&MBB, true); 556 LiveRegPos = MBB.end(); 557 LiveRegsValid = true; 558 } 559 // Move backward just before the "Before" position. 560 while (LiveRegPos != Before) { 561 --LiveRegPos; 562 LiveRegs.stepBackward(*LiveRegPos); 563 } 564 } 565 566 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 567 unsigned Reg) { 568 for (const std::pair<unsigned, bool> &R : Regs) 569 if (R.first == Reg) 570 return true; 571 return false; 572 } 573 574 /// Create and insert a LDM or STM with Base as base register and registers in 575 /// Regs as the register operands that would be loaded / stored. It returns 576 /// true if the transformation is done. 577 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 578 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 579 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 580 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 581 unsigned NumRegs = Regs.size(); 582 assert(NumRegs > 1); 583 584 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 585 // Compute liveness information for that register to make the decision. 586 bool SafeToClobberCPSR = !isThumb1 || 587 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 588 MachineBasicBlock::LQR_Dead); 589 590 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 591 592 // Exception: If the base register is in the input reglist, Thumb1 LDM is 593 // non-writeback. 594 // It's also not possible to merge an STR of the base register in Thumb1. 595 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 596 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 597 if (Opcode == ARM::tLDRi) { 598 Writeback = false; 599 } else if (Opcode == ARM::tSTRi) { 600 return nullptr; 601 } 602 } 603 604 ARM_AM::AMSubMode Mode = ARM_AM::ia; 605 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 606 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 607 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 608 609 if (Offset == 4 && haveIBAndDA) { 610 Mode = ARM_AM::ib; 611 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 612 Mode = ARM_AM::da; 613 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 614 // VLDM/VSTM do not support DB mode without also updating the base reg. 615 Mode = ARM_AM::db; 616 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 617 // Check if this is a supported opcode before inserting instructions to 618 // calculate a new base register. 619 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 620 621 // If starting offset isn't zero, insert a MI to materialize a new base. 622 // But only do so if it is cost effective, i.e. merging more than two 623 // loads / stores. 624 if (NumRegs <= 2) 625 return nullptr; 626 627 // On Thumb1, it's not worth materializing a new base register without 628 // clobbering the CPSR (i.e. not using ADDS/SUBS). 629 if (!SafeToClobberCPSR) 630 return nullptr; 631 632 unsigned NewBase; 633 if (isi32Load(Opcode)) { 634 // If it is a load, then just use one of the destination register to 635 // use as the new base. 636 NewBase = Regs[NumRegs-1].first; 637 } else { 638 // Find a free register that we can use as scratch register. 639 moveLiveRegsBefore(MBB, InsertBefore); 640 // The merged instruction does not exist yet but will use several Regs if 641 // it is a Store. 642 if (!isLoadSingle(Opcode)) 643 for (const std::pair<unsigned, bool> &R : Regs) 644 LiveRegs.addReg(R.first); 645 646 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 647 if (NewBase == 0) 648 return nullptr; 649 } 650 651 int BaseOpc = 652 isThumb2 ? ARM::t2ADDri : 653 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 654 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 655 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 656 657 if (Offset < 0) { 658 Offset = - Offset; 659 BaseOpc = 660 isThumb2 ? ARM::t2SUBri : 661 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 662 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 663 } 664 665 if (!TL->isLegalAddImmediate(Offset)) 666 // FIXME: Try add with register operand? 667 return nullptr; // Probably not worth it then. 668 669 // We can only append a kill flag to the add/sub input if the value is not 670 // used in the register list of the stm as well. 671 bool KillOldBase = BaseKill && 672 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 673 674 if (isThumb1) { 675 // Thumb1: depending on immediate size, use either 676 // ADDS NewBase, Base, #imm3 677 // or 678 // MOV NewBase, Base 679 // ADDS NewBase, #imm8. 680 if (Base != NewBase && 681 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 682 // Need to insert a MOV to the new base first. 683 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 684 !STI->hasV6Ops()) { 685 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 686 if (Pred != ARMCC::AL) 687 return nullptr; 688 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 689 .addReg(Base, getKillRegState(KillOldBase)); 690 } else 691 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 692 .addReg(Base, getKillRegState(KillOldBase)) 693 .addImm(Pred).addReg(PredReg); 694 695 // The following ADDS/SUBS becomes an update. 696 Base = NewBase; 697 KillOldBase = true; 698 } 699 if (BaseOpc == ARM::tADDrSPi) { 700 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 701 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 702 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 703 .addImm(Pred).addReg(PredReg); 704 } else 705 AddDefaultT1CC( 706 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 707 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 708 .addImm(Pred).addReg(PredReg); 709 } else { 710 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 711 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 712 .addImm(Pred).addReg(PredReg).addReg(0); 713 } 714 Base = NewBase; 715 BaseKill = true; // New base is always killed straight away. 716 } 717 718 bool isDef = isLoadSingle(Opcode); 719 720 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 721 // base register writeback. 722 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 723 if (!Opcode) 724 return nullptr; 725 726 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 727 // - There is no writeback (LDM of base register), 728 // - the base register is killed by the merged instruction, 729 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 730 // to reset the base register. 731 // Otherwise, don't merge. 732 // It's safe to return here since the code to materialize a new base register 733 // above is also conditional on SafeToClobberCPSR. 734 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 735 return nullptr; 736 737 MachineInstrBuilder MIB; 738 739 if (Writeback) { 740 if (Opcode == ARM::tLDMIA) 741 // Update tLDMIA with writeback if necessary. 742 Opcode = ARM::tLDMIA_UPD; 743 744 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 745 746 // Thumb1: we might need to set base writeback when building the MI. 747 MIB.addReg(Base, getDefRegState(true)) 748 .addReg(Base, getKillRegState(BaseKill)); 749 750 // The base isn't dead after a merged instruction with writeback. 751 // Insert a sub instruction after the newly formed instruction to reset. 752 if (!BaseKill) 753 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 754 755 } else { 756 // No writeback, simply build the MachineInstr. 757 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 758 MIB.addReg(Base, getKillRegState(BaseKill)); 759 } 760 761 MIB.addImm(Pred).addReg(PredReg); 762 763 for (const std::pair<unsigned, bool> &R : Regs) 764 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 765 766 return MIB.getInstr(); 767 } 768 769 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 770 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 771 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 772 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 773 bool IsLoad = isi32Load(Opcode); 774 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 775 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 776 777 assert(Regs.size() == 2); 778 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 779 TII->get(LoadStoreOpcode)); 780 if (IsLoad) { 781 MIB.addReg(Regs[0].first, RegState::Define) 782 .addReg(Regs[1].first, RegState::Define); 783 } else { 784 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 785 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 786 } 787 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 788 return MIB.getInstr(); 789 } 790 791 /// Call MergeOps and update MemOps and merges accordingly on success. 792 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 793 const MachineInstr *First = Cand.Instrs.front(); 794 unsigned Opcode = First->getOpcode(); 795 bool IsLoad = isLoadSingle(Opcode); 796 SmallVector<std::pair<unsigned, bool>, 8> Regs; 797 SmallVector<unsigned, 4> ImpDefs; 798 DenseSet<unsigned> KilledRegs; 799 DenseSet<unsigned> UsedRegs; 800 // Determine list of registers and list of implicit super-register defs. 801 for (const MachineInstr *MI : Cand.Instrs) { 802 const MachineOperand &MO = getLoadStoreRegOp(*MI); 803 unsigned Reg = MO.getReg(); 804 bool IsKill = MO.isKill(); 805 if (IsKill) 806 KilledRegs.insert(Reg); 807 Regs.push_back(std::make_pair(Reg, IsKill)); 808 UsedRegs.insert(Reg); 809 810 if (IsLoad) { 811 // Collect any implicit defs of super-registers, after merging we can't 812 // be sure anymore that we properly preserved these live ranges and must 813 // removed these implicit operands. 814 for (const MachineOperand &MO : MI->implicit_operands()) { 815 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 816 continue; 817 assert(MO.isImplicit()); 818 unsigned DefReg = MO.getReg(); 819 820 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 821 continue; 822 // We can ignore cases where the super-reg is read and written. 823 if (MI->readsRegister(DefReg)) 824 continue; 825 ImpDefs.push_back(DefReg); 826 } 827 } 828 } 829 830 // Attempt the merge. 831 typedef MachineBasicBlock::iterator iterator; 832 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 833 iterator InsertBefore = std::next(iterator(LatestMI)); 834 MachineBasicBlock &MBB = *LatestMI->getParent(); 835 unsigned Offset = getMemoryOpOffset(First); 836 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 837 bool BaseKill = LatestMI->killsRegister(Base); 838 unsigned PredReg = 0; 839 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); 840 DebugLoc DL = First->getDebugLoc(); 841 MachineInstr *Merged = nullptr; 842 if (Cand.CanMergeToLSDouble) 843 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 844 Opcode, Pred, PredReg, DL, Regs); 845 if (!Merged && Cand.CanMergeToLSMulti) 846 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 847 Opcode, Pred, PredReg, DL, Regs); 848 if (!Merged) 849 return nullptr; 850 851 // Determine earliest instruction that will get removed. We then keep an 852 // iterator just above it so the following erases don't invalidated it. 853 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 854 bool EarliestAtBegin = false; 855 if (EarliestI == MBB.begin()) { 856 EarliestAtBegin = true; 857 } else { 858 EarliestI = std::prev(EarliestI); 859 } 860 861 // Remove instructions which have been merged. 862 for (MachineInstr *MI : Cand.Instrs) 863 MBB.erase(MI); 864 865 // Determine range between the earliest removed instruction and the new one. 866 if (EarliestAtBegin) 867 EarliestI = MBB.begin(); 868 else 869 EarliestI = std::next(EarliestI); 870 auto FixupRange = make_range(EarliestI, iterator(Merged)); 871 872 if (isLoadSingle(Opcode)) { 873 // If the previous loads defined a super-reg, then we have to mark earlier 874 // operands undef; Replicate the super-reg def on the merged instruction. 875 for (MachineInstr &MI : FixupRange) { 876 for (unsigned &ImpDefReg : ImpDefs) { 877 for (MachineOperand &MO : MI.implicit_operands()) { 878 if (!MO.isReg() || MO.getReg() != ImpDefReg) 879 continue; 880 if (MO.readsReg()) 881 MO.setIsUndef(); 882 else if (MO.isDef()) 883 ImpDefReg = 0; 884 } 885 } 886 } 887 888 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 889 for (unsigned ImpDef : ImpDefs) 890 MIB.addReg(ImpDef, RegState::ImplicitDefine); 891 } else { 892 // Remove kill flags: We are possibly storing the values later now. 893 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 894 for (MachineInstr &MI : FixupRange) { 895 for (MachineOperand &MO : MI.uses()) { 896 if (!MO.isReg() || !MO.isKill()) 897 continue; 898 if (UsedRegs.count(MO.getReg())) 899 MO.setIsKill(false); 900 } 901 } 902 assert(ImpDefs.empty()); 903 } 904 905 return Merged; 906 } 907 908 static bool isValidLSDoubleOffset(int Offset) { 909 unsigned Value = abs(Offset); 910 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 911 // multiplied by 4. 912 return (Value % 4) == 0 && Value < 1024; 913 } 914 915 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 916 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 917 const MachineInstr *FirstMI = MemOps[0].MI; 918 unsigned Opcode = FirstMI->getOpcode(); 919 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 920 unsigned Size = getLSMultipleTransferSize(FirstMI); 921 922 unsigned SIndex = 0; 923 unsigned EIndex = MemOps.size(); 924 do { 925 // Look at the first instruction. 926 const MachineInstr *MI = MemOps[SIndex].MI; 927 int Offset = MemOps[SIndex].Offset; 928 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 929 unsigned PReg = PMO.getReg(); 930 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 931 unsigned Latest = SIndex; 932 unsigned Earliest = SIndex; 933 unsigned Count = 1; 934 bool CanMergeToLSDouble = 935 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 936 // ARM errata 602117: LDRD with base in list may result in incorrect base 937 // register when interrupted or faulted. 938 if (STI->isCortexM3() && isi32Load(Opcode) && 939 PReg == getLoadStoreBaseOp(*MI).getReg()) 940 CanMergeToLSDouble = false; 941 942 bool CanMergeToLSMulti = true; 943 // On swift vldm/vstm starting with an odd register number as that needs 944 // more uops than single vldrs. 945 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 946 CanMergeToLSMulti = false; 947 948 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it 949 // deprecated; LDM to PC is fine but cannot happen here. 950 if (PReg == ARM::SP || PReg == ARM::PC) 951 CanMergeToLSMulti = CanMergeToLSDouble = false; 952 953 // Merge following instructions where possible. 954 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 955 int NewOffset = MemOps[I].Offset; 956 if (NewOffset != Offset + (int)Size) 957 break; 958 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 959 unsigned Reg = MO.getReg(); 960 if (Reg == ARM::SP || Reg == ARM::PC) 961 break; 962 963 // See if the current load/store may be part of a multi load/store. 964 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 965 bool PartOfLSMulti = CanMergeToLSMulti; 966 if (PartOfLSMulti) { 967 // Register numbers must be in ascending order. 968 if (RegNum <= PRegNum) 969 PartOfLSMulti = false; 970 // For VFP / NEON load/store multiples, the registers must be 971 // consecutive and within the limit on the number of registers per 972 // instruction. 973 else if (!isNotVFP && RegNum != PRegNum+1) 974 PartOfLSMulti = false; 975 } 976 // See if the current load/store may be part of a double load/store. 977 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 978 979 if (!PartOfLSMulti && !PartOfLSDouble) 980 break; 981 CanMergeToLSMulti &= PartOfLSMulti; 982 CanMergeToLSDouble &= PartOfLSDouble; 983 // Track MemOp with latest and earliest position (Positions are 984 // counted in reverse). 985 unsigned Position = MemOps[I].Position; 986 if (Position < MemOps[Latest].Position) 987 Latest = I; 988 else if (Position > MemOps[Earliest].Position) 989 Earliest = I; 990 // Prepare for next MemOp. 991 Offset += Size; 992 PRegNum = RegNum; 993 } 994 995 // Form a candidate from the Ops collected so far. 996 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 997 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 998 Candidate->Instrs.push_back(MemOps[C].MI); 999 Candidate->LatestMIIdx = Latest - SIndex; 1000 Candidate->EarliestMIIdx = Earliest - SIndex; 1001 Candidate->InsertPos = MemOps[Latest].Position; 1002 if (Count == 1) 1003 CanMergeToLSMulti = CanMergeToLSDouble = false; 1004 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 1005 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 1006 Candidates.push_back(Candidate); 1007 // Continue after the chain. 1008 SIndex += Count; 1009 } while (SIndex < EIndex); 1010 } 1011 1012 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1013 ARM_AM::AMSubMode Mode) { 1014 switch (Opc) { 1015 default: llvm_unreachable("Unhandled opcode!"); 1016 case ARM::LDMIA: 1017 case ARM::LDMDA: 1018 case ARM::LDMDB: 1019 case ARM::LDMIB: 1020 switch (Mode) { 1021 default: llvm_unreachable("Unhandled submode!"); 1022 case ARM_AM::ia: return ARM::LDMIA_UPD; 1023 case ARM_AM::ib: return ARM::LDMIB_UPD; 1024 case ARM_AM::da: return ARM::LDMDA_UPD; 1025 case ARM_AM::db: return ARM::LDMDB_UPD; 1026 } 1027 case ARM::STMIA: 1028 case ARM::STMDA: 1029 case ARM::STMDB: 1030 case ARM::STMIB: 1031 switch (Mode) { 1032 default: llvm_unreachable("Unhandled submode!"); 1033 case ARM_AM::ia: return ARM::STMIA_UPD; 1034 case ARM_AM::ib: return ARM::STMIB_UPD; 1035 case ARM_AM::da: return ARM::STMDA_UPD; 1036 case ARM_AM::db: return ARM::STMDB_UPD; 1037 } 1038 case ARM::t2LDMIA: 1039 case ARM::t2LDMDB: 1040 switch (Mode) { 1041 default: llvm_unreachable("Unhandled submode!"); 1042 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1043 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1044 } 1045 case ARM::t2STMIA: 1046 case ARM::t2STMDB: 1047 switch (Mode) { 1048 default: llvm_unreachable("Unhandled submode!"); 1049 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1050 case ARM_AM::db: return ARM::t2STMDB_UPD; 1051 } 1052 case ARM::VLDMSIA: 1053 switch (Mode) { 1054 default: llvm_unreachable("Unhandled submode!"); 1055 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1056 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1057 } 1058 case ARM::VLDMDIA: 1059 switch (Mode) { 1060 default: llvm_unreachable("Unhandled submode!"); 1061 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1062 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1063 } 1064 case ARM::VSTMSIA: 1065 switch (Mode) { 1066 default: llvm_unreachable("Unhandled submode!"); 1067 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1068 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1069 } 1070 case ARM::VSTMDIA: 1071 switch (Mode) { 1072 default: llvm_unreachable("Unhandled submode!"); 1073 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1074 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1075 } 1076 } 1077 } 1078 1079 /// Check if the given instruction increments or decrements a register and 1080 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1081 /// generated by the instruction are possibly read as well. 1082 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1083 ARMCC::CondCodes Pred, unsigned PredReg) { 1084 bool CheckCPSRDef; 1085 int Scale; 1086 switch (MI.getOpcode()) { 1087 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1088 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1089 case ARM::t2SUBri: 1090 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1091 case ARM::t2ADDri: 1092 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1093 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1094 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1095 default: return 0; 1096 } 1097 1098 unsigned MIPredReg; 1099 if (MI.getOperand(0).getReg() != Reg || 1100 MI.getOperand(1).getReg() != Reg || 1101 getInstrPredicate(&MI, MIPredReg) != Pred || 1102 MIPredReg != PredReg) 1103 return 0; 1104 1105 if (CheckCPSRDef && definesCPSR(&MI)) 1106 return 0; 1107 return MI.getOperand(2).getImm() * Scale; 1108 } 1109 1110 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1111 static MachineBasicBlock::iterator 1112 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1113 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1114 Offset = 0; 1115 MachineBasicBlock &MBB = *MBBI->getParent(); 1116 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1117 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1118 if (MBBI == BeginMBBI) 1119 return EndMBBI; 1120 1121 // Skip debug values. 1122 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1123 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1124 --PrevMBBI; 1125 1126 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1127 return Offset == 0 ? EndMBBI : PrevMBBI; 1128 } 1129 1130 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1131 static MachineBasicBlock::iterator 1132 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1133 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1134 Offset = 0; 1135 MachineBasicBlock &MBB = *MBBI->getParent(); 1136 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1137 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1138 // Skip debug values. 1139 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1140 ++NextMBBI; 1141 if (NextMBBI == EndMBBI) 1142 return EndMBBI; 1143 1144 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1145 return Offset == 0 ? EndMBBI : NextMBBI; 1146 } 1147 1148 /// Fold proceeding/trailing inc/dec of base register into the 1149 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1150 /// 1151 /// stmia rn, <ra, rb, rc> 1152 /// rn := rn + 4 * 3; 1153 /// => 1154 /// stmia rn!, <ra, rb, rc> 1155 /// 1156 /// rn := rn - 4 * 3; 1157 /// ldmia rn, <ra, rb, rc> 1158 /// => 1159 /// ldmdb rn!, <ra, rb, rc> 1160 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1161 // Thumb1 is already using updating loads/stores. 1162 if (isThumb1) return false; 1163 1164 const MachineOperand &BaseOP = MI->getOperand(0); 1165 unsigned Base = BaseOP.getReg(); 1166 bool BaseKill = BaseOP.isKill(); 1167 unsigned PredReg = 0; 1168 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1169 unsigned Opcode = MI->getOpcode(); 1170 DebugLoc DL = MI->getDebugLoc(); 1171 1172 // Can't use an updating ld/st if the base register is also a dest 1173 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1174 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1175 if (MI->getOperand(i).getReg() == Base) 1176 return false; 1177 1178 int Bytes = getLSMultipleTransferSize(MI); 1179 MachineBasicBlock &MBB = *MI->getParent(); 1180 MachineBasicBlock::iterator MBBI(MI); 1181 int Offset; 1182 MachineBasicBlock::iterator MergeInstr 1183 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1184 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1185 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1186 Mode = ARM_AM::db; 1187 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1188 Mode = ARM_AM::da; 1189 } else { 1190 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1191 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1192 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) 1193 return false; 1194 } 1195 MBB.erase(MergeInstr); 1196 1197 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1198 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1199 .addReg(Base, getDefRegState(true)) // WB base register 1200 .addReg(Base, getKillRegState(BaseKill)) 1201 .addImm(Pred).addReg(PredReg); 1202 1203 // Transfer the rest of operands. 1204 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1205 MIB.addOperand(MI->getOperand(OpNum)); 1206 1207 // Transfer memoperands. 1208 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1209 1210 MBB.erase(MBBI); 1211 return true; 1212 } 1213 1214 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1215 ARM_AM::AddrOpc Mode) { 1216 switch (Opc) { 1217 case ARM::LDRi12: 1218 return ARM::LDR_PRE_IMM; 1219 case ARM::STRi12: 1220 return ARM::STR_PRE_IMM; 1221 case ARM::VLDRS: 1222 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1223 case ARM::VLDRD: 1224 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1225 case ARM::VSTRS: 1226 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1227 case ARM::VSTRD: 1228 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1229 case ARM::t2LDRi8: 1230 case ARM::t2LDRi12: 1231 return ARM::t2LDR_PRE; 1232 case ARM::t2STRi8: 1233 case ARM::t2STRi12: 1234 return ARM::t2STR_PRE; 1235 default: llvm_unreachable("Unhandled opcode!"); 1236 } 1237 } 1238 1239 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1240 ARM_AM::AddrOpc Mode) { 1241 switch (Opc) { 1242 case ARM::LDRi12: 1243 return ARM::LDR_POST_IMM; 1244 case ARM::STRi12: 1245 return ARM::STR_POST_IMM; 1246 case ARM::VLDRS: 1247 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1248 case ARM::VLDRD: 1249 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1250 case ARM::VSTRS: 1251 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1252 case ARM::VSTRD: 1253 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1254 case ARM::t2LDRi8: 1255 case ARM::t2LDRi12: 1256 return ARM::t2LDR_POST; 1257 case ARM::t2STRi8: 1258 case ARM::t2STRi12: 1259 return ARM::t2STR_POST; 1260 default: llvm_unreachable("Unhandled opcode!"); 1261 } 1262 } 1263 1264 /// Fold proceeding/trailing inc/dec of base register into the 1265 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1266 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1267 // Thumb1 doesn't have updating LDR/STR. 1268 // FIXME: Use LDM/STM with single register instead. 1269 if (isThumb1) return false; 1270 1271 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1272 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1273 unsigned Opcode = MI->getOpcode(); 1274 DebugLoc DL = MI->getDebugLoc(); 1275 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1276 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1277 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1278 if (isi32Load(Opcode) || isi32Store(Opcode)) 1279 if (MI->getOperand(2).getImm() != 0) 1280 return false; 1281 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1282 return false; 1283 1284 // Can't do the merge if the destination register is the same as the would-be 1285 // writeback register. 1286 if (MI->getOperand(0).getReg() == Base) 1287 return false; 1288 1289 unsigned PredReg = 0; 1290 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1291 int Bytes = getLSMultipleTransferSize(MI); 1292 MachineBasicBlock &MBB = *MI->getParent(); 1293 MachineBasicBlock::iterator MBBI(MI); 1294 int Offset; 1295 MachineBasicBlock::iterator MergeInstr 1296 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1297 unsigned NewOpc; 1298 if (!isAM5 && Offset == Bytes) { 1299 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1300 } else if (Offset == -Bytes) { 1301 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1302 } else { 1303 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1304 if (Offset == Bytes) { 1305 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1306 } else if (!isAM5 && Offset == -Bytes) { 1307 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1308 } else 1309 return false; 1310 } 1311 MBB.erase(MergeInstr); 1312 1313 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1314 1315 bool isLd = isLoadSingle(Opcode); 1316 if (isAM5) { 1317 // VLDM[SD]_UPD, VSTM[SD]_UPD 1318 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1319 // updating load/store-multiple instructions can be used with only one 1320 // register.) 1321 MachineOperand &MO = MI->getOperand(0); 1322 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1323 .addReg(Base, getDefRegState(true)) // WB base register 1324 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1325 .addImm(Pred).addReg(PredReg) 1326 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1327 getKillRegState(MO.isKill()))); 1328 } else if (isLd) { 1329 if (isAM2) { 1330 // LDR_PRE, LDR_POST 1331 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1332 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1333 .addReg(Base, RegState::Define) 1334 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1335 } else { 1336 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1337 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1338 .addReg(Base, RegState::Define) 1339 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1340 } 1341 } else { 1342 // t2LDR_PRE, t2LDR_POST 1343 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1344 .addReg(Base, RegState::Define) 1345 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1346 } 1347 } else { 1348 MachineOperand &MO = MI->getOperand(0); 1349 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1350 // the vestigal zero-reg offset register. When that's fixed, this clause 1351 // can be removed entirely. 1352 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1353 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1354 // STR_PRE, STR_POST 1355 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1356 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1357 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1358 } else { 1359 // t2STR_PRE, t2STR_POST 1360 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1361 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1362 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1363 } 1364 } 1365 MBB.erase(MBBI); 1366 1367 return true; 1368 } 1369 1370 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1371 unsigned Opcode = MI.getOpcode(); 1372 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1373 "Must have t2STRDi8 or t2LDRDi8"); 1374 if (MI.getOperand(3).getImm() != 0) 1375 return false; 1376 1377 // Behaviour for writeback is undefined if base register is the same as one 1378 // of the others. 1379 const MachineOperand &BaseOp = MI.getOperand(2); 1380 unsigned Base = BaseOp.getReg(); 1381 const MachineOperand &Reg0Op = MI.getOperand(0); 1382 const MachineOperand &Reg1Op = MI.getOperand(1); 1383 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1384 return false; 1385 1386 unsigned PredReg; 1387 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); 1388 MachineBasicBlock::iterator MBBI(MI); 1389 MachineBasicBlock &MBB = *MI.getParent(); 1390 int Offset; 1391 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1392 PredReg, Offset); 1393 unsigned NewOpc; 1394 if (Offset == 8 || Offset == -8) { 1395 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1396 } else { 1397 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1398 if (Offset == 8 || Offset == -8) { 1399 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1400 } else 1401 return false; 1402 } 1403 MBB.erase(MergeInstr); 1404 1405 DebugLoc DL = MI.getDebugLoc(); 1406 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1407 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1408 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1409 .addReg(BaseOp.getReg(), RegState::Define); 1410 } else { 1411 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1412 MIB.addReg(BaseOp.getReg(), RegState::Define) 1413 .addOperand(Reg0Op).addOperand(Reg1Op); 1414 } 1415 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1416 .addImm(Offset).addImm(Pred).addReg(PredReg); 1417 assert(TII->get(Opcode).getNumOperands() == 6 && 1418 TII->get(NewOpc).getNumOperands() == 7 && 1419 "Unexpected number of operands in Opcode specification."); 1420 1421 // Transfer implicit operands. 1422 for (const MachineOperand &MO : MI.implicit_operands()) 1423 MIB.addOperand(MO); 1424 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1425 1426 MBB.erase(MBBI); 1427 return true; 1428 } 1429 1430 /// Returns true if instruction is a memory operation that this pass is capable 1431 /// of operating on. 1432 static bool isMemoryOp(const MachineInstr *MI) { 1433 // When no memory operands are present, conservatively assume unaligned, 1434 // volatile, unfoldable. 1435 if (!MI->hasOneMemOperand()) 1436 return false; 1437 1438 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1439 1440 // Don't touch volatile memory accesses - we may be changing their order. 1441 if (MMO->isVolatile()) 1442 return false; 1443 1444 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1445 // not. 1446 if (MMO->getAlignment() < 4) 1447 return false; 1448 1449 // str <undef> could probably be eliminated entirely, but for now we just want 1450 // to avoid making a mess of it. 1451 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1452 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1453 MI->getOperand(0).isUndef()) 1454 return false; 1455 1456 // Likewise don't mess with references to undefined addresses. 1457 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1458 MI->getOperand(1).isUndef()) 1459 return false; 1460 1461 unsigned Opcode = MI->getOpcode(); 1462 switch (Opcode) { 1463 default: break; 1464 case ARM::VLDRS: 1465 case ARM::VSTRS: 1466 return MI->getOperand(1).isReg(); 1467 case ARM::VLDRD: 1468 case ARM::VSTRD: 1469 return MI->getOperand(1).isReg(); 1470 case ARM::LDRi12: 1471 case ARM::STRi12: 1472 case ARM::tLDRi: 1473 case ARM::tSTRi: 1474 case ARM::tLDRspi: 1475 case ARM::tSTRspi: 1476 case ARM::t2LDRi8: 1477 case ARM::t2LDRi12: 1478 case ARM::t2STRi8: 1479 case ARM::t2STRi12: 1480 return MI->getOperand(1).isReg(); 1481 } 1482 return false; 1483 } 1484 1485 static void InsertLDR_STR(MachineBasicBlock &MBB, 1486 MachineBasicBlock::iterator &MBBI, 1487 int Offset, bool isDef, 1488 DebugLoc DL, unsigned NewOpc, 1489 unsigned Reg, bool RegDeadKill, bool RegUndef, 1490 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1491 bool OffKill, bool OffUndef, 1492 ARMCC::CondCodes Pred, unsigned PredReg, 1493 const TargetInstrInfo *TII, bool isT2) { 1494 if (isDef) { 1495 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1496 TII->get(NewOpc)) 1497 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1498 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1499 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1500 } else { 1501 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1502 TII->get(NewOpc)) 1503 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1504 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1505 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1506 } 1507 } 1508 1509 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1510 MachineBasicBlock::iterator &MBBI) { 1511 MachineInstr *MI = &*MBBI; 1512 unsigned Opcode = MI->getOpcode(); 1513 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1514 return false; 1515 1516 const MachineOperand &BaseOp = MI->getOperand(2); 1517 unsigned BaseReg = BaseOp.getReg(); 1518 unsigned EvenReg = MI->getOperand(0).getReg(); 1519 unsigned OddReg = MI->getOperand(1).getReg(); 1520 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1521 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1522 1523 // ARM errata 602117: LDRD with base in list may result in incorrect base 1524 // register when interrupted or faulted. 1525 bool Errata602117 = EvenReg == BaseReg && 1526 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1527 // ARM LDRD/STRD needs consecutive registers. 1528 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1529 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1530 1531 if (!Errata602117 && !NonConsecutiveRegs) 1532 return false; 1533 1534 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1535 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1536 bool EvenDeadKill = isLd ? 1537 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1538 bool EvenUndef = MI->getOperand(0).isUndef(); 1539 bool OddDeadKill = isLd ? 1540 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1541 bool OddUndef = MI->getOperand(1).isUndef(); 1542 bool BaseKill = BaseOp.isKill(); 1543 bool BaseUndef = BaseOp.isUndef(); 1544 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1545 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1546 int OffImm = getMemoryOpOffset(MI); 1547 unsigned PredReg = 0; 1548 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1549 1550 if (OddRegNum > EvenRegNum && OffImm == 0) { 1551 // Ascending register numbers and no offset. It's safe to change it to a 1552 // ldm or stm. 1553 unsigned NewOpc = (isLd) 1554 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1555 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1556 if (isLd) { 1557 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1558 .addReg(BaseReg, getKillRegState(BaseKill)) 1559 .addImm(Pred).addReg(PredReg) 1560 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1561 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1562 ++NumLDRD2LDM; 1563 } else { 1564 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1565 .addReg(BaseReg, getKillRegState(BaseKill)) 1566 .addImm(Pred).addReg(PredReg) 1567 .addReg(EvenReg, 1568 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1569 .addReg(OddReg, 1570 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1571 ++NumSTRD2STM; 1572 } 1573 } else { 1574 // Split into two instructions. 1575 unsigned NewOpc = (isLd) 1576 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1577 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1578 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1579 // so adjust and use t2LDRi12 here for that. 1580 unsigned NewOpc2 = (isLd) 1581 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1582 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1583 DebugLoc dl = MBBI->getDebugLoc(); 1584 // If this is a load and base register is killed, it may have been 1585 // re-defed by the load, make sure the first load does not clobber it. 1586 if (isLd && 1587 (BaseKill || OffKill) && 1588 (TRI->regsOverlap(EvenReg, BaseReg))) { 1589 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1590 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1591 OddReg, OddDeadKill, false, 1592 BaseReg, false, BaseUndef, false, OffUndef, 1593 Pred, PredReg, TII, isT2); 1594 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1595 EvenReg, EvenDeadKill, false, 1596 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1597 Pred, PredReg, TII, isT2); 1598 } else { 1599 if (OddReg == EvenReg && EvenDeadKill) { 1600 // If the two source operands are the same, the kill marker is 1601 // probably on the first one. e.g. 1602 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1603 EvenDeadKill = false; 1604 OddDeadKill = true; 1605 } 1606 // Never kill the base register in the first instruction. 1607 if (EvenReg == BaseReg) 1608 EvenDeadKill = false; 1609 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1610 EvenReg, EvenDeadKill, EvenUndef, 1611 BaseReg, false, BaseUndef, false, OffUndef, 1612 Pred, PredReg, TII, isT2); 1613 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1614 OddReg, OddDeadKill, OddUndef, 1615 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1616 Pred, PredReg, TII, isT2); 1617 } 1618 if (isLd) 1619 ++NumLDRD2LDR; 1620 else 1621 ++NumSTRD2STR; 1622 } 1623 1624 MBBI = MBB.erase(MBBI); 1625 return true; 1626 } 1627 1628 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1629 /// incrementing offset into LDM / STM ops. 1630 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1631 MemOpQueue MemOps; 1632 unsigned CurrBase = 0; 1633 unsigned CurrOpc = ~0u; 1634 ARMCC::CondCodes CurrPred = ARMCC::AL; 1635 unsigned Position = 0; 1636 assert(Candidates.size() == 0); 1637 assert(MergeBaseCandidates.size() == 0); 1638 LiveRegsValid = false; 1639 1640 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1641 I = MBBI) { 1642 // The instruction in front of the iterator is the one we look at. 1643 MBBI = std::prev(I); 1644 if (FixInvalidRegPairOp(MBB, MBBI)) 1645 continue; 1646 ++Position; 1647 1648 if (isMemoryOp(MBBI)) { 1649 unsigned Opcode = MBBI->getOpcode(); 1650 const MachineOperand &MO = MBBI->getOperand(0); 1651 unsigned Reg = MO.getReg(); 1652 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1653 unsigned PredReg = 0; 1654 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1655 int Offset = getMemoryOpOffset(MBBI); 1656 if (CurrBase == 0) { 1657 // Start of a new chain. 1658 CurrBase = Base; 1659 CurrOpc = Opcode; 1660 CurrPred = Pred; 1661 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1662 continue; 1663 } 1664 // Note: No need to match PredReg in the next if. 1665 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1666 // Watch out for: 1667 // r4 := ldr [r0, #8] 1668 // r4 := ldr [r0, #4] 1669 // or 1670 // r0 := ldr [r0] 1671 // If a load overrides the base register or a register loaded by 1672 // another load in our chain, we cannot take this instruction. 1673 bool Overlap = false; 1674 if (isLoadSingle(Opcode)) { 1675 Overlap = (Base == Reg); 1676 if (!Overlap) { 1677 for (const MemOpQueueEntry &E : MemOps) { 1678 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1679 Overlap = true; 1680 break; 1681 } 1682 } 1683 } 1684 } 1685 1686 if (!Overlap) { 1687 // Check offset and sort memory operation into the current chain. 1688 if (Offset > MemOps.back().Offset) { 1689 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1690 continue; 1691 } else { 1692 MemOpQueue::iterator MI, ME; 1693 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1694 if (Offset < MI->Offset) { 1695 // Found a place to insert. 1696 break; 1697 } 1698 if (Offset == MI->Offset) { 1699 // Collision, abort. 1700 MI = ME; 1701 break; 1702 } 1703 } 1704 if (MI != MemOps.end()) { 1705 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1706 continue; 1707 } 1708 } 1709 } 1710 } 1711 1712 // Don't advance the iterator; The op will start a new chain next. 1713 MBBI = I; 1714 --Position; 1715 // Fallthrough to look into existing chain. 1716 } else if (MBBI->isDebugValue()) { 1717 continue; 1718 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1719 MBBI->getOpcode() == ARM::t2STRDi8) { 1720 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1721 // remember them because we may still be able to merge add/sub into them. 1722 MergeBaseCandidates.push_back(MBBI); 1723 } 1724 1725 1726 // If we are here then the chain is broken; Extract candidates for a merge. 1727 if (MemOps.size() > 0) { 1728 FormCandidates(MemOps); 1729 // Reset for the next chain. 1730 CurrBase = 0; 1731 CurrOpc = ~0u; 1732 CurrPred = ARMCC::AL; 1733 MemOps.clear(); 1734 } 1735 } 1736 if (MemOps.size() > 0) 1737 FormCandidates(MemOps); 1738 1739 // Sort candidates so they get processed from end to begin of the basic 1740 // block later; This is necessary for liveness calculation. 1741 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1742 return M0->InsertPos < M1->InsertPos; 1743 }; 1744 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1745 1746 // Go through list of candidates and merge. 1747 bool Changed = false; 1748 for (const MergeCandidate *Candidate : Candidates) { 1749 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1750 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1751 // Merge preceding/trailing base inc/dec into the merged op. 1752 if (Merged) { 1753 Changed = true; 1754 unsigned Opcode = Merged->getOpcode(); 1755 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1756 MergeBaseUpdateLSDouble(*Merged); 1757 else 1758 MergeBaseUpdateLSMultiple(Merged); 1759 } else { 1760 for (MachineInstr *MI : Candidate->Instrs) { 1761 if (MergeBaseUpdateLoadStore(MI)) 1762 Changed = true; 1763 } 1764 } 1765 } else { 1766 assert(Candidate->Instrs.size() == 1); 1767 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1768 Changed = true; 1769 } 1770 } 1771 Candidates.clear(); 1772 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1773 for (MachineInstr *MI : MergeBaseCandidates) 1774 MergeBaseUpdateLSDouble(*MI); 1775 MergeBaseCandidates.clear(); 1776 1777 return Changed; 1778 } 1779 1780 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1781 /// into the preceding stack restore so it directly restore the value of LR 1782 /// into pc. 1783 /// ldmfd sp!, {..., lr} 1784 /// bx lr 1785 /// or 1786 /// ldmfd sp!, {..., lr} 1787 /// mov pc, lr 1788 /// => 1789 /// ldmfd sp!, {..., pc} 1790 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1791 // Thumb1 LDM doesn't allow high registers. 1792 if (isThumb1) return false; 1793 if (MBB.empty()) return false; 1794 1795 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1796 if (MBBI != MBB.begin() && 1797 (MBBI->getOpcode() == ARM::BX_RET || 1798 MBBI->getOpcode() == ARM::tBX_RET || 1799 MBBI->getOpcode() == ARM::MOVPCLR)) { 1800 MachineInstr *PrevMI = std::prev(MBBI); 1801 unsigned Opcode = PrevMI->getOpcode(); 1802 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1803 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1804 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1805 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1806 if (MO.getReg() != ARM::LR) 1807 return false; 1808 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1809 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1810 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1811 PrevMI->setDesc(TII->get(NewOpc)); 1812 MO.setReg(ARM::PC); 1813 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1814 MBB.erase(MBBI); 1815 return true; 1816 } 1817 } 1818 return false; 1819 } 1820 1821 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1822 MF = &Fn; 1823 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1824 TL = STI->getTargetLowering(); 1825 AFI = Fn.getInfo<ARMFunctionInfo>(); 1826 TII = STI->getInstrInfo(); 1827 TRI = STI->getRegisterInfo(); 1828 MRI = &Fn.getRegInfo(); 1829 RegClassInfoValid = false; 1830 isThumb2 = AFI->isThumb2Function(); 1831 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1832 1833 bool Modified = false; 1834 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1835 ++MFI) { 1836 MachineBasicBlock &MBB = *MFI; 1837 Modified |= LoadStoreMultipleOpti(MBB); 1838 if (STI->hasV5TOps()) 1839 Modified |= MergeReturnIntoLDM(MBB); 1840 } 1841 1842 Allocator.DestroyAll(); 1843 return Modified; 1844 } 1845 1846 namespace { 1847 /// Pre- register allocation pass that move load / stores from consecutive 1848 /// locations close to make it more likely they will be combined later. 1849 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1850 static char ID; 1851 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1852 1853 const DataLayout *TD; 1854 const TargetInstrInfo *TII; 1855 const TargetRegisterInfo *TRI; 1856 const ARMSubtarget *STI; 1857 MachineRegisterInfo *MRI; 1858 MachineFunction *MF; 1859 1860 bool runOnMachineFunction(MachineFunction &Fn) override; 1861 1862 const char *getPassName() const override { 1863 return "ARM pre- register allocation load / store optimization pass"; 1864 } 1865 1866 private: 1867 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1868 unsigned &NewOpc, unsigned &EvenReg, 1869 unsigned &OddReg, unsigned &BaseReg, 1870 int &Offset, 1871 unsigned &PredReg, ARMCC::CondCodes &Pred, 1872 bool &isT2); 1873 bool RescheduleOps(MachineBasicBlock *MBB, 1874 SmallVectorImpl<MachineInstr *> &Ops, 1875 unsigned Base, bool isLd, 1876 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1877 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1878 }; 1879 char ARMPreAllocLoadStoreOpt::ID = 0; 1880 } 1881 1882 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1883 TD = &Fn.getDataLayout(); 1884 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1885 TII = STI->getInstrInfo(); 1886 TRI = STI->getRegisterInfo(); 1887 MRI = &Fn.getRegInfo(); 1888 MF = &Fn; 1889 1890 bool Modified = false; 1891 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1892 ++MFI) 1893 Modified |= RescheduleLoadStoreInstrs(MFI); 1894 1895 return Modified; 1896 } 1897 1898 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1899 MachineBasicBlock::iterator I, 1900 MachineBasicBlock::iterator E, 1901 SmallPtrSetImpl<MachineInstr*> &MemOps, 1902 SmallSet<unsigned, 4> &MemRegs, 1903 const TargetRegisterInfo *TRI) { 1904 // Are there stores / loads / calls between them? 1905 // FIXME: This is overly conservative. We should make use of alias information 1906 // some day. 1907 SmallSet<unsigned, 4> AddedRegPressure; 1908 while (++I != E) { 1909 if (I->isDebugValue() || MemOps.count(&*I)) 1910 continue; 1911 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1912 return false; 1913 if (isLd && I->mayStore()) 1914 return false; 1915 if (!isLd) { 1916 if (I->mayLoad()) 1917 return false; 1918 // It's not safe to move the first 'str' down. 1919 // str r1, [r0] 1920 // strh r5, [r0] 1921 // str r4, [r0, #+4] 1922 if (I->mayStore()) 1923 return false; 1924 } 1925 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1926 MachineOperand &MO = I->getOperand(j); 1927 if (!MO.isReg()) 1928 continue; 1929 unsigned Reg = MO.getReg(); 1930 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1931 return false; 1932 if (Reg != Base && !MemRegs.count(Reg)) 1933 AddedRegPressure.insert(Reg); 1934 } 1935 } 1936 1937 // Estimate register pressure increase due to the transformation. 1938 if (MemRegs.size() <= 4) 1939 // Ok if we are moving small number of instructions. 1940 return true; 1941 return AddedRegPressure.size() <= MemRegs.size() * 2; 1942 } 1943 1944 1945 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1946 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1947 MachineInstr *Op1) { 1948 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1949 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1950 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1951 1952 MachineFunction *MF = MI->getParent()->getParent(); 1953 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1954 MachineSDNode::mmo_iterator MemEnd = 1955 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1956 MemEnd = 1957 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1958 MI->setMemRefs(MemBegin, MemEnd); 1959 } 1960 1961 bool 1962 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1963 DebugLoc &dl, unsigned &NewOpc, 1964 unsigned &FirstReg, 1965 unsigned &SecondReg, 1966 unsigned &BaseReg, int &Offset, 1967 unsigned &PredReg, 1968 ARMCC::CondCodes &Pred, 1969 bool &isT2) { 1970 // Make sure we're allowed to generate LDRD/STRD. 1971 if (!STI->hasV5TEOps()) 1972 return false; 1973 1974 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1975 unsigned Scale = 1; 1976 unsigned Opcode = Op0->getOpcode(); 1977 if (Opcode == ARM::LDRi12) { 1978 NewOpc = ARM::LDRD; 1979 } else if (Opcode == ARM::STRi12) { 1980 NewOpc = ARM::STRD; 1981 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1982 NewOpc = ARM::t2LDRDi8; 1983 Scale = 4; 1984 isT2 = true; 1985 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1986 NewOpc = ARM::t2STRDi8; 1987 Scale = 4; 1988 isT2 = true; 1989 } else { 1990 return false; 1991 } 1992 1993 // Make sure the base address satisfies i64 ld / st alignment requirement. 1994 // At the moment, we ignore the memoryoperand's value. 1995 // If we want to use AliasAnalysis, we should check it accordingly. 1996 if (!Op0->hasOneMemOperand() || 1997 (*Op0->memoperands_begin())->isVolatile()) 1998 return false; 1999 2000 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 2001 const Function *Func = MF->getFunction(); 2002 unsigned ReqAlign = STI->hasV6Ops() 2003 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 2004 : 8; // Pre-v6 need 8-byte align 2005 if (Align < ReqAlign) 2006 return false; 2007 2008 // Then make sure the immediate offset fits. 2009 int OffImm = getMemoryOpOffset(Op0); 2010 if (isT2) { 2011 int Limit = (1 << 8) * Scale; 2012 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2013 return false; 2014 Offset = OffImm; 2015 } else { 2016 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2017 if (OffImm < 0) { 2018 AddSub = ARM_AM::sub; 2019 OffImm = - OffImm; 2020 } 2021 int Limit = (1 << 8) * Scale; 2022 if (OffImm >= Limit || (OffImm & (Scale-1))) 2023 return false; 2024 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2025 } 2026 FirstReg = Op0->getOperand(0).getReg(); 2027 SecondReg = Op1->getOperand(0).getReg(); 2028 if (FirstReg == SecondReg) 2029 return false; 2030 BaseReg = Op0->getOperand(1).getReg(); 2031 Pred = getInstrPredicate(Op0, PredReg); 2032 dl = Op0->getDebugLoc(); 2033 return true; 2034 } 2035 2036 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2037 SmallVectorImpl<MachineInstr *> &Ops, 2038 unsigned Base, bool isLd, 2039 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2040 bool RetVal = false; 2041 2042 // Sort by offset (in reverse order). 2043 std::sort(Ops.begin(), Ops.end(), 2044 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2045 int LOffset = getMemoryOpOffset(LHS); 2046 int ROffset = getMemoryOpOffset(RHS); 2047 assert(LHS == RHS || LOffset != ROffset); 2048 return LOffset > ROffset; 2049 }); 2050 2051 // The loads / stores of the same base are in order. Scan them from first to 2052 // last and check for the following: 2053 // 1. Any def of base. 2054 // 2. Any gaps. 2055 while (Ops.size() > 1) { 2056 unsigned FirstLoc = ~0U; 2057 unsigned LastLoc = 0; 2058 MachineInstr *FirstOp = nullptr; 2059 MachineInstr *LastOp = nullptr; 2060 int LastOffset = 0; 2061 unsigned LastOpcode = 0; 2062 unsigned LastBytes = 0; 2063 unsigned NumMove = 0; 2064 for (int i = Ops.size() - 1; i >= 0; --i) { 2065 MachineInstr *Op = Ops[i]; 2066 unsigned Loc = MI2LocMap[Op]; 2067 if (Loc <= FirstLoc) { 2068 FirstLoc = Loc; 2069 FirstOp = Op; 2070 } 2071 if (Loc >= LastLoc) { 2072 LastLoc = Loc; 2073 LastOp = Op; 2074 } 2075 2076 unsigned LSMOpcode 2077 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2078 if (LastOpcode && LSMOpcode != LastOpcode) 2079 break; 2080 2081 int Offset = getMemoryOpOffset(Op); 2082 unsigned Bytes = getLSMultipleTransferSize(Op); 2083 if (LastBytes) { 2084 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2085 break; 2086 } 2087 LastOffset = Offset; 2088 LastBytes = Bytes; 2089 LastOpcode = LSMOpcode; 2090 if (++NumMove == 8) // FIXME: Tune this limit. 2091 break; 2092 } 2093 2094 if (NumMove <= 1) 2095 Ops.pop_back(); 2096 else { 2097 SmallPtrSet<MachineInstr*, 4> MemOps; 2098 SmallSet<unsigned, 4> MemRegs; 2099 for (int i = NumMove-1; i >= 0; --i) { 2100 MemOps.insert(Ops[i]); 2101 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2102 } 2103 2104 // Be conservative, if the instructions are too far apart, don't 2105 // move them. We want to limit the increase of register pressure. 2106 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2107 if (DoMove) 2108 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2109 MemOps, MemRegs, TRI); 2110 if (!DoMove) { 2111 for (unsigned i = 0; i != NumMove; ++i) 2112 Ops.pop_back(); 2113 } else { 2114 // This is the new location for the loads / stores. 2115 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2116 while (InsertPos != MBB->end() 2117 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2118 ++InsertPos; 2119 2120 // If we are moving a pair of loads / stores, see if it makes sense 2121 // to try to allocate a pair of registers that can form register pairs. 2122 MachineInstr *Op0 = Ops.back(); 2123 MachineInstr *Op1 = Ops[Ops.size()-2]; 2124 unsigned FirstReg = 0, SecondReg = 0; 2125 unsigned BaseReg = 0, PredReg = 0; 2126 ARMCC::CondCodes Pred = ARMCC::AL; 2127 bool isT2 = false; 2128 unsigned NewOpc = 0; 2129 int Offset = 0; 2130 DebugLoc dl; 2131 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2132 FirstReg, SecondReg, BaseReg, 2133 Offset, PredReg, Pred, isT2)) { 2134 Ops.pop_back(); 2135 Ops.pop_back(); 2136 2137 const MCInstrDesc &MCID = TII->get(NewOpc); 2138 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2139 MRI->constrainRegClass(FirstReg, TRC); 2140 MRI->constrainRegClass(SecondReg, TRC); 2141 2142 // Form the pair instruction. 2143 if (isLd) { 2144 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2145 .addReg(FirstReg, RegState::Define) 2146 .addReg(SecondReg, RegState::Define) 2147 .addReg(BaseReg); 2148 // FIXME: We're converting from LDRi12 to an insn that still 2149 // uses addrmode2, so we need an explicit offset reg. It should 2150 // always by reg0 since we're transforming LDRi12s. 2151 if (!isT2) 2152 MIB.addReg(0); 2153 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2154 concatenateMemOperands(MIB, Op0, Op1); 2155 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2156 ++NumLDRDFormed; 2157 } else { 2158 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2159 .addReg(FirstReg) 2160 .addReg(SecondReg) 2161 .addReg(BaseReg); 2162 // FIXME: We're converting from LDRi12 to an insn that still 2163 // uses addrmode2, so we need an explicit offset reg. It should 2164 // always by reg0 since we're transforming STRi12s. 2165 if (!isT2) 2166 MIB.addReg(0); 2167 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2168 concatenateMemOperands(MIB, Op0, Op1); 2169 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2170 ++NumSTRDFormed; 2171 } 2172 MBB->erase(Op0); 2173 MBB->erase(Op1); 2174 2175 if (!isT2) { 2176 // Add register allocation hints to form register pairs. 2177 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2178 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2179 } 2180 } else { 2181 for (unsigned i = 0; i != NumMove; ++i) { 2182 MachineInstr *Op = Ops.back(); 2183 Ops.pop_back(); 2184 MBB->splice(InsertPos, MBB, Op); 2185 } 2186 } 2187 2188 NumLdStMoved += NumMove; 2189 RetVal = true; 2190 } 2191 } 2192 } 2193 2194 return RetVal; 2195 } 2196 2197 bool 2198 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2199 bool RetVal = false; 2200 2201 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2202 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2203 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2204 SmallVector<unsigned, 4> LdBases; 2205 SmallVector<unsigned, 4> StBases; 2206 2207 unsigned Loc = 0; 2208 MachineBasicBlock::iterator MBBI = MBB->begin(); 2209 MachineBasicBlock::iterator E = MBB->end(); 2210 while (MBBI != E) { 2211 for (; MBBI != E; ++MBBI) { 2212 MachineInstr *MI = MBBI; 2213 if (MI->isCall() || MI->isTerminator()) { 2214 // Stop at barriers. 2215 ++MBBI; 2216 break; 2217 } 2218 2219 if (!MI->isDebugValue()) 2220 MI2LocMap[MI] = ++Loc; 2221 2222 if (!isMemoryOp(MI)) 2223 continue; 2224 unsigned PredReg = 0; 2225 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2226 continue; 2227 2228 int Opc = MI->getOpcode(); 2229 bool isLd = isLoadSingle(Opc); 2230 unsigned Base = MI->getOperand(1).getReg(); 2231 int Offset = getMemoryOpOffset(MI); 2232 2233 bool StopHere = false; 2234 if (isLd) { 2235 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2236 Base2LdsMap.find(Base); 2237 if (BI != Base2LdsMap.end()) { 2238 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2239 if (Offset == getMemoryOpOffset(BI->second[i])) { 2240 StopHere = true; 2241 break; 2242 } 2243 } 2244 if (!StopHere) 2245 BI->second.push_back(MI); 2246 } else { 2247 Base2LdsMap[Base].push_back(MI); 2248 LdBases.push_back(Base); 2249 } 2250 } else { 2251 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2252 Base2StsMap.find(Base); 2253 if (BI != Base2StsMap.end()) { 2254 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2255 if (Offset == getMemoryOpOffset(BI->second[i])) { 2256 StopHere = true; 2257 break; 2258 } 2259 } 2260 if (!StopHere) 2261 BI->second.push_back(MI); 2262 } else { 2263 Base2StsMap[Base].push_back(MI); 2264 StBases.push_back(Base); 2265 } 2266 } 2267 2268 if (StopHere) { 2269 // Found a duplicate (a base+offset combination that's seen earlier). 2270 // Backtrack. 2271 --Loc; 2272 break; 2273 } 2274 } 2275 2276 // Re-schedule loads. 2277 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2278 unsigned Base = LdBases[i]; 2279 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2280 if (Lds.size() > 1) 2281 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2282 } 2283 2284 // Re-schedule stores. 2285 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2286 unsigned Base = StBases[i]; 2287 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2288 if (Sts.size() > 1) 2289 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2290 } 2291 2292 if (MBBI != E) { 2293 Base2LdsMap.clear(); 2294 Base2StsMap.clear(); 2295 LdBases.clear(); 2296 StBases.clear(); 2297 } 2298 } 2299 2300 return RetVal; 2301 } 2302 2303 2304 /// Returns an instance of the load / store optimization pass. 2305 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2306 if (PreAlloc) 2307 return new ARMPreAllocLoadStoreOpt(); 2308 return new ARMLoadStoreOpt(); 2309 } 2310 2311