1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 /// This switch disables formation of double/multi instructions that could 64 /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP 65 /// disabled. This can be used to create libraries that are robust even when 66 /// users provoke undefined behaviour by supplying misaligned pointers. 67 /// \see mayCombineMisaligned() 68 static cl::opt<bool> 69 AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, 70 cl::init(false), cl::desc("Be more conservative in ARM load/store opt")); 71 72 namespace llvm { 73 void initializeARMLoadStoreOptPass(PassRegistry &); 74 } 75 76 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" 77 78 namespace { 79 /// Post- register allocation pass the combine load / store instructions to 80 /// form ldm / stm instructions. 81 struct ARMLoadStoreOpt : public MachineFunctionPass { 82 static char ID; 83 ARMLoadStoreOpt() : MachineFunctionPass(ID) { 84 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry()); 85 } 86 87 const MachineFunction *MF; 88 const TargetInstrInfo *TII; 89 const TargetRegisterInfo *TRI; 90 const ARMSubtarget *STI; 91 const TargetLowering *TL; 92 ARMFunctionInfo *AFI; 93 LivePhysRegs LiveRegs; 94 RegisterClassInfo RegClassInfo; 95 MachineBasicBlock::const_iterator LiveRegPos; 96 bool LiveRegsValid; 97 bool RegClassInfoValid; 98 bool isThumb1, isThumb2; 99 100 bool runOnMachineFunction(MachineFunction &Fn) override; 101 102 MachineFunctionProperties getRequiredProperties() const override { 103 return MachineFunctionProperties().set( 104 MachineFunctionProperties::Property::AllVRegsAllocated); 105 } 106 107 const char *getPassName() const override { 108 return ARM_LOAD_STORE_OPT_NAME; 109 } 110 111 private: 112 /// A set of load/store MachineInstrs with same base register sorted by 113 /// offset. 114 struct MemOpQueueEntry { 115 MachineInstr *MI; 116 int Offset; ///< Load/Store offset. 117 unsigned Position; ///< Position as counted from end of basic block. 118 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 119 : MI(MI), Offset(Offset), Position(Position) {} 120 }; 121 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 122 123 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 124 /// merged into a LDM/STM. 125 struct MergeCandidate { 126 /// List of instructions ordered by load/store offset. 127 SmallVector<MachineInstr*, 4> Instrs; 128 /// Index in Instrs of the instruction being latest in the schedule. 129 unsigned LatestMIIdx; 130 /// Index in Instrs of the instruction being earliest in the schedule. 131 unsigned EarliestMIIdx; 132 /// Index into the basic block where the merged instruction will be 133 /// inserted. (See MemOpQueueEntry.Position) 134 unsigned InsertPos; 135 /// Whether the instructions can be merged into a ldm/stm instruction. 136 bool CanMergeToLSMulti; 137 /// Whether the instructions can be merged into a ldrd/strd instruction. 138 bool CanMergeToLSDouble; 139 }; 140 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 141 SmallVector<const MergeCandidate*,4> Candidates; 142 SmallVector<MachineInstr*,4> MergeBaseCandidates; 143 144 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 145 MachineBasicBlock::const_iterator Before); 146 unsigned findFreeReg(const TargetRegisterClass &RegClass); 147 void UpdateBaseRegUses(MachineBasicBlock &MBB, 148 MachineBasicBlock::iterator MBBI, 149 DebugLoc DL, unsigned Base, unsigned WordOffset, 150 ARMCC::CondCodes Pred, unsigned PredReg); 151 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 152 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 153 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 154 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 155 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 156 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 157 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 158 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 159 void FormCandidates(const MemOpQueue &MemOps); 160 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 161 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 162 MachineBasicBlock::iterator &MBBI); 163 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 164 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 165 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 166 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 167 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 168 bool CombineMovBx(MachineBasicBlock &MBB); 169 }; 170 char ARMLoadStoreOpt::ID = 0; 171 } 172 173 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false) 174 175 static bool definesCPSR(const MachineInstr *MI) { 176 for (const auto &MO : MI->operands()) { 177 if (!MO.isReg()) 178 continue; 179 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 180 // If the instruction has live CPSR def, then it's not safe to fold it 181 // into load / store. 182 return true; 183 } 184 185 return false; 186 } 187 188 static int getMemoryOpOffset(const MachineInstr *MI) { 189 unsigned Opcode = MI->getOpcode(); 190 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 191 unsigned NumOperands = MI->getDesc().getNumOperands(); 192 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 193 194 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 195 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 196 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 197 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 198 return OffField; 199 200 // Thumb1 immediate offsets are scaled by 4 201 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 202 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 203 return OffField * 4; 204 205 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 206 : ARM_AM::getAM5Offset(OffField) * 4; 207 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 208 : ARM_AM::getAM5Op(OffField); 209 210 if (Op == ARM_AM::sub) 211 return -Offset; 212 213 return Offset; 214 } 215 216 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 217 return MI.getOperand(1); 218 } 219 220 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 221 return MI.getOperand(0); 222 } 223 224 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 225 switch (Opcode) { 226 default: llvm_unreachable("Unhandled opcode!"); 227 case ARM::LDRi12: 228 ++NumLDMGened; 229 switch (Mode) { 230 default: llvm_unreachable("Unhandled submode!"); 231 case ARM_AM::ia: return ARM::LDMIA; 232 case ARM_AM::da: return ARM::LDMDA; 233 case ARM_AM::db: return ARM::LDMDB; 234 case ARM_AM::ib: return ARM::LDMIB; 235 } 236 case ARM::STRi12: 237 ++NumSTMGened; 238 switch (Mode) { 239 default: llvm_unreachable("Unhandled submode!"); 240 case ARM_AM::ia: return ARM::STMIA; 241 case ARM_AM::da: return ARM::STMDA; 242 case ARM_AM::db: return ARM::STMDB; 243 case ARM_AM::ib: return ARM::STMIB; 244 } 245 case ARM::tLDRi: 246 case ARM::tLDRspi: 247 // tLDMIA is writeback-only - unless the base register is in the input 248 // reglist. 249 ++NumLDMGened; 250 switch (Mode) { 251 default: llvm_unreachable("Unhandled submode!"); 252 case ARM_AM::ia: return ARM::tLDMIA; 253 } 254 case ARM::tSTRi: 255 case ARM::tSTRspi: 256 // There is no non-writeback tSTMIA either. 257 ++NumSTMGened; 258 switch (Mode) { 259 default: llvm_unreachable("Unhandled submode!"); 260 case ARM_AM::ia: return ARM::tSTMIA_UPD; 261 } 262 case ARM::t2LDRi8: 263 case ARM::t2LDRi12: 264 ++NumLDMGened; 265 switch (Mode) { 266 default: llvm_unreachable("Unhandled submode!"); 267 case ARM_AM::ia: return ARM::t2LDMIA; 268 case ARM_AM::db: return ARM::t2LDMDB; 269 } 270 case ARM::t2STRi8: 271 case ARM::t2STRi12: 272 ++NumSTMGened; 273 switch (Mode) { 274 default: llvm_unreachable("Unhandled submode!"); 275 case ARM_AM::ia: return ARM::t2STMIA; 276 case ARM_AM::db: return ARM::t2STMDB; 277 } 278 case ARM::VLDRS: 279 ++NumVLDMGened; 280 switch (Mode) { 281 default: llvm_unreachable("Unhandled submode!"); 282 case ARM_AM::ia: return ARM::VLDMSIA; 283 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 284 } 285 case ARM::VSTRS: 286 ++NumVSTMGened; 287 switch (Mode) { 288 default: llvm_unreachable("Unhandled submode!"); 289 case ARM_AM::ia: return ARM::VSTMSIA; 290 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 291 } 292 case ARM::VLDRD: 293 ++NumVLDMGened; 294 switch (Mode) { 295 default: llvm_unreachable("Unhandled submode!"); 296 case ARM_AM::ia: return ARM::VLDMDIA; 297 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 298 } 299 case ARM::VSTRD: 300 ++NumVSTMGened; 301 switch (Mode) { 302 default: llvm_unreachable("Unhandled submode!"); 303 case ARM_AM::ia: return ARM::VSTMDIA; 304 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 305 } 306 } 307 } 308 309 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 310 switch (Opcode) { 311 default: llvm_unreachable("Unhandled opcode!"); 312 case ARM::LDMIA_RET: 313 case ARM::LDMIA: 314 case ARM::LDMIA_UPD: 315 case ARM::STMIA: 316 case ARM::STMIA_UPD: 317 case ARM::tLDMIA: 318 case ARM::tLDMIA_UPD: 319 case ARM::tSTMIA_UPD: 320 case ARM::t2LDMIA_RET: 321 case ARM::t2LDMIA: 322 case ARM::t2LDMIA_UPD: 323 case ARM::t2STMIA: 324 case ARM::t2STMIA_UPD: 325 case ARM::VLDMSIA: 326 case ARM::VLDMSIA_UPD: 327 case ARM::VSTMSIA: 328 case ARM::VSTMSIA_UPD: 329 case ARM::VLDMDIA: 330 case ARM::VLDMDIA_UPD: 331 case ARM::VSTMDIA: 332 case ARM::VSTMDIA_UPD: 333 return ARM_AM::ia; 334 335 case ARM::LDMDA: 336 case ARM::LDMDA_UPD: 337 case ARM::STMDA: 338 case ARM::STMDA_UPD: 339 return ARM_AM::da; 340 341 case ARM::LDMDB: 342 case ARM::LDMDB_UPD: 343 case ARM::STMDB: 344 case ARM::STMDB_UPD: 345 case ARM::t2LDMDB: 346 case ARM::t2LDMDB_UPD: 347 case ARM::t2STMDB: 348 case ARM::t2STMDB_UPD: 349 case ARM::VLDMSDB_UPD: 350 case ARM::VSTMSDB_UPD: 351 case ARM::VLDMDDB_UPD: 352 case ARM::VSTMDDB_UPD: 353 return ARM_AM::db; 354 355 case ARM::LDMIB: 356 case ARM::LDMIB_UPD: 357 case ARM::STMIB: 358 case ARM::STMIB_UPD: 359 return ARM_AM::ib; 360 } 361 } 362 363 static bool isT1i32Load(unsigned Opc) { 364 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 365 } 366 367 static bool isT2i32Load(unsigned Opc) { 368 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 369 } 370 371 static bool isi32Load(unsigned Opc) { 372 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 373 } 374 375 static bool isT1i32Store(unsigned Opc) { 376 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 377 } 378 379 static bool isT2i32Store(unsigned Opc) { 380 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 381 } 382 383 static bool isi32Store(unsigned Opc) { 384 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 385 } 386 387 static bool isLoadSingle(unsigned Opc) { 388 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 389 } 390 391 static unsigned getImmScale(unsigned Opc) { 392 switch (Opc) { 393 default: llvm_unreachable("Unhandled opcode!"); 394 case ARM::tLDRi: 395 case ARM::tSTRi: 396 case ARM::tLDRspi: 397 case ARM::tSTRspi: 398 return 1; 399 case ARM::tLDRHi: 400 case ARM::tSTRHi: 401 return 2; 402 case ARM::tLDRBi: 403 case ARM::tSTRBi: 404 return 4; 405 } 406 } 407 408 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 409 switch (MI->getOpcode()) { 410 default: return 0; 411 case ARM::LDRi12: 412 case ARM::STRi12: 413 case ARM::tLDRi: 414 case ARM::tSTRi: 415 case ARM::tLDRspi: 416 case ARM::tSTRspi: 417 case ARM::t2LDRi8: 418 case ARM::t2LDRi12: 419 case ARM::t2STRi8: 420 case ARM::t2STRi12: 421 case ARM::VLDRS: 422 case ARM::VSTRS: 423 return 4; 424 case ARM::VLDRD: 425 case ARM::VSTRD: 426 return 8; 427 case ARM::LDMIA: 428 case ARM::LDMDA: 429 case ARM::LDMDB: 430 case ARM::LDMIB: 431 case ARM::STMIA: 432 case ARM::STMDA: 433 case ARM::STMDB: 434 case ARM::STMIB: 435 case ARM::tLDMIA: 436 case ARM::tLDMIA_UPD: 437 case ARM::tSTMIA_UPD: 438 case ARM::t2LDMIA: 439 case ARM::t2LDMDB: 440 case ARM::t2STMIA: 441 case ARM::t2STMDB: 442 case ARM::VLDMSIA: 443 case ARM::VSTMSIA: 444 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 445 case ARM::VLDMDIA: 446 case ARM::VSTMDIA: 447 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 448 } 449 } 450 451 /// Update future uses of the base register with the offset introduced 452 /// due to writeback. This function only works on Thumb1. 453 void 454 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 455 MachineBasicBlock::iterator MBBI, 456 DebugLoc DL, unsigned Base, 457 unsigned WordOffset, 458 ARMCC::CondCodes Pred, unsigned PredReg) { 459 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 460 // Start updating any instructions with immediate offsets. Insert a SUB before 461 // the first non-updateable instruction (if any). 462 for (; MBBI != MBB.end(); ++MBBI) { 463 bool InsertSub = false; 464 unsigned Opc = MBBI->getOpcode(); 465 466 if (MBBI->readsRegister(Base)) { 467 int Offset; 468 bool IsLoad = 469 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 470 bool IsStore = 471 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 472 473 if (IsLoad || IsStore) { 474 // Loads and stores with immediate offsets can be updated, but only if 475 // the new offset isn't negative. 476 // The MachineOperand containing the offset immediate is the last one 477 // before predicates. 478 MachineOperand &MO = 479 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 480 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 481 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 482 483 // If storing the base register, it needs to be reset first. 484 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 485 486 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 487 MO.setImm(Offset); 488 else 489 InsertSub = true; 490 491 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 492 !definesCPSR(MBBI)) { 493 // SUBS/ADDS using this register, with a dead def of the CPSR. 494 // Merge it with the update; if the merged offset is too large, 495 // insert a new sub instead. 496 MachineOperand &MO = 497 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 498 Offset = (Opc == ARM::tSUBi8) ? 499 MO.getImm() + WordOffset * 4 : 500 MO.getImm() - WordOffset * 4 ; 501 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 502 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 503 // Offset == 0. 504 MO.setImm(Offset); 505 // The base register has now been reset, so exit early. 506 return; 507 } else { 508 InsertSub = true; 509 } 510 511 } else { 512 // Can't update the instruction. 513 InsertSub = true; 514 } 515 516 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 517 // Since SUBS sets the condition flags, we can't place the base reset 518 // after an instruction that has a live CPSR def. 519 // The base register might also contain an argument for a function call. 520 InsertSub = true; 521 } 522 523 if (InsertSub) { 524 // An instruction above couldn't be updated, so insert a sub. 525 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 526 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 527 return; 528 } 529 530 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 531 // Register got killed. Stop updating. 532 return; 533 } 534 535 // End of block was reached. 536 if (MBB.succ_size() > 0) { 537 // FIXME: Because of a bug, live registers are sometimes missing from 538 // the successor blocks' live-in sets. This means we can't trust that 539 // information and *always* have to reset at the end of a block. 540 // See PR21029. 541 if (MBBI != MBB.end()) --MBBI; 542 AddDefaultT1CC( 543 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 544 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 545 } 546 } 547 548 /// Return the first register of class \p RegClass that is not in \p Regs. 549 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 550 if (!RegClassInfoValid) { 551 RegClassInfo.runOnMachineFunction(*MF); 552 RegClassInfoValid = true; 553 } 554 555 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 556 if (!LiveRegs.contains(Reg)) 557 return Reg; 558 return 0; 559 } 560 561 /// Compute live registers just before instruction \p Before (in normal schedule 562 /// direction). Computes backwards so multiple queries in the same block must 563 /// come in reverse order. 564 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 565 MachineBasicBlock::const_iterator Before) { 566 // Initialize if we never queried in this block. 567 if (!LiveRegsValid) { 568 LiveRegs.init(TRI); 569 LiveRegs.addLiveOuts(&MBB, true); 570 LiveRegPos = MBB.end(); 571 LiveRegsValid = true; 572 } 573 // Move backward just before the "Before" position. 574 while (LiveRegPos != Before) { 575 --LiveRegPos; 576 LiveRegs.stepBackward(*LiveRegPos); 577 } 578 } 579 580 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 581 unsigned Reg) { 582 for (const std::pair<unsigned, bool> &R : Regs) 583 if (R.first == Reg) 584 return true; 585 return false; 586 } 587 588 /// Create and insert a LDM or STM with Base as base register and registers in 589 /// Regs as the register operands that would be loaded / stored. It returns 590 /// true if the transformation is done. 591 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 592 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 593 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 594 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 595 unsigned NumRegs = Regs.size(); 596 assert(NumRegs > 1); 597 598 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 599 // Compute liveness information for that register to make the decision. 600 bool SafeToClobberCPSR = !isThumb1 || 601 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 602 MachineBasicBlock::LQR_Dead); 603 604 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 605 606 // Exception: If the base register is in the input reglist, Thumb1 LDM is 607 // non-writeback. 608 // It's also not possible to merge an STR of the base register in Thumb1. 609 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 610 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 611 if (Opcode == ARM::tLDRi) { 612 Writeback = false; 613 } else if (Opcode == ARM::tSTRi) { 614 return nullptr; 615 } 616 } 617 618 ARM_AM::AMSubMode Mode = ARM_AM::ia; 619 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 620 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 621 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 622 623 if (Offset == 4 && haveIBAndDA) { 624 Mode = ARM_AM::ib; 625 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 626 Mode = ARM_AM::da; 627 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 628 // VLDM/VSTM do not support DB mode without also updating the base reg. 629 Mode = ARM_AM::db; 630 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 631 // Check if this is a supported opcode before inserting instructions to 632 // calculate a new base register. 633 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 634 635 // If starting offset isn't zero, insert a MI to materialize a new base. 636 // But only do so if it is cost effective, i.e. merging more than two 637 // loads / stores. 638 if (NumRegs <= 2) 639 return nullptr; 640 641 // On Thumb1, it's not worth materializing a new base register without 642 // clobbering the CPSR (i.e. not using ADDS/SUBS). 643 if (!SafeToClobberCPSR) 644 return nullptr; 645 646 unsigned NewBase; 647 if (isi32Load(Opcode)) { 648 // If it is a load, then just use one of the destination registers 649 // as the new base. Will no longer be writeback in Thumb1. 650 NewBase = Regs[NumRegs-1].first; 651 Writeback = false; 652 } else { 653 // Find a free register that we can use as scratch register. 654 moveLiveRegsBefore(MBB, InsertBefore); 655 // The merged instruction does not exist yet but will use several Regs if 656 // it is a Store. 657 if (!isLoadSingle(Opcode)) 658 for (const std::pair<unsigned, bool> &R : Regs) 659 LiveRegs.addReg(R.first); 660 661 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 662 if (NewBase == 0) 663 return nullptr; 664 } 665 666 int BaseOpc = 667 isThumb2 ? ARM::t2ADDri : 668 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 669 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 670 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 671 672 if (Offset < 0) { 673 Offset = - Offset; 674 BaseOpc = 675 isThumb2 ? ARM::t2SUBri : 676 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 677 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 678 } 679 680 if (!TL->isLegalAddImmediate(Offset)) 681 // FIXME: Try add with register operand? 682 return nullptr; // Probably not worth it then. 683 684 // We can only append a kill flag to the add/sub input if the value is not 685 // used in the register list of the stm as well. 686 bool KillOldBase = BaseKill && 687 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 688 689 if (isThumb1) { 690 // Thumb1: depending on immediate size, use either 691 // ADDS NewBase, Base, #imm3 692 // or 693 // MOV NewBase, Base 694 // ADDS NewBase, #imm8. 695 if (Base != NewBase && 696 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 697 // Need to insert a MOV to the new base first. 698 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 699 !STI->hasV6Ops()) { 700 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 701 if (Pred != ARMCC::AL) 702 return nullptr; 703 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 704 .addReg(Base, getKillRegState(KillOldBase)); 705 } else 706 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 707 .addReg(Base, getKillRegState(KillOldBase)) 708 .addImm(Pred).addReg(PredReg); 709 710 // The following ADDS/SUBS becomes an update. 711 Base = NewBase; 712 KillOldBase = true; 713 } 714 if (BaseOpc == ARM::tADDrSPi) { 715 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 716 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 717 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 718 .addImm(Pred).addReg(PredReg); 719 } else 720 AddDefaultT1CC( 721 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 722 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 723 .addImm(Pred).addReg(PredReg); 724 } else { 725 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 726 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 727 .addImm(Pred).addReg(PredReg).addReg(0); 728 } 729 Base = NewBase; 730 BaseKill = true; // New base is always killed straight away. 731 } 732 733 bool isDef = isLoadSingle(Opcode); 734 735 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 736 // base register writeback. 737 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 738 if (!Opcode) 739 return nullptr; 740 741 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 742 // - There is no writeback (LDM of base register), 743 // - the base register is killed by the merged instruction, 744 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 745 // to reset the base register. 746 // Otherwise, don't merge. 747 // It's safe to return here since the code to materialize a new base register 748 // above is also conditional on SafeToClobberCPSR. 749 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 750 return nullptr; 751 752 MachineInstrBuilder MIB; 753 754 if (Writeback) { 755 assert(isThumb1 && "expected Writeback only inThumb1"); 756 if (Opcode == ARM::tLDMIA) { 757 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs"); 758 // Update tLDMIA with writeback if necessary. 759 Opcode = ARM::tLDMIA_UPD; 760 } 761 762 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 763 764 // Thumb1: we might need to set base writeback when building the MI. 765 MIB.addReg(Base, getDefRegState(true)) 766 .addReg(Base, getKillRegState(BaseKill)); 767 768 // The base isn't dead after a merged instruction with writeback. 769 // Insert a sub instruction after the newly formed instruction to reset. 770 if (!BaseKill) 771 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 772 773 } else { 774 // No writeback, simply build the MachineInstr. 775 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 776 MIB.addReg(Base, getKillRegState(BaseKill)); 777 } 778 779 MIB.addImm(Pred).addReg(PredReg); 780 781 for (const std::pair<unsigned, bool> &R : Regs) 782 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 783 784 return MIB.getInstr(); 785 } 786 787 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 788 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 789 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 790 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 791 bool IsLoad = isi32Load(Opcode); 792 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 793 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 794 795 assert(Regs.size() == 2); 796 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 797 TII->get(LoadStoreOpcode)); 798 if (IsLoad) { 799 MIB.addReg(Regs[0].first, RegState::Define) 800 .addReg(Regs[1].first, RegState::Define); 801 } else { 802 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 803 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 804 } 805 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 806 return MIB.getInstr(); 807 } 808 809 /// Call MergeOps and update MemOps and merges accordingly on success. 810 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 811 const MachineInstr *First = Cand.Instrs.front(); 812 unsigned Opcode = First->getOpcode(); 813 bool IsLoad = isLoadSingle(Opcode); 814 SmallVector<std::pair<unsigned, bool>, 8> Regs; 815 SmallVector<unsigned, 4> ImpDefs; 816 DenseSet<unsigned> KilledRegs; 817 DenseSet<unsigned> UsedRegs; 818 // Determine list of registers and list of implicit super-register defs. 819 for (const MachineInstr *MI : Cand.Instrs) { 820 const MachineOperand &MO = getLoadStoreRegOp(*MI); 821 unsigned Reg = MO.getReg(); 822 bool IsKill = MO.isKill(); 823 if (IsKill) 824 KilledRegs.insert(Reg); 825 Regs.push_back(std::make_pair(Reg, IsKill)); 826 UsedRegs.insert(Reg); 827 828 if (IsLoad) { 829 // Collect any implicit defs of super-registers, after merging we can't 830 // be sure anymore that we properly preserved these live ranges and must 831 // removed these implicit operands. 832 for (const MachineOperand &MO : MI->implicit_operands()) { 833 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 834 continue; 835 assert(MO.isImplicit()); 836 unsigned DefReg = MO.getReg(); 837 838 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 839 continue; 840 // We can ignore cases where the super-reg is read and written. 841 if (MI->readsRegister(DefReg)) 842 continue; 843 ImpDefs.push_back(DefReg); 844 } 845 } 846 } 847 848 // Attempt the merge. 849 typedef MachineBasicBlock::iterator iterator; 850 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 851 iterator InsertBefore = std::next(iterator(LatestMI)); 852 MachineBasicBlock &MBB = *LatestMI->getParent(); 853 unsigned Offset = getMemoryOpOffset(First); 854 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 855 bool BaseKill = LatestMI->killsRegister(Base); 856 unsigned PredReg = 0; 857 ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg); 858 DebugLoc DL = First->getDebugLoc(); 859 MachineInstr *Merged = nullptr; 860 if (Cand.CanMergeToLSDouble) 861 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 862 Opcode, Pred, PredReg, DL, Regs); 863 if (!Merged && Cand.CanMergeToLSMulti) 864 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 865 Opcode, Pred, PredReg, DL, Regs); 866 if (!Merged) 867 return nullptr; 868 869 // Determine earliest instruction that will get removed. We then keep an 870 // iterator just above it so the following erases don't invalidated it. 871 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 872 bool EarliestAtBegin = false; 873 if (EarliestI == MBB.begin()) { 874 EarliestAtBegin = true; 875 } else { 876 EarliestI = std::prev(EarliestI); 877 } 878 879 // Remove instructions which have been merged. 880 for (MachineInstr *MI : Cand.Instrs) 881 MBB.erase(MI); 882 883 // Determine range between the earliest removed instruction and the new one. 884 if (EarliestAtBegin) 885 EarliestI = MBB.begin(); 886 else 887 EarliestI = std::next(EarliestI); 888 auto FixupRange = make_range(EarliestI, iterator(Merged)); 889 890 if (isLoadSingle(Opcode)) { 891 // If the previous loads defined a super-reg, then we have to mark earlier 892 // operands undef; Replicate the super-reg def on the merged instruction. 893 for (MachineInstr &MI : FixupRange) { 894 for (unsigned &ImpDefReg : ImpDefs) { 895 for (MachineOperand &MO : MI.implicit_operands()) { 896 if (!MO.isReg() || MO.getReg() != ImpDefReg) 897 continue; 898 if (MO.readsReg()) 899 MO.setIsUndef(); 900 else if (MO.isDef()) 901 ImpDefReg = 0; 902 } 903 } 904 } 905 906 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 907 for (unsigned ImpDef : ImpDefs) 908 MIB.addReg(ImpDef, RegState::ImplicitDefine); 909 } else { 910 // Remove kill flags: We are possibly storing the values later now. 911 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 912 for (MachineInstr &MI : FixupRange) { 913 for (MachineOperand &MO : MI.uses()) { 914 if (!MO.isReg() || !MO.isKill()) 915 continue; 916 if (UsedRegs.count(MO.getReg())) 917 MO.setIsKill(false); 918 } 919 } 920 assert(ImpDefs.empty()); 921 } 922 923 return Merged; 924 } 925 926 static bool isValidLSDoubleOffset(int Offset) { 927 unsigned Value = abs(Offset); 928 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 929 // multiplied by 4. 930 return (Value % 4) == 0 && Value < 1024; 931 } 932 933 /// Return true for loads/stores that can be combined to a double/multi 934 /// operation without increasing the requirements for alignment. 935 static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, 936 const MachineInstr &MI) { 937 // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no 938 // difference. 939 unsigned Opcode = MI.getOpcode(); 940 if (!isi32Load(Opcode) && !isi32Store(Opcode)) 941 return true; 942 943 // Stack pointer alignment is out of the programmers control so we can trust 944 // SP-relative loads/stores. 945 if (getLoadStoreBaseOp(MI).getReg() == ARM::SP && 946 STI.getFrameLowering()->getTransientStackAlignment() >= 4) 947 return true; 948 return false; 949 } 950 951 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 952 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 953 const MachineInstr *FirstMI = MemOps[0].MI; 954 unsigned Opcode = FirstMI->getOpcode(); 955 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 956 unsigned Size = getLSMultipleTransferSize(FirstMI); 957 958 unsigned SIndex = 0; 959 unsigned EIndex = MemOps.size(); 960 do { 961 // Look at the first instruction. 962 const MachineInstr *MI = MemOps[SIndex].MI; 963 int Offset = MemOps[SIndex].Offset; 964 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 965 unsigned PReg = PMO.getReg(); 966 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 967 unsigned Latest = SIndex; 968 unsigned Earliest = SIndex; 969 unsigned Count = 1; 970 bool CanMergeToLSDouble = 971 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 972 // ARM errata 602117: LDRD with base in list may result in incorrect base 973 // register when interrupted or faulted. 974 if (STI->isCortexM3() && isi32Load(Opcode) && 975 PReg == getLoadStoreBaseOp(*MI).getReg()) 976 CanMergeToLSDouble = false; 977 978 bool CanMergeToLSMulti = true; 979 // On swift vldm/vstm starting with an odd register number as that needs 980 // more uops than single vldrs. 981 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 982 CanMergeToLSMulti = false; 983 984 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it 985 // deprecated; LDM to PC is fine but cannot happen here. 986 if (PReg == ARM::SP || PReg == ARM::PC) 987 CanMergeToLSMulti = CanMergeToLSDouble = false; 988 989 // Should we be conservative? 990 if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI)) 991 CanMergeToLSMulti = CanMergeToLSDouble = false; 992 993 // Merge following instructions where possible. 994 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 995 int NewOffset = MemOps[I].Offset; 996 if (NewOffset != Offset + (int)Size) 997 break; 998 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 999 unsigned Reg = MO.getReg(); 1000 if (Reg == ARM::SP || Reg == ARM::PC) 1001 break; 1002 1003 // See if the current load/store may be part of a multi load/store. 1004 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 1005 bool PartOfLSMulti = CanMergeToLSMulti; 1006 if (PartOfLSMulti) { 1007 // Register numbers must be in ascending order. 1008 if (RegNum <= PRegNum) 1009 PartOfLSMulti = false; 1010 // For VFP / NEON load/store multiples, the registers must be 1011 // consecutive and within the limit on the number of registers per 1012 // instruction. 1013 else if (!isNotVFP && RegNum != PRegNum+1) 1014 PartOfLSMulti = false; 1015 } 1016 // See if the current load/store may be part of a double load/store. 1017 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 1018 1019 if (!PartOfLSMulti && !PartOfLSDouble) 1020 break; 1021 CanMergeToLSMulti &= PartOfLSMulti; 1022 CanMergeToLSDouble &= PartOfLSDouble; 1023 // Track MemOp with latest and earliest position (Positions are 1024 // counted in reverse). 1025 unsigned Position = MemOps[I].Position; 1026 if (Position < MemOps[Latest].Position) 1027 Latest = I; 1028 else if (Position > MemOps[Earliest].Position) 1029 Earliest = I; 1030 // Prepare for next MemOp. 1031 Offset += Size; 1032 PRegNum = RegNum; 1033 } 1034 1035 // Form a candidate from the Ops collected so far. 1036 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 1037 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 1038 Candidate->Instrs.push_back(MemOps[C].MI); 1039 Candidate->LatestMIIdx = Latest - SIndex; 1040 Candidate->EarliestMIIdx = Earliest - SIndex; 1041 Candidate->InsertPos = MemOps[Latest].Position; 1042 if (Count == 1) 1043 CanMergeToLSMulti = CanMergeToLSDouble = false; 1044 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 1045 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 1046 Candidates.push_back(Candidate); 1047 // Continue after the chain. 1048 SIndex += Count; 1049 } while (SIndex < EIndex); 1050 } 1051 1052 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1053 ARM_AM::AMSubMode Mode) { 1054 switch (Opc) { 1055 default: llvm_unreachable("Unhandled opcode!"); 1056 case ARM::LDMIA: 1057 case ARM::LDMDA: 1058 case ARM::LDMDB: 1059 case ARM::LDMIB: 1060 switch (Mode) { 1061 default: llvm_unreachable("Unhandled submode!"); 1062 case ARM_AM::ia: return ARM::LDMIA_UPD; 1063 case ARM_AM::ib: return ARM::LDMIB_UPD; 1064 case ARM_AM::da: return ARM::LDMDA_UPD; 1065 case ARM_AM::db: return ARM::LDMDB_UPD; 1066 } 1067 case ARM::STMIA: 1068 case ARM::STMDA: 1069 case ARM::STMDB: 1070 case ARM::STMIB: 1071 switch (Mode) { 1072 default: llvm_unreachable("Unhandled submode!"); 1073 case ARM_AM::ia: return ARM::STMIA_UPD; 1074 case ARM_AM::ib: return ARM::STMIB_UPD; 1075 case ARM_AM::da: return ARM::STMDA_UPD; 1076 case ARM_AM::db: return ARM::STMDB_UPD; 1077 } 1078 case ARM::t2LDMIA: 1079 case ARM::t2LDMDB: 1080 switch (Mode) { 1081 default: llvm_unreachable("Unhandled submode!"); 1082 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1083 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1084 } 1085 case ARM::t2STMIA: 1086 case ARM::t2STMDB: 1087 switch (Mode) { 1088 default: llvm_unreachable("Unhandled submode!"); 1089 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1090 case ARM_AM::db: return ARM::t2STMDB_UPD; 1091 } 1092 case ARM::VLDMSIA: 1093 switch (Mode) { 1094 default: llvm_unreachable("Unhandled submode!"); 1095 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1096 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1097 } 1098 case ARM::VLDMDIA: 1099 switch (Mode) { 1100 default: llvm_unreachable("Unhandled submode!"); 1101 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1102 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1103 } 1104 case ARM::VSTMSIA: 1105 switch (Mode) { 1106 default: llvm_unreachable("Unhandled submode!"); 1107 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1108 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1109 } 1110 case ARM::VSTMDIA: 1111 switch (Mode) { 1112 default: llvm_unreachable("Unhandled submode!"); 1113 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1114 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1115 } 1116 } 1117 } 1118 1119 /// Check if the given instruction increments or decrements a register and 1120 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1121 /// generated by the instruction are possibly read as well. 1122 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1123 ARMCC::CondCodes Pred, unsigned PredReg) { 1124 bool CheckCPSRDef; 1125 int Scale; 1126 switch (MI.getOpcode()) { 1127 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1128 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1129 case ARM::t2SUBri: 1130 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1131 case ARM::t2ADDri: 1132 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1133 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1134 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1135 default: return 0; 1136 } 1137 1138 unsigned MIPredReg; 1139 if (MI.getOperand(0).getReg() != Reg || 1140 MI.getOperand(1).getReg() != Reg || 1141 getInstrPredicate(MI, MIPredReg) != Pred || 1142 MIPredReg != PredReg) 1143 return 0; 1144 1145 if (CheckCPSRDef && definesCPSR(&MI)) 1146 return 0; 1147 return MI.getOperand(2).getImm() * Scale; 1148 } 1149 1150 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1151 static MachineBasicBlock::iterator 1152 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1153 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1154 Offset = 0; 1155 MachineBasicBlock &MBB = *MBBI->getParent(); 1156 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1157 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1158 if (MBBI == BeginMBBI) 1159 return EndMBBI; 1160 1161 // Skip debug values. 1162 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1163 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1164 --PrevMBBI; 1165 1166 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1167 return Offset == 0 ? EndMBBI : PrevMBBI; 1168 } 1169 1170 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1171 static MachineBasicBlock::iterator 1172 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1173 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1174 Offset = 0; 1175 MachineBasicBlock &MBB = *MBBI->getParent(); 1176 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1177 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1178 // Skip debug values. 1179 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1180 ++NextMBBI; 1181 if (NextMBBI == EndMBBI) 1182 return EndMBBI; 1183 1184 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1185 return Offset == 0 ? EndMBBI : NextMBBI; 1186 } 1187 1188 /// Fold proceeding/trailing inc/dec of base register into the 1189 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1190 /// 1191 /// stmia rn, <ra, rb, rc> 1192 /// rn := rn + 4 * 3; 1193 /// => 1194 /// stmia rn!, <ra, rb, rc> 1195 /// 1196 /// rn := rn - 4 * 3; 1197 /// ldmia rn, <ra, rb, rc> 1198 /// => 1199 /// ldmdb rn!, <ra, rb, rc> 1200 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1201 // Thumb1 is already using updating loads/stores. 1202 if (isThumb1) return false; 1203 1204 const MachineOperand &BaseOP = MI->getOperand(0); 1205 unsigned Base = BaseOP.getReg(); 1206 bool BaseKill = BaseOP.isKill(); 1207 unsigned PredReg = 0; 1208 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1209 unsigned Opcode = MI->getOpcode(); 1210 DebugLoc DL = MI->getDebugLoc(); 1211 1212 // Can't use an updating ld/st if the base register is also a dest 1213 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1214 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1215 if (MI->getOperand(i).getReg() == Base) 1216 return false; 1217 1218 int Bytes = getLSMultipleTransferSize(MI); 1219 MachineBasicBlock &MBB = *MI->getParent(); 1220 MachineBasicBlock::iterator MBBI(MI); 1221 int Offset; 1222 MachineBasicBlock::iterator MergeInstr 1223 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1224 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1225 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1226 Mode = ARM_AM::db; 1227 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1228 Mode = ARM_AM::da; 1229 } else { 1230 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1231 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1232 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) 1233 return false; 1234 } 1235 MBB.erase(MergeInstr); 1236 1237 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1238 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1239 .addReg(Base, getDefRegState(true)) // WB base register 1240 .addReg(Base, getKillRegState(BaseKill)) 1241 .addImm(Pred).addReg(PredReg); 1242 1243 // Transfer the rest of operands. 1244 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1245 MIB.addOperand(MI->getOperand(OpNum)); 1246 1247 // Transfer memoperands. 1248 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1249 1250 MBB.erase(MBBI); 1251 return true; 1252 } 1253 1254 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1255 ARM_AM::AddrOpc Mode) { 1256 switch (Opc) { 1257 case ARM::LDRi12: 1258 return ARM::LDR_PRE_IMM; 1259 case ARM::STRi12: 1260 return ARM::STR_PRE_IMM; 1261 case ARM::VLDRS: 1262 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1263 case ARM::VLDRD: 1264 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1265 case ARM::VSTRS: 1266 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1267 case ARM::VSTRD: 1268 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1269 case ARM::t2LDRi8: 1270 case ARM::t2LDRi12: 1271 return ARM::t2LDR_PRE; 1272 case ARM::t2STRi8: 1273 case ARM::t2STRi12: 1274 return ARM::t2STR_PRE; 1275 default: llvm_unreachable("Unhandled opcode!"); 1276 } 1277 } 1278 1279 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1280 ARM_AM::AddrOpc Mode) { 1281 switch (Opc) { 1282 case ARM::LDRi12: 1283 return ARM::LDR_POST_IMM; 1284 case ARM::STRi12: 1285 return ARM::STR_POST_IMM; 1286 case ARM::VLDRS: 1287 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1288 case ARM::VLDRD: 1289 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1290 case ARM::VSTRS: 1291 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1292 case ARM::VSTRD: 1293 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1294 case ARM::t2LDRi8: 1295 case ARM::t2LDRi12: 1296 return ARM::t2LDR_POST; 1297 case ARM::t2STRi8: 1298 case ARM::t2STRi12: 1299 return ARM::t2STR_POST; 1300 default: llvm_unreachable("Unhandled opcode!"); 1301 } 1302 } 1303 1304 /// Fold proceeding/trailing inc/dec of base register into the 1305 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1306 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1307 // Thumb1 doesn't have updating LDR/STR. 1308 // FIXME: Use LDM/STM with single register instead. 1309 if (isThumb1) return false; 1310 1311 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1312 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1313 unsigned Opcode = MI->getOpcode(); 1314 DebugLoc DL = MI->getDebugLoc(); 1315 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1316 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1317 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1318 if (isi32Load(Opcode) || isi32Store(Opcode)) 1319 if (MI->getOperand(2).getImm() != 0) 1320 return false; 1321 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1322 return false; 1323 1324 // Can't do the merge if the destination register is the same as the would-be 1325 // writeback register. 1326 if (MI->getOperand(0).getReg() == Base) 1327 return false; 1328 1329 unsigned PredReg = 0; 1330 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1331 int Bytes = getLSMultipleTransferSize(MI); 1332 MachineBasicBlock &MBB = *MI->getParent(); 1333 MachineBasicBlock::iterator MBBI(MI); 1334 int Offset; 1335 MachineBasicBlock::iterator MergeInstr 1336 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1337 unsigned NewOpc; 1338 if (!isAM5 && Offset == Bytes) { 1339 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1340 } else if (Offset == -Bytes) { 1341 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1342 } else { 1343 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1344 if (Offset == Bytes) { 1345 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1346 } else if (!isAM5 && Offset == -Bytes) { 1347 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1348 } else 1349 return false; 1350 } 1351 MBB.erase(MergeInstr); 1352 1353 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1354 1355 bool isLd = isLoadSingle(Opcode); 1356 if (isAM5) { 1357 // VLDM[SD]_UPD, VSTM[SD]_UPD 1358 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1359 // updating load/store-multiple instructions can be used with only one 1360 // register.) 1361 MachineOperand &MO = MI->getOperand(0); 1362 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1363 .addReg(Base, getDefRegState(true)) // WB base register 1364 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1365 .addImm(Pred).addReg(PredReg) 1366 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1367 getKillRegState(MO.isKill()))); 1368 } else if (isLd) { 1369 if (isAM2) { 1370 // LDR_PRE, LDR_POST 1371 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1372 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1373 .addReg(Base, RegState::Define) 1374 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1375 } else { 1376 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1377 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1378 .addReg(Base, RegState::Define) 1379 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1380 } 1381 } else { 1382 // t2LDR_PRE, t2LDR_POST 1383 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1384 .addReg(Base, RegState::Define) 1385 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1386 } 1387 } else { 1388 MachineOperand &MO = MI->getOperand(0); 1389 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1390 // the vestigal zero-reg offset register. When that's fixed, this clause 1391 // can be removed entirely. 1392 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1393 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1394 // STR_PRE, STR_POST 1395 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1396 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1397 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1398 } else { 1399 // t2STR_PRE, t2STR_POST 1400 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1401 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1402 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1403 } 1404 } 1405 MBB.erase(MBBI); 1406 1407 return true; 1408 } 1409 1410 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1411 unsigned Opcode = MI.getOpcode(); 1412 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1413 "Must have t2STRDi8 or t2LDRDi8"); 1414 if (MI.getOperand(3).getImm() != 0) 1415 return false; 1416 1417 // Behaviour for writeback is undefined if base register is the same as one 1418 // of the others. 1419 const MachineOperand &BaseOp = MI.getOperand(2); 1420 unsigned Base = BaseOp.getReg(); 1421 const MachineOperand &Reg0Op = MI.getOperand(0); 1422 const MachineOperand &Reg1Op = MI.getOperand(1); 1423 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1424 return false; 1425 1426 unsigned PredReg; 1427 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1428 MachineBasicBlock::iterator MBBI(MI); 1429 MachineBasicBlock &MBB = *MI.getParent(); 1430 int Offset; 1431 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1432 PredReg, Offset); 1433 unsigned NewOpc; 1434 if (Offset == 8 || Offset == -8) { 1435 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1436 } else { 1437 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1438 if (Offset == 8 || Offset == -8) { 1439 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1440 } else 1441 return false; 1442 } 1443 MBB.erase(MergeInstr); 1444 1445 DebugLoc DL = MI.getDebugLoc(); 1446 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1447 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1448 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1449 .addReg(BaseOp.getReg(), RegState::Define); 1450 } else { 1451 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1452 MIB.addReg(BaseOp.getReg(), RegState::Define) 1453 .addOperand(Reg0Op).addOperand(Reg1Op); 1454 } 1455 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1456 .addImm(Offset).addImm(Pred).addReg(PredReg); 1457 assert(TII->get(Opcode).getNumOperands() == 6 && 1458 TII->get(NewOpc).getNumOperands() == 7 && 1459 "Unexpected number of operands in Opcode specification."); 1460 1461 // Transfer implicit operands. 1462 for (const MachineOperand &MO : MI.implicit_operands()) 1463 MIB.addOperand(MO); 1464 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1465 1466 MBB.erase(MBBI); 1467 return true; 1468 } 1469 1470 /// Returns true if instruction is a memory operation that this pass is capable 1471 /// of operating on. 1472 static bool isMemoryOp(const MachineInstr &MI) { 1473 unsigned Opcode = MI.getOpcode(); 1474 switch (Opcode) { 1475 case ARM::VLDRS: 1476 case ARM::VSTRS: 1477 case ARM::VLDRD: 1478 case ARM::VSTRD: 1479 case ARM::LDRi12: 1480 case ARM::STRi12: 1481 case ARM::tLDRi: 1482 case ARM::tSTRi: 1483 case ARM::tLDRspi: 1484 case ARM::tSTRspi: 1485 case ARM::t2LDRi8: 1486 case ARM::t2LDRi12: 1487 case ARM::t2STRi8: 1488 case ARM::t2STRi12: 1489 break; 1490 default: 1491 return false; 1492 } 1493 if (!MI.getOperand(1).isReg()) 1494 return false; 1495 1496 // When no memory operands are present, conservatively assume unaligned, 1497 // volatile, unfoldable. 1498 if (!MI.hasOneMemOperand()) 1499 return false; 1500 1501 const MachineMemOperand &MMO = **MI.memoperands_begin(); 1502 1503 // Don't touch volatile memory accesses - we may be changing their order. 1504 if (MMO.isVolatile()) 1505 return false; 1506 1507 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1508 // not. 1509 if (MMO.getAlignment() < 4) 1510 return false; 1511 1512 // str <undef> could probably be eliminated entirely, but for now we just want 1513 // to avoid making a mess of it. 1514 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1515 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef()) 1516 return false; 1517 1518 // Likewise don't mess with references to undefined addresses. 1519 if (MI.getOperand(1).isUndef()) 1520 return false; 1521 1522 return true; 1523 } 1524 1525 static void InsertLDR_STR(MachineBasicBlock &MBB, 1526 MachineBasicBlock::iterator &MBBI, 1527 int Offset, bool isDef, 1528 DebugLoc DL, unsigned NewOpc, 1529 unsigned Reg, bool RegDeadKill, bool RegUndef, 1530 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1531 bool OffKill, bool OffUndef, 1532 ARMCC::CondCodes Pred, unsigned PredReg, 1533 const TargetInstrInfo *TII, bool isT2) { 1534 if (isDef) { 1535 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1536 TII->get(NewOpc)) 1537 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1538 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1539 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1540 } else { 1541 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1542 TII->get(NewOpc)) 1543 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1544 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1545 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1546 } 1547 } 1548 1549 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1550 MachineBasicBlock::iterator &MBBI) { 1551 MachineInstr *MI = &*MBBI; 1552 unsigned Opcode = MI->getOpcode(); 1553 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1554 return false; 1555 1556 const MachineOperand &BaseOp = MI->getOperand(2); 1557 unsigned BaseReg = BaseOp.getReg(); 1558 unsigned EvenReg = MI->getOperand(0).getReg(); 1559 unsigned OddReg = MI->getOperand(1).getReg(); 1560 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1561 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1562 1563 // ARM errata 602117: LDRD with base in list may result in incorrect base 1564 // register when interrupted or faulted. 1565 bool Errata602117 = EvenReg == BaseReg && 1566 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1567 // ARM LDRD/STRD needs consecutive registers. 1568 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1569 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1570 1571 if (!Errata602117 && !NonConsecutiveRegs) 1572 return false; 1573 1574 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1575 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1576 bool EvenDeadKill = isLd ? 1577 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1578 bool EvenUndef = MI->getOperand(0).isUndef(); 1579 bool OddDeadKill = isLd ? 1580 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1581 bool OddUndef = MI->getOperand(1).isUndef(); 1582 bool BaseKill = BaseOp.isKill(); 1583 bool BaseUndef = BaseOp.isUndef(); 1584 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1585 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1586 int OffImm = getMemoryOpOffset(MI); 1587 unsigned PredReg = 0; 1588 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1589 1590 if (OddRegNum > EvenRegNum && OffImm == 0) { 1591 // Ascending register numbers and no offset. It's safe to change it to a 1592 // ldm or stm. 1593 unsigned NewOpc = (isLd) 1594 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1595 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1596 if (isLd) { 1597 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1598 .addReg(BaseReg, getKillRegState(BaseKill)) 1599 .addImm(Pred).addReg(PredReg) 1600 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1601 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1602 ++NumLDRD2LDM; 1603 } else { 1604 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1605 .addReg(BaseReg, getKillRegState(BaseKill)) 1606 .addImm(Pred).addReg(PredReg) 1607 .addReg(EvenReg, 1608 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1609 .addReg(OddReg, 1610 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1611 ++NumSTRD2STM; 1612 } 1613 } else { 1614 // Split into two instructions. 1615 unsigned NewOpc = (isLd) 1616 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1617 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1618 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1619 // so adjust and use t2LDRi12 here for that. 1620 unsigned NewOpc2 = (isLd) 1621 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1622 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1623 DebugLoc dl = MBBI->getDebugLoc(); 1624 // If this is a load and base register is killed, it may have been 1625 // re-defed by the load, make sure the first load does not clobber it. 1626 if (isLd && 1627 (BaseKill || OffKill) && 1628 (TRI->regsOverlap(EvenReg, BaseReg))) { 1629 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1630 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1631 OddReg, OddDeadKill, false, 1632 BaseReg, false, BaseUndef, false, OffUndef, 1633 Pred, PredReg, TII, isT2); 1634 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1635 EvenReg, EvenDeadKill, false, 1636 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1637 Pred, PredReg, TII, isT2); 1638 } else { 1639 if (OddReg == EvenReg && EvenDeadKill) { 1640 // If the two source operands are the same, the kill marker is 1641 // probably on the first one. e.g. 1642 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1643 EvenDeadKill = false; 1644 OddDeadKill = true; 1645 } 1646 // Never kill the base register in the first instruction. 1647 if (EvenReg == BaseReg) 1648 EvenDeadKill = false; 1649 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1650 EvenReg, EvenDeadKill, EvenUndef, 1651 BaseReg, false, BaseUndef, false, OffUndef, 1652 Pred, PredReg, TII, isT2); 1653 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1654 OddReg, OddDeadKill, OddUndef, 1655 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1656 Pred, PredReg, TII, isT2); 1657 } 1658 if (isLd) 1659 ++NumLDRD2LDR; 1660 else 1661 ++NumSTRD2STR; 1662 } 1663 1664 MBBI = MBB.erase(MBBI); 1665 return true; 1666 } 1667 1668 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1669 /// incrementing offset into LDM / STM ops. 1670 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1671 MemOpQueue MemOps; 1672 unsigned CurrBase = 0; 1673 unsigned CurrOpc = ~0u; 1674 ARMCC::CondCodes CurrPred = ARMCC::AL; 1675 unsigned Position = 0; 1676 assert(Candidates.size() == 0); 1677 assert(MergeBaseCandidates.size() == 0); 1678 LiveRegsValid = false; 1679 1680 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1681 I = MBBI) { 1682 // The instruction in front of the iterator is the one we look at. 1683 MBBI = std::prev(I); 1684 if (FixInvalidRegPairOp(MBB, MBBI)) 1685 continue; 1686 ++Position; 1687 1688 if (isMemoryOp(*MBBI)) { 1689 unsigned Opcode = MBBI->getOpcode(); 1690 const MachineOperand &MO = MBBI->getOperand(0); 1691 unsigned Reg = MO.getReg(); 1692 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1693 unsigned PredReg = 0; 1694 ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg); 1695 int Offset = getMemoryOpOffset(MBBI); 1696 if (CurrBase == 0) { 1697 // Start of a new chain. 1698 CurrBase = Base; 1699 CurrOpc = Opcode; 1700 CurrPred = Pred; 1701 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1702 continue; 1703 } 1704 // Note: No need to match PredReg in the next if. 1705 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1706 // Watch out for: 1707 // r4 := ldr [r0, #8] 1708 // r4 := ldr [r0, #4] 1709 // or 1710 // r0 := ldr [r0] 1711 // If a load overrides the base register or a register loaded by 1712 // another load in our chain, we cannot take this instruction. 1713 bool Overlap = false; 1714 if (isLoadSingle(Opcode)) { 1715 Overlap = (Base == Reg); 1716 if (!Overlap) { 1717 for (const MemOpQueueEntry &E : MemOps) { 1718 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1719 Overlap = true; 1720 break; 1721 } 1722 } 1723 } 1724 } 1725 1726 if (!Overlap) { 1727 // Check offset and sort memory operation into the current chain. 1728 if (Offset > MemOps.back().Offset) { 1729 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1730 continue; 1731 } else { 1732 MemOpQueue::iterator MI, ME; 1733 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1734 if (Offset < MI->Offset) { 1735 // Found a place to insert. 1736 break; 1737 } 1738 if (Offset == MI->Offset) { 1739 // Collision, abort. 1740 MI = ME; 1741 break; 1742 } 1743 } 1744 if (MI != MemOps.end()) { 1745 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1746 continue; 1747 } 1748 } 1749 } 1750 } 1751 1752 // Don't advance the iterator; The op will start a new chain next. 1753 MBBI = I; 1754 --Position; 1755 // Fallthrough to look into existing chain. 1756 } else if (MBBI->isDebugValue()) { 1757 continue; 1758 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1759 MBBI->getOpcode() == ARM::t2STRDi8) { 1760 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1761 // remember them because we may still be able to merge add/sub into them. 1762 MergeBaseCandidates.push_back(MBBI); 1763 } 1764 1765 1766 // If we are here then the chain is broken; Extract candidates for a merge. 1767 if (MemOps.size() > 0) { 1768 FormCandidates(MemOps); 1769 // Reset for the next chain. 1770 CurrBase = 0; 1771 CurrOpc = ~0u; 1772 CurrPred = ARMCC::AL; 1773 MemOps.clear(); 1774 } 1775 } 1776 if (MemOps.size() > 0) 1777 FormCandidates(MemOps); 1778 1779 // Sort candidates so they get processed from end to begin of the basic 1780 // block later; This is necessary for liveness calculation. 1781 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1782 return M0->InsertPos < M1->InsertPos; 1783 }; 1784 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1785 1786 // Go through list of candidates and merge. 1787 bool Changed = false; 1788 for (const MergeCandidate *Candidate : Candidates) { 1789 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1790 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1791 // Merge preceding/trailing base inc/dec into the merged op. 1792 if (Merged) { 1793 Changed = true; 1794 unsigned Opcode = Merged->getOpcode(); 1795 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1796 MergeBaseUpdateLSDouble(*Merged); 1797 else 1798 MergeBaseUpdateLSMultiple(Merged); 1799 } else { 1800 for (MachineInstr *MI : Candidate->Instrs) { 1801 if (MergeBaseUpdateLoadStore(MI)) 1802 Changed = true; 1803 } 1804 } 1805 } else { 1806 assert(Candidate->Instrs.size() == 1); 1807 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1808 Changed = true; 1809 } 1810 } 1811 Candidates.clear(); 1812 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1813 for (MachineInstr *MI : MergeBaseCandidates) 1814 MergeBaseUpdateLSDouble(*MI); 1815 MergeBaseCandidates.clear(); 1816 1817 return Changed; 1818 } 1819 1820 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1821 /// into the preceding stack restore so it directly restore the value of LR 1822 /// into pc. 1823 /// ldmfd sp!, {..., lr} 1824 /// bx lr 1825 /// or 1826 /// ldmfd sp!, {..., lr} 1827 /// mov pc, lr 1828 /// => 1829 /// ldmfd sp!, {..., pc} 1830 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1831 // Thumb1 LDM doesn't allow high registers. 1832 if (isThumb1) return false; 1833 if (MBB.empty()) return false; 1834 1835 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1836 if (MBBI != MBB.begin() && 1837 (MBBI->getOpcode() == ARM::BX_RET || 1838 MBBI->getOpcode() == ARM::tBX_RET || 1839 MBBI->getOpcode() == ARM::MOVPCLR)) { 1840 MachineBasicBlock::iterator PrevI = std::prev(MBBI); 1841 // Ignore any DBG_VALUE instructions. 1842 while (PrevI->isDebugValue() && PrevI != MBB.begin()) 1843 --PrevI; 1844 MachineInstr *PrevMI = PrevI; 1845 unsigned Opcode = PrevMI->getOpcode(); 1846 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1847 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1848 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1849 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1850 if (MO.getReg() != ARM::LR) 1851 return false; 1852 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1853 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1854 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1855 PrevMI->setDesc(TII->get(NewOpc)); 1856 MO.setReg(ARM::PC); 1857 PrevMI->copyImplicitOps(*MBB.getParent(), *MBBI); 1858 MBB.erase(MBBI); 1859 return true; 1860 } 1861 } 1862 return false; 1863 } 1864 1865 bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { 1866 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1867 if (MBBI == MBB.begin() || MBBI == MBB.end() || 1868 MBBI->getOpcode() != ARM::tBX_RET) 1869 return false; 1870 1871 MachineBasicBlock::iterator Prev = MBBI; 1872 --Prev; 1873 if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR)) 1874 return false; 1875 1876 for (auto Use : Prev->uses()) 1877 if (Use.isKill()) { 1878 AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) 1879 .addReg(Use.getReg(), RegState::Kill)) 1880 .copyImplicitOps(*MBBI); 1881 MBB.erase(MBBI); 1882 MBB.erase(Prev); 1883 return true; 1884 } 1885 1886 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?"); 1887 } 1888 1889 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1890 MF = &Fn; 1891 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1892 TL = STI->getTargetLowering(); 1893 AFI = Fn.getInfo<ARMFunctionInfo>(); 1894 TII = STI->getInstrInfo(); 1895 TRI = STI->getRegisterInfo(); 1896 1897 RegClassInfoValid = false; 1898 isThumb2 = AFI->isThumb2Function(); 1899 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1900 1901 bool Modified = false; 1902 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1903 ++MFI) { 1904 MachineBasicBlock &MBB = *MFI; 1905 Modified |= LoadStoreMultipleOpti(MBB); 1906 if (STI->hasV5TOps()) 1907 Modified |= MergeReturnIntoLDM(MBB); 1908 if (isThumb1) 1909 Modified |= CombineMovBx(MBB); 1910 } 1911 1912 Allocator.DestroyAll(); 1913 return Modified; 1914 } 1915 1916 namespace llvm { 1917 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); 1918 } 1919 1920 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \ 1921 "ARM pre- register allocation load / store optimization pass" 1922 1923 namespace { 1924 /// Pre- register allocation pass that move load / stores from consecutive 1925 /// locations close to make it more likely they will be combined later. 1926 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1927 static char ID; 1928 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) { 1929 initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry()); 1930 } 1931 1932 const DataLayout *TD; 1933 const TargetInstrInfo *TII; 1934 const TargetRegisterInfo *TRI; 1935 const ARMSubtarget *STI; 1936 MachineRegisterInfo *MRI; 1937 MachineFunction *MF; 1938 1939 bool runOnMachineFunction(MachineFunction &Fn) override; 1940 1941 const char *getPassName() const override { 1942 return ARM_PREALLOC_LOAD_STORE_OPT_NAME; 1943 } 1944 1945 private: 1946 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1947 unsigned &NewOpc, unsigned &EvenReg, 1948 unsigned &OddReg, unsigned &BaseReg, 1949 int &Offset, 1950 unsigned &PredReg, ARMCC::CondCodes &Pred, 1951 bool &isT2); 1952 bool RescheduleOps(MachineBasicBlock *MBB, 1953 SmallVectorImpl<MachineInstr *> &Ops, 1954 unsigned Base, bool isLd, 1955 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1956 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1957 }; 1958 char ARMPreAllocLoadStoreOpt::ID = 0; 1959 } 1960 1961 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt", 1962 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) 1963 1964 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1965 if (AssumeMisalignedLoadStores) 1966 return false; 1967 1968 TD = &Fn.getDataLayout(); 1969 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1970 TII = STI->getInstrInfo(); 1971 TRI = STI->getRegisterInfo(); 1972 MRI = &Fn.getRegInfo(); 1973 MF = &Fn; 1974 1975 bool Modified = false; 1976 for (MachineBasicBlock &MFI : Fn) 1977 Modified |= RescheduleLoadStoreInstrs(&MFI); 1978 1979 return Modified; 1980 } 1981 1982 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1983 MachineBasicBlock::iterator I, 1984 MachineBasicBlock::iterator E, 1985 SmallPtrSetImpl<MachineInstr*> &MemOps, 1986 SmallSet<unsigned, 4> &MemRegs, 1987 const TargetRegisterInfo *TRI) { 1988 // Are there stores / loads / calls between them? 1989 // FIXME: This is overly conservative. We should make use of alias information 1990 // some day. 1991 SmallSet<unsigned, 4> AddedRegPressure; 1992 while (++I != E) { 1993 if (I->isDebugValue() || MemOps.count(&*I)) 1994 continue; 1995 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1996 return false; 1997 if (isLd && I->mayStore()) 1998 return false; 1999 if (!isLd) { 2000 if (I->mayLoad()) 2001 return false; 2002 // It's not safe to move the first 'str' down. 2003 // str r1, [r0] 2004 // strh r5, [r0] 2005 // str r4, [r0, #+4] 2006 if (I->mayStore()) 2007 return false; 2008 } 2009 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 2010 MachineOperand &MO = I->getOperand(j); 2011 if (!MO.isReg()) 2012 continue; 2013 unsigned Reg = MO.getReg(); 2014 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 2015 return false; 2016 if (Reg != Base && !MemRegs.count(Reg)) 2017 AddedRegPressure.insert(Reg); 2018 } 2019 } 2020 2021 // Estimate register pressure increase due to the transformation. 2022 if (MemRegs.size() <= 4) 2023 // Ok if we are moving small number of instructions. 2024 return true; 2025 return AddedRegPressure.size() <= MemRegs.size() * 2; 2026 } 2027 2028 bool 2029 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 2030 DebugLoc &dl, unsigned &NewOpc, 2031 unsigned &FirstReg, 2032 unsigned &SecondReg, 2033 unsigned &BaseReg, int &Offset, 2034 unsigned &PredReg, 2035 ARMCC::CondCodes &Pred, 2036 bool &isT2) { 2037 // Make sure we're allowed to generate LDRD/STRD. 2038 if (!STI->hasV5TEOps()) 2039 return false; 2040 2041 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 2042 unsigned Scale = 1; 2043 unsigned Opcode = Op0->getOpcode(); 2044 if (Opcode == ARM::LDRi12) { 2045 NewOpc = ARM::LDRD; 2046 } else if (Opcode == ARM::STRi12) { 2047 NewOpc = ARM::STRD; 2048 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 2049 NewOpc = ARM::t2LDRDi8; 2050 Scale = 4; 2051 isT2 = true; 2052 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 2053 NewOpc = ARM::t2STRDi8; 2054 Scale = 4; 2055 isT2 = true; 2056 } else { 2057 return false; 2058 } 2059 2060 // Make sure the base address satisfies i64 ld / st alignment requirement. 2061 // At the moment, we ignore the memoryoperand's value. 2062 // If we want to use AliasAnalysis, we should check it accordingly. 2063 if (!Op0->hasOneMemOperand() || 2064 (*Op0->memoperands_begin())->isVolatile()) 2065 return false; 2066 2067 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 2068 const Function *Func = MF->getFunction(); 2069 unsigned ReqAlign = STI->hasV6Ops() 2070 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 2071 : 8; // Pre-v6 need 8-byte align 2072 if (Align < ReqAlign) 2073 return false; 2074 2075 // Then make sure the immediate offset fits. 2076 int OffImm = getMemoryOpOffset(Op0); 2077 if (isT2) { 2078 int Limit = (1 << 8) * Scale; 2079 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2080 return false; 2081 Offset = OffImm; 2082 } else { 2083 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2084 if (OffImm < 0) { 2085 AddSub = ARM_AM::sub; 2086 OffImm = - OffImm; 2087 } 2088 int Limit = (1 << 8) * Scale; 2089 if (OffImm >= Limit || (OffImm & (Scale-1))) 2090 return false; 2091 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2092 } 2093 FirstReg = Op0->getOperand(0).getReg(); 2094 SecondReg = Op1->getOperand(0).getReg(); 2095 if (FirstReg == SecondReg) 2096 return false; 2097 BaseReg = Op0->getOperand(1).getReg(); 2098 Pred = getInstrPredicate(*Op0, PredReg); 2099 dl = Op0->getDebugLoc(); 2100 return true; 2101 } 2102 2103 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2104 SmallVectorImpl<MachineInstr *> &Ops, 2105 unsigned Base, bool isLd, 2106 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2107 bool RetVal = false; 2108 2109 // Sort by offset (in reverse order). 2110 std::sort(Ops.begin(), Ops.end(), 2111 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2112 int LOffset = getMemoryOpOffset(LHS); 2113 int ROffset = getMemoryOpOffset(RHS); 2114 assert(LHS == RHS || LOffset != ROffset); 2115 return LOffset > ROffset; 2116 }); 2117 2118 // The loads / stores of the same base are in order. Scan them from first to 2119 // last and check for the following: 2120 // 1. Any def of base. 2121 // 2. Any gaps. 2122 while (Ops.size() > 1) { 2123 unsigned FirstLoc = ~0U; 2124 unsigned LastLoc = 0; 2125 MachineInstr *FirstOp = nullptr; 2126 MachineInstr *LastOp = nullptr; 2127 int LastOffset = 0; 2128 unsigned LastOpcode = 0; 2129 unsigned LastBytes = 0; 2130 unsigned NumMove = 0; 2131 for (int i = Ops.size() - 1; i >= 0; --i) { 2132 MachineInstr *Op = Ops[i]; 2133 unsigned Loc = MI2LocMap[Op]; 2134 if (Loc <= FirstLoc) { 2135 FirstLoc = Loc; 2136 FirstOp = Op; 2137 } 2138 if (Loc >= LastLoc) { 2139 LastLoc = Loc; 2140 LastOp = Op; 2141 } 2142 2143 unsigned LSMOpcode 2144 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2145 if (LastOpcode && LSMOpcode != LastOpcode) 2146 break; 2147 2148 int Offset = getMemoryOpOffset(Op); 2149 unsigned Bytes = getLSMultipleTransferSize(Op); 2150 if (LastBytes) { 2151 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2152 break; 2153 } 2154 LastOffset = Offset; 2155 LastBytes = Bytes; 2156 LastOpcode = LSMOpcode; 2157 if (++NumMove == 8) // FIXME: Tune this limit. 2158 break; 2159 } 2160 2161 if (NumMove <= 1) 2162 Ops.pop_back(); 2163 else { 2164 SmallPtrSet<MachineInstr*, 4> MemOps; 2165 SmallSet<unsigned, 4> MemRegs; 2166 for (int i = NumMove-1; i >= 0; --i) { 2167 MemOps.insert(Ops[i]); 2168 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2169 } 2170 2171 // Be conservative, if the instructions are too far apart, don't 2172 // move them. We want to limit the increase of register pressure. 2173 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2174 if (DoMove) 2175 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2176 MemOps, MemRegs, TRI); 2177 if (!DoMove) { 2178 for (unsigned i = 0; i != NumMove; ++i) 2179 Ops.pop_back(); 2180 } else { 2181 // This is the new location for the loads / stores. 2182 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2183 while (InsertPos != MBB->end() 2184 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2185 ++InsertPos; 2186 2187 // If we are moving a pair of loads / stores, see if it makes sense 2188 // to try to allocate a pair of registers that can form register pairs. 2189 MachineInstr *Op0 = Ops.back(); 2190 MachineInstr *Op1 = Ops[Ops.size()-2]; 2191 unsigned FirstReg = 0, SecondReg = 0; 2192 unsigned BaseReg = 0, PredReg = 0; 2193 ARMCC::CondCodes Pred = ARMCC::AL; 2194 bool isT2 = false; 2195 unsigned NewOpc = 0; 2196 int Offset = 0; 2197 DebugLoc dl; 2198 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2199 FirstReg, SecondReg, BaseReg, 2200 Offset, PredReg, Pred, isT2)) { 2201 Ops.pop_back(); 2202 Ops.pop_back(); 2203 2204 const MCInstrDesc &MCID = TII->get(NewOpc); 2205 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2206 MRI->constrainRegClass(FirstReg, TRC); 2207 MRI->constrainRegClass(SecondReg, TRC); 2208 2209 // Form the pair instruction. 2210 if (isLd) { 2211 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2212 .addReg(FirstReg, RegState::Define) 2213 .addReg(SecondReg, RegState::Define) 2214 .addReg(BaseReg); 2215 // FIXME: We're converting from LDRi12 to an insn that still 2216 // uses addrmode2, so we need an explicit offset reg. It should 2217 // always by reg0 since we're transforming LDRi12s. 2218 if (!isT2) 2219 MIB.addReg(0); 2220 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2221 MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1)); 2222 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2223 ++NumLDRDFormed; 2224 } else { 2225 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2226 .addReg(FirstReg) 2227 .addReg(SecondReg) 2228 .addReg(BaseReg); 2229 // FIXME: We're converting from LDRi12 to an insn that still 2230 // uses addrmode2, so we need an explicit offset reg. It should 2231 // always by reg0 since we're transforming STRi12s. 2232 if (!isT2) 2233 MIB.addReg(0); 2234 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2235 MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1)); 2236 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2237 ++NumSTRDFormed; 2238 } 2239 MBB->erase(Op0); 2240 MBB->erase(Op1); 2241 2242 if (!isT2) { 2243 // Add register allocation hints to form register pairs. 2244 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2245 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2246 } 2247 } else { 2248 for (unsigned i = 0; i != NumMove; ++i) { 2249 MachineInstr *Op = Ops.back(); 2250 Ops.pop_back(); 2251 MBB->splice(InsertPos, MBB, Op); 2252 } 2253 } 2254 2255 NumLdStMoved += NumMove; 2256 RetVal = true; 2257 } 2258 } 2259 } 2260 2261 return RetVal; 2262 } 2263 2264 bool 2265 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2266 bool RetVal = false; 2267 2268 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2269 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2270 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2271 SmallVector<unsigned, 4> LdBases; 2272 SmallVector<unsigned, 4> StBases; 2273 2274 unsigned Loc = 0; 2275 MachineBasicBlock::iterator MBBI = MBB->begin(); 2276 MachineBasicBlock::iterator E = MBB->end(); 2277 while (MBBI != E) { 2278 for (; MBBI != E; ++MBBI) { 2279 MachineInstr *MI = MBBI; 2280 if (MI->isCall() || MI->isTerminator()) { 2281 // Stop at barriers. 2282 ++MBBI; 2283 break; 2284 } 2285 2286 if (!MI->isDebugValue()) 2287 MI2LocMap[MI] = ++Loc; 2288 2289 if (!isMemoryOp(*MI)) 2290 continue; 2291 unsigned PredReg = 0; 2292 if (getInstrPredicate(*MI, PredReg) != ARMCC::AL) 2293 continue; 2294 2295 int Opc = MI->getOpcode(); 2296 bool isLd = isLoadSingle(Opc); 2297 unsigned Base = MI->getOperand(1).getReg(); 2298 int Offset = getMemoryOpOffset(MI); 2299 2300 bool StopHere = false; 2301 if (isLd) { 2302 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2303 Base2LdsMap.find(Base); 2304 if (BI != Base2LdsMap.end()) { 2305 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2306 if (Offset == getMemoryOpOffset(BI->second[i])) { 2307 StopHere = true; 2308 break; 2309 } 2310 } 2311 if (!StopHere) 2312 BI->second.push_back(MI); 2313 } else { 2314 Base2LdsMap[Base].push_back(MI); 2315 LdBases.push_back(Base); 2316 } 2317 } else { 2318 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2319 Base2StsMap.find(Base); 2320 if (BI != Base2StsMap.end()) { 2321 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2322 if (Offset == getMemoryOpOffset(BI->second[i])) { 2323 StopHere = true; 2324 break; 2325 } 2326 } 2327 if (!StopHere) 2328 BI->second.push_back(MI); 2329 } else { 2330 Base2StsMap[Base].push_back(MI); 2331 StBases.push_back(Base); 2332 } 2333 } 2334 2335 if (StopHere) { 2336 // Found a duplicate (a base+offset combination that's seen earlier). 2337 // Backtrack. 2338 --Loc; 2339 break; 2340 } 2341 } 2342 2343 // Re-schedule loads. 2344 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2345 unsigned Base = LdBases[i]; 2346 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2347 if (Lds.size() > 1) 2348 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2349 } 2350 2351 // Re-schedule stores. 2352 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2353 unsigned Base = StBases[i]; 2354 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2355 if (Sts.size() > 1) 2356 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2357 } 2358 2359 if (MBBI != E) { 2360 Base2LdsMap.clear(); 2361 Base2StsMap.clear(); 2362 LdBases.clear(); 2363 StBases.clear(); 2364 } 2365 } 2366 2367 return RetVal; 2368 } 2369 2370 2371 /// Returns an instance of the load / store optimization pass. 2372 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2373 if (PreAlloc) 2374 return new ARMPreAllocLoadStoreOpt(); 2375 return new ARMLoadStoreOpt(); 2376 } 2377