1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 /// This switch disables formation of double/multi instructions that could 64 /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP 65 /// disabled. This can be used to create libraries that are robust even when 66 /// users provoke undefined behaviour by supplying misaligned pointers. 67 /// \see mayCombineMisaligned() 68 static cl::opt<bool> 69 AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, 70 cl::init(false), cl::desc("Be more conservative in ARM load/store opt")); 71 72 namespace llvm { 73 void initializeARMLoadStoreOptPass(PassRegistry &); 74 } 75 76 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" 77 78 namespace { 79 /// Post- register allocation pass the combine load / store instructions to 80 /// form ldm / stm instructions. 81 struct ARMLoadStoreOpt : public MachineFunctionPass { 82 static char ID; 83 ARMLoadStoreOpt() : MachineFunctionPass(ID) { 84 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry()); 85 } 86 87 const MachineFunction *MF; 88 const TargetInstrInfo *TII; 89 const TargetRegisterInfo *TRI; 90 const ARMSubtarget *STI; 91 const TargetLowering *TL; 92 ARMFunctionInfo *AFI; 93 LivePhysRegs LiveRegs; 94 RegisterClassInfo RegClassInfo; 95 MachineBasicBlock::const_iterator LiveRegPos; 96 bool LiveRegsValid; 97 bool RegClassInfoValid; 98 bool isThumb1, isThumb2; 99 100 bool runOnMachineFunction(MachineFunction &Fn) override; 101 102 MachineFunctionProperties getRequiredProperties() const override { 103 return MachineFunctionProperties().set( 104 MachineFunctionProperties::Property::AllVRegsAllocated); 105 } 106 107 const char *getPassName() const override { 108 return ARM_LOAD_STORE_OPT_NAME; 109 } 110 111 private: 112 /// A set of load/store MachineInstrs with same base register sorted by 113 /// offset. 114 struct MemOpQueueEntry { 115 MachineInstr *MI; 116 int Offset; ///< Load/Store offset. 117 unsigned Position; ///< Position as counted from end of basic block. 118 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 119 : MI(MI), Offset(Offset), Position(Position) {} 120 }; 121 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 122 123 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 124 /// merged into a LDM/STM. 125 struct MergeCandidate { 126 /// List of instructions ordered by load/store offset. 127 SmallVector<MachineInstr*, 4> Instrs; 128 /// Index in Instrs of the instruction being latest in the schedule. 129 unsigned LatestMIIdx; 130 /// Index in Instrs of the instruction being earliest in the schedule. 131 unsigned EarliestMIIdx; 132 /// Index into the basic block where the merged instruction will be 133 /// inserted. (See MemOpQueueEntry.Position) 134 unsigned InsertPos; 135 /// Whether the instructions can be merged into a ldm/stm instruction. 136 bool CanMergeToLSMulti; 137 /// Whether the instructions can be merged into a ldrd/strd instruction. 138 bool CanMergeToLSDouble; 139 }; 140 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 141 SmallVector<const MergeCandidate*,4> Candidates; 142 SmallVector<MachineInstr*,4> MergeBaseCandidates; 143 144 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 145 MachineBasicBlock::const_iterator Before); 146 unsigned findFreeReg(const TargetRegisterClass &RegClass); 147 void UpdateBaseRegUses(MachineBasicBlock &MBB, 148 MachineBasicBlock::iterator MBBI, 149 DebugLoc DL, unsigned Base, unsigned WordOffset, 150 ARMCC::CondCodes Pred, unsigned PredReg); 151 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 152 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 153 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 154 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 155 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 156 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 157 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 158 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 159 void FormCandidates(const MemOpQueue &MemOps); 160 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 161 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 162 MachineBasicBlock::iterator &MBBI); 163 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 164 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 165 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 166 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 167 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 168 bool CombineMovBx(MachineBasicBlock &MBB); 169 }; 170 char ARMLoadStoreOpt::ID = 0; 171 } 172 173 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false) 174 175 static bool definesCPSR(const MachineInstr *MI) { 176 for (const auto &MO : MI->operands()) { 177 if (!MO.isReg()) 178 continue; 179 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 180 // If the instruction has live CPSR def, then it's not safe to fold it 181 // into load / store. 182 return true; 183 } 184 185 return false; 186 } 187 188 static int getMemoryOpOffset(const MachineInstr *MI) { 189 unsigned Opcode = MI->getOpcode(); 190 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 191 unsigned NumOperands = MI->getDesc().getNumOperands(); 192 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 193 194 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 195 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 196 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 197 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 198 return OffField; 199 200 // Thumb1 immediate offsets are scaled by 4 201 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 202 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 203 return OffField * 4; 204 205 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 206 : ARM_AM::getAM5Offset(OffField) * 4; 207 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 208 : ARM_AM::getAM5Op(OffField); 209 210 if (Op == ARM_AM::sub) 211 return -Offset; 212 213 return Offset; 214 } 215 216 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 217 return MI.getOperand(1); 218 } 219 220 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 221 return MI.getOperand(0); 222 } 223 224 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 225 switch (Opcode) { 226 default: llvm_unreachable("Unhandled opcode!"); 227 case ARM::LDRi12: 228 ++NumLDMGened; 229 switch (Mode) { 230 default: llvm_unreachable("Unhandled submode!"); 231 case ARM_AM::ia: return ARM::LDMIA; 232 case ARM_AM::da: return ARM::LDMDA; 233 case ARM_AM::db: return ARM::LDMDB; 234 case ARM_AM::ib: return ARM::LDMIB; 235 } 236 case ARM::STRi12: 237 ++NumSTMGened; 238 switch (Mode) { 239 default: llvm_unreachable("Unhandled submode!"); 240 case ARM_AM::ia: return ARM::STMIA; 241 case ARM_AM::da: return ARM::STMDA; 242 case ARM_AM::db: return ARM::STMDB; 243 case ARM_AM::ib: return ARM::STMIB; 244 } 245 case ARM::tLDRi: 246 case ARM::tLDRspi: 247 // tLDMIA is writeback-only - unless the base register is in the input 248 // reglist. 249 ++NumLDMGened; 250 switch (Mode) { 251 default: llvm_unreachable("Unhandled submode!"); 252 case ARM_AM::ia: return ARM::tLDMIA; 253 } 254 case ARM::tSTRi: 255 case ARM::tSTRspi: 256 // There is no non-writeback tSTMIA either. 257 ++NumSTMGened; 258 switch (Mode) { 259 default: llvm_unreachable("Unhandled submode!"); 260 case ARM_AM::ia: return ARM::tSTMIA_UPD; 261 } 262 case ARM::t2LDRi8: 263 case ARM::t2LDRi12: 264 ++NumLDMGened; 265 switch (Mode) { 266 default: llvm_unreachable("Unhandled submode!"); 267 case ARM_AM::ia: return ARM::t2LDMIA; 268 case ARM_AM::db: return ARM::t2LDMDB; 269 } 270 case ARM::t2STRi8: 271 case ARM::t2STRi12: 272 ++NumSTMGened; 273 switch (Mode) { 274 default: llvm_unreachable("Unhandled submode!"); 275 case ARM_AM::ia: return ARM::t2STMIA; 276 case ARM_AM::db: return ARM::t2STMDB; 277 } 278 case ARM::VLDRS: 279 ++NumVLDMGened; 280 switch (Mode) { 281 default: llvm_unreachable("Unhandled submode!"); 282 case ARM_AM::ia: return ARM::VLDMSIA; 283 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 284 } 285 case ARM::VSTRS: 286 ++NumVSTMGened; 287 switch (Mode) { 288 default: llvm_unreachable("Unhandled submode!"); 289 case ARM_AM::ia: return ARM::VSTMSIA; 290 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 291 } 292 case ARM::VLDRD: 293 ++NumVLDMGened; 294 switch (Mode) { 295 default: llvm_unreachable("Unhandled submode!"); 296 case ARM_AM::ia: return ARM::VLDMDIA; 297 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 298 } 299 case ARM::VSTRD: 300 ++NumVSTMGened; 301 switch (Mode) { 302 default: llvm_unreachable("Unhandled submode!"); 303 case ARM_AM::ia: return ARM::VSTMDIA; 304 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 305 } 306 } 307 } 308 309 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 310 switch (Opcode) { 311 default: llvm_unreachable("Unhandled opcode!"); 312 case ARM::LDMIA_RET: 313 case ARM::LDMIA: 314 case ARM::LDMIA_UPD: 315 case ARM::STMIA: 316 case ARM::STMIA_UPD: 317 case ARM::tLDMIA: 318 case ARM::tLDMIA_UPD: 319 case ARM::tSTMIA_UPD: 320 case ARM::t2LDMIA_RET: 321 case ARM::t2LDMIA: 322 case ARM::t2LDMIA_UPD: 323 case ARM::t2STMIA: 324 case ARM::t2STMIA_UPD: 325 case ARM::VLDMSIA: 326 case ARM::VLDMSIA_UPD: 327 case ARM::VSTMSIA: 328 case ARM::VSTMSIA_UPD: 329 case ARM::VLDMDIA: 330 case ARM::VLDMDIA_UPD: 331 case ARM::VSTMDIA: 332 case ARM::VSTMDIA_UPD: 333 return ARM_AM::ia; 334 335 case ARM::LDMDA: 336 case ARM::LDMDA_UPD: 337 case ARM::STMDA: 338 case ARM::STMDA_UPD: 339 return ARM_AM::da; 340 341 case ARM::LDMDB: 342 case ARM::LDMDB_UPD: 343 case ARM::STMDB: 344 case ARM::STMDB_UPD: 345 case ARM::t2LDMDB: 346 case ARM::t2LDMDB_UPD: 347 case ARM::t2STMDB: 348 case ARM::t2STMDB_UPD: 349 case ARM::VLDMSDB_UPD: 350 case ARM::VSTMSDB_UPD: 351 case ARM::VLDMDDB_UPD: 352 case ARM::VSTMDDB_UPD: 353 return ARM_AM::db; 354 355 case ARM::LDMIB: 356 case ARM::LDMIB_UPD: 357 case ARM::STMIB: 358 case ARM::STMIB_UPD: 359 return ARM_AM::ib; 360 } 361 } 362 363 static bool isT1i32Load(unsigned Opc) { 364 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 365 } 366 367 static bool isT2i32Load(unsigned Opc) { 368 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 369 } 370 371 static bool isi32Load(unsigned Opc) { 372 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 373 } 374 375 static bool isT1i32Store(unsigned Opc) { 376 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 377 } 378 379 static bool isT2i32Store(unsigned Opc) { 380 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 381 } 382 383 static bool isi32Store(unsigned Opc) { 384 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 385 } 386 387 static bool isLoadSingle(unsigned Opc) { 388 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 389 } 390 391 static unsigned getImmScale(unsigned Opc) { 392 switch (Opc) { 393 default: llvm_unreachable("Unhandled opcode!"); 394 case ARM::tLDRi: 395 case ARM::tSTRi: 396 case ARM::tLDRspi: 397 case ARM::tSTRspi: 398 return 1; 399 case ARM::tLDRHi: 400 case ARM::tSTRHi: 401 return 2; 402 case ARM::tLDRBi: 403 case ARM::tSTRBi: 404 return 4; 405 } 406 } 407 408 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 409 switch (MI->getOpcode()) { 410 default: return 0; 411 case ARM::LDRi12: 412 case ARM::STRi12: 413 case ARM::tLDRi: 414 case ARM::tSTRi: 415 case ARM::tLDRspi: 416 case ARM::tSTRspi: 417 case ARM::t2LDRi8: 418 case ARM::t2LDRi12: 419 case ARM::t2STRi8: 420 case ARM::t2STRi12: 421 case ARM::VLDRS: 422 case ARM::VSTRS: 423 return 4; 424 case ARM::VLDRD: 425 case ARM::VSTRD: 426 return 8; 427 case ARM::LDMIA: 428 case ARM::LDMDA: 429 case ARM::LDMDB: 430 case ARM::LDMIB: 431 case ARM::STMIA: 432 case ARM::STMDA: 433 case ARM::STMDB: 434 case ARM::STMIB: 435 case ARM::tLDMIA: 436 case ARM::tLDMIA_UPD: 437 case ARM::tSTMIA_UPD: 438 case ARM::t2LDMIA: 439 case ARM::t2LDMDB: 440 case ARM::t2STMIA: 441 case ARM::t2STMDB: 442 case ARM::VLDMSIA: 443 case ARM::VSTMSIA: 444 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 445 case ARM::VLDMDIA: 446 case ARM::VSTMDIA: 447 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 448 } 449 } 450 451 /// Update future uses of the base register with the offset introduced 452 /// due to writeback. This function only works on Thumb1. 453 void 454 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 455 MachineBasicBlock::iterator MBBI, 456 DebugLoc DL, unsigned Base, 457 unsigned WordOffset, 458 ARMCC::CondCodes Pred, unsigned PredReg) { 459 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 460 // Start updating any instructions with immediate offsets. Insert a SUB before 461 // the first non-updateable instruction (if any). 462 for (; MBBI != MBB.end(); ++MBBI) { 463 bool InsertSub = false; 464 unsigned Opc = MBBI->getOpcode(); 465 466 if (MBBI->readsRegister(Base)) { 467 int Offset; 468 bool IsLoad = 469 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 470 bool IsStore = 471 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 472 473 if (IsLoad || IsStore) { 474 // Loads and stores with immediate offsets can be updated, but only if 475 // the new offset isn't negative. 476 // The MachineOperand containing the offset immediate is the last one 477 // before predicates. 478 MachineOperand &MO = 479 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 480 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 481 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 482 483 // If storing the base register, it needs to be reset first. 484 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 485 486 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 487 MO.setImm(Offset); 488 else 489 InsertSub = true; 490 491 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 492 !definesCPSR(MBBI)) { 493 // SUBS/ADDS using this register, with a dead def of the CPSR. 494 // Merge it with the update; if the merged offset is too large, 495 // insert a new sub instead. 496 MachineOperand &MO = 497 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 498 Offset = (Opc == ARM::tSUBi8) ? 499 MO.getImm() + WordOffset * 4 : 500 MO.getImm() - WordOffset * 4 ; 501 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 502 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 503 // Offset == 0. 504 MO.setImm(Offset); 505 // The base register has now been reset, so exit early. 506 return; 507 } else { 508 InsertSub = true; 509 } 510 511 } else { 512 // Can't update the instruction. 513 InsertSub = true; 514 } 515 516 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 517 // Since SUBS sets the condition flags, we can't place the base reset 518 // after an instruction that has a live CPSR def. 519 // The base register might also contain an argument for a function call. 520 InsertSub = true; 521 } 522 523 if (InsertSub) { 524 // An instruction above couldn't be updated, so insert a sub. 525 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 526 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 527 return; 528 } 529 530 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 531 // Register got killed. Stop updating. 532 return; 533 } 534 535 // End of block was reached. 536 if (MBB.succ_size() > 0) { 537 // FIXME: Because of a bug, live registers are sometimes missing from 538 // the successor blocks' live-in sets. This means we can't trust that 539 // information and *always* have to reset at the end of a block. 540 // See PR21029. 541 if (MBBI != MBB.end()) --MBBI; 542 AddDefaultT1CC( 543 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 544 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 545 } 546 } 547 548 /// Return the first register of class \p RegClass that is not in \p Regs. 549 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 550 if (!RegClassInfoValid) { 551 RegClassInfo.runOnMachineFunction(*MF); 552 RegClassInfoValid = true; 553 } 554 555 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 556 if (!LiveRegs.contains(Reg)) 557 return Reg; 558 return 0; 559 } 560 561 /// Compute live registers just before instruction \p Before (in normal schedule 562 /// direction). Computes backwards so multiple queries in the same block must 563 /// come in reverse order. 564 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 565 MachineBasicBlock::const_iterator Before) { 566 // Initialize if we never queried in this block. 567 if (!LiveRegsValid) { 568 LiveRegs.init(TRI); 569 LiveRegs.addLiveOuts(MBB); 570 LiveRegPos = MBB.end(); 571 LiveRegsValid = true; 572 } 573 // Move backward just before the "Before" position. 574 while (LiveRegPos != Before) { 575 --LiveRegPos; 576 LiveRegs.stepBackward(*LiveRegPos); 577 } 578 } 579 580 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 581 unsigned Reg) { 582 for (const std::pair<unsigned, bool> &R : Regs) 583 if (R.first == Reg) 584 return true; 585 return false; 586 } 587 588 /// Create and insert a LDM or STM with Base as base register and registers in 589 /// Regs as the register operands that would be loaded / stored. It returns 590 /// true if the transformation is done. 591 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 592 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 593 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 594 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 595 unsigned NumRegs = Regs.size(); 596 assert(NumRegs > 1); 597 598 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 599 // Compute liveness information for that register to make the decision. 600 bool SafeToClobberCPSR = !isThumb1 || 601 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 602 MachineBasicBlock::LQR_Dead); 603 604 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 605 606 // Exception: If the base register is in the input reglist, Thumb1 LDM is 607 // non-writeback. 608 // It's also not possible to merge an STR of the base register in Thumb1. 609 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 610 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 611 if (Opcode == ARM::tLDRi) { 612 Writeback = false; 613 } else if (Opcode == ARM::tSTRi) { 614 return nullptr; 615 } 616 } 617 618 ARM_AM::AMSubMode Mode = ARM_AM::ia; 619 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 620 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 621 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 622 623 if (Offset == 4 && haveIBAndDA) { 624 Mode = ARM_AM::ib; 625 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 626 Mode = ARM_AM::da; 627 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 628 // VLDM/VSTM do not support DB mode without also updating the base reg. 629 Mode = ARM_AM::db; 630 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 631 // Check if this is a supported opcode before inserting instructions to 632 // calculate a new base register. 633 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 634 635 // If starting offset isn't zero, insert a MI to materialize a new base. 636 // But only do so if it is cost effective, i.e. merging more than two 637 // loads / stores. 638 if (NumRegs <= 2) 639 return nullptr; 640 641 // On Thumb1, it's not worth materializing a new base register without 642 // clobbering the CPSR (i.e. not using ADDS/SUBS). 643 if (!SafeToClobberCPSR) 644 return nullptr; 645 646 unsigned NewBase; 647 if (isi32Load(Opcode)) { 648 // If it is a load, then just use one of the destination registers 649 // as the new base. Will no longer be writeback in Thumb1. 650 NewBase = Regs[NumRegs-1].first; 651 Writeback = false; 652 } else { 653 // Find a free register that we can use as scratch register. 654 moveLiveRegsBefore(MBB, InsertBefore); 655 // The merged instruction does not exist yet but will use several Regs if 656 // it is a Store. 657 if (!isLoadSingle(Opcode)) 658 for (const std::pair<unsigned, bool> &R : Regs) 659 LiveRegs.addReg(R.first); 660 661 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 662 if (NewBase == 0) 663 return nullptr; 664 } 665 666 int BaseOpc = 667 isThumb2 ? ARM::t2ADDri : 668 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 669 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 670 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 671 672 if (Offset < 0) { 673 Offset = - Offset; 674 BaseOpc = 675 isThumb2 ? ARM::t2SUBri : 676 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 677 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 678 } 679 680 if (!TL->isLegalAddImmediate(Offset)) 681 // FIXME: Try add with register operand? 682 return nullptr; // Probably not worth it then. 683 684 // We can only append a kill flag to the add/sub input if the value is not 685 // used in the register list of the stm as well. 686 bool KillOldBase = BaseKill && 687 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 688 689 if (isThumb1) { 690 // Thumb1: depending on immediate size, use either 691 // ADDS NewBase, Base, #imm3 692 // or 693 // MOV NewBase, Base 694 // ADDS NewBase, #imm8. 695 if (Base != NewBase && 696 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 697 // Need to insert a MOV to the new base first. 698 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 699 !STI->hasV6Ops()) { 700 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 701 if (Pred != ARMCC::AL) 702 return nullptr; 703 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 704 .addReg(Base, getKillRegState(KillOldBase)); 705 } else 706 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 707 .addReg(Base, getKillRegState(KillOldBase)) 708 .addImm(Pred).addReg(PredReg); 709 710 // The following ADDS/SUBS becomes an update. 711 Base = NewBase; 712 KillOldBase = true; 713 } 714 if (BaseOpc == ARM::tADDrSPi) { 715 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 716 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 717 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 718 .addImm(Pred).addReg(PredReg); 719 } else 720 AddDefaultT1CC( 721 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 722 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 723 .addImm(Pred).addReg(PredReg); 724 } else { 725 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 726 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 727 .addImm(Pred).addReg(PredReg).addReg(0); 728 } 729 Base = NewBase; 730 BaseKill = true; // New base is always killed straight away. 731 } 732 733 bool isDef = isLoadSingle(Opcode); 734 735 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 736 // base register writeback. 737 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 738 if (!Opcode) 739 return nullptr; 740 741 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 742 // - There is no writeback (LDM of base register), 743 // - the base register is killed by the merged instruction, 744 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 745 // to reset the base register. 746 // Otherwise, don't merge. 747 // It's safe to return here since the code to materialize a new base register 748 // above is also conditional on SafeToClobberCPSR. 749 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 750 return nullptr; 751 752 MachineInstrBuilder MIB; 753 754 if (Writeback) { 755 assert(isThumb1 && "expected Writeback only inThumb1"); 756 if (Opcode == ARM::tLDMIA) { 757 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs"); 758 // Update tLDMIA with writeback if necessary. 759 Opcode = ARM::tLDMIA_UPD; 760 } 761 762 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 763 764 // Thumb1: we might need to set base writeback when building the MI. 765 MIB.addReg(Base, getDefRegState(true)) 766 .addReg(Base, getKillRegState(BaseKill)); 767 768 // The base isn't dead after a merged instruction with writeback. 769 // Insert a sub instruction after the newly formed instruction to reset. 770 if (!BaseKill) 771 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 772 773 } else { 774 // No writeback, simply build the MachineInstr. 775 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 776 MIB.addReg(Base, getKillRegState(BaseKill)); 777 } 778 779 MIB.addImm(Pred).addReg(PredReg); 780 781 for (const std::pair<unsigned, bool> &R : Regs) 782 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 783 784 return MIB.getInstr(); 785 } 786 787 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 788 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 789 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 790 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 791 bool IsLoad = isi32Load(Opcode); 792 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 793 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 794 795 assert(Regs.size() == 2); 796 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 797 TII->get(LoadStoreOpcode)); 798 if (IsLoad) { 799 MIB.addReg(Regs[0].first, RegState::Define) 800 .addReg(Regs[1].first, RegState::Define); 801 } else { 802 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 803 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 804 } 805 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 806 return MIB.getInstr(); 807 } 808 809 /// Call MergeOps and update MemOps and merges accordingly on success. 810 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 811 const MachineInstr *First = Cand.Instrs.front(); 812 unsigned Opcode = First->getOpcode(); 813 bool IsLoad = isLoadSingle(Opcode); 814 SmallVector<std::pair<unsigned, bool>, 8> Regs; 815 SmallVector<unsigned, 4> ImpDefs; 816 DenseSet<unsigned> KilledRegs; 817 DenseSet<unsigned> UsedRegs; 818 // Determine list of registers and list of implicit super-register defs. 819 for (const MachineInstr *MI : Cand.Instrs) { 820 const MachineOperand &MO = getLoadStoreRegOp(*MI); 821 unsigned Reg = MO.getReg(); 822 bool IsKill = MO.isKill(); 823 if (IsKill) 824 KilledRegs.insert(Reg); 825 Regs.push_back(std::make_pair(Reg, IsKill)); 826 UsedRegs.insert(Reg); 827 828 if (IsLoad) { 829 // Collect any implicit defs of super-registers, after merging we can't 830 // be sure anymore that we properly preserved these live ranges and must 831 // removed these implicit operands. 832 for (const MachineOperand &MO : MI->implicit_operands()) { 833 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 834 continue; 835 assert(MO.isImplicit()); 836 unsigned DefReg = MO.getReg(); 837 838 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 839 continue; 840 // We can ignore cases where the super-reg is read and written. 841 if (MI->readsRegister(DefReg)) 842 continue; 843 ImpDefs.push_back(DefReg); 844 } 845 } 846 } 847 848 // Attempt the merge. 849 typedef MachineBasicBlock::iterator iterator; 850 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 851 iterator InsertBefore = std::next(iterator(LatestMI)); 852 MachineBasicBlock &MBB = *LatestMI->getParent(); 853 unsigned Offset = getMemoryOpOffset(First); 854 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 855 bool BaseKill = LatestMI->killsRegister(Base); 856 unsigned PredReg = 0; 857 ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg); 858 DebugLoc DL = First->getDebugLoc(); 859 MachineInstr *Merged = nullptr; 860 if (Cand.CanMergeToLSDouble) 861 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 862 Opcode, Pred, PredReg, DL, Regs); 863 if (!Merged && Cand.CanMergeToLSMulti) 864 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 865 Opcode, Pred, PredReg, DL, Regs); 866 if (!Merged) 867 return nullptr; 868 869 // Determine earliest instruction that will get removed. We then keep an 870 // iterator just above it so the following erases don't invalidated it. 871 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 872 bool EarliestAtBegin = false; 873 if (EarliestI == MBB.begin()) { 874 EarliestAtBegin = true; 875 } else { 876 EarliestI = std::prev(EarliestI); 877 } 878 879 // Remove instructions which have been merged. 880 for (MachineInstr *MI : Cand.Instrs) 881 MBB.erase(MI); 882 883 // Determine range between the earliest removed instruction and the new one. 884 if (EarliestAtBegin) 885 EarliestI = MBB.begin(); 886 else 887 EarliestI = std::next(EarliestI); 888 auto FixupRange = make_range(EarliestI, iterator(Merged)); 889 890 if (isLoadSingle(Opcode)) { 891 // If the previous loads defined a super-reg, then we have to mark earlier 892 // operands undef; Replicate the super-reg def on the merged instruction. 893 for (MachineInstr &MI : FixupRange) { 894 for (unsigned &ImpDefReg : ImpDefs) { 895 for (MachineOperand &MO : MI.implicit_operands()) { 896 if (!MO.isReg() || MO.getReg() != ImpDefReg) 897 continue; 898 if (MO.readsReg()) 899 MO.setIsUndef(); 900 else if (MO.isDef()) 901 ImpDefReg = 0; 902 } 903 } 904 } 905 906 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 907 for (unsigned ImpDef : ImpDefs) 908 MIB.addReg(ImpDef, RegState::ImplicitDefine); 909 } else { 910 // Remove kill flags: We are possibly storing the values later now. 911 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 912 for (MachineInstr &MI : FixupRange) { 913 for (MachineOperand &MO : MI.uses()) { 914 if (!MO.isReg() || !MO.isKill()) 915 continue; 916 if (UsedRegs.count(MO.getReg())) 917 MO.setIsKill(false); 918 } 919 } 920 assert(ImpDefs.empty()); 921 } 922 923 return Merged; 924 } 925 926 static bool isValidLSDoubleOffset(int Offset) { 927 unsigned Value = abs(Offset); 928 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 929 // multiplied by 4. 930 return (Value % 4) == 0 && Value < 1024; 931 } 932 933 /// Return true for loads/stores that can be combined to a double/multi 934 /// operation without increasing the requirements for alignment. 935 static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, 936 const MachineInstr &MI) { 937 // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no 938 // difference. 939 unsigned Opcode = MI.getOpcode(); 940 if (!isi32Load(Opcode) && !isi32Store(Opcode)) 941 return true; 942 943 // Stack pointer alignment is out of the programmers control so we can trust 944 // SP-relative loads/stores. 945 if (getLoadStoreBaseOp(MI).getReg() == ARM::SP && 946 STI.getFrameLowering()->getTransientStackAlignment() >= 4) 947 return true; 948 return false; 949 } 950 951 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 952 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 953 const MachineInstr *FirstMI = MemOps[0].MI; 954 unsigned Opcode = FirstMI->getOpcode(); 955 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 956 unsigned Size = getLSMultipleTransferSize(FirstMI); 957 958 unsigned SIndex = 0; 959 unsigned EIndex = MemOps.size(); 960 do { 961 // Look at the first instruction. 962 const MachineInstr *MI = MemOps[SIndex].MI; 963 int Offset = MemOps[SIndex].Offset; 964 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 965 unsigned PReg = PMO.getReg(); 966 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 967 unsigned Latest = SIndex; 968 unsigned Earliest = SIndex; 969 unsigned Count = 1; 970 bool CanMergeToLSDouble = 971 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 972 // ARM errata 602117: LDRD with base in list may result in incorrect base 973 // register when interrupted or faulted. 974 if (STI->isCortexM3() && isi32Load(Opcode) && 975 PReg == getLoadStoreBaseOp(*MI).getReg()) 976 CanMergeToLSDouble = false; 977 978 bool CanMergeToLSMulti = true; 979 // On swift vldm/vstm starting with an odd register number as that needs 980 // more uops than single vldrs. 981 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 982 CanMergeToLSMulti = false; 983 984 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it 985 // deprecated; LDM to PC is fine but cannot happen here. 986 if (PReg == ARM::SP || PReg == ARM::PC) 987 CanMergeToLSMulti = CanMergeToLSDouble = false; 988 989 // Should we be conservative? 990 if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI)) 991 CanMergeToLSMulti = CanMergeToLSDouble = false; 992 993 // Merge following instructions where possible. 994 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 995 int NewOffset = MemOps[I].Offset; 996 if (NewOffset != Offset + (int)Size) 997 break; 998 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 999 unsigned Reg = MO.getReg(); 1000 if (Reg == ARM::SP || Reg == ARM::PC) 1001 break; 1002 1003 // See if the current load/store may be part of a multi load/store. 1004 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 1005 bool PartOfLSMulti = CanMergeToLSMulti; 1006 if (PartOfLSMulti) { 1007 // Register numbers must be in ascending order. 1008 if (RegNum <= PRegNum) 1009 PartOfLSMulti = false; 1010 // For VFP / NEON load/store multiples, the registers must be 1011 // consecutive and within the limit on the number of registers per 1012 // instruction. 1013 else if (!isNotVFP && RegNum != PRegNum+1) 1014 PartOfLSMulti = false; 1015 } 1016 // See if the current load/store may be part of a double load/store. 1017 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 1018 1019 if (!PartOfLSMulti && !PartOfLSDouble) 1020 break; 1021 CanMergeToLSMulti &= PartOfLSMulti; 1022 CanMergeToLSDouble &= PartOfLSDouble; 1023 // Track MemOp with latest and earliest position (Positions are 1024 // counted in reverse). 1025 unsigned Position = MemOps[I].Position; 1026 if (Position < MemOps[Latest].Position) 1027 Latest = I; 1028 else if (Position > MemOps[Earliest].Position) 1029 Earliest = I; 1030 // Prepare for next MemOp. 1031 Offset += Size; 1032 PRegNum = RegNum; 1033 } 1034 1035 // Form a candidate from the Ops collected so far. 1036 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 1037 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 1038 Candidate->Instrs.push_back(MemOps[C].MI); 1039 Candidate->LatestMIIdx = Latest - SIndex; 1040 Candidate->EarliestMIIdx = Earliest - SIndex; 1041 Candidate->InsertPos = MemOps[Latest].Position; 1042 if (Count == 1) 1043 CanMergeToLSMulti = CanMergeToLSDouble = false; 1044 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 1045 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 1046 Candidates.push_back(Candidate); 1047 // Continue after the chain. 1048 SIndex += Count; 1049 } while (SIndex < EIndex); 1050 } 1051 1052 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1053 ARM_AM::AMSubMode Mode) { 1054 switch (Opc) { 1055 default: llvm_unreachable("Unhandled opcode!"); 1056 case ARM::LDMIA: 1057 case ARM::LDMDA: 1058 case ARM::LDMDB: 1059 case ARM::LDMIB: 1060 switch (Mode) { 1061 default: llvm_unreachable("Unhandled submode!"); 1062 case ARM_AM::ia: return ARM::LDMIA_UPD; 1063 case ARM_AM::ib: return ARM::LDMIB_UPD; 1064 case ARM_AM::da: return ARM::LDMDA_UPD; 1065 case ARM_AM::db: return ARM::LDMDB_UPD; 1066 } 1067 case ARM::STMIA: 1068 case ARM::STMDA: 1069 case ARM::STMDB: 1070 case ARM::STMIB: 1071 switch (Mode) { 1072 default: llvm_unreachable("Unhandled submode!"); 1073 case ARM_AM::ia: return ARM::STMIA_UPD; 1074 case ARM_AM::ib: return ARM::STMIB_UPD; 1075 case ARM_AM::da: return ARM::STMDA_UPD; 1076 case ARM_AM::db: return ARM::STMDB_UPD; 1077 } 1078 case ARM::t2LDMIA: 1079 case ARM::t2LDMDB: 1080 switch (Mode) { 1081 default: llvm_unreachable("Unhandled submode!"); 1082 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1083 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1084 } 1085 case ARM::t2STMIA: 1086 case ARM::t2STMDB: 1087 switch (Mode) { 1088 default: llvm_unreachable("Unhandled submode!"); 1089 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1090 case ARM_AM::db: return ARM::t2STMDB_UPD; 1091 } 1092 case ARM::VLDMSIA: 1093 switch (Mode) { 1094 default: llvm_unreachable("Unhandled submode!"); 1095 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1096 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1097 } 1098 case ARM::VLDMDIA: 1099 switch (Mode) { 1100 default: llvm_unreachable("Unhandled submode!"); 1101 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1102 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1103 } 1104 case ARM::VSTMSIA: 1105 switch (Mode) { 1106 default: llvm_unreachable("Unhandled submode!"); 1107 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1108 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1109 } 1110 case ARM::VSTMDIA: 1111 switch (Mode) { 1112 default: llvm_unreachable("Unhandled submode!"); 1113 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1114 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1115 } 1116 } 1117 } 1118 1119 /// Check if the given instruction increments or decrements a register and 1120 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1121 /// generated by the instruction are possibly read as well. 1122 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1123 ARMCC::CondCodes Pred, unsigned PredReg) { 1124 bool CheckCPSRDef; 1125 int Scale; 1126 switch (MI.getOpcode()) { 1127 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1128 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1129 case ARM::t2SUBri: 1130 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1131 case ARM::t2ADDri: 1132 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1133 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1134 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1135 default: return 0; 1136 } 1137 1138 unsigned MIPredReg; 1139 if (MI.getOperand(0).getReg() != Reg || 1140 MI.getOperand(1).getReg() != Reg || 1141 getInstrPredicate(MI, MIPredReg) != Pred || 1142 MIPredReg != PredReg) 1143 return 0; 1144 1145 if (CheckCPSRDef && definesCPSR(&MI)) 1146 return 0; 1147 return MI.getOperand(2).getImm() * Scale; 1148 } 1149 1150 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1151 static MachineBasicBlock::iterator 1152 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1153 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1154 Offset = 0; 1155 MachineBasicBlock &MBB = *MBBI->getParent(); 1156 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1157 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1158 if (MBBI == BeginMBBI) 1159 return EndMBBI; 1160 1161 // Skip debug values. 1162 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1163 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1164 --PrevMBBI; 1165 1166 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1167 return Offset == 0 ? EndMBBI : PrevMBBI; 1168 } 1169 1170 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1171 static MachineBasicBlock::iterator 1172 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1173 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1174 Offset = 0; 1175 MachineBasicBlock &MBB = *MBBI->getParent(); 1176 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1177 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1178 // Skip debug values. 1179 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1180 ++NextMBBI; 1181 if (NextMBBI == EndMBBI) 1182 return EndMBBI; 1183 1184 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1185 return Offset == 0 ? EndMBBI : NextMBBI; 1186 } 1187 1188 /// Fold proceeding/trailing inc/dec of base register into the 1189 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1190 /// 1191 /// stmia rn, <ra, rb, rc> 1192 /// rn := rn + 4 * 3; 1193 /// => 1194 /// stmia rn!, <ra, rb, rc> 1195 /// 1196 /// rn := rn - 4 * 3; 1197 /// ldmia rn, <ra, rb, rc> 1198 /// => 1199 /// ldmdb rn!, <ra, rb, rc> 1200 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1201 // Thumb1 is already using updating loads/stores. 1202 if (isThumb1) return false; 1203 1204 const MachineOperand &BaseOP = MI->getOperand(0); 1205 unsigned Base = BaseOP.getReg(); 1206 bool BaseKill = BaseOP.isKill(); 1207 unsigned PredReg = 0; 1208 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1209 unsigned Opcode = MI->getOpcode(); 1210 DebugLoc DL = MI->getDebugLoc(); 1211 1212 // Can't use an updating ld/st if the base register is also a dest 1213 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1214 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1215 if (MI->getOperand(i).getReg() == Base) 1216 return false; 1217 1218 int Bytes = getLSMultipleTransferSize(MI); 1219 MachineBasicBlock &MBB = *MI->getParent(); 1220 MachineBasicBlock::iterator MBBI(MI); 1221 int Offset; 1222 MachineBasicBlock::iterator MergeInstr 1223 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1224 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1225 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1226 Mode = ARM_AM::db; 1227 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1228 Mode = ARM_AM::da; 1229 } else { 1230 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1231 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1232 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) 1233 return false; 1234 } 1235 MBB.erase(MergeInstr); 1236 1237 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1238 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1239 .addReg(Base, getDefRegState(true)) // WB base register 1240 .addReg(Base, getKillRegState(BaseKill)) 1241 .addImm(Pred).addReg(PredReg); 1242 1243 // Transfer the rest of operands. 1244 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1245 MIB.addOperand(MI->getOperand(OpNum)); 1246 1247 // Transfer memoperands. 1248 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1249 1250 MBB.erase(MBBI); 1251 return true; 1252 } 1253 1254 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1255 ARM_AM::AddrOpc Mode) { 1256 switch (Opc) { 1257 case ARM::LDRi12: 1258 return ARM::LDR_PRE_IMM; 1259 case ARM::STRi12: 1260 return ARM::STR_PRE_IMM; 1261 case ARM::VLDRS: 1262 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1263 case ARM::VLDRD: 1264 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1265 case ARM::VSTRS: 1266 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1267 case ARM::VSTRD: 1268 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1269 case ARM::t2LDRi8: 1270 case ARM::t2LDRi12: 1271 return ARM::t2LDR_PRE; 1272 case ARM::t2STRi8: 1273 case ARM::t2STRi12: 1274 return ARM::t2STR_PRE; 1275 default: llvm_unreachable("Unhandled opcode!"); 1276 } 1277 } 1278 1279 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1280 ARM_AM::AddrOpc Mode) { 1281 switch (Opc) { 1282 case ARM::LDRi12: 1283 return ARM::LDR_POST_IMM; 1284 case ARM::STRi12: 1285 return ARM::STR_POST_IMM; 1286 case ARM::VLDRS: 1287 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1288 case ARM::VLDRD: 1289 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1290 case ARM::VSTRS: 1291 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1292 case ARM::VSTRD: 1293 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1294 case ARM::t2LDRi8: 1295 case ARM::t2LDRi12: 1296 return ARM::t2LDR_POST; 1297 case ARM::t2STRi8: 1298 case ARM::t2STRi12: 1299 return ARM::t2STR_POST; 1300 default: llvm_unreachable("Unhandled opcode!"); 1301 } 1302 } 1303 1304 /// Fold proceeding/trailing inc/dec of base register into the 1305 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1306 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1307 // Thumb1 doesn't have updating LDR/STR. 1308 // FIXME: Use LDM/STM with single register instead. 1309 if (isThumb1) return false; 1310 1311 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1312 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1313 unsigned Opcode = MI->getOpcode(); 1314 DebugLoc DL = MI->getDebugLoc(); 1315 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1316 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1317 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1318 if (isi32Load(Opcode) || isi32Store(Opcode)) 1319 if (MI->getOperand(2).getImm() != 0) 1320 return false; 1321 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1322 return false; 1323 1324 // Can't do the merge if the destination register is the same as the would-be 1325 // writeback register. 1326 if (MI->getOperand(0).getReg() == Base) 1327 return false; 1328 1329 unsigned PredReg = 0; 1330 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1331 int Bytes = getLSMultipleTransferSize(MI); 1332 MachineBasicBlock &MBB = *MI->getParent(); 1333 MachineBasicBlock::iterator MBBI(MI); 1334 int Offset; 1335 MachineBasicBlock::iterator MergeInstr 1336 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1337 unsigned NewOpc; 1338 if (!isAM5 && Offset == Bytes) { 1339 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1340 } else if (Offset == -Bytes) { 1341 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1342 } else { 1343 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1344 if (Offset == Bytes) { 1345 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1346 } else if (!isAM5 && Offset == -Bytes) { 1347 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1348 } else 1349 return false; 1350 } 1351 MBB.erase(MergeInstr); 1352 1353 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1354 1355 bool isLd = isLoadSingle(Opcode); 1356 if (isAM5) { 1357 // VLDM[SD]_UPD, VSTM[SD]_UPD 1358 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1359 // updating load/store-multiple instructions can be used with only one 1360 // register.) 1361 MachineOperand &MO = MI->getOperand(0); 1362 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1363 .addReg(Base, getDefRegState(true)) // WB base register 1364 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1365 .addImm(Pred).addReg(PredReg) 1366 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1367 getKillRegState(MO.isKill()))); 1368 } else if (isLd) { 1369 if (isAM2) { 1370 // LDR_PRE, LDR_POST 1371 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1372 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1373 .addReg(Base, RegState::Define) 1374 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1375 } else { 1376 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1377 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1378 .addReg(Base, RegState::Define) 1379 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1380 } 1381 } else { 1382 // t2LDR_PRE, t2LDR_POST 1383 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1384 .addReg(Base, RegState::Define) 1385 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1386 } 1387 } else { 1388 MachineOperand &MO = MI->getOperand(0); 1389 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1390 // the vestigal zero-reg offset register. When that's fixed, this clause 1391 // can be removed entirely. 1392 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1393 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1394 // STR_PRE, STR_POST 1395 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1396 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1397 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1398 } else { 1399 // t2STR_PRE, t2STR_POST 1400 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1401 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1402 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1403 } 1404 } 1405 MBB.erase(MBBI); 1406 1407 return true; 1408 } 1409 1410 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1411 unsigned Opcode = MI.getOpcode(); 1412 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1413 "Must have t2STRDi8 or t2LDRDi8"); 1414 if (MI.getOperand(3).getImm() != 0) 1415 return false; 1416 1417 // Behaviour for writeback is undefined if base register is the same as one 1418 // of the others. 1419 const MachineOperand &BaseOp = MI.getOperand(2); 1420 unsigned Base = BaseOp.getReg(); 1421 const MachineOperand &Reg0Op = MI.getOperand(0); 1422 const MachineOperand &Reg1Op = MI.getOperand(1); 1423 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1424 return false; 1425 1426 unsigned PredReg; 1427 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1428 MachineBasicBlock::iterator MBBI(MI); 1429 MachineBasicBlock &MBB = *MI.getParent(); 1430 int Offset; 1431 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1432 PredReg, Offset); 1433 unsigned NewOpc; 1434 if (Offset == 8 || Offset == -8) { 1435 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1436 } else { 1437 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1438 if (Offset == 8 || Offset == -8) { 1439 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1440 } else 1441 return false; 1442 } 1443 MBB.erase(MergeInstr); 1444 1445 DebugLoc DL = MI.getDebugLoc(); 1446 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1447 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1448 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1449 .addReg(BaseOp.getReg(), RegState::Define); 1450 } else { 1451 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1452 MIB.addReg(BaseOp.getReg(), RegState::Define) 1453 .addOperand(Reg0Op).addOperand(Reg1Op); 1454 } 1455 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1456 .addImm(Offset).addImm(Pred).addReg(PredReg); 1457 assert(TII->get(Opcode).getNumOperands() == 6 && 1458 TII->get(NewOpc).getNumOperands() == 7 && 1459 "Unexpected number of operands in Opcode specification."); 1460 1461 // Transfer implicit operands. 1462 for (const MachineOperand &MO : MI.implicit_operands()) 1463 MIB.addOperand(MO); 1464 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1465 1466 MBB.erase(MBBI); 1467 return true; 1468 } 1469 1470 /// Returns true if instruction is a memory operation that this pass is capable 1471 /// of operating on. 1472 static bool isMemoryOp(const MachineInstr &MI) { 1473 unsigned Opcode = MI.getOpcode(); 1474 switch (Opcode) { 1475 case ARM::VLDRS: 1476 case ARM::VSTRS: 1477 case ARM::VLDRD: 1478 case ARM::VSTRD: 1479 case ARM::LDRi12: 1480 case ARM::STRi12: 1481 case ARM::tLDRi: 1482 case ARM::tSTRi: 1483 case ARM::tLDRspi: 1484 case ARM::tSTRspi: 1485 case ARM::t2LDRi8: 1486 case ARM::t2LDRi12: 1487 case ARM::t2STRi8: 1488 case ARM::t2STRi12: 1489 break; 1490 default: 1491 return false; 1492 } 1493 if (!MI.getOperand(1).isReg()) 1494 return false; 1495 1496 // When no memory operands are present, conservatively assume unaligned, 1497 // volatile, unfoldable. 1498 if (!MI.hasOneMemOperand()) 1499 return false; 1500 1501 const MachineMemOperand &MMO = **MI.memoperands_begin(); 1502 1503 // Don't touch volatile memory accesses - we may be changing their order. 1504 if (MMO.isVolatile()) 1505 return false; 1506 1507 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1508 // not. 1509 if (MMO.getAlignment() < 4) 1510 return false; 1511 1512 // str <undef> could probably be eliminated entirely, but for now we just want 1513 // to avoid making a mess of it. 1514 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1515 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef()) 1516 return false; 1517 1518 // Likewise don't mess with references to undefined addresses. 1519 if (MI.getOperand(1).isUndef()) 1520 return false; 1521 1522 return true; 1523 } 1524 1525 static void InsertLDR_STR(MachineBasicBlock &MBB, 1526 MachineBasicBlock::iterator &MBBI, 1527 int Offset, bool isDef, 1528 DebugLoc DL, unsigned NewOpc, 1529 unsigned Reg, bool RegDeadKill, bool RegUndef, 1530 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1531 bool OffKill, bool OffUndef, 1532 ARMCC::CondCodes Pred, unsigned PredReg, 1533 const TargetInstrInfo *TII, bool isT2) { 1534 if (isDef) { 1535 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1536 TII->get(NewOpc)) 1537 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1538 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1539 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1540 } else { 1541 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1542 TII->get(NewOpc)) 1543 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1544 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1545 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1546 } 1547 } 1548 1549 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1550 MachineBasicBlock::iterator &MBBI) { 1551 MachineInstr *MI = &*MBBI; 1552 unsigned Opcode = MI->getOpcode(); 1553 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1554 return false; 1555 1556 const MachineOperand &BaseOp = MI->getOperand(2); 1557 unsigned BaseReg = BaseOp.getReg(); 1558 unsigned EvenReg = MI->getOperand(0).getReg(); 1559 unsigned OddReg = MI->getOperand(1).getReg(); 1560 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1561 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1562 1563 // ARM errata 602117: LDRD with base in list may result in incorrect base 1564 // register when interrupted or faulted. 1565 bool Errata602117 = EvenReg == BaseReg && 1566 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1567 // ARM LDRD/STRD needs consecutive registers. 1568 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1569 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1570 1571 if (!Errata602117 && !NonConsecutiveRegs) 1572 return false; 1573 1574 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1575 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1576 bool EvenDeadKill = isLd ? 1577 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1578 bool EvenUndef = MI->getOperand(0).isUndef(); 1579 bool OddDeadKill = isLd ? 1580 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1581 bool OddUndef = MI->getOperand(1).isUndef(); 1582 bool BaseKill = BaseOp.isKill(); 1583 bool BaseUndef = BaseOp.isUndef(); 1584 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1585 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1586 int OffImm = getMemoryOpOffset(MI); 1587 unsigned PredReg = 0; 1588 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1589 1590 if (OddRegNum > EvenRegNum && OffImm == 0) { 1591 // Ascending register numbers and no offset. It's safe to change it to a 1592 // ldm or stm. 1593 unsigned NewOpc = (isLd) 1594 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1595 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1596 if (isLd) { 1597 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1598 .addReg(BaseReg, getKillRegState(BaseKill)) 1599 .addImm(Pred).addReg(PredReg) 1600 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1601 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1602 ++NumLDRD2LDM; 1603 } else { 1604 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1605 .addReg(BaseReg, getKillRegState(BaseKill)) 1606 .addImm(Pred).addReg(PredReg) 1607 .addReg(EvenReg, 1608 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1609 .addReg(OddReg, 1610 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1611 ++NumSTRD2STM; 1612 } 1613 } else { 1614 // Split into two instructions. 1615 unsigned NewOpc = (isLd) 1616 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1617 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1618 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1619 // so adjust and use t2LDRi12 here for that. 1620 unsigned NewOpc2 = (isLd) 1621 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1622 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1623 DebugLoc dl = MBBI->getDebugLoc(); 1624 // If this is a load and base register is killed, it may have been 1625 // re-defed by the load, make sure the first load does not clobber it. 1626 if (isLd && 1627 (BaseKill || OffKill) && 1628 (TRI->regsOverlap(EvenReg, BaseReg))) { 1629 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1630 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1631 OddReg, OddDeadKill, false, 1632 BaseReg, false, BaseUndef, false, OffUndef, 1633 Pred, PredReg, TII, isT2); 1634 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1635 EvenReg, EvenDeadKill, false, 1636 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1637 Pred, PredReg, TII, isT2); 1638 } else { 1639 if (OddReg == EvenReg && EvenDeadKill) { 1640 // If the two source operands are the same, the kill marker is 1641 // probably on the first one. e.g. 1642 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1643 EvenDeadKill = false; 1644 OddDeadKill = true; 1645 } 1646 // Never kill the base register in the first instruction. 1647 if (EvenReg == BaseReg) 1648 EvenDeadKill = false; 1649 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1650 EvenReg, EvenDeadKill, EvenUndef, 1651 BaseReg, false, BaseUndef, false, OffUndef, 1652 Pred, PredReg, TII, isT2); 1653 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1654 OddReg, OddDeadKill, OddUndef, 1655 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1656 Pred, PredReg, TII, isT2); 1657 } 1658 if (isLd) 1659 ++NumLDRD2LDR; 1660 else 1661 ++NumSTRD2STR; 1662 } 1663 1664 MBBI = MBB.erase(MBBI); 1665 return true; 1666 } 1667 1668 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1669 /// incrementing offset into LDM / STM ops. 1670 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1671 MemOpQueue MemOps; 1672 unsigned CurrBase = 0; 1673 unsigned CurrOpc = ~0u; 1674 ARMCC::CondCodes CurrPred = ARMCC::AL; 1675 unsigned Position = 0; 1676 assert(Candidates.size() == 0); 1677 assert(MergeBaseCandidates.size() == 0); 1678 LiveRegsValid = false; 1679 1680 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1681 I = MBBI) { 1682 // The instruction in front of the iterator is the one we look at. 1683 MBBI = std::prev(I); 1684 if (FixInvalidRegPairOp(MBB, MBBI)) 1685 continue; 1686 ++Position; 1687 1688 if (isMemoryOp(*MBBI)) { 1689 unsigned Opcode = MBBI->getOpcode(); 1690 const MachineOperand &MO = MBBI->getOperand(0); 1691 unsigned Reg = MO.getReg(); 1692 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1693 unsigned PredReg = 0; 1694 ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg); 1695 int Offset = getMemoryOpOffset(MBBI); 1696 if (CurrBase == 0) { 1697 // Start of a new chain. 1698 CurrBase = Base; 1699 CurrOpc = Opcode; 1700 CurrPred = Pred; 1701 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1702 continue; 1703 } 1704 // Note: No need to match PredReg in the next if. 1705 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1706 // Watch out for: 1707 // r4 := ldr [r0, #8] 1708 // r4 := ldr [r0, #4] 1709 // or 1710 // r0 := ldr [r0] 1711 // If a load overrides the base register or a register loaded by 1712 // another load in our chain, we cannot take this instruction. 1713 bool Overlap = false; 1714 if (isLoadSingle(Opcode)) { 1715 Overlap = (Base == Reg); 1716 if (!Overlap) { 1717 for (const MemOpQueueEntry &E : MemOps) { 1718 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1719 Overlap = true; 1720 break; 1721 } 1722 } 1723 } 1724 } 1725 1726 if (!Overlap) { 1727 // Check offset and sort memory operation into the current chain. 1728 if (Offset > MemOps.back().Offset) { 1729 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1730 continue; 1731 } else { 1732 MemOpQueue::iterator MI, ME; 1733 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1734 if (Offset < MI->Offset) { 1735 // Found a place to insert. 1736 break; 1737 } 1738 if (Offset == MI->Offset) { 1739 // Collision, abort. 1740 MI = ME; 1741 break; 1742 } 1743 } 1744 if (MI != MemOps.end()) { 1745 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1746 continue; 1747 } 1748 } 1749 } 1750 } 1751 1752 // Don't advance the iterator; The op will start a new chain next. 1753 MBBI = I; 1754 --Position; 1755 // Fallthrough to look into existing chain. 1756 } else if (MBBI->isDebugValue()) { 1757 continue; 1758 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1759 MBBI->getOpcode() == ARM::t2STRDi8) { 1760 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1761 // remember them because we may still be able to merge add/sub into them. 1762 MergeBaseCandidates.push_back(MBBI); 1763 } 1764 1765 1766 // If we are here then the chain is broken; Extract candidates for a merge. 1767 if (MemOps.size() > 0) { 1768 FormCandidates(MemOps); 1769 // Reset for the next chain. 1770 CurrBase = 0; 1771 CurrOpc = ~0u; 1772 CurrPred = ARMCC::AL; 1773 MemOps.clear(); 1774 } 1775 } 1776 if (MemOps.size() > 0) 1777 FormCandidates(MemOps); 1778 1779 // Sort candidates so they get processed from end to begin of the basic 1780 // block later; This is necessary for liveness calculation. 1781 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1782 return M0->InsertPos < M1->InsertPos; 1783 }; 1784 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1785 1786 // Go through list of candidates and merge. 1787 bool Changed = false; 1788 for (const MergeCandidate *Candidate : Candidates) { 1789 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1790 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1791 // Merge preceding/trailing base inc/dec into the merged op. 1792 if (Merged) { 1793 Changed = true; 1794 unsigned Opcode = Merged->getOpcode(); 1795 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1796 MergeBaseUpdateLSDouble(*Merged); 1797 else 1798 MergeBaseUpdateLSMultiple(Merged); 1799 } else { 1800 for (MachineInstr *MI : Candidate->Instrs) { 1801 if (MergeBaseUpdateLoadStore(MI)) 1802 Changed = true; 1803 } 1804 } 1805 } else { 1806 assert(Candidate->Instrs.size() == 1); 1807 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1808 Changed = true; 1809 } 1810 } 1811 Candidates.clear(); 1812 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1813 for (MachineInstr *MI : MergeBaseCandidates) 1814 MergeBaseUpdateLSDouble(*MI); 1815 MergeBaseCandidates.clear(); 1816 1817 return Changed; 1818 } 1819 1820 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1821 /// into the preceding stack restore so it directly restore the value of LR 1822 /// into pc. 1823 /// ldmfd sp!, {..., lr} 1824 /// bx lr 1825 /// or 1826 /// ldmfd sp!, {..., lr} 1827 /// mov pc, lr 1828 /// => 1829 /// ldmfd sp!, {..., pc} 1830 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1831 // Thumb1 LDM doesn't allow high registers. 1832 if (isThumb1) return false; 1833 if (MBB.empty()) return false; 1834 1835 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1836 if (MBBI != MBB.begin() && 1837 (MBBI->getOpcode() == ARM::BX_RET || 1838 MBBI->getOpcode() == ARM::tBX_RET || 1839 MBBI->getOpcode() == ARM::MOVPCLR)) { 1840 MachineBasicBlock::iterator PrevI = std::prev(MBBI); 1841 // Ignore any DBG_VALUE instructions. 1842 while (PrevI->isDebugValue() && PrevI != MBB.begin()) 1843 --PrevI; 1844 MachineInstr *PrevMI = PrevI; 1845 unsigned Opcode = PrevMI->getOpcode(); 1846 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1847 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1848 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1849 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1850 if (MO.getReg() != ARM::LR) 1851 return false; 1852 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1853 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1854 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1855 PrevMI->setDesc(TII->get(NewOpc)); 1856 MO.setReg(ARM::PC); 1857 PrevMI->copyImplicitOps(*MBB.getParent(), *MBBI); 1858 MBB.erase(MBBI); 1859 return true; 1860 } 1861 } 1862 return false; 1863 } 1864 1865 bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { 1866 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1867 if (MBBI == MBB.begin() || MBBI == MBB.end() || 1868 MBBI->getOpcode() != ARM::tBX_RET) 1869 return false; 1870 1871 MachineBasicBlock::iterator Prev = MBBI; 1872 --Prev; 1873 if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR)) 1874 return false; 1875 1876 for (auto Use : Prev->uses()) 1877 if (Use.isKill()) { 1878 AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) 1879 .addReg(Use.getReg(), RegState::Kill)) 1880 .copyImplicitOps(*MBBI); 1881 MBB.erase(MBBI); 1882 MBB.erase(Prev); 1883 return true; 1884 } 1885 1886 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?"); 1887 } 1888 1889 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1890 if (skipFunction(*Fn.getFunction())) 1891 return false; 1892 1893 MF = &Fn; 1894 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1895 TL = STI->getTargetLowering(); 1896 AFI = Fn.getInfo<ARMFunctionInfo>(); 1897 TII = STI->getInstrInfo(); 1898 TRI = STI->getRegisterInfo(); 1899 1900 RegClassInfoValid = false; 1901 isThumb2 = AFI->isThumb2Function(); 1902 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1903 1904 bool Modified = false; 1905 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1906 ++MFI) { 1907 MachineBasicBlock &MBB = *MFI; 1908 Modified |= LoadStoreMultipleOpti(MBB); 1909 if (STI->hasV5TOps()) 1910 Modified |= MergeReturnIntoLDM(MBB); 1911 if (isThumb1) 1912 Modified |= CombineMovBx(MBB); 1913 } 1914 1915 Allocator.DestroyAll(); 1916 return Modified; 1917 } 1918 1919 namespace llvm { 1920 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); 1921 } 1922 1923 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \ 1924 "ARM pre- register allocation load / store optimization pass" 1925 1926 namespace { 1927 /// Pre- register allocation pass that move load / stores from consecutive 1928 /// locations close to make it more likely they will be combined later. 1929 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1930 static char ID; 1931 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) { 1932 initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry()); 1933 } 1934 1935 const DataLayout *TD; 1936 const TargetInstrInfo *TII; 1937 const TargetRegisterInfo *TRI; 1938 const ARMSubtarget *STI; 1939 MachineRegisterInfo *MRI; 1940 MachineFunction *MF; 1941 1942 bool runOnMachineFunction(MachineFunction &Fn) override; 1943 1944 const char *getPassName() const override { 1945 return ARM_PREALLOC_LOAD_STORE_OPT_NAME; 1946 } 1947 1948 private: 1949 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1950 unsigned &NewOpc, unsigned &EvenReg, 1951 unsigned &OddReg, unsigned &BaseReg, 1952 int &Offset, 1953 unsigned &PredReg, ARMCC::CondCodes &Pred, 1954 bool &isT2); 1955 bool RescheduleOps(MachineBasicBlock *MBB, 1956 SmallVectorImpl<MachineInstr *> &Ops, 1957 unsigned Base, bool isLd, 1958 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1959 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1960 }; 1961 char ARMPreAllocLoadStoreOpt::ID = 0; 1962 } 1963 1964 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt", 1965 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) 1966 1967 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1968 if (AssumeMisalignedLoadStores || skipFunction(*Fn.getFunction())) 1969 return false; 1970 1971 TD = &Fn.getDataLayout(); 1972 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1973 TII = STI->getInstrInfo(); 1974 TRI = STI->getRegisterInfo(); 1975 MRI = &Fn.getRegInfo(); 1976 MF = &Fn; 1977 1978 bool Modified = false; 1979 for (MachineBasicBlock &MFI : Fn) 1980 Modified |= RescheduleLoadStoreInstrs(&MFI); 1981 1982 return Modified; 1983 } 1984 1985 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1986 MachineBasicBlock::iterator I, 1987 MachineBasicBlock::iterator E, 1988 SmallPtrSetImpl<MachineInstr*> &MemOps, 1989 SmallSet<unsigned, 4> &MemRegs, 1990 const TargetRegisterInfo *TRI) { 1991 // Are there stores / loads / calls between them? 1992 // FIXME: This is overly conservative. We should make use of alias information 1993 // some day. 1994 SmallSet<unsigned, 4> AddedRegPressure; 1995 while (++I != E) { 1996 if (I->isDebugValue() || MemOps.count(&*I)) 1997 continue; 1998 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1999 return false; 2000 if (isLd && I->mayStore()) 2001 return false; 2002 if (!isLd) { 2003 if (I->mayLoad()) 2004 return false; 2005 // It's not safe to move the first 'str' down. 2006 // str r1, [r0] 2007 // strh r5, [r0] 2008 // str r4, [r0, #+4] 2009 if (I->mayStore()) 2010 return false; 2011 } 2012 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 2013 MachineOperand &MO = I->getOperand(j); 2014 if (!MO.isReg()) 2015 continue; 2016 unsigned Reg = MO.getReg(); 2017 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 2018 return false; 2019 if (Reg != Base && !MemRegs.count(Reg)) 2020 AddedRegPressure.insert(Reg); 2021 } 2022 } 2023 2024 // Estimate register pressure increase due to the transformation. 2025 if (MemRegs.size() <= 4) 2026 // Ok if we are moving small number of instructions. 2027 return true; 2028 return AddedRegPressure.size() <= MemRegs.size() * 2; 2029 } 2030 2031 bool 2032 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 2033 DebugLoc &dl, unsigned &NewOpc, 2034 unsigned &FirstReg, 2035 unsigned &SecondReg, 2036 unsigned &BaseReg, int &Offset, 2037 unsigned &PredReg, 2038 ARMCC::CondCodes &Pred, 2039 bool &isT2) { 2040 // Make sure we're allowed to generate LDRD/STRD. 2041 if (!STI->hasV5TEOps()) 2042 return false; 2043 2044 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 2045 unsigned Scale = 1; 2046 unsigned Opcode = Op0->getOpcode(); 2047 if (Opcode == ARM::LDRi12) { 2048 NewOpc = ARM::LDRD; 2049 } else if (Opcode == ARM::STRi12) { 2050 NewOpc = ARM::STRD; 2051 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 2052 NewOpc = ARM::t2LDRDi8; 2053 Scale = 4; 2054 isT2 = true; 2055 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 2056 NewOpc = ARM::t2STRDi8; 2057 Scale = 4; 2058 isT2 = true; 2059 } else { 2060 return false; 2061 } 2062 2063 // Make sure the base address satisfies i64 ld / st alignment requirement. 2064 // At the moment, we ignore the memoryoperand's value. 2065 // If we want to use AliasAnalysis, we should check it accordingly. 2066 if (!Op0->hasOneMemOperand() || 2067 (*Op0->memoperands_begin())->isVolatile()) 2068 return false; 2069 2070 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 2071 const Function *Func = MF->getFunction(); 2072 unsigned ReqAlign = STI->hasV6Ops() 2073 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 2074 : 8; // Pre-v6 need 8-byte align 2075 if (Align < ReqAlign) 2076 return false; 2077 2078 // Then make sure the immediate offset fits. 2079 int OffImm = getMemoryOpOffset(Op0); 2080 if (isT2) { 2081 int Limit = (1 << 8) * Scale; 2082 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2083 return false; 2084 Offset = OffImm; 2085 } else { 2086 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2087 if (OffImm < 0) { 2088 AddSub = ARM_AM::sub; 2089 OffImm = - OffImm; 2090 } 2091 int Limit = (1 << 8) * Scale; 2092 if (OffImm >= Limit || (OffImm & (Scale-1))) 2093 return false; 2094 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2095 } 2096 FirstReg = Op0->getOperand(0).getReg(); 2097 SecondReg = Op1->getOperand(0).getReg(); 2098 if (FirstReg == SecondReg) 2099 return false; 2100 BaseReg = Op0->getOperand(1).getReg(); 2101 Pred = getInstrPredicate(*Op0, PredReg); 2102 dl = Op0->getDebugLoc(); 2103 return true; 2104 } 2105 2106 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2107 SmallVectorImpl<MachineInstr *> &Ops, 2108 unsigned Base, bool isLd, 2109 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2110 bool RetVal = false; 2111 2112 // Sort by offset (in reverse order). 2113 std::sort(Ops.begin(), Ops.end(), 2114 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2115 int LOffset = getMemoryOpOffset(LHS); 2116 int ROffset = getMemoryOpOffset(RHS); 2117 assert(LHS == RHS || LOffset != ROffset); 2118 return LOffset > ROffset; 2119 }); 2120 2121 // The loads / stores of the same base are in order. Scan them from first to 2122 // last and check for the following: 2123 // 1. Any def of base. 2124 // 2. Any gaps. 2125 while (Ops.size() > 1) { 2126 unsigned FirstLoc = ~0U; 2127 unsigned LastLoc = 0; 2128 MachineInstr *FirstOp = nullptr; 2129 MachineInstr *LastOp = nullptr; 2130 int LastOffset = 0; 2131 unsigned LastOpcode = 0; 2132 unsigned LastBytes = 0; 2133 unsigned NumMove = 0; 2134 for (int i = Ops.size() - 1; i >= 0; --i) { 2135 MachineInstr *Op = Ops[i]; 2136 unsigned Loc = MI2LocMap[Op]; 2137 if (Loc <= FirstLoc) { 2138 FirstLoc = Loc; 2139 FirstOp = Op; 2140 } 2141 if (Loc >= LastLoc) { 2142 LastLoc = Loc; 2143 LastOp = Op; 2144 } 2145 2146 unsigned LSMOpcode 2147 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2148 if (LastOpcode && LSMOpcode != LastOpcode) 2149 break; 2150 2151 int Offset = getMemoryOpOffset(Op); 2152 unsigned Bytes = getLSMultipleTransferSize(Op); 2153 if (LastBytes) { 2154 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2155 break; 2156 } 2157 LastOffset = Offset; 2158 LastBytes = Bytes; 2159 LastOpcode = LSMOpcode; 2160 if (++NumMove == 8) // FIXME: Tune this limit. 2161 break; 2162 } 2163 2164 if (NumMove <= 1) 2165 Ops.pop_back(); 2166 else { 2167 SmallPtrSet<MachineInstr*, 4> MemOps; 2168 SmallSet<unsigned, 4> MemRegs; 2169 for (int i = NumMove-1; i >= 0; --i) { 2170 MemOps.insert(Ops[i]); 2171 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2172 } 2173 2174 // Be conservative, if the instructions are too far apart, don't 2175 // move them. We want to limit the increase of register pressure. 2176 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2177 if (DoMove) 2178 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2179 MemOps, MemRegs, TRI); 2180 if (!DoMove) { 2181 for (unsigned i = 0; i != NumMove; ++i) 2182 Ops.pop_back(); 2183 } else { 2184 // This is the new location for the loads / stores. 2185 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2186 while (InsertPos != MBB->end() 2187 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2188 ++InsertPos; 2189 2190 // If we are moving a pair of loads / stores, see if it makes sense 2191 // to try to allocate a pair of registers that can form register pairs. 2192 MachineInstr *Op0 = Ops.back(); 2193 MachineInstr *Op1 = Ops[Ops.size()-2]; 2194 unsigned FirstReg = 0, SecondReg = 0; 2195 unsigned BaseReg = 0, PredReg = 0; 2196 ARMCC::CondCodes Pred = ARMCC::AL; 2197 bool isT2 = false; 2198 unsigned NewOpc = 0; 2199 int Offset = 0; 2200 DebugLoc dl; 2201 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2202 FirstReg, SecondReg, BaseReg, 2203 Offset, PredReg, Pred, isT2)) { 2204 Ops.pop_back(); 2205 Ops.pop_back(); 2206 2207 const MCInstrDesc &MCID = TII->get(NewOpc); 2208 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2209 MRI->constrainRegClass(FirstReg, TRC); 2210 MRI->constrainRegClass(SecondReg, TRC); 2211 2212 // Form the pair instruction. 2213 if (isLd) { 2214 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2215 .addReg(FirstReg, RegState::Define) 2216 .addReg(SecondReg, RegState::Define) 2217 .addReg(BaseReg); 2218 // FIXME: We're converting from LDRi12 to an insn that still 2219 // uses addrmode2, so we need an explicit offset reg. It should 2220 // always by reg0 since we're transforming LDRi12s. 2221 if (!isT2) 2222 MIB.addReg(0); 2223 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2224 MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1)); 2225 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2226 ++NumLDRDFormed; 2227 } else { 2228 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2229 .addReg(FirstReg) 2230 .addReg(SecondReg) 2231 .addReg(BaseReg); 2232 // FIXME: We're converting from LDRi12 to an insn that still 2233 // uses addrmode2, so we need an explicit offset reg. It should 2234 // always by reg0 since we're transforming STRi12s. 2235 if (!isT2) 2236 MIB.addReg(0); 2237 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2238 MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1)); 2239 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2240 ++NumSTRDFormed; 2241 } 2242 MBB->erase(Op0); 2243 MBB->erase(Op1); 2244 2245 if (!isT2) { 2246 // Add register allocation hints to form register pairs. 2247 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2248 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2249 } 2250 } else { 2251 for (unsigned i = 0; i != NumMove; ++i) { 2252 MachineInstr *Op = Ops.back(); 2253 Ops.pop_back(); 2254 MBB->splice(InsertPos, MBB, Op); 2255 } 2256 } 2257 2258 NumLdStMoved += NumMove; 2259 RetVal = true; 2260 } 2261 } 2262 } 2263 2264 return RetVal; 2265 } 2266 2267 bool 2268 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2269 bool RetVal = false; 2270 2271 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2272 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2273 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2274 SmallVector<unsigned, 4> LdBases; 2275 SmallVector<unsigned, 4> StBases; 2276 2277 unsigned Loc = 0; 2278 MachineBasicBlock::iterator MBBI = MBB->begin(); 2279 MachineBasicBlock::iterator E = MBB->end(); 2280 while (MBBI != E) { 2281 for (; MBBI != E; ++MBBI) { 2282 MachineInstr *MI = MBBI; 2283 if (MI->isCall() || MI->isTerminator()) { 2284 // Stop at barriers. 2285 ++MBBI; 2286 break; 2287 } 2288 2289 if (!MI->isDebugValue()) 2290 MI2LocMap[MI] = ++Loc; 2291 2292 if (!isMemoryOp(*MI)) 2293 continue; 2294 unsigned PredReg = 0; 2295 if (getInstrPredicate(*MI, PredReg) != ARMCC::AL) 2296 continue; 2297 2298 int Opc = MI->getOpcode(); 2299 bool isLd = isLoadSingle(Opc); 2300 unsigned Base = MI->getOperand(1).getReg(); 2301 int Offset = getMemoryOpOffset(MI); 2302 2303 bool StopHere = false; 2304 if (isLd) { 2305 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2306 Base2LdsMap.find(Base); 2307 if (BI != Base2LdsMap.end()) { 2308 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2309 if (Offset == getMemoryOpOffset(BI->second[i])) { 2310 StopHere = true; 2311 break; 2312 } 2313 } 2314 if (!StopHere) 2315 BI->second.push_back(MI); 2316 } else { 2317 Base2LdsMap[Base].push_back(MI); 2318 LdBases.push_back(Base); 2319 } 2320 } else { 2321 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2322 Base2StsMap.find(Base); 2323 if (BI != Base2StsMap.end()) { 2324 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2325 if (Offset == getMemoryOpOffset(BI->second[i])) { 2326 StopHere = true; 2327 break; 2328 } 2329 } 2330 if (!StopHere) 2331 BI->second.push_back(MI); 2332 } else { 2333 Base2StsMap[Base].push_back(MI); 2334 StBases.push_back(Base); 2335 } 2336 } 2337 2338 if (StopHere) { 2339 // Found a duplicate (a base+offset combination that's seen earlier). 2340 // Backtrack. 2341 --Loc; 2342 break; 2343 } 2344 } 2345 2346 // Re-schedule loads. 2347 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2348 unsigned Base = LdBases[i]; 2349 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2350 if (Lds.size() > 1) 2351 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2352 } 2353 2354 // Re-schedule stores. 2355 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2356 unsigned Base = StBases[i]; 2357 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2358 if (Sts.size() > 1) 2359 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2360 } 2361 2362 if (MBBI != E) { 2363 Base2LdsMap.clear(); 2364 Base2StsMap.clear(); 2365 LdBases.clear(); 2366 StBases.clear(); 2367 } 2368 } 2369 2370 return RetVal; 2371 } 2372 2373 2374 /// Returns an instance of the load / store optimization pass. 2375 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2376 if (PreAlloc) 2377 return new ARMPreAllocLoadStoreOpt(); 2378 return new ARMLoadStoreOpt(); 2379 } 2380