1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 /// This switch disables formation of double/multi instructions that could 64 /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP 65 /// disabled. This can be used to create libraries that are robust even when 66 /// users provoke undefined behaviour by supplying misaligned pointers. 67 /// \see mayCombineMisaligned() 68 static cl::opt<bool> 69 AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, 70 cl::init(false), cl::desc("Be more conservative in ARM load/store opt")); 71 72 namespace llvm { 73 void initializeARMLoadStoreOptPass(PassRegistry &); 74 } 75 76 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" 77 78 namespace { 79 /// Post- register allocation pass the combine load / store instructions to 80 /// form ldm / stm instructions. 81 struct ARMLoadStoreOpt : public MachineFunctionPass { 82 static char ID; 83 ARMLoadStoreOpt() : MachineFunctionPass(ID) { 84 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry()); 85 } 86 87 const MachineFunction *MF; 88 const TargetInstrInfo *TII; 89 const TargetRegisterInfo *TRI; 90 const ARMSubtarget *STI; 91 const TargetLowering *TL; 92 ARMFunctionInfo *AFI; 93 LivePhysRegs LiveRegs; 94 RegisterClassInfo RegClassInfo; 95 MachineBasicBlock::const_iterator LiveRegPos; 96 bool LiveRegsValid; 97 bool RegClassInfoValid; 98 bool isThumb1, isThumb2; 99 100 bool runOnMachineFunction(MachineFunction &Fn) override; 101 102 const char *getPassName() const override { 103 return ARM_LOAD_STORE_OPT_NAME; 104 } 105 106 private: 107 /// A set of load/store MachineInstrs with same base register sorted by 108 /// offset. 109 struct MemOpQueueEntry { 110 MachineInstr *MI; 111 int Offset; ///< Load/Store offset. 112 unsigned Position; ///< Position as counted from end of basic block. 113 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 114 : MI(MI), Offset(Offset), Position(Position) {} 115 }; 116 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 117 118 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 119 /// merged into a LDM/STM. 120 struct MergeCandidate { 121 /// List of instructions ordered by load/store offset. 122 SmallVector<MachineInstr*, 4> Instrs; 123 /// Index in Instrs of the instruction being latest in the schedule. 124 unsigned LatestMIIdx; 125 /// Index in Instrs of the instruction being earliest in the schedule. 126 unsigned EarliestMIIdx; 127 /// Index into the basic block where the merged instruction will be 128 /// inserted. (See MemOpQueueEntry.Position) 129 unsigned InsertPos; 130 /// Whether the instructions can be merged into a ldm/stm instruction. 131 bool CanMergeToLSMulti; 132 /// Whether the instructions can be merged into a ldrd/strd instruction. 133 bool CanMergeToLSDouble; 134 }; 135 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 136 SmallVector<const MergeCandidate*,4> Candidates; 137 SmallVector<MachineInstr*,4> MergeBaseCandidates; 138 139 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 140 MachineBasicBlock::const_iterator Before); 141 unsigned findFreeReg(const TargetRegisterClass &RegClass); 142 void UpdateBaseRegUses(MachineBasicBlock &MBB, 143 MachineBasicBlock::iterator MBBI, 144 DebugLoc DL, unsigned Base, unsigned WordOffset, 145 ARMCC::CondCodes Pred, unsigned PredReg); 146 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 147 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 148 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 149 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 150 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 151 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 152 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 153 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 154 void FormCandidates(const MemOpQueue &MemOps); 155 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 156 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 157 MachineBasicBlock::iterator &MBBI); 158 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 159 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 160 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 161 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 162 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 163 bool CombineMovBx(MachineBasicBlock &MBB); 164 }; 165 char ARMLoadStoreOpt::ID = 0; 166 } 167 168 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false) 169 170 static bool definesCPSR(const MachineInstr *MI) { 171 for (const auto &MO : MI->operands()) { 172 if (!MO.isReg()) 173 continue; 174 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 175 // If the instruction has live CPSR def, then it's not safe to fold it 176 // into load / store. 177 return true; 178 } 179 180 return false; 181 } 182 183 static int getMemoryOpOffset(const MachineInstr *MI) { 184 unsigned Opcode = MI->getOpcode(); 185 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 186 unsigned NumOperands = MI->getDesc().getNumOperands(); 187 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 188 189 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 190 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 191 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 192 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 193 return OffField; 194 195 // Thumb1 immediate offsets are scaled by 4 196 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 197 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 198 return OffField * 4; 199 200 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 201 : ARM_AM::getAM5Offset(OffField) * 4; 202 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 203 : ARM_AM::getAM5Op(OffField); 204 205 if (Op == ARM_AM::sub) 206 return -Offset; 207 208 return Offset; 209 } 210 211 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 212 return MI.getOperand(1); 213 } 214 215 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 216 return MI.getOperand(0); 217 } 218 219 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 220 switch (Opcode) { 221 default: llvm_unreachable("Unhandled opcode!"); 222 case ARM::LDRi12: 223 ++NumLDMGened; 224 switch (Mode) { 225 default: llvm_unreachable("Unhandled submode!"); 226 case ARM_AM::ia: return ARM::LDMIA; 227 case ARM_AM::da: return ARM::LDMDA; 228 case ARM_AM::db: return ARM::LDMDB; 229 case ARM_AM::ib: return ARM::LDMIB; 230 } 231 case ARM::STRi12: 232 ++NumSTMGened; 233 switch (Mode) { 234 default: llvm_unreachable("Unhandled submode!"); 235 case ARM_AM::ia: return ARM::STMIA; 236 case ARM_AM::da: return ARM::STMDA; 237 case ARM_AM::db: return ARM::STMDB; 238 case ARM_AM::ib: return ARM::STMIB; 239 } 240 case ARM::tLDRi: 241 case ARM::tLDRspi: 242 // tLDMIA is writeback-only - unless the base register is in the input 243 // reglist. 244 ++NumLDMGened; 245 switch (Mode) { 246 default: llvm_unreachable("Unhandled submode!"); 247 case ARM_AM::ia: return ARM::tLDMIA; 248 } 249 case ARM::tSTRi: 250 case ARM::tSTRspi: 251 // There is no non-writeback tSTMIA either. 252 ++NumSTMGened; 253 switch (Mode) { 254 default: llvm_unreachable("Unhandled submode!"); 255 case ARM_AM::ia: return ARM::tSTMIA_UPD; 256 } 257 case ARM::t2LDRi8: 258 case ARM::t2LDRi12: 259 ++NumLDMGened; 260 switch (Mode) { 261 default: llvm_unreachable("Unhandled submode!"); 262 case ARM_AM::ia: return ARM::t2LDMIA; 263 case ARM_AM::db: return ARM::t2LDMDB; 264 } 265 case ARM::t2STRi8: 266 case ARM::t2STRi12: 267 ++NumSTMGened; 268 switch (Mode) { 269 default: llvm_unreachable("Unhandled submode!"); 270 case ARM_AM::ia: return ARM::t2STMIA; 271 case ARM_AM::db: return ARM::t2STMDB; 272 } 273 case ARM::VLDRS: 274 ++NumVLDMGened; 275 switch (Mode) { 276 default: llvm_unreachable("Unhandled submode!"); 277 case ARM_AM::ia: return ARM::VLDMSIA; 278 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 279 } 280 case ARM::VSTRS: 281 ++NumVSTMGened; 282 switch (Mode) { 283 default: llvm_unreachable("Unhandled submode!"); 284 case ARM_AM::ia: return ARM::VSTMSIA; 285 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 286 } 287 case ARM::VLDRD: 288 ++NumVLDMGened; 289 switch (Mode) { 290 default: llvm_unreachable("Unhandled submode!"); 291 case ARM_AM::ia: return ARM::VLDMDIA; 292 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 293 } 294 case ARM::VSTRD: 295 ++NumVSTMGened; 296 switch (Mode) { 297 default: llvm_unreachable("Unhandled submode!"); 298 case ARM_AM::ia: return ARM::VSTMDIA; 299 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 300 } 301 } 302 } 303 304 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 305 switch (Opcode) { 306 default: llvm_unreachable("Unhandled opcode!"); 307 case ARM::LDMIA_RET: 308 case ARM::LDMIA: 309 case ARM::LDMIA_UPD: 310 case ARM::STMIA: 311 case ARM::STMIA_UPD: 312 case ARM::tLDMIA: 313 case ARM::tLDMIA_UPD: 314 case ARM::tSTMIA_UPD: 315 case ARM::t2LDMIA_RET: 316 case ARM::t2LDMIA: 317 case ARM::t2LDMIA_UPD: 318 case ARM::t2STMIA: 319 case ARM::t2STMIA_UPD: 320 case ARM::VLDMSIA: 321 case ARM::VLDMSIA_UPD: 322 case ARM::VSTMSIA: 323 case ARM::VSTMSIA_UPD: 324 case ARM::VLDMDIA: 325 case ARM::VLDMDIA_UPD: 326 case ARM::VSTMDIA: 327 case ARM::VSTMDIA_UPD: 328 return ARM_AM::ia; 329 330 case ARM::LDMDA: 331 case ARM::LDMDA_UPD: 332 case ARM::STMDA: 333 case ARM::STMDA_UPD: 334 return ARM_AM::da; 335 336 case ARM::LDMDB: 337 case ARM::LDMDB_UPD: 338 case ARM::STMDB: 339 case ARM::STMDB_UPD: 340 case ARM::t2LDMDB: 341 case ARM::t2LDMDB_UPD: 342 case ARM::t2STMDB: 343 case ARM::t2STMDB_UPD: 344 case ARM::VLDMSDB_UPD: 345 case ARM::VSTMSDB_UPD: 346 case ARM::VLDMDDB_UPD: 347 case ARM::VSTMDDB_UPD: 348 return ARM_AM::db; 349 350 case ARM::LDMIB: 351 case ARM::LDMIB_UPD: 352 case ARM::STMIB: 353 case ARM::STMIB_UPD: 354 return ARM_AM::ib; 355 } 356 } 357 358 static bool isT1i32Load(unsigned Opc) { 359 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 360 } 361 362 static bool isT2i32Load(unsigned Opc) { 363 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 364 } 365 366 static bool isi32Load(unsigned Opc) { 367 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 368 } 369 370 static bool isT1i32Store(unsigned Opc) { 371 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 372 } 373 374 static bool isT2i32Store(unsigned Opc) { 375 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 376 } 377 378 static bool isi32Store(unsigned Opc) { 379 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 380 } 381 382 static bool isLoadSingle(unsigned Opc) { 383 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 384 } 385 386 static unsigned getImmScale(unsigned Opc) { 387 switch (Opc) { 388 default: llvm_unreachable("Unhandled opcode!"); 389 case ARM::tLDRi: 390 case ARM::tSTRi: 391 case ARM::tLDRspi: 392 case ARM::tSTRspi: 393 return 1; 394 case ARM::tLDRHi: 395 case ARM::tSTRHi: 396 return 2; 397 case ARM::tLDRBi: 398 case ARM::tSTRBi: 399 return 4; 400 } 401 } 402 403 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 404 switch (MI->getOpcode()) { 405 default: return 0; 406 case ARM::LDRi12: 407 case ARM::STRi12: 408 case ARM::tLDRi: 409 case ARM::tSTRi: 410 case ARM::tLDRspi: 411 case ARM::tSTRspi: 412 case ARM::t2LDRi8: 413 case ARM::t2LDRi12: 414 case ARM::t2STRi8: 415 case ARM::t2STRi12: 416 case ARM::VLDRS: 417 case ARM::VSTRS: 418 return 4; 419 case ARM::VLDRD: 420 case ARM::VSTRD: 421 return 8; 422 case ARM::LDMIA: 423 case ARM::LDMDA: 424 case ARM::LDMDB: 425 case ARM::LDMIB: 426 case ARM::STMIA: 427 case ARM::STMDA: 428 case ARM::STMDB: 429 case ARM::STMIB: 430 case ARM::tLDMIA: 431 case ARM::tLDMIA_UPD: 432 case ARM::tSTMIA_UPD: 433 case ARM::t2LDMIA: 434 case ARM::t2LDMDB: 435 case ARM::t2STMIA: 436 case ARM::t2STMDB: 437 case ARM::VLDMSIA: 438 case ARM::VSTMSIA: 439 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 440 case ARM::VLDMDIA: 441 case ARM::VSTMDIA: 442 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 443 } 444 } 445 446 /// Update future uses of the base register with the offset introduced 447 /// due to writeback. This function only works on Thumb1. 448 void 449 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 450 MachineBasicBlock::iterator MBBI, 451 DebugLoc DL, unsigned Base, 452 unsigned WordOffset, 453 ARMCC::CondCodes Pred, unsigned PredReg) { 454 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 455 // Start updating any instructions with immediate offsets. Insert a SUB before 456 // the first non-updateable instruction (if any). 457 for (; MBBI != MBB.end(); ++MBBI) { 458 bool InsertSub = false; 459 unsigned Opc = MBBI->getOpcode(); 460 461 if (MBBI->readsRegister(Base)) { 462 int Offset; 463 bool IsLoad = 464 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 465 bool IsStore = 466 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 467 468 if (IsLoad || IsStore) { 469 // Loads and stores with immediate offsets can be updated, but only if 470 // the new offset isn't negative. 471 // The MachineOperand containing the offset immediate is the last one 472 // before predicates. 473 MachineOperand &MO = 474 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 475 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 476 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 477 478 // If storing the base register, it needs to be reset first. 479 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 480 481 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 482 MO.setImm(Offset); 483 else 484 InsertSub = true; 485 486 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 487 !definesCPSR(MBBI)) { 488 // SUBS/ADDS using this register, with a dead def of the CPSR. 489 // Merge it with the update; if the merged offset is too large, 490 // insert a new sub instead. 491 MachineOperand &MO = 492 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 493 Offset = (Opc == ARM::tSUBi8) ? 494 MO.getImm() + WordOffset * 4 : 495 MO.getImm() - WordOffset * 4 ; 496 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 497 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 498 // Offset == 0. 499 MO.setImm(Offset); 500 // The base register has now been reset, so exit early. 501 return; 502 } else { 503 InsertSub = true; 504 } 505 506 } else { 507 // Can't update the instruction. 508 InsertSub = true; 509 } 510 511 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 512 // Since SUBS sets the condition flags, we can't place the base reset 513 // after an instruction that has a live CPSR def. 514 // The base register might also contain an argument for a function call. 515 InsertSub = true; 516 } 517 518 if (InsertSub) { 519 // An instruction above couldn't be updated, so insert a sub. 520 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 521 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 522 return; 523 } 524 525 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 526 // Register got killed. Stop updating. 527 return; 528 } 529 530 // End of block was reached. 531 if (MBB.succ_size() > 0) { 532 // FIXME: Because of a bug, live registers are sometimes missing from 533 // the successor blocks' live-in sets. This means we can't trust that 534 // information and *always* have to reset at the end of a block. 535 // See PR21029. 536 if (MBBI != MBB.end()) --MBBI; 537 AddDefaultT1CC( 538 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 539 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 540 } 541 } 542 543 /// Return the first register of class \p RegClass that is not in \p Regs. 544 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 545 if (!RegClassInfoValid) { 546 RegClassInfo.runOnMachineFunction(*MF); 547 RegClassInfoValid = true; 548 } 549 550 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 551 if (!LiveRegs.contains(Reg)) 552 return Reg; 553 return 0; 554 } 555 556 /// Compute live registers just before instruction \p Before (in normal schedule 557 /// direction). Computes backwards so multiple queries in the same block must 558 /// come in reverse order. 559 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 560 MachineBasicBlock::const_iterator Before) { 561 // Initialize if we never queried in this block. 562 if (!LiveRegsValid) { 563 LiveRegs.init(TRI); 564 LiveRegs.addLiveOuts(&MBB, true); 565 LiveRegPos = MBB.end(); 566 LiveRegsValid = true; 567 } 568 // Move backward just before the "Before" position. 569 while (LiveRegPos != Before) { 570 --LiveRegPos; 571 LiveRegs.stepBackward(*LiveRegPos); 572 } 573 } 574 575 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 576 unsigned Reg) { 577 for (const std::pair<unsigned, bool> &R : Regs) 578 if (R.first == Reg) 579 return true; 580 return false; 581 } 582 583 /// Create and insert a LDM or STM with Base as base register and registers in 584 /// Regs as the register operands that would be loaded / stored. It returns 585 /// true if the transformation is done. 586 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 587 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 588 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 589 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 590 unsigned NumRegs = Regs.size(); 591 assert(NumRegs > 1); 592 593 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 594 // Compute liveness information for that register to make the decision. 595 bool SafeToClobberCPSR = !isThumb1 || 596 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 597 MachineBasicBlock::LQR_Dead); 598 599 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 600 601 // Exception: If the base register is in the input reglist, Thumb1 LDM is 602 // non-writeback. 603 // It's also not possible to merge an STR of the base register in Thumb1. 604 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 605 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 606 if (Opcode == ARM::tLDRi) { 607 Writeback = false; 608 } else if (Opcode == ARM::tSTRi) { 609 return nullptr; 610 } 611 } 612 613 ARM_AM::AMSubMode Mode = ARM_AM::ia; 614 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 615 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 616 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 617 618 if (Offset == 4 && haveIBAndDA) { 619 Mode = ARM_AM::ib; 620 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 621 Mode = ARM_AM::da; 622 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 623 // VLDM/VSTM do not support DB mode without also updating the base reg. 624 Mode = ARM_AM::db; 625 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 626 // Check if this is a supported opcode before inserting instructions to 627 // calculate a new base register. 628 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 629 630 // If starting offset isn't zero, insert a MI to materialize a new base. 631 // But only do so if it is cost effective, i.e. merging more than two 632 // loads / stores. 633 if (NumRegs <= 2) 634 return nullptr; 635 636 // On Thumb1, it's not worth materializing a new base register without 637 // clobbering the CPSR (i.e. not using ADDS/SUBS). 638 if (!SafeToClobberCPSR) 639 return nullptr; 640 641 unsigned NewBase; 642 if (isi32Load(Opcode)) { 643 // If it is a load, then just use one of the destination registers 644 // as the new base. Will no longer be writeback in Thumb1. 645 NewBase = Regs[NumRegs-1].first; 646 Writeback = false; 647 } else { 648 // Find a free register that we can use as scratch register. 649 moveLiveRegsBefore(MBB, InsertBefore); 650 // The merged instruction does not exist yet but will use several Regs if 651 // it is a Store. 652 if (!isLoadSingle(Opcode)) 653 for (const std::pair<unsigned, bool> &R : Regs) 654 LiveRegs.addReg(R.first); 655 656 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 657 if (NewBase == 0) 658 return nullptr; 659 } 660 661 int BaseOpc = 662 isThumb2 ? ARM::t2ADDri : 663 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 664 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 665 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 666 667 if (Offset < 0) { 668 Offset = - Offset; 669 BaseOpc = 670 isThumb2 ? ARM::t2SUBri : 671 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 672 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 673 } 674 675 if (!TL->isLegalAddImmediate(Offset)) 676 // FIXME: Try add with register operand? 677 return nullptr; // Probably not worth it then. 678 679 // We can only append a kill flag to the add/sub input if the value is not 680 // used in the register list of the stm as well. 681 bool KillOldBase = BaseKill && 682 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 683 684 if (isThumb1) { 685 // Thumb1: depending on immediate size, use either 686 // ADDS NewBase, Base, #imm3 687 // or 688 // MOV NewBase, Base 689 // ADDS NewBase, #imm8. 690 if (Base != NewBase && 691 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 692 // Need to insert a MOV to the new base first. 693 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 694 !STI->hasV6Ops()) { 695 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 696 if (Pred != ARMCC::AL) 697 return nullptr; 698 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 699 .addReg(Base, getKillRegState(KillOldBase)); 700 } else 701 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 702 .addReg(Base, getKillRegState(KillOldBase)) 703 .addImm(Pred).addReg(PredReg); 704 705 // The following ADDS/SUBS becomes an update. 706 Base = NewBase; 707 KillOldBase = true; 708 } 709 if (BaseOpc == ARM::tADDrSPi) { 710 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 711 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 712 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 713 .addImm(Pred).addReg(PredReg); 714 } else 715 AddDefaultT1CC( 716 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 717 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 718 .addImm(Pred).addReg(PredReg); 719 } else { 720 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 721 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 722 .addImm(Pred).addReg(PredReg).addReg(0); 723 } 724 Base = NewBase; 725 BaseKill = true; // New base is always killed straight away. 726 } 727 728 bool isDef = isLoadSingle(Opcode); 729 730 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 731 // base register writeback. 732 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 733 if (!Opcode) 734 return nullptr; 735 736 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 737 // - There is no writeback (LDM of base register), 738 // - the base register is killed by the merged instruction, 739 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 740 // to reset the base register. 741 // Otherwise, don't merge. 742 // It's safe to return here since the code to materialize a new base register 743 // above is also conditional on SafeToClobberCPSR. 744 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 745 return nullptr; 746 747 MachineInstrBuilder MIB; 748 749 if (Writeback) { 750 assert(isThumb1 && "expected Writeback only inThumb1"); 751 if (Opcode == ARM::tLDMIA) { 752 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs"); 753 // Update tLDMIA with writeback if necessary. 754 Opcode = ARM::tLDMIA_UPD; 755 } 756 757 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 758 759 // Thumb1: we might need to set base writeback when building the MI. 760 MIB.addReg(Base, getDefRegState(true)) 761 .addReg(Base, getKillRegState(BaseKill)); 762 763 // The base isn't dead after a merged instruction with writeback. 764 // Insert a sub instruction after the newly formed instruction to reset. 765 if (!BaseKill) 766 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 767 768 } else { 769 // No writeback, simply build the MachineInstr. 770 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 771 MIB.addReg(Base, getKillRegState(BaseKill)); 772 } 773 774 MIB.addImm(Pred).addReg(PredReg); 775 776 for (const std::pair<unsigned, bool> &R : Regs) 777 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 778 779 return MIB.getInstr(); 780 } 781 782 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 783 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 784 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 785 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 786 bool IsLoad = isi32Load(Opcode); 787 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 788 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 789 790 assert(Regs.size() == 2); 791 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 792 TII->get(LoadStoreOpcode)); 793 if (IsLoad) { 794 MIB.addReg(Regs[0].first, RegState::Define) 795 .addReg(Regs[1].first, RegState::Define); 796 } else { 797 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 798 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 799 } 800 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 801 return MIB.getInstr(); 802 } 803 804 /// Call MergeOps and update MemOps and merges accordingly on success. 805 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 806 const MachineInstr *First = Cand.Instrs.front(); 807 unsigned Opcode = First->getOpcode(); 808 bool IsLoad = isLoadSingle(Opcode); 809 SmallVector<std::pair<unsigned, bool>, 8> Regs; 810 SmallVector<unsigned, 4> ImpDefs; 811 DenseSet<unsigned> KilledRegs; 812 DenseSet<unsigned> UsedRegs; 813 // Determine list of registers and list of implicit super-register defs. 814 for (const MachineInstr *MI : Cand.Instrs) { 815 const MachineOperand &MO = getLoadStoreRegOp(*MI); 816 unsigned Reg = MO.getReg(); 817 bool IsKill = MO.isKill(); 818 if (IsKill) 819 KilledRegs.insert(Reg); 820 Regs.push_back(std::make_pair(Reg, IsKill)); 821 UsedRegs.insert(Reg); 822 823 if (IsLoad) { 824 // Collect any implicit defs of super-registers, after merging we can't 825 // be sure anymore that we properly preserved these live ranges and must 826 // removed these implicit operands. 827 for (const MachineOperand &MO : MI->implicit_operands()) { 828 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 829 continue; 830 assert(MO.isImplicit()); 831 unsigned DefReg = MO.getReg(); 832 833 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 834 continue; 835 // We can ignore cases where the super-reg is read and written. 836 if (MI->readsRegister(DefReg)) 837 continue; 838 ImpDefs.push_back(DefReg); 839 } 840 } 841 } 842 843 // Attempt the merge. 844 typedef MachineBasicBlock::iterator iterator; 845 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 846 iterator InsertBefore = std::next(iterator(LatestMI)); 847 MachineBasicBlock &MBB = *LatestMI->getParent(); 848 unsigned Offset = getMemoryOpOffset(First); 849 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 850 bool BaseKill = LatestMI->killsRegister(Base); 851 unsigned PredReg = 0; 852 ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg); 853 DebugLoc DL = First->getDebugLoc(); 854 MachineInstr *Merged = nullptr; 855 if (Cand.CanMergeToLSDouble) 856 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 857 Opcode, Pred, PredReg, DL, Regs); 858 if (!Merged && Cand.CanMergeToLSMulti) 859 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 860 Opcode, Pred, PredReg, DL, Regs); 861 if (!Merged) 862 return nullptr; 863 864 // Determine earliest instruction that will get removed. We then keep an 865 // iterator just above it so the following erases don't invalidated it. 866 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 867 bool EarliestAtBegin = false; 868 if (EarliestI == MBB.begin()) { 869 EarliestAtBegin = true; 870 } else { 871 EarliestI = std::prev(EarliestI); 872 } 873 874 // Remove instructions which have been merged. 875 for (MachineInstr *MI : Cand.Instrs) 876 MBB.erase(MI); 877 878 // Determine range between the earliest removed instruction and the new one. 879 if (EarliestAtBegin) 880 EarliestI = MBB.begin(); 881 else 882 EarliestI = std::next(EarliestI); 883 auto FixupRange = make_range(EarliestI, iterator(Merged)); 884 885 if (isLoadSingle(Opcode)) { 886 // If the previous loads defined a super-reg, then we have to mark earlier 887 // operands undef; Replicate the super-reg def on the merged instruction. 888 for (MachineInstr &MI : FixupRange) { 889 for (unsigned &ImpDefReg : ImpDefs) { 890 for (MachineOperand &MO : MI.implicit_operands()) { 891 if (!MO.isReg() || MO.getReg() != ImpDefReg) 892 continue; 893 if (MO.readsReg()) 894 MO.setIsUndef(); 895 else if (MO.isDef()) 896 ImpDefReg = 0; 897 } 898 } 899 } 900 901 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 902 for (unsigned ImpDef : ImpDefs) 903 MIB.addReg(ImpDef, RegState::ImplicitDefine); 904 } else { 905 // Remove kill flags: We are possibly storing the values later now. 906 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 907 for (MachineInstr &MI : FixupRange) { 908 for (MachineOperand &MO : MI.uses()) { 909 if (!MO.isReg() || !MO.isKill()) 910 continue; 911 if (UsedRegs.count(MO.getReg())) 912 MO.setIsKill(false); 913 } 914 } 915 assert(ImpDefs.empty()); 916 } 917 918 return Merged; 919 } 920 921 static bool isValidLSDoubleOffset(int Offset) { 922 unsigned Value = abs(Offset); 923 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 924 // multiplied by 4. 925 return (Value % 4) == 0 && Value < 1024; 926 } 927 928 /// Return true for loads/stores that can be combined to a double/multi 929 /// operation without increasing the requirements for alignment. 930 static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, 931 const MachineInstr &MI) { 932 // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no 933 // difference. 934 unsigned Opcode = MI.getOpcode(); 935 if (!isi32Load(Opcode) && !isi32Store(Opcode)) 936 return true; 937 938 // Stack pointer alignment is out of the programmers control so we can trust 939 // SP-relative loads/stores. 940 if (getLoadStoreBaseOp(MI).getReg() == ARM::SP && 941 STI.getFrameLowering()->getTransientStackAlignment() >= 4) 942 return true; 943 return false; 944 } 945 946 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 947 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 948 const MachineInstr *FirstMI = MemOps[0].MI; 949 unsigned Opcode = FirstMI->getOpcode(); 950 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 951 unsigned Size = getLSMultipleTransferSize(FirstMI); 952 953 unsigned SIndex = 0; 954 unsigned EIndex = MemOps.size(); 955 do { 956 // Look at the first instruction. 957 const MachineInstr *MI = MemOps[SIndex].MI; 958 int Offset = MemOps[SIndex].Offset; 959 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 960 unsigned PReg = PMO.getReg(); 961 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 962 unsigned Latest = SIndex; 963 unsigned Earliest = SIndex; 964 unsigned Count = 1; 965 bool CanMergeToLSDouble = 966 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 967 // ARM errata 602117: LDRD with base in list may result in incorrect base 968 // register when interrupted or faulted. 969 if (STI->isCortexM3() && isi32Load(Opcode) && 970 PReg == getLoadStoreBaseOp(*MI).getReg()) 971 CanMergeToLSDouble = false; 972 973 bool CanMergeToLSMulti = true; 974 // On swift vldm/vstm starting with an odd register number as that needs 975 // more uops than single vldrs. 976 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 977 CanMergeToLSMulti = false; 978 979 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it 980 // deprecated; LDM to PC is fine but cannot happen here. 981 if (PReg == ARM::SP || PReg == ARM::PC) 982 CanMergeToLSMulti = CanMergeToLSDouble = false; 983 984 // Should we be conservative? 985 if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI)) 986 CanMergeToLSMulti = CanMergeToLSDouble = false; 987 988 // Merge following instructions where possible. 989 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 990 int NewOffset = MemOps[I].Offset; 991 if (NewOffset != Offset + (int)Size) 992 break; 993 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 994 unsigned Reg = MO.getReg(); 995 if (Reg == ARM::SP || Reg == ARM::PC) 996 break; 997 998 // See if the current load/store may be part of a multi load/store. 999 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 1000 bool PartOfLSMulti = CanMergeToLSMulti; 1001 if (PartOfLSMulti) { 1002 // Register numbers must be in ascending order. 1003 if (RegNum <= PRegNum) 1004 PartOfLSMulti = false; 1005 // For VFP / NEON load/store multiples, the registers must be 1006 // consecutive and within the limit on the number of registers per 1007 // instruction. 1008 else if (!isNotVFP && RegNum != PRegNum+1) 1009 PartOfLSMulti = false; 1010 } 1011 // See if the current load/store may be part of a double load/store. 1012 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 1013 1014 if (!PartOfLSMulti && !PartOfLSDouble) 1015 break; 1016 CanMergeToLSMulti &= PartOfLSMulti; 1017 CanMergeToLSDouble &= PartOfLSDouble; 1018 // Track MemOp with latest and earliest position (Positions are 1019 // counted in reverse). 1020 unsigned Position = MemOps[I].Position; 1021 if (Position < MemOps[Latest].Position) 1022 Latest = I; 1023 else if (Position > MemOps[Earliest].Position) 1024 Earliest = I; 1025 // Prepare for next MemOp. 1026 Offset += Size; 1027 PRegNum = RegNum; 1028 } 1029 1030 // Form a candidate from the Ops collected so far. 1031 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 1032 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 1033 Candidate->Instrs.push_back(MemOps[C].MI); 1034 Candidate->LatestMIIdx = Latest - SIndex; 1035 Candidate->EarliestMIIdx = Earliest - SIndex; 1036 Candidate->InsertPos = MemOps[Latest].Position; 1037 if (Count == 1) 1038 CanMergeToLSMulti = CanMergeToLSDouble = false; 1039 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 1040 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 1041 Candidates.push_back(Candidate); 1042 // Continue after the chain. 1043 SIndex += Count; 1044 } while (SIndex < EIndex); 1045 } 1046 1047 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1048 ARM_AM::AMSubMode Mode) { 1049 switch (Opc) { 1050 default: llvm_unreachable("Unhandled opcode!"); 1051 case ARM::LDMIA: 1052 case ARM::LDMDA: 1053 case ARM::LDMDB: 1054 case ARM::LDMIB: 1055 switch (Mode) { 1056 default: llvm_unreachable("Unhandled submode!"); 1057 case ARM_AM::ia: return ARM::LDMIA_UPD; 1058 case ARM_AM::ib: return ARM::LDMIB_UPD; 1059 case ARM_AM::da: return ARM::LDMDA_UPD; 1060 case ARM_AM::db: return ARM::LDMDB_UPD; 1061 } 1062 case ARM::STMIA: 1063 case ARM::STMDA: 1064 case ARM::STMDB: 1065 case ARM::STMIB: 1066 switch (Mode) { 1067 default: llvm_unreachable("Unhandled submode!"); 1068 case ARM_AM::ia: return ARM::STMIA_UPD; 1069 case ARM_AM::ib: return ARM::STMIB_UPD; 1070 case ARM_AM::da: return ARM::STMDA_UPD; 1071 case ARM_AM::db: return ARM::STMDB_UPD; 1072 } 1073 case ARM::t2LDMIA: 1074 case ARM::t2LDMDB: 1075 switch (Mode) { 1076 default: llvm_unreachable("Unhandled submode!"); 1077 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1078 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1079 } 1080 case ARM::t2STMIA: 1081 case ARM::t2STMDB: 1082 switch (Mode) { 1083 default: llvm_unreachable("Unhandled submode!"); 1084 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1085 case ARM_AM::db: return ARM::t2STMDB_UPD; 1086 } 1087 case ARM::VLDMSIA: 1088 switch (Mode) { 1089 default: llvm_unreachable("Unhandled submode!"); 1090 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1091 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1092 } 1093 case ARM::VLDMDIA: 1094 switch (Mode) { 1095 default: llvm_unreachable("Unhandled submode!"); 1096 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1097 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1098 } 1099 case ARM::VSTMSIA: 1100 switch (Mode) { 1101 default: llvm_unreachable("Unhandled submode!"); 1102 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1103 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1104 } 1105 case ARM::VSTMDIA: 1106 switch (Mode) { 1107 default: llvm_unreachable("Unhandled submode!"); 1108 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1109 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1110 } 1111 } 1112 } 1113 1114 /// Check if the given instruction increments or decrements a register and 1115 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1116 /// generated by the instruction are possibly read as well. 1117 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1118 ARMCC::CondCodes Pred, unsigned PredReg) { 1119 bool CheckCPSRDef; 1120 int Scale; 1121 switch (MI.getOpcode()) { 1122 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1123 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1124 case ARM::t2SUBri: 1125 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1126 case ARM::t2ADDri: 1127 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1128 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1129 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1130 default: return 0; 1131 } 1132 1133 unsigned MIPredReg; 1134 if (MI.getOperand(0).getReg() != Reg || 1135 MI.getOperand(1).getReg() != Reg || 1136 getInstrPredicate(MI, MIPredReg) != Pred || 1137 MIPredReg != PredReg) 1138 return 0; 1139 1140 if (CheckCPSRDef && definesCPSR(&MI)) 1141 return 0; 1142 return MI.getOperand(2).getImm() * Scale; 1143 } 1144 1145 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1146 static MachineBasicBlock::iterator 1147 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1148 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1149 Offset = 0; 1150 MachineBasicBlock &MBB = *MBBI->getParent(); 1151 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1152 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1153 if (MBBI == BeginMBBI) 1154 return EndMBBI; 1155 1156 // Skip debug values. 1157 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1158 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1159 --PrevMBBI; 1160 1161 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1162 return Offset == 0 ? EndMBBI : PrevMBBI; 1163 } 1164 1165 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1166 static MachineBasicBlock::iterator 1167 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1168 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1169 Offset = 0; 1170 MachineBasicBlock &MBB = *MBBI->getParent(); 1171 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1172 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1173 // Skip debug values. 1174 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1175 ++NextMBBI; 1176 if (NextMBBI == EndMBBI) 1177 return EndMBBI; 1178 1179 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1180 return Offset == 0 ? EndMBBI : NextMBBI; 1181 } 1182 1183 /// Fold proceeding/trailing inc/dec of base register into the 1184 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1185 /// 1186 /// stmia rn, <ra, rb, rc> 1187 /// rn := rn + 4 * 3; 1188 /// => 1189 /// stmia rn!, <ra, rb, rc> 1190 /// 1191 /// rn := rn - 4 * 3; 1192 /// ldmia rn, <ra, rb, rc> 1193 /// => 1194 /// ldmdb rn!, <ra, rb, rc> 1195 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1196 // Thumb1 is already using updating loads/stores. 1197 if (isThumb1) return false; 1198 1199 const MachineOperand &BaseOP = MI->getOperand(0); 1200 unsigned Base = BaseOP.getReg(); 1201 bool BaseKill = BaseOP.isKill(); 1202 unsigned PredReg = 0; 1203 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1204 unsigned Opcode = MI->getOpcode(); 1205 DebugLoc DL = MI->getDebugLoc(); 1206 1207 // Can't use an updating ld/st if the base register is also a dest 1208 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1209 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1210 if (MI->getOperand(i).getReg() == Base) 1211 return false; 1212 1213 int Bytes = getLSMultipleTransferSize(MI); 1214 MachineBasicBlock &MBB = *MI->getParent(); 1215 MachineBasicBlock::iterator MBBI(MI); 1216 int Offset; 1217 MachineBasicBlock::iterator MergeInstr 1218 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1219 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1220 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1221 Mode = ARM_AM::db; 1222 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1223 Mode = ARM_AM::da; 1224 } else { 1225 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1226 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1227 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) 1228 return false; 1229 } 1230 MBB.erase(MergeInstr); 1231 1232 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1233 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1234 .addReg(Base, getDefRegState(true)) // WB base register 1235 .addReg(Base, getKillRegState(BaseKill)) 1236 .addImm(Pred).addReg(PredReg); 1237 1238 // Transfer the rest of operands. 1239 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1240 MIB.addOperand(MI->getOperand(OpNum)); 1241 1242 // Transfer memoperands. 1243 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1244 1245 MBB.erase(MBBI); 1246 return true; 1247 } 1248 1249 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1250 ARM_AM::AddrOpc Mode) { 1251 switch (Opc) { 1252 case ARM::LDRi12: 1253 return ARM::LDR_PRE_IMM; 1254 case ARM::STRi12: 1255 return ARM::STR_PRE_IMM; 1256 case ARM::VLDRS: 1257 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1258 case ARM::VLDRD: 1259 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1260 case ARM::VSTRS: 1261 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1262 case ARM::VSTRD: 1263 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1264 case ARM::t2LDRi8: 1265 case ARM::t2LDRi12: 1266 return ARM::t2LDR_PRE; 1267 case ARM::t2STRi8: 1268 case ARM::t2STRi12: 1269 return ARM::t2STR_PRE; 1270 default: llvm_unreachable("Unhandled opcode!"); 1271 } 1272 } 1273 1274 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1275 ARM_AM::AddrOpc Mode) { 1276 switch (Opc) { 1277 case ARM::LDRi12: 1278 return ARM::LDR_POST_IMM; 1279 case ARM::STRi12: 1280 return ARM::STR_POST_IMM; 1281 case ARM::VLDRS: 1282 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1283 case ARM::VLDRD: 1284 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1285 case ARM::VSTRS: 1286 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1287 case ARM::VSTRD: 1288 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1289 case ARM::t2LDRi8: 1290 case ARM::t2LDRi12: 1291 return ARM::t2LDR_POST; 1292 case ARM::t2STRi8: 1293 case ARM::t2STRi12: 1294 return ARM::t2STR_POST; 1295 default: llvm_unreachable("Unhandled opcode!"); 1296 } 1297 } 1298 1299 /// Fold proceeding/trailing inc/dec of base register into the 1300 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1301 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1302 // Thumb1 doesn't have updating LDR/STR. 1303 // FIXME: Use LDM/STM with single register instead. 1304 if (isThumb1) return false; 1305 1306 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1307 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1308 unsigned Opcode = MI->getOpcode(); 1309 DebugLoc DL = MI->getDebugLoc(); 1310 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1311 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1312 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1313 if (isi32Load(Opcode) || isi32Store(Opcode)) 1314 if (MI->getOperand(2).getImm() != 0) 1315 return false; 1316 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1317 return false; 1318 1319 // Can't do the merge if the destination register is the same as the would-be 1320 // writeback register. 1321 if (MI->getOperand(0).getReg() == Base) 1322 return false; 1323 1324 unsigned PredReg = 0; 1325 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1326 int Bytes = getLSMultipleTransferSize(MI); 1327 MachineBasicBlock &MBB = *MI->getParent(); 1328 MachineBasicBlock::iterator MBBI(MI); 1329 int Offset; 1330 MachineBasicBlock::iterator MergeInstr 1331 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1332 unsigned NewOpc; 1333 if (!isAM5 && Offset == Bytes) { 1334 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1335 } else if (Offset == -Bytes) { 1336 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1337 } else { 1338 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1339 if (Offset == Bytes) { 1340 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1341 } else if (!isAM5 && Offset == -Bytes) { 1342 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1343 } else 1344 return false; 1345 } 1346 MBB.erase(MergeInstr); 1347 1348 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1349 1350 bool isLd = isLoadSingle(Opcode); 1351 if (isAM5) { 1352 // VLDM[SD]_UPD, VSTM[SD]_UPD 1353 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1354 // updating load/store-multiple instructions can be used with only one 1355 // register.) 1356 MachineOperand &MO = MI->getOperand(0); 1357 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1358 .addReg(Base, getDefRegState(true)) // WB base register 1359 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1360 .addImm(Pred).addReg(PredReg) 1361 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1362 getKillRegState(MO.isKill()))); 1363 } else if (isLd) { 1364 if (isAM2) { 1365 // LDR_PRE, LDR_POST 1366 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1367 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1368 .addReg(Base, RegState::Define) 1369 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1370 } else { 1371 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1372 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1373 .addReg(Base, RegState::Define) 1374 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1375 } 1376 } else { 1377 // t2LDR_PRE, t2LDR_POST 1378 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1379 .addReg(Base, RegState::Define) 1380 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1381 } 1382 } else { 1383 MachineOperand &MO = MI->getOperand(0); 1384 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1385 // the vestigal zero-reg offset register. When that's fixed, this clause 1386 // can be removed entirely. 1387 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1388 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1389 // STR_PRE, STR_POST 1390 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1391 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1392 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1393 } else { 1394 // t2STR_PRE, t2STR_POST 1395 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1396 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1397 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1398 } 1399 } 1400 MBB.erase(MBBI); 1401 1402 return true; 1403 } 1404 1405 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1406 unsigned Opcode = MI.getOpcode(); 1407 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1408 "Must have t2STRDi8 or t2LDRDi8"); 1409 if (MI.getOperand(3).getImm() != 0) 1410 return false; 1411 1412 // Behaviour for writeback is undefined if base register is the same as one 1413 // of the others. 1414 const MachineOperand &BaseOp = MI.getOperand(2); 1415 unsigned Base = BaseOp.getReg(); 1416 const MachineOperand &Reg0Op = MI.getOperand(0); 1417 const MachineOperand &Reg1Op = MI.getOperand(1); 1418 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1419 return false; 1420 1421 unsigned PredReg; 1422 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1423 MachineBasicBlock::iterator MBBI(MI); 1424 MachineBasicBlock &MBB = *MI.getParent(); 1425 int Offset; 1426 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1427 PredReg, Offset); 1428 unsigned NewOpc; 1429 if (Offset == 8 || Offset == -8) { 1430 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1431 } else { 1432 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1433 if (Offset == 8 || Offset == -8) { 1434 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1435 } else 1436 return false; 1437 } 1438 MBB.erase(MergeInstr); 1439 1440 DebugLoc DL = MI.getDebugLoc(); 1441 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1442 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1443 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1444 .addReg(BaseOp.getReg(), RegState::Define); 1445 } else { 1446 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1447 MIB.addReg(BaseOp.getReg(), RegState::Define) 1448 .addOperand(Reg0Op).addOperand(Reg1Op); 1449 } 1450 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1451 .addImm(Offset).addImm(Pred).addReg(PredReg); 1452 assert(TII->get(Opcode).getNumOperands() == 6 && 1453 TII->get(NewOpc).getNumOperands() == 7 && 1454 "Unexpected number of operands in Opcode specification."); 1455 1456 // Transfer implicit operands. 1457 for (const MachineOperand &MO : MI.implicit_operands()) 1458 MIB.addOperand(MO); 1459 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1460 1461 MBB.erase(MBBI); 1462 return true; 1463 } 1464 1465 /// Returns true if instruction is a memory operation that this pass is capable 1466 /// of operating on. 1467 static bool isMemoryOp(const MachineInstr &MI) { 1468 unsigned Opcode = MI.getOpcode(); 1469 switch (Opcode) { 1470 case ARM::VLDRS: 1471 case ARM::VSTRS: 1472 case ARM::VLDRD: 1473 case ARM::VSTRD: 1474 case ARM::LDRi12: 1475 case ARM::STRi12: 1476 case ARM::tLDRi: 1477 case ARM::tSTRi: 1478 case ARM::tLDRspi: 1479 case ARM::tSTRspi: 1480 case ARM::t2LDRi8: 1481 case ARM::t2LDRi12: 1482 case ARM::t2STRi8: 1483 case ARM::t2STRi12: 1484 break; 1485 default: 1486 return false; 1487 } 1488 if (!MI.getOperand(1).isReg()) 1489 return false; 1490 1491 // When no memory operands are present, conservatively assume unaligned, 1492 // volatile, unfoldable. 1493 if (!MI.hasOneMemOperand()) 1494 return false; 1495 1496 const MachineMemOperand &MMO = **MI.memoperands_begin(); 1497 1498 // Don't touch volatile memory accesses - we may be changing their order. 1499 if (MMO.isVolatile()) 1500 return false; 1501 1502 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1503 // not. 1504 if (MMO.getAlignment() < 4) 1505 return false; 1506 1507 // str <undef> could probably be eliminated entirely, but for now we just want 1508 // to avoid making a mess of it. 1509 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1510 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef()) 1511 return false; 1512 1513 // Likewise don't mess with references to undefined addresses. 1514 if (MI.getOperand(1).isUndef()) 1515 return false; 1516 1517 return true; 1518 } 1519 1520 static void InsertLDR_STR(MachineBasicBlock &MBB, 1521 MachineBasicBlock::iterator &MBBI, 1522 int Offset, bool isDef, 1523 DebugLoc DL, unsigned NewOpc, 1524 unsigned Reg, bool RegDeadKill, bool RegUndef, 1525 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1526 bool OffKill, bool OffUndef, 1527 ARMCC::CondCodes Pred, unsigned PredReg, 1528 const TargetInstrInfo *TII, bool isT2) { 1529 if (isDef) { 1530 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1531 TII->get(NewOpc)) 1532 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1533 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1534 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1535 } else { 1536 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1537 TII->get(NewOpc)) 1538 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1539 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1540 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1541 } 1542 } 1543 1544 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1545 MachineBasicBlock::iterator &MBBI) { 1546 MachineInstr *MI = &*MBBI; 1547 unsigned Opcode = MI->getOpcode(); 1548 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1549 return false; 1550 1551 const MachineOperand &BaseOp = MI->getOperand(2); 1552 unsigned BaseReg = BaseOp.getReg(); 1553 unsigned EvenReg = MI->getOperand(0).getReg(); 1554 unsigned OddReg = MI->getOperand(1).getReg(); 1555 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1556 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1557 1558 // ARM errata 602117: LDRD with base in list may result in incorrect base 1559 // register when interrupted or faulted. 1560 bool Errata602117 = EvenReg == BaseReg && 1561 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1562 // ARM LDRD/STRD needs consecutive registers. 1563 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1564 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1565 1566 if (!Errata602117 && !NonConsecutiveRegs) 1567 return false; 1568 1569 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1570 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1571 bool EvenDeadKill = isLd ? 1572 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1573 bool EvenUndef = MI->getOperand(0).isUndef(); 1574 bool OddDeadKill = isLd ? 1575 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1576 bool OddUndef = MI->getOperand(1).isUndef(); 1577 bool BaseKill = BaseOp.isKill(); 1578 bool BaseUndef = BaseOp.isUndef(); 1579 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1580 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1581 int OffImm = getMemoryOpOffset(MI); 1582 unsigned PredReg = 0; 1583 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1584 1585 if (OddRegNum > EvenRegNum && OffImm == 0) { 1586 // Ascending register numbers and no offset. It's safe to change it to a 1587 // ldm or stm. 1588 unsigned NewOpc = (isLd) 1589 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1590 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1591 if (isLd) { 1592 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1593 .addReg(BaseReg, getKillRegState(BaseKill)) 1594 .addImm(Pred).addReg(PredReg) 1595 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1596 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1597 ++NumLDRD2LDM; 1598 } else { 1599 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1600 .addReg(BaseReg, getKillRegState(BaseKill)) 1601 .addImm(Pred).addReg(PredReg) 1602 .addReg(EvenReg, 1603 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1604 .addReg(OddReg, 1605 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1606 ++NumSTRD2STM; 1607 } 1608 } else { 1609 // Split into two instructions. 1610 unsigned NewOpc = (isLd) 1611 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1612 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1613 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1614 // so adjust and use t2LDRi12 here for that. 1615 unsigned NewOpc2 = (isLd) 1616 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1617 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1618 DebugLoc dl = MBBI->getDebugLoc(); 1619 // If this is a load and base register is killed, it may have been 1620 // re-defed by the load, make sure the first load does not clobber it. 1621 if (isLd && 1622 (BaseKill || OffKill) && 1623 (TRI->regsOverlap(EvenReg, BaseReg))) { 1624 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1625 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1626 OddReg, OddDeadKill, false, 1627 BaseReg, false, BaseUndef, false, OffUndef, 1628 Pred, PredReg, TII, isT2); 1629 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1630 EvenReg, EvenDeadKill, false, 1631 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1632 Pred, PredReg, TII, isT2); 1633 } else { 1634 if (OddReg == EvenReg && EvenDeadKill) { 1635 // If the two source operands are the same, the kill marker is 1636 // probably on the first one. e.g. 1637 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1638 EvenDeadKill = false; 1639 OddDeadKill = true; 1640 } 1641 // Never kill the base register in the first instruction. 1642 if (EvenReg == BaseReg) 1643 EvenDeadKill = false; 1644 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1645 EvenReg, EvenDeadKill, EvenUndef, 1646 BaseReg, false, BaseUndef, false, OffUndef, 1647 Pred, PredReg, TII, isT2); 1648 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1649 OddReg, OddDeadKill, OddUndef, 1650 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1651 Pred, PredReg, TII, isT2); 1652 } 1653 if (isLd) 1654 ++NumLDRD2LDR; 1655 else 1656 ++NumSTRD2STR; 1657 } 1658 1659 MBBI = MBB.erase(MBBI); 1660 return true; 1661 } 1662 1663 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1664 /// incrementing offset into LDM / STM ops. 1665 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1666 MemOpQueue MemOps; 1667 unsigned CurrBase = 0; 1668 unsigned CurrOpc = ~0u; 1669 ARMCC::CondCodes CurrPred = ARMCC::AL; 1670 unsigned Position = 0; 1671 assert(Candidates.size() == 0); 1672 assert(MergeBaseCandidates.size() == 0); 1673 LiveRegsValid = false; 1674 1675 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1676 I = MBBI) { 1677 // The instruction in front of the iterator is the one we look at. 1678 MBBI = std::prev(I); 1679 if (FixInvalidRegPairOp(MBB, MBBI)) 1680 continue; 1681 ++Position; 1682 1683 if (isMemoryOp(*MBBI)) { 1684 unsigned Opcode = MBBI->getOpcode(); 1685 const MachineOperand &MO = MBBI->getOperand(0); 1686 unsigned Reg = MO.getReg(); 1687 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1688 unsigned PredReg = 0; 1689 ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg); 1690 int Offset = getMemoryOpOffset(MBBI); 1691 if (CurrBase == 0) { 1692 // Start of a new chain. 1693 CurrBase = Base; 1694 CurrOpc = Opcode; 1695 CurrPred = Pred; 1696 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1697 continue; 1698 } 1699 // Note: No need to match PredReg in the next if. 1700 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1701 // Watch out for: 1702 // r4 := ldr [r0, #8] 1703 // r4 := ldr [r0, #4] 1704 // or 1705 // r0 := ldr [r0] 1706 // If a load overrides the base register or a register loaded by 1707 // another load in our chain, we cannot take this instruction. 1708 bool Overlap = false; 1709 if (isLoadSingle(Opcode)) { 1710 Overlap = (Base == Reg); 1711 if (!Overlap) { 1712 for (const MemOpQueueEntry &E : MemOps) { 1713 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1714 Overlap = true; 1715 break; 1716 } 1717 } 1718 } 1719 } 1720 1721 if (!Overlap) { 1722 // Check offset and sort memory operation into the current chain. 1723 if (Offset > MemOps.back().Offset) { 1724 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1725 continue; 1726 } else { 1727 MemOpQueue::iterator MI, ME; 1728 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1729 if (Offset < MI->Offset) { 1730 // Found a place to insert. 1731 break; 1732 } 1733 if (Offset == MI->Offset) { 1734 // Collision, abort. 1735 MI = ME; 1736 break; 1737 } 1738 } 1739 if (MI != MemOps.end()) { 1740 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1741 continue; 1742 } 1743 } 1744 } 1745 } 1746 1747 // Don't advance the iterator; The op will start a new chain next. 1748 MBBI = I; 1749 --Position; 1750 // Fallthrough to look into existing chain. 1751 } else if (MBBI->isDebugValue()) { 1752 continue; 1753 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1754 MBBI->getOpcode() == ARM::t2STRDi8) { 1755 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1756 // remember them because we may still be able to merge add/sub into them. 1757 MergeBaseCandidates.push_back(MBBI); 1758 } 1759 1760 1761 // If we are here then the chain is broken; Extract candidates for a merge. 1762 if (MemOps.size() > 0) { 1763 FormCandidates(MemOps); 1764 // Reset for the next chain. 1765 CurrBase = 0; 1766 CurrOpc = ~0u; 1767 CurrPred = ARMCC::AL; 1768 MemOps.clear(); 1769 } 1770 } 1771 if (MemOps.size() > 0) 1772 FormCandidates(MemOps); 1773 1774 // Sort candidates so they get processed from end to begin of the basic 1775 // block later; This is necessary for liveness calculation. 1776 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1777 return M0->InsertPos < M1->InsertPos; 1778 }; 1779 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1780 1781 // Go through list of candidates and merge. 1782 bool Changed = false; 1783 for (const MergeCandidate *Candidate : Candidates) { 1784 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1785 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1786 // Merge preceding/trailing base inc/dec into the merged op. 1787 if (Merged) { 1788 Changed = true; 1789 unsigned Opcode = Merged->getOpcode(); 1790 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1791 MergeBaseUpdateLSDouble(*Merged); 1792 else 1793 MergeBaseUpdateLSMultiple(Merged); 1794 } else { 1795 for (MachineInstr *MI : Candidate->Instrs) { 1796 if (MergeBaseUpdateLoadStore(MI)) 1797 Changed = true; 1798 } 1799 } 1800 } else { 1801 assert(Candidate->Instrs.size() == 1); 1802 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1803 Changed = true; 1804 } 1805 } 1806 Candidates.clear(); 1807 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1808 for (MachineInstr *MI : MergeBaseCandidates) 1809 MergeBaseUpdateLSDouble(*MI); 1810 MergeBaseCandidates.clear(); 1811 1812 return Changed; 1813 } 1814 1815 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1816 /// into the preceding stack restore so it directly restore the value of LR 1817 /// into pc. 1818 /// ldmfd sp!, {..., lr} 1819 /// bx lr 1820 /// or 1821 /// ldmfd sp!, {..., lr} 1822 /// mov pc, lr 1823 /// => 1824 /// ldmfd sp!, {..., pc} 1825 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1826 // Thumb1 LDM doesn't allow high registers. 1827 if (isThumb1) return false; 1828 if (MBB.empty()) return false; 1829 1830 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1831 if (MBBI != MBB.begin() && 1832 (MBBI->getOpcode() == ARM::BX_RET || 1833 MBBI->getOpcode() == ARM::tBX_RET || 1834 MBBI->getOpcode() == ARM::MOVPCLR)) { 1835 MachineBasicBlock::iterator PrevI = std::prev(MBBI); 1836 // Ignore any DBG_VALUE instructions. 1837 while (PrevI->isDebugValue() && PrevI != MBB.begin()) 1838 --PrevI; 1839 MachineInstr *PrevMI = PrevI; 1840 unsigned Opcode = PrevMI->getOpcode(); 1841 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1842 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1843 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1844 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1845 if (MO.getReg() != ARM::LR) 1846 return false; 1847 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1848 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1849 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1850 PrevMI->setDesc(TII->get(NewOpc)); 1851 MO.setReg(ARM::PC); 1852 PrevMI->copyImplicitOps(*MBB.getParent(), *MBBI); 1853 MBB.erase(MBBI); 1854 return true; 1855 } 1856 } 1857 return false; 1858 } 1859 1860 bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { 1861 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1862 if (MBBI == MBB.begin() || MBBI == MBB.end() || 1863 MBBI->getOpcode() != ARM::tBX_RET) 1864 return false; 1865 1866 MachineBasicBlock::iterator Prev = MBBI; 1867 --Prev; 1868 if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR)) 1869 return false; 1870 1871 for (auto Use : Prev->uses()) 1872 if (Use.isKill()) { 1873 AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) 1874 .addReg(Use.getReg(), RegState::Kill)) 1875 .copyImplicitOps(*MBBI); 1876 MBB.erase(MBBI); 1877 MBB.erase(Prev); 1878 return true; 1879 } 1880 1881 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?"); 1882 } 1883 1884 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1885 MF = &Fn; 1886 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1887 TL = STI->getTargetLowering(); 1888 AFI = Fn.getInfo<ARMFunctionInfo>(); 1889 TII = STI->getInstrInfo(); 1890 TRI = STI->getRegisterInfo(); 1891 1892 RegClassInfoValid = false; 1893 isThumb2 = AFI->isThumb2Function(); 1894 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1895 1896 bool Modified = false; 1897 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1898 ++MFI) { 1899 MachineBasicBlock &MBB = *MFI; 1900 Modified |= LoadStoreMultipleOpti(MBB); 1901 if (STI->hasV5TOps()) 1902 Modified |= MergeReturnIntoLDM(MBB); 1903 if (isThumb1) 1904 Modified |= CombineMovBx(MBB); 1905 } 1906 1907 Allocator.DestroyAll(); 1908 return Modified; 1909 } 1910 1911 namespace llvm { 1912 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); 1913 } 1914 1915 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \ 1916 "ARM pre- register allocation load / store optimization pass" 1917 1918 namespace { 1919 /// Pre- register allocation pass that move load / stores from consecutive 1920 /// locations close to make it more likely they will be combined later. 1921 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1922 static char ID; 1923 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) { 1924 initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry()); 1925 } 1926 1927 const DataLayout *TD; 1928 const TargetInstrInfo *TII; 1929 const TargetRegisterInfo *TRI; 1930 const ARMSubtarget *STI; 1931 MachineRegisterInfo *MRI; 1932 MachineFunction *MF; 1933 1934 bool runOnMachineFunction(MachineFunction &Fn) override; 1935 1936 const char *getPassName() const override { 1937 return ARM_PREALLOC_LOAD_STORE_OPT_NAME; 1938 } 1939 1940 private: 1941 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1942 unsigned &NewOpc, unsigned &EvenReg, 1943 unsigned &OddReg, unsigned &BaseReg, 1944 int &Offset, 1945 unsigned &PredReg, ARMCC::CondCodes &Pred, 1946 bool &isT2); 1947 bool RescheduleOps(MachineBasicBlock *MBB, 1948 SmallVectorImpl<MachineInstr *> &Ops, 1949 unsigned Base, bool isLd, 1950 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1951 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1952 }; 1953 char ARMPreAllocLoadStoreOpt::ID = 0; 1954 } 1955 1956 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt", 1957 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) 1958 1959 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1960 if (AssumeMisalignedLoadStores) 1961 return false; 1962 1963 TD = &Fn.getDataLayout(); 1964 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1965 TII = STI->getInstrInfo(); 1966 TRI = STI->getRegisterInfo(); 1967 MRI = &Fn.getRegInfo(); 1968 MF = &Fn; 1969 1970 bool Modified = false; 1971 for (MachineBasicBlock &MFI : Fn) 1972 Modified |= RescheduleLoadStoreInstrs(&MFI); 1973 1974 return Modified; 1975 } 1976 1977 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1978 MachineBasicBlock::iterator I, 1979 MachineBasicBlock::iterator E, 1980 SmallPtrSetImpl<MachineInstr*> &MemOps, 1981 SmallSet<unsigned, 4> &MemRegs, 1982 const TargetRegisterInfo *TRI) { 1983 // Are there stores / loads / calls between them? 1984 // FIXME: This is overly conservative. We should make use of alias information 1985 // some day. 1986 SmallSet<unsigned, 4> AddedRegPressure; 1987 while (++I != E) { 1988 if (I->isDebugValue() || MemOps.count(&*I)) 1989 continue; 1990 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1991 return false; 1992 if (isLd && I->mayStore()) 1993 return false; 1994 if (!isLd) { 1995 if (I->mayLoad()) 1996 return false; 1997 // It's not safe to move the first 'str' down. 1998 // str r1, [r0] 1999 // strh r5, [r0] 2000 // str r4, [r0, #+4] 2001 if (I->mayStore()) 2002 return false; 2003 } 2004 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 2005 MachineOperand &MO = I->getOperand(j); 2006 if (!MO.isReg()) 2007 continue; 2008 unsigned Reg = MO.getReg(); 2009 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 2010 return false; 2011 if (Reg != Base && !MemRegs.count(Reg)) 2012 AddedRegPressure.insert(Reg); 2013 } 2014 } 2015 2016 // Estimate register pressure increase due to the transformation. 2017 if (MemRegs.size() <= 4) 2018 // Ok if we are moving small number of instructions. 2019 return true; 2020 return AddedRegPressure.size() <= MemRegs.size() * 2; 2021 } 2022 2023 bool 2024 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 2025 DebugLoc &dl, unsigned &NewOpc, 2026 unsigned &FirstReg, 2027 unsigned &SecondReg, 2028 unsigned &BaseReg, int &Offset, 2029 unsigned &PredReg, 2030 ARMCC::CondCodes &Pred, 2031 bool &isT2) { 2032 // Make sure we're allowed to generate LDRD/STRD. 2033 if (!STI->hasV5TEOps()) 2034 return false; 2035 2036 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 2037 unsigned Scale = 1; 2038 unsigned Opcode = Op0->getOpcode(); 2039 if (Opcode == ARM::LDRi12) { 2040 NewOpc = ARM::LDRD; 2041 } else if (Opcode == ARM::STRi12) { 2042 NewOpc = ARM::STRD; 2043 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 2044 NewOpc = ARM::t2LDRDi8; 2045 Scale = 4; 2046 isT2 = true; 2047 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 2048 NewOpc = ARM::t2STRDi8; 2049 Scale = 4; 2050 isT2 = true; 2051 } else { 2052 return false; 2053 } 2054 2055 // Make sure the base address satisfies i64 ld / st alignment requirement. 2056 // At the moment, we ignore the memoryoperand's value. 2057 // If we want to use AliasAnalysis, we should check it accordingly. 2058 if (!Op0->hasOneMemOperand() || 2059 (*Op0->memoperands_begin())->isVolatile()) 2060 return false; 2061 2062 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 2063 const Function *Func = MF->getFunction(); 2064 unsigned ReqAlign = STI->hasV6Ops() 2065 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 2066 : 8; // Pre-v6 need 8-byte align 2067 if (Align < ReqAlign) 2068 return false; 2069 2070 // Then make sure the immediate offset fits. 2071 int OffImm = getMemoryOpOffset(Op0); 2072 if (isT2) { 2073 int Limit = (1 << 8) * Scale; 2074 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2075 return false; 2076 Offset = OffImm; 2077 } else { 2078 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2079 if (OffImm < 0) { 2080 AddSub = ARM_AM::sub; 2081 OffImm = - OffImm; 2082 } 2083 int Limit = (1 << 8) * Scale; 2084 if (OffImm >= Limit || (OffImm & (Scale-1))) 2085 return false; 2086 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2087 } 2088 FirstReg = Op0->getOperand(0).getReg(); 2089 SecondReg = Op1->getOperand(0).getReg(); 2090 if (FirstReg == SecondReg) 2091 return false; 2092 BaseReg = Op0->getOperand(1).getReg(); 2093 Pred = getInstrPredicate(*Op0, PredReg); 2094 dl = Op0->getDebugLoc(); 2095 return true; 2096 } 2097 2098 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2099 SmallVectorImpl<MachineInstr *> &Ops, 2100 unsigned Base, bool isLd, 2101 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2102 bool RetVal = false; 2103 2104 // Sort by offset (in reverse order). 2105 std::sort(Ops.begin(), Ops.end(), 2106 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2107 int LOffset = getMemoryOpOffset(LHS); 2108 int ROffset = getMemoryOpOffset(RHS); 2109 assert(LHS == RHS || LOffset != ROffset); 2110 return LOffset > ROffset; 2111 }); 2112 2113 // The loads / stores of the same base are in order. Scan them from first to 2114 // last and check for the following: 2115 // 1. Any def of base. 2116 // 2. Any gaps. 2117 while (Ops.size() > 1) { 2118 unsigned FirstLoc = ~0U; 2119 unsigned LastLoc = 0; 2120 MachineInstr *FirstOp = nullptr; 2121 MachineInstr *LastOp = nullptr; 2122 int LastOffset = 0; 2123 unsigned LastOpcode = 0; 2124 unsigned LastBytes = 0; 2125 unsigned NumMove = 0; 2126 for (int i = Ops.size() - 1; i >= 0; --i) { 2127 MachineInstr *Op = Ops[i]; 2128 unsigned Loc = MI2LocMap[Op]; 2129 if (Loc <= FirstLoc) { 2130 FirstLoc = Loc; 2131 FirstOp = Op; 2132 } 2133 if (Loc >= LastLoc) { 2134 LastLoc = Loc; 2135 LastOp = Op; 2136 } 2137 2138 unsigned LSMOpcode 2139 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2140 if (LastOpcode && LSMOpcode != LastOpcode) 2141 break; 2142 2143 int Offset = getMemoryOpOffset(Op); 2144 unsigned Bytes = getLSMultipleTransferSize(Op); 2145 if (LastBytes) { 2146 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2147 break; 2148 } 2149 LastOffset = Offset; 2150 LastBytes = Bytes; 2151 LastOpcode = LSMOpcode; 2152 if (++NumMove == 8) // FIXME: Tune this limit. 2153 break; 2154 } 2155 2156 if (NumMove <= 1) 2157 Ops.pop_back(); 2158 else { 2159 SmallPtrSet<MachineInstr*, 4> MemOps; 2160 SmallSet<unsigned, 4> MemRegs; 2161 for (int i = NumMove-1; i >= 0; --i) { 2162 MemOps.insert(Ops[i]); 2163 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2164 } 2165 2166 // Be conservative, if the instructions are too far apart, don't 2167 // move them. We want to limit the increase of register pressure. 2168 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2169 if (DoMove) 2170 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2171 MemOps, MemRegs, TRI); 2172 if (!DoMove) { 2173 for (unsigned i = 0; i != NumMove; ++i) 2174 Ops.pop_back(); 2175 } else { 2176 // This is the new location for the loads / stores. 2177 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2178 while (InsertPos != MBB->end() 2179 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2180 ++InsertPos; 2181 2182 // If we are moving a pair of loads / stores, see if it makes sense 2183 // to try to allocate a pair of registers that can form register pairs. 2184 MachineInstr *Op0 = Ops.back(); 2185 MachineInstr *Op1 = Ops[Ops.size()-2]; 2186 unsigned FirstReg = 0, SecondReg = 0; 2187 unsigned BaseReg = 0, PredReg = 0; 2188 ARMCC::CondCodes Pred = ARMCC::AL; 2189 bool isT2 = false; 2190 unsigned NewOpc = 0; 2191 int Offset = 0; 2192 DebugLoc dl; 2193 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2194 FirstReg, SecondReg, BaseReg, 2195 Offset, PredReg, Pred, isT2)) { 2196 Ops.pop_back(); 2197 Ops.pop_back(); 2198 2199 const MCInstrDesc &MCID = TII->get(NewOpc); 2200 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2201 MRI->constrainRegClass(FirstReg, TRC); 2202 MRI->constrainRegClass(SecondReg, TRC); 2203 2204 // Form the pair instruction. 2205 if (isLd) { 2206 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2207 .addReg(FirstReg, RegState::Define) 2208 .addReg(SecondReg, RegState::Define) 2209 .addReg(BaseReg); 2210 // FIXME: We're converting from LDRi12 to an insn that still 2211 // uses addrmode2, so we need an explicit offset reg. It should 2212 // always by reg0 since we're transforming LDRi12s. 2213 if (!isT2) 2214 MIB.addReg(0); 2215 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2216 MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1)); 2217 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2218 ++NumLDRDFormed; 2219 } else { 2220 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2221 .addReg(FirstReg) 2222 .addReg(SecondReg) 2223 .addReg(BaseReg); 2224 // FIXME: We're converting from LDRi12 to an insn that still 2225 // uses addrmode2, so we need an explicit offset reg. It should 2226 // always by reg0 since we're transforming STRi12s. 2227 if (!isT2) 2228 MIB.addReg(0); 2229 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2230 MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1)); 2231 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2232 ++NumSTRDFormed; 2233 } 2234 MBB->erase(Op0); 2235 MBB->erase(Op1); 2236 2237 if (!isT2) { 2238 // Add register allocation hints to form register pairs. 2239 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2240 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2241 } 2242 } else { 2243 for (unsigned i = 0; i != NumMove; ++i) { 2244 MachineInstr *Op = Ops.back(); 2245 Ops.pop_back(); 2246 MBB->splice(InsertPos, MBB, Op); 2247 } 2248 } 2249 2250 NumLdStMoved += NumMove; 2251 RetVal = true; 2252 } 2253 } 2254 } 2255 2256 return RetVal; 2257 } 2258 2259 bool 2260 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2261 bool RetVal = false; 2262 2263 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2264 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2265 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2266 SmallVector<unsigned, 4> LdBases; 2267 SmallVector<unsigned, 4> StBases; 2268 2269 unsigned Loc = 0; 2270 MachineBasicBlock::iterator MBBI = MBB->begin(); 2271 MachineBasicBlock::iterator E = MBB->end(); 2272 while (MBBI != E) { 2273 for (; MBBI != E; ++MBBI) { 2274 MachineInstr *MI = MBBI; 2275 if (MI->isCall() || MI->isTerminator()) { 2276 // Stop at barriers. 2277 ++MBBI; 2278 break; 2279 } 2280 2281 if (!MI->isDebugValue()) 2282 MI2LocMap[MI] = ++Loc; 2283 2284 if (!isMemoryOp(*MI)) 2285 continue; 2286 unsigned PredReg = 0; 2287 if (getInstrPredicate(*MI, PredReg) != ARMCC::AL) 2288 continue; 2289 2290 int Opc = MI->getOpcode(); 2291 bool isLd = isLoadSingle(Opc); 2292 unsigned Base = MI->getOperand(1).getReg(); 2293 int Offset = getMemoryOpOffset(MI); 2294 2295 bool StopHere = false; 2296 if (isLd) { 2297 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2298 Base2LdsMap.find(Base); 2299 if (BI != Base2LdsMap.end()) { 2300 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2301 if (Offset == getMemoryOpOffset(BI->second[i])) { 2302 StopHere = true; 2303 break; 2304 } 2305 } 2306 if (!StopHere) 2307 BI->second.push_back(MI); 2308 } else { 2309 Base2LdsMap[Base].push_back(MI); 2310 LdBases.push_back(Base); 2311 } 2312 } else { 2313 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2314 Base2StsMap.find(Base); 2315 if (BI != Base2StsMap.end()) { 2316 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2317 if (Offset == getMemoryOpOffset(BI->second[i])) { 2318 StopHere = true; 2319 break; 2320 } 2321 } 2322 if (!StopHere) 2323 BI->second.push_back(MI); 2324 } else { 2325 Base2StsMap[Base].push_back(MI); 2326 StBases.push_back(Base); 2327 } 2328 } 2329 2330 if (StopHere) { 2331 // Found a duplicate (a base+offset combination that's seen earlier). 2332 // Backtrack. 2333 --Loc; 2334 break; 2335 } 2336 } 2337 2338 // Re-schedule loads. 2339 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2340 unsigned Base = LdBases[i]; 2341 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2342 if (Lds.size() > 1) 2343 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2344 } 2345 2346 // Re-schedule stores. 2347 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2348 unsigned Base = StBases[i]; 2349 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2350 if (Sts.size() > 1) 2351 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2352 } 2353 2354 if (MBBI != E) { 2355 Base2LdsMap.clear(); 2356 Base2StsMap.clear(); 2357 LdBases.clear(); 2358 StBases.clear(); 2359 } 2360 } 2361 2362 return RetVal; 2363 } 2364 2365 2366 /// Returns an instance of the load / store optimization pass. 2367 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2368 if (PreAlloc) 2369 return new ARMPreAllocLoadStoreOpt(); 2370 return new ARMLoadStoreOpt(); 2371 } 2372 2373