1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 /// This switch disables formation of double/multi instructions that could 64 /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP 65 /// disabled. This can be used to create libraries that are robust even when 66 /// users provoke undefined behaviour by supplying misaligned pointers. 67 /// \see mayCombineMisaligned() 68 static cl::opt<bool> 69 AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, 70 cl::init(false), cl::desc("Be more conservative in ARM load/store opt")); 71 72 namespace llvm { 73 void initializeARMLoadStoreOptPass(PassRegistry &); 74 } 75 76 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" 77 78 namespace { 79 /// Post- register allocation pass the combine load / store instructions to 80 /// form ldm / stm instructions. 81 struct ARMLoadStoreOpt : public MachineFunctionPass { 82 static char ID; 83 ARMLoadStoreOpt() : MachineFunctionPass(ID) { 84 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry()); 85 } 86 87 const MachineFunction *MF; 88 const TargetInstrInfo *TII; 89 const TargetRegisterInfo *TRI; 90 const ARMSubtarget *STI; 91 const TargetLowering *TL; 92 ARMFunctionInfo *AFI; 93 LivePhysRegs LiveRegs; 94 RegisterClassInfo RegClassInfo; 95 MachineBasicBlock::const_iterator LiveRegPos; 96 bool LiveRegsValid; 97 bool RegClassInfoValid; 98 bool isThumb1, isThumb2; 99 100 bool runOnMachineFunction(MachineFunction &Fn) override; 101 102 MachineFunctionProperties getRequiredProperties() const override { 103 return MachineFunctionProperties().set( 104 MachineFunctionProperties::Property::AllVRegsAllocated); 105 } 106 107 const char *getPassName() const override { 108 return ARM_LOAD_STORE_OPT_NAME; 109 } 110 111 private: 112 /// A set of load/store MachineInstrs with same base register sorted by 113 /// offset. 114 struct MemOpQueueEntry { 115 MachineInstr *MI; 116 int Offset; ///< Load/Store offset. 117 unsigned Position; ///< Position as counted from end of basic block. 118 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 119 : MI(MI), Offset(Offset), Position(Position) {} 120 }; 121 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 122 123 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 124 /// merged into a LDM/STM. 125 struct MergeCandidate { 126 /// List of instructions ordered by load/store offset. 127 SmallVector<MachineInstr*, 4> Instrs; 128 /// Index in Instrs of the instruction being latest in the schedule. 129 unsigned LatestMIIdx; 130 /// Index in Instrs of the instruction being earliest in the schedule. 131 unsigned EarliestMIIdx; 132 /// Index into the basic block where the merged instruction will be 133 /// inserted. (See MemOpQueueEntry.Position) 134 unsigned InsertPos; 135 /// Whether the instructions can be merged into a ldm/stm instruction. 136 bool CanMergeToLSMulti; 137 /// Whether the instructions can be merged into a ldrd/strd instruction. 138 bool CanMergeToLSDouble; 139 }; 140 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 141 SmallVector<const MergeCandidate*,4> Candidates; 142 SmallVector<MachineInstr*,4> MergeBaseCandidates; 143 144 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 145 MachineBasicBlock::const_iterator Before); 146 unsigned findFreeReg(const TargetRegisterClass &RegClass); 147 void UpdateBaseRegUses(MachineBasicBlock &MBB, 148 MachineBasicBlock::iterator MBBI, 149 DebugLoc DL, unsigned Base, unsigned WordOffset, 150 ARMCC::CondCodes Pred, unsigned PredReg); 151 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 152 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 153 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 154 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 155 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 156 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 157 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 158 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 159 void FormCandidates(const MemOpQueue &MemOps); 160 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 161 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 162 MachineBasicBlock::iterator &MBBI); 163 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 164 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 165 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 166 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 167 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 168 bool CombineMovBx(MachineBasicBlock &MBB); 169 }; 170 char ARMLoadStoreOpt::ID = 0; 171 } 172 173 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false) 174 175 static bool definesCPSR(const MachineInstr *MI) { 176 for (const auto &MO : MI->operands()) { 177 if (!MO.isReg()) 178 continue; 179 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 180 // If the instruction has live CPSR def, then it's not safe to fold it 181 // into load / store. 182 return true; 183 } 184 185 return false; 186 } 187 188 static int getMemoryOpOffset(const MachineInstr *MI) { 189 unsigned Opcode = MI->getOpcode(); 190 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 191 unsigned NumOperands = MI->getDesc().getNumOperands(); 192 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 193 194 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 195 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 196 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 197 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 198 return OffField; 199 200 // Thumb1 immediate offsets are scaled by 4 201 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 202 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 203 return OffField * 4; 204 205 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 206 : ARM_AM::getAM5Offset(OffField) * 4; 207 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 208 : ARM_AM::getAM5Op(OffField); 209 210 if (Op == ARM_AM::sub) 211 return -Offset; 212 213 return Offset; 214 } 215 216 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 217 return MI.getOperand(1); 218 } 219 220 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 221 return MI.getOperand(0); 222 } 223 224 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 225 switch (Opcode) { 226 default: llvm_unreachable("Unhandled opcode!"); 227 case ARM::LDRi12: 228 ++NumLDMGened; 229 switch (Mode) { 230 default: llvm_unreachable("Unhandled submode!"); 231 case ARM_AM::ia: return ARM::LDMIA; 232 case ARM_AM::da: return ARM::LDMDA; 233 case ARM_AM::db: return ARM::LDMDB; 234 case ARM_AM::ib: return ARM::LDMIB; 235 } 236 case ARM::STRi12: 237 ++NumSTMGened; 238 switch (Mode) { 239 default: llvm_unreachable("Unhandled submode!"); 240 case ARM_AM::ia: return ARM::STMIA; 241 case ARM_AM::da: return ARM::STMDA; 242 case ARM_AM::db: return ARM::STMDB; 243 case ARM_AM::ib: return ARM::STMIB; 244 } 245 case ARM::tLDRi: 246 case ARM::tLDRspi: 247 // tLDMIA is writeback-only - unless the base register is in the input 248 // reglist. 249 ++NumLDMGened; 250 switch (Mode) { 251 default: llvm_unreachable("Unhandled submode!"); 252 case ARM_AM::ia: return ARM::tLDMIA; 253 } 254 case ARM::tSTRi: 255 case ARM::tSTRspi: 256 // There is no non-writeback tSTMIA either. 257 ++NumSTMGened; 258 switch (Mode) { 259 default: llvm_unreachable("Unhandled submode!"); 260 case ARM_AM::ia: return ARM::tSTMIA_UPD; 261 } 262 case ARM::t2LDRi8: 263 case ARM::t2LDRi12: 264 ++NumLDMGened; 265 switch (Mode) { 266 default: llvm_unreachable("Unhandled submode!"); 267 case ARM_AM::ia: return ARM::t2LDMIA; 268 case ARM_AM::db: return ARM::t2LDMDB; 269 } 270 case ARM::t2STRi8: 271 case ARM::t2STRi12: 272 ++NumSTMGened; 273 switch (Mode) { 274 default: llvm_unreachable("Unhandled submode!"); 275 case ARM_AM::ia: return ARM::t2STMIA; 276 case ARM_AM::db: return ARM::t2STMDB; 277 } 278 case ARM::VLDRS: 279 ++NumVLDMGened; 280 switch (Mode) { 281 default: llvm_unreachable("Unhandled submode!"); 282 case ARM_AM::ia: return ARM::VLDMSIA; 283 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 284 } 285 case ARM::VSTRS: 286 ++NumVSTMGened; 287 switch (Mode) { 288 default: llvm_unreachable("Unhandled submode!"); 289 case ARM_AM::ia: return ARM::VSTMSIA; 290 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 291 } 292 case ARM::VLDRD: 293 ++NumVLDMGened; 294 switch (Mode) { 295 default: llvm_unreachable("Unhandled submode!"); 296 case ARM_AM::ia: return ARM::VLDMDIA; 297 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 298 } 299 case ARM::VSTRD: 300 ++NumVSTMGened; 301 switch (Mode) { 302 default: llvm_unreachable("Unhandled submode!"); 303 case ARM_AM::ia: return ARM::VSTMDIA; 304 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 305 } 306 } 307 } 308 309 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 310 switch (Opcode) { 311 default: llvm_unreachable("Unhandled opcode!"); 312 case ARM::LDMIA_RET: 313 case ARM::LDMIA: 314 case ARM::LDMIA_UPD: 315 case ARM::STMIA: 316 case ARM::STMIA_UPD: 317 case ARM::tLDMIA: 318 case ARM::tLDMIA_UPD: 319 case ARM::tSTMIA_UPD: 320 case ARM::t2LDMIA_RET: 321 case ARM::t2LDMIA: 322 case ARM::t2LDMIA_UPD: 323 case ARM::t2STMIA: 324 case ARM::t2STMIA_UPD: 325 case ARM::VLDMSIA: 326 case ARM::VLDMSIA_UPD: 327 case ARM::VSTMSIA: 328 case ARM::VSTMSIA_UPD: 329 case ARM::VLDMDIA: 330 case ARM::VLDMDIA_UPD: 331 case ARM::VSTMDIA: 332 case ARM::VSTMDIA_UPD: 333 return ARM_AM::ia; 334 335 case ARM::LDMDA: 336 case ARM::LDMDA_UPD: 337 case ARM::STMDA: 338 case ARM::STMDA_UPD: 339 return ARM_AM::da; 340 341 case ARM::LDMDB: 342 case ARM::LDMDB_UPD: 343 case ARM::STMDB: 344 case ARM::STMDB_UPD: 345 case ARM::t2LDMDB: 346 case ARM::t2LDMDB_UPD: 347 case ARM::t2STMDB: 348 case ARM::t2STMDB_UPD: 349 case ARM::VLDMSDB_UPD: 350 case ARM::VSTMSDB_UPD: 351 case ARM::VLDMDDB_UPD: 352 case ARM::VSTMDDB_UPD: 353 return ARM_AM::db; 354 355 case ARM::LDMIB: 356 case ARM::LDMIB_UPD: 357 case ARM::STMIB: 358 case ARM::STMIB_UPD: 359 return ARM_AM::ib; 360 } 361 } 362 363 static bool isT1i32Load(unsigned Opc) { 364 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 365 } 366 367 static bool isT2i32Load(unsigned Opc) { 368 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 369 } 370 371 static bool isi32Load(unsigned Opc) { 372 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 373 } 374 375 static bool isT1i32Store(unsigned Opc) { 376 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 377 } 378 379 static bool isT2i32Store(unsigned Opc) { 380 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 381 } 382 383 static bool isi32Store(unsigned Opc) { 384 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 385 } 386 387 static bool isLoadSingle(unsigned Opc) { 388 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 389 } 390 391 static unsigned getImmScale(unsigned Opc) { 392 switch (Opc) { 393 default: llvm_unreachable("Unhandled opcode!"); 394 case ARM::tLDRi: 395 case ARM::tSTRi: 396 case ARM::tLDRspi: 397 case ARM::tSTRspi: 398 return 1; 399 case ARM::tLDRHi: 400 case ARM::tSTRHi: 401 return 2; 402 case ARM::tLDRBi: 403 case ARM::tSTRBi: 404 return 4; 405 } 406 } 407 408 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 409 switch (MI->getOpcode()) { 410 default: return 0; 411 case ARM::LDRi12: 412 case ARM::STRi12: 413 case ARM::tLDRi: 414 case ARM::tSTRi: 415 case ARM::tLDRspi: 416 case ARM::tSTRspi: 417 case ARM::t2LDRi8: 418 case ARM::t2LDRi12: 419 case ARM::t2STRi8: 420 case ARM::t2STRi12: 421 case ARM::VLDRS: 422 case ARM::VSTRS: 423 return 4; 424 case ARM::VLDRD: 425 case ARM::VSTRD: 426 return 8; 427 case ARM::LDMIA: 428 case ARM::LDMDA: 429 case ARM::LDMDB: 430 case ARM::LDMIB: 431 case ARM::STMIA: 432 case ARM::STMDA: 433 case ARM::STMDB: 434 case ARM::STMIB: 435 case ARM::tLDMIA: 436 case ARM::tLDMIA_UPD: 437 case ARM::tSTMIA_UPD: 438 case ARM::t2LDMIA: 439 case ARM::t2LDMDB: 440 case ARM::t2STMIA: 441 case ARM::t2STMDB: 442 case ARM::VLDMSIA: 443 case ARM::VSTMSIA: 444 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 445 case ARM::VLDMDIA: 446 case ARM::VSTMDIA: 447 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 448 } 449 } 450 451 /// Update future uses of the base register with the offset introduced 452 /// due to writeback. This function only works on Thumb1. 453 void 454 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 455 MachineBasicBlock::iterator MBBI, 456 DebugLoc DL, unsigned Base, 457 unsigned WordOffset, 458 ARMCC::CondCodes Pred, unsigned PredReg) { 459 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 460 // Start updating any instructions with immediate offsets. Insert a SUB before 461 // the first non-updateable instruction (if any). 462 for (; MBBI != MBB.end(); ++MBBI) { 463 bool InsertSub = false; 464 unsigned Opc = MBBI->getOpcode(); 465 466 if (MBBI->readsRegister(Base)) { 467 int Offset; 468 bool IsLoad = 469 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 470 bool IsStore = 471 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 472 473 if (IsLoad || IsStore) { 474 // Loads and stores with immediate offsets can be updated, but only if 475 // the new offset isn't negative. 476 // The MachineOperand containing the offset immediate is the last one 477 // before predicates. 478 MachineOperand &MO = 479 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 480 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 481 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 482 483 // If storing the base register, it needs to be reset first. 484 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 485 486 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 487 MO.setImm(Offset); 488 else 489 InsertSub = true; 490 491 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 492 !definesCPSR(MBBI)) { 493 // SUBS/ADDS using this register, with a dead def of the CPSR. 494 // Merge it with the update; if the merged offset is too large, 495 // insert a new sub instead. 496 MachineOperand &MO = 497 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 498 Offset = (Opc == ARM::tSUBi8) ? 499 MO.getImm() + WordOffset * 4 : 500 MO.getImm() - WordOffset * 4 ; 501 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 502 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 503 // Offset == 0. 504 MO.setImm(Offset); 505 // The base register has now been reset, so exit early. 506 return; 507 } else { 508 InsertSub = true; 509 } 510 511 } else { 512 // Can't update the instruction. 513 InsertSub = true; 514 } 515 516 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 517 // Since SUBS sets the condition flags, we can't place the base reset 518 // after an instruction that has a live CPSR def. 519 // The base register might also contain an argument for a function call. 520 InsertSub = true; 521 } 522 523 if (InsertSub) { 524 // An instruction above couldn't be updated, so insert a sub. 525 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 526 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 527 return; 528 } 529 530 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 531 // Register got killed. Stop updating. 532 return; 533 } 534 535 // End of block was reached. 536 if (MBB.succ_size() > 0) { 537 // FIXME: Because of a bug, live registers are sometimes missing from 538 // the successor blocks' live-in sets. This means we can't trust that 539 // information and *always* have to reset at the end of a block. 540 // See PR21029. 541 if (MBBI != MBB.end()) --MBBI; 542 AddDefaultT1CC( 543 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 544 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 545 } 546 } 547 548 /// Return the first register of class \p RegClass that is not in \p Regs. 549 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 550 if (!RegClassInfoValid) { 551 RegClassInfo.runOnMachineFunction(*MF); 552 RegClassInfoValid = true; 553 } 554 555 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 556 if (!LiveRegs.contains(Reg)) 557 return Reg; 558 return 0; 559 } 560 561 /// Compute live registers just before instruction \p Before (in normal schedule 562 /// direction). Computes backwards so multiple queries in the same block must 563 /// come in reverse order. 564 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 565 MachineBasicBlock::const_iterator Before) { 566 // Initialize if we never queried in this block. 567 if (!LiveRegsValid) { 568 LiveRegs.init(TRI); 569 LiveRegs.addLiveOuts(MBB); 570 LiveRegPos = MBB.end(); 571 LiveRegsValid = true; 572 } 573 // Move backward just before the "Before" position. 574 while (LiveRegPos != Before) { 575 --LiveRegPos; 576 LiveRegs.stepBackward(*LiveRegPos); 577 } 578 } 579 580 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 581 unsigned Reg) { 582 for (const std::pair<unsigned, bool> &R : Regs) 583 if (R.first == Reg) 584 return true; 585 return false; 586 } 587 588 /// Create and insert a LDM or STM with Base as base register and registers in 589 /// Regs as the register operands that would be loaded / stored. It returns 590 /// true if the transformation is done. 591 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 592 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 593 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 594 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 595 unsigned NumRegs = Regs.size(); 596 assert(NumRegs > 1); 597 598 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 599 // Compute liveness information for that register to make the decision. 600 bool SafeToClobberCPSR = !isThumb1 || 601 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 602 MachineBasicBlock::LQR_Dead); 603 604 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 605 606 // Exception: If the base register is in the input reglist, Thumb1 LDM is 607 // non-writeback. 608 // It's also not possible to merge an STR of the base register in Thumb1. 609 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 610 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 611 if (Opcode == ARM::tLDRi) { 612 Writeback = false; 613 } else if (Opcode == ARM::tSTRi) { 614 return nullptr; 615 } 616 } 617 618 ARM_AM::AMSubMode Mode = ARM_AM::ia; 619 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 620 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 621 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 622 623 if (Offset == 4 && haveIBAndDA) { 624 Mode = ARM_AM::ib; 625 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 626 Mode = ARM_AM::da; 627 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 628 // VLDM/VSTM do not support DB mode without also updating the base reg. 629 Mode = ARM_AM::db; 630 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 631 // Check if this is a supported opcode before inserting instructions to 632 // calculate a new base register. 633 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 634 635 // If starting offset isn't zero, insert a MI to materialize a new base. 636 // But only do so if it is cost effective, i.e. merging more than two 637 // loads / stores. 638 if (NumRegs <= 2) 639 return nullptr; 640 641 // On Thumb1, it's not worth materializing a new base register without 642 // clobbering the CPSR (i.e. not using ADDS/SUBS). 643 if (!SafeToClobberCPSR) 644 return nullptr; 645 646 unsigned NewBase; 647 if (isi32Load(Opcode)) { 648 // If it is a load, then just use one of the destination registers 649 // as the new base. Will no longer be writeback in Thumb1. 650 NewBase = Regs[NumRegs-1].first; 651 Writeback = false; 652 } else { 653 // Find a free register that we can use as scratch register. 654 moveLiveRegsBefore(MBB, InsertBefore); 655 // The merged instruction does not exist yet but will use several Regs if 656 // it is a Store. 657 if (!isLoadSingle(Opcode)) 658 for (const std::pair<unsigned, bool> &R : Regs) 659 LiveRegs.addReg(R.first); 660 661 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 662 if (NewBase == 0) 663 return nullptr; 664 } 665 666 int BaseOpc = 667 isThumb2 ? ARM::t2ADDri : 668 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 669 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 670 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 671 672 if (Offset < 0) { 673 Offset = - Offset; 674 BaseOpc = 675 isThumb2 ? ARM::t2SUBri : 676 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 677 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 678 } 679 680 if (!TL->isLegalAddImmediate(Offset)) 681 // FIXME: Try add with register operand? 682 return nullptr; // Probably not worth it then. 683 684 // We can only append a kill flag to the add/sub input if the value is not 685 // used in the register list of the stm as well. 686 bool KillOldBase = BaseKill && 687 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 688 689 if (isThumb1) { 690 // Thumb1: depending on immediate size, use either 691 // ADDS NewBase, Base, #imm3 692 // or 693 // MOV NewBase, Base 694 // ADDS NewBase, #imm8. 695 if (Base != NewBase && 696 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 697 // Need to insert a MOV to the new base first. 698 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 699 !STI->hasV6Ops()) { 700 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 701 if (Pred != ARMCC::AL) 702 return nullptr; 703 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 704 .addReg(Base, getKillRegState(KillOldBase)); 705 } else 706 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 707 .addReg(Base, getKillRegState(KillOldBase)) 708 .addImm(Pred).addReg(PredReg); 709 710 // The following ADDS/SUBS becomes an update. 711 Base = NewBase; 712 KillOldBase = true; 713 } 714 if (BaseOpc == ARM::tADDrSPi) { 715 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 716 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 717 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 718 .addImm(Pred).addReg(PredReg); 719 } else 720 AddDefaultT1CC( 721 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 722 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 723 .addImm(Pred).addReg(PredReg); 724 } else { 725 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 726 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 727 .addImm(Pred).addReg(PredReg).addReg(0); 728 } 729 Base = NewBase; 730 BaseKill = true; // New base is always killed straight away. 731 } 732 733 bool isDef = isLoadSingle(Opcode); 734 735 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 736 // base register writeback. 737 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 738 if (!Opcode) 739 return nullptr; 740 741 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 742 // - There is no writeback (LDM of base register), 743 // - the base register is killed by the merged instruction, 744 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 745 // to reset the base register. 746 // Otherwise, don't merge. 747 // It's safe to return here since the code to materialize a new base register 748 // above is also conditional on SafeToClobberCPSR. 749 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 750 return nullptr; 751 752 MachineInstrBuilder MIB; 753 754 if (Writeback) { 755 assert(isThumb1 && "expected Writeback only inThumb1"); 756 if (Opcode == ARM::tLDMIA) { 757 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs"); 758 // Update tLDMIA with writeback if necessary. 759 Opcode = ARM::tLDMIA_UPD; 760 } 761 762 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 763 764 // Thumb1: we might need to set base writeback when building the MI. 765 MIB.addReg(Base, getDefRegState(true)) 766 .addReg(Base, getKillRegState(BaseKill)); 767 768 // The base isn't dead after a merged instruction with writeback. 769 // Insert a sub instruction after the newly formed instruction to reset. 770 if (!BaseKill) 771 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 772 773 } else { 774 // No writeback, simply build the MachineInstr. 775 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 776 MIB.addReg(Base, getKillRegState(BaseKill)); 777 } 778 779 MIB.addImm(Pred).addReg(PredReg); 780 781 for (const std::pair<unsigned, bool> &R : Regs) 782 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 783 784 return MIB.getInstr(); 785 } 786 787 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 788 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 789 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 790 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 791 bool IsLoad = isi32Load(Opcode); 792 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 793 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 794 795 assert(Regs.size() == 2); 796 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 797 TII->get(LoadStoreOpcode)); 798 if (IsLoad) { 799 MIB.addReg(Regs[0].first, RegState::Define) 800 .addReg(Regs[1].first, RegState::Define); 801 } else { 802 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 803 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 804 } 805 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 806 return MIB.getInstr(); 807 } 808 809 /// Call MergeOps and update MemOps and merges accordingly on success. 810 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 811 const MachineInstr *First = Cand.Instrs.front(); 812 unsigned Opcode = First->getOpcode(); 813 bool IsLoad = isLoadSingle(Opcode); 814 SmallVector<std::pair<unsigned, bool>, 8> Regs; 815 SmallVector<unsigned, 4> ImpDefs; 816 DenseSet<unsigned> KilledRegs; 817 DenseSet<unsigned> UsedRegs; 818 // Determine list of registers and list of implicit super-register defs. 819 for (const MachineInstr *MI : Cand.Instrs) { 820 const MachineOperand &MO = getLoadStoreRegOp(*MI); 821 unsigned Reg = MO.getReg(); 822 bool IsKill = MO.isKill(); 823 if (IsKill) 824 KilledRegs.insert(Reg); 825 Regs.push_back(std::make_pair(Reg, IsKill)); 826 UsedRegs.insert(Reg); 827 828 if (IsLoad) { 829 // Collect any implicit defs of super-registers, after merging we can't 830 // be sure anymore that we properly preserved these live ranges and must 831 // removed these implicit operands. 832 for (const MachineOperand &MO : MI->implicit_operands()) { 833 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 834 continue; 835 assert(MO.isImplicit()); 836 unsigned DefReg = MO.getReg(); 837 838 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 839 continue; 840 // We can ignore cases where the super-reg is read and written. 841 if (MI->readsRegister(DefReg)) 842 continue; 843 ImpDefs.push_back(DefReg); 844 } 845 } 846 } 847 848 // Attempt the merge. 849 typedef MachineBasicBlock::iterator iterator; 850 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 851 iterator InsertBefore = std::next(iterator(LatestMI)); 852 MachineBasicBlock &MBB = *LatestMI->getParent(); 853 unsigned Offset = getMemoryOpOffset(First); 854 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 855 bool BaseKill = LatestMI->killsRegister(Base); 856 unsigned PredReg = 0; 857 ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg); 858 DebugLoc DL = First->getDebugLoc(); 859 MachineInstr *Merged = nullptr; 860 if (Cand.CanMergeToLSDouble) 861 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 862 Opcode, Pred, PredReg, DL, Regs); 863 if (!Merged && Cand.CanMergeToLSMulti) 864 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 865 Opcode, Pred, PredReg, DL, Regs); 866 if (!Merged) 867 return nullptr; 868 869 // Determine earliest instruction that will get removed. We then keep an 870 // iterator just above it so the following erases don't invalidated it. 871 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 872 bool EarliestAtBegin = false; 873 if (EarliestI == MBB.begin()) { 874 EarliestAtBegin = true; 875 } else { 876 EarliestI = std::prev(EarliestI); 877 } 878 879 // Remove instructions which have been merged. 880 for (MachineInstr *MI : Cand.Instrs) 881 MBB.erase(MI); 882 883 // Determine range between the earliest removed instruction and the new one. 884 if (EarliestAtBegin) 885 EarliestI = MBB.begin(); 886 else 887 EarliestI = std::next(EarliestI); 888 auto FixupRange = make_range(EarliestI, iterator(Merged)); 889 890 if (isLoadSingle(Opcode)) { 891 // If the previous loads defined a super-reg, then we have to mark earlier 892 // operands undef; Replicate the super-reg def on the merged instruction. 893 for (MachineInstr &MI : FixupRange) { 894 for (unsigned &ImpDefReg : ImpDefs) { 895 for (MachineOperand &MO : MI.implicit_operands()) { 896 if (!MO.isReg() || MO.getReg() != ImpDefReg) 897 continue; 898 if (MO.readsReg()) 899 MO.setIsUndef(); 900 else if (MO.isDef()) 901 ImpDefReg = 0; 902 } 903 } 904 } 905 906 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 907 for (unsigned ImpDef : ImpDefs) 908 MIB.addReg(ImpDef, RegState::ImplicitDefine); 909 } else { 910 // Remove kill flags: We are possibly storing the values later now. 911 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 912 for (MachineInstr &MI : FixupRange) { 913 for (MachineOperand &MO : MI.uses()) { 914 if (!MO.isReg() || !MO.isKill()) 915 continue; 916 if (UsedRegs.count(MO.getReg())) 917 MO.setIsKill(false); 918 } 919 } 920 assert(ImpDefs.empty()); 921 } 922 923 return Merged; 924 } 925 926 static bool isValidLSDoubleOffset(int Offset) { 927 unsigned Value = abs(Offset); 928 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 929 // multiplied by 4. 930 return (Value % 4) == 0 && Value < 1024; 931 } 932 933 /// Return true for loads/stores that can be combined to a double/multi 934 /// operation without increasing the requirements for alignment. 935 static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, 936 const MachineInstr &MI) { 937 // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no 938 // difference. 939 unsigned Opcode = MI.getOpcode(); 940 if (!isi32Load(Opcode) && !isi32Store(Opcode)) 941 return true; 942 943 // Stack pointer alignment is out of the programmers control so we can trust 944 // SP-relative loads/stores. 945 if (getLoadStoreBaseOp(MI).getReg() == ARM::SP && 946 STI.getFrameLowering()->getTransientStackAlignment() >= 4) 947 return true; 948 return false; 949 } 950 951 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 952 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 953 const MachineInstr *FirstMI = MemOps[0].MI; 954 unsigned Opcode = FirstMI->getOpcode(); 955 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 956 unsigned Size = getLSMultipleTransferSize(FirstMI); 957 958 unsigned SIndex = 0; 959 unsigned EIndex = MemOps.size(); 960 do { 961 // Look at the first instruction. 962 const MachineInstr *MI = MemOps[SIndex].MI; 963 int Offset = MemOps[SIndex].Offset; 964 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 965 unsigned PReg = PMO.getReg(); 966 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 967 unsigned Latest = SIndex; 968 unsigned Earliest = SIndex; 969 unsigned Count = 1; 970 bool CanMergeToLSDouble = 971 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 972 // ARM errata 602117: LDRD with base in list may result in incorrect base 973 // register when interrupted or faulted. 974 if (STI->isCortexM3() && isi32Load(Opcode) && 975 PReg == getLoadStoreBaseOp(*MI).getReg()) 976 CanMergeToLSDouble = false; 977 978 bool CanMergeToLSMulti = true; 979 // On swift vldm/vstm starting with an odd register number as that needs 980 // more uops than single vldrs. 981 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 982 CanMergeToLSMulti = false; 983 984 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it 985 // deprecated; LDM to PC is fine but cannot happen here. 986 if (PReg == ARM::SP || PReg == ARM::PC) 987 CanMergeToLSMulti = CanMergeToLSDouble = false; 988 989 // Should we be conservative? 990 if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI)) 991 CanMergeToLSMulti = CanMergeToLSDouble = false; 992 993 // Merge following instructions where possible. 994 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 995 int NewOffset = MemOps[I].Offset; 996 if (NewOffset != Offset + (int)Size) 997 break; 998 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 999 unsigned Reg = MO.getReg(); 1000 if (Reg == ARM::SP || Reg == ARM::PC) 1001 break; 1002 1003 // See if the current load/store may be part of a multi load/store. 1004 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 1005 bool PartOfLSMulti = CanMergeToLSMulti; 1006 if (PartOfLSMulti) { 1007 // Register numbers must be in ascending order. 1008 if (RegNum <= PRegNum) 1009 PartOfLSMulti = false; 1010 // For VFP / NEON load/store multiples, the registers must be 1011 // consecutive and within the limit on the number of registers per 1012 // instruction. 1013 else if (!isNotVFP && RegNum != PRegNum+1) 1014 PartOfLSMulti = false; 1015 } 1016 // See if the current load/store may be part of a double load/store. 1017 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 1018 1019 if (!PartOfLSMulti && !PartOfLSDouble) 1020 break; 1021 CanMergeToLSMulti &= PartOfLSMulti; 1022 CanMergeToLSDouble &= PartOfLSDouble; 1023 // Track MemOp with latest and earliest position (Positions are 1024 // counted in reverse). 1025 unsigned Position = MemOps[I].Position; 1026 if (Position < MemOps[Latest].Position) 1027 Latest = I; 1028 else if (Position > MemOps[Earliest].Position) 1029 Earliest = I; 1030 // Prepare for next MemOp. 1031 Offset += Size; 1032 PRegNum = RegNum; 1033 } 1034 1035 // Form a candidate from the Ops collected so far. 1036 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 1037 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 1038 Candidate->Instrs.push_back(MemOps[C].MI); 1039 Candidate->LatestMIIdx = Latest - SIndex; 1040 Candidate->EarliestMIIdx = Earliest - SIndex; 1041 Candidate->InsertPos = MemOps[Latest].Position; 1042 if (Count == 1) 1043 CanMergeToLSMulti = CanMergeToLSDouble = false; 1044 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 1045 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 1046 Candidates.push_back(Candidate); 1047 // Continue after the chain. 1048 SIndex += Count; 1049 } while (SIndex < EIndex); 1050 } 1051 1052 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1053 ARM_AM::AMSubMode Mode) { 1054 switch (Opc) { 1055 default: llvm_unreachable("Unhandled opcode!"); 1056 case ARM::LDMIA: 1057 case ARM::LDMDA: 1058 case ARM::LDMDB: 1059 case ARM::LDMIB: 1060 switch (Mode) { 1061 default: llvm_unreachable("Unhandled submode!"); 1062 case ARM_AM::ia: return ARM::LDMIA_UPD; 1063 case ARM_AM::ib: return ARM::LDMIB_UPD; 1064 case ARM_AM::da: return ARM::LDMDA_UPD; 1065 case ARM_AM::db: return ARM::LDMDB_UPD; 1066 } 1067 case ARM::STMIA: 1068 case ARM::STMDA: 1069 case ARM::STMDB: 1070 case ARM::STMIB: 1071 switch (Mode) { 1072 default: llvm_unreachable("Unhandled submode!"); 1073 case ARM_AM::ia: return ARM::STMIA_UPD; 1074 case ARM_AM::ib: return ARM::STMIB_UPD; 1075 case ARM_AM::da: return ARM::STMDA_UPD; 1076 case ARM_AM::db: return ARM::STMDB_UPD; 1077 } 1078 case ARM::t2LDMIA: 1079 case ARM::t2LDMDB: 1080 switch (Mode) { 1081 default: llvm_unreachable("Unhandled submode!"); 1082 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1083 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1084 } 1085 case ARM::t2STMIA: 1086 case ARM::t2STMDB: 1087 switch (Mode) { 1088 default: llvm_unreachable("Unhandled submode!"); 1089 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1090 case ARM_AM::db: return ARM::t2STMDB_UPD; 1091 } 1092 case ARM::VLDMSIA: 1093 switch (Mode) { 1094 default: llvm_unreachable("Unhandled submode!"); 1095 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1096 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1097 } 1098 case ARM::VLDMDIA: 1099 switch (Mode) { 1100 default: llvm_unreachable("Unhandled submode!"); 1101 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1102 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1103 } 1104 case ARM::VSTMSIA: 1105 switch (Mode) { 1106 default: llvm_unreachable("Unhandled submode!"); 1107 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1108 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1109 } 1110 case ARM::VSTMDIA: 1111 switch (Mode) { 1112 default: llvm_unreachable("Unhandled submode!"); 1113 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1114 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1115 } 1116 } 1117 } 1118 1119 /// Check if the given instruction increments or decrements a register and 1120 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1121 /// generated by the instruction are possibly read as well. 1122 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1123 ARMCC::CondCodes Pred, unsigned PredReg) { 1124 bool CheckCPSRDef; 1125 int Scale; 1126 switch (MI.getOpcode()) { 1127 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1128 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1129 case ARM::t2SUBri: 1130 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1131 case ARM::t2ADDri: 1132 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1133 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1134 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1135 default: return 0; 1136 } 1137 1138 unsigned MIPredReg; 1139 if (MI.getOperand(0).getReg() != Reg || 1140 MI.getOperand(1).getReg() != Reg || 1141 getInstrPredicate(MI, MIPredReg) != Pred || 1142 MIPredReg != PredReg) 1143 return 0; 1144 1145 if (CheckCPSRDef && definesCPSR(&MI)) 1146 return 0; 1147 return MI.getOperand(2).getImm() * Scale; 1148 } 1149 1150 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1151 static MachineBasicBlock::iterator 1152 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1153 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1154 Offset = 0; 1155 MachineBasicBlock &MBB = *MBBI->getParent(); 1156 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1157 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1158 if (MBBI == BeginMBBI) 1159 return EndMBBI; 1160 1161 // Skip debug values. 1162 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1163 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1164 --PrevMBBI; 1165 1166 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1167 return Offset == 0 ? EndMBBI : PrevMBBI; 1168 } 1169 1170 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1171 static MachineBasicBlock::iterator 1172 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1173 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1174 Offset = 0; 1175 MachineBasicBlock &MBB = *MBBI->getParent(); 1176 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1177 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1178 // Skip debug values. 1179 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1180 ++NextMBBI; 1181 if (NextMBBI == EndMBBI) 1182 return EndMBBI; 1183 1184 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1185 return Offset == 0 ? EndMBBI : NextMBBI; 1186 } 1187 1188 /// Fold proceeding/trailing inc/dec of base register into the 1189 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1190 /// 1191 /// stmia rn, <ra, rb, rc> 1192 /// rn := rn + 4 * 3; 1193 /// => 1194 /// stmia rn!, <ra, rb, rc> 1195 /// 1196 /// rn := rn - 4 * 3; 1197 /// ldmia rn, <ra, rb, rc> 1198 /// => 1199 /// ldmdb rn!, <ra, rb, rc> 1200 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1201 // Thumb1 is already using updating loads/stores. 1202 if (isThumb1) return false; 1203 1204 const MachineOperand &BaseOP = MI->getOperand(0); 1205 unsigned Base = BaseOP.getReg(); 1206 bool BaseKill = BaseOP.isKill(); 1207 unsigned PredReg = 0; 1208 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1209 unsigned Opcode = MI->getOpcode(); 1210 DebugLoc DL = MI->getDebugLoc(); 1211 1212 // Can't use an updating ld/st if the base register is also a dest 1213 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1214 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1215 if (MI->getOperand(i).getReg() == Base) 1216 return false; 1217 1218 int Bytes = getLSMultipleTransferSize(MI); 1219 MachineBasicBlock &MBB = *MI->getParent(); 1220 MachineBasicBlock::iterator MBBI(MI); 1221 int Offset; 1222 MachineBasicBlock::iterator MergeInstr 1223 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1224 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1225 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1226 Mode = ARM_AM::db; 1227 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1228 Mode = ARM_AM::da; 1229 } else { 1230 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1231 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1232 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) { 1233 1234 // We couldn't find an inc/dec to merge. But if the base is dead, we 1235 // can still change to a writeback form as that will save us 2 bytes 1236 // of code size. It can create WAW hazards though, so only do it if 1237 // we're minimizing code size. 1238 if (!MBB.getParent()->getFunction()->optForMinSize() || !BaseKill) 1239 return false; 1240 1241 bool HighRegsUsed = false; 1242 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1243 if (MI->getOperand(i).getReg() >= ARM::R8) { 1244 HighRegsUsed = true; 1245 break; 1246 } 1247 1248 if (!HighRegsUsed) 1249 MergeInstr = MBB.end(); 1250 else 1251 return false; 1252 } 1253 } 1254 if (MergeInstr != MBB.end()) 1255 MBB.erase(MergeInstr); 1256 1257 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1258 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1259 .addReg(Base, getDefRegState(true)) // WB base register 1260 .addReg(Base, getKillRegState(BaseKill)) 1261 .addImm(Pred).addReg(PredReg); 1262 1263 // Transfer the rest of operands. 1264 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1265 MIB.addOperand(MI->getOperand(OpNum)); 1266 1267 // Transfer memoperands. 1268 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1269 1270 MBB.erase(MBBI); 1271 return true; 1272 } 1273 1274 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1275 ARM_AM::AddrOpc Mode) { 1276 switch (Opc) { 1277 case ARM::LDRi12: 1278 return ARM::LDR_PRE_IMM; 1279 case ARM::STRi12: 1280 return ARM::STR_PRE_IMM; 1281 case ARM::VLDRS: 1282 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1283 case ARM::VLDRD: 1284 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1285 case ARM::VSTRS: 1286 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1287 case ARM::VSTRD: 1288 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1289 case ARM::t2LDRi8: 1290 case ARM::t2LDRi12: 1291 return ARM::t2LDR_PRE; 1292 case ARM::t2STRi8: 1293 case ARM::t2STRi12: 1294 return ARM::t2STR_PRE; 1295 default: llvm_unreachable("Unhandled opcode!"); 1296 } 1297 } 1298 1299 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1300 ARM_AM::AddrOpc Mode) { 1301 switch (Opc) { 1302 case ARM::LDRi12: 1303 return ARM::LDR_POST_IMM; 1304 case ARM::STRi12: 1305 return ARM::STR_POST_IMM; 1306 case ARM::VLDRS: 1307 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1308 case ARM::VLDRD: 1309 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1310 case ARM::VSTRS: 1311 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1312 case ARM::VSTRD: 1313 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1314 case ARM::t2LDRi8: 1315 case ARM::t2LDRi12: 1316 return ARM::t2LDR_POST; 1317 case ARM::t2STRi8: 1318 case ARM::t2STRi12: 1319 return ARM::t2STR_POST; 1320 default: llvm_unreachable("Unhandled opcode!"); 1321 } 1322 } 1323 1324 /// Fold proceeding/trailing inc/dec of base register into the 1325 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1326 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1327 // Thumb1 doesn't have updating LDR/STR. 1328 // FIXME: Use LDM/STM with single register instead. 1329 if (isThumb1) return false; 1330 1331 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1332 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1333 unsigned Opcode = MI->getOpcode(); 1334 DebugLoc DL = MI->getDebugLoc(); 1335 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1336 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1337 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1338 if (isi32Load(Opcode) || isi32Store(Opcode)) 1339 if (MI->getOperand(2).getImm() != 0) 1340 return false; 1341 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1342 return false; 1343 1344 // Can't do the merge if the destination register is the same as the would-be 1345 // writeback register. 1346 if (MI->getOperand(0).getReg() == Base) 1347 return false; 1348 1349 unsigned PredReg = 0; 1350 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1351 int Bytes = getLSMultipleTransferSize(MI); 1352 MachineBasicBlock &MBB = *MI->getParent(); 1353 MachineBasicBlock::iterator MBBI(MI); 1354 int Offset; 1355 MachineBasicBlock::iterator MergeInstr 1356 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1357 unsigned NewOpc; 1358 if (!isAM5 && Offset == Bytes) { 1359 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1360 } else if (Offset == -Bytes) { 1361 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1362 } else { 1363 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1364 if (Offset == Bytes) { 1365 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1366 } else if (!isAM5 && Offset == -Bytes) { 1367 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1368 } else 1369 return false; 1370 } 1371 MBB.erase(MergeInstr); 1372 1373 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1374 1375 bool isLd = isLoadSingle(Opcode); 1376 if (isAM5) { 1377 // VLDM[SD]_UPD, VSTM[SD]_UPD 1378 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1379 // updating load/store-multiple instructions can be used with only one 1380 // register.) 1381 MachineOperand &MO = MI->getOperand(0); 1382 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1383 .addReg(Base, getDefRegState(true)) // WB base register 1384 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1385 .addImm(Pred).addReg(PredReg) 1386 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1387 getKillRegState(MO.isKill()))); 1388 } else if (isLd) { 1389 if (isAM2) { 1390 // LDR_PRE, LDR_POST 1391 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1392 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1393 .addReg(Base, RegState::Define) 1394 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1395 } else { 1396 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1397 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1398 .addReg(Base, RegState::Define) 1399 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1400 } 1401 } else { 1402 // t2LDR_PRE, t2LDR_POST 1403 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1404 .addReg(Base, RegState::Define) 1405 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1406 } 1407 } else { 1408 MachineOperand &MO = MI->getOperand(0); 1409 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1410 // the vestigal zero-reg offset register. When that's fixed, this clause 1411 // can be removed entirely. 1412 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1413 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1414 // STR_PRE, STR_POST 1415 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1416 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1417 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1418 } else { 1419 // t2STR_PRE, t2STR_POST 1420 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1421 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1422 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1423 } 1424 } 1425 MBB.erase(MBBI); 1426 1427 return true; 1428 } 1429 1430 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1431 unsigned Opcode = MI.getOpcode(); 1432 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1433 "Must have t2STRDi8 or t2LDRDi8"); 1434 if (MI.getOperand(3).getImm() != 0) 1435 return false; 1436 1437 // Behaviour for writeback is undefined if base register is the same as one 1438 // of the others. 1439 const MachineOperand &BaseOp = MI.getOperand(2); 1440 unsigned Base = BaseOp.getReg(); 1441 const MachineOperand &Reg0Op = MI.getOperand(0); 1442 const MachineOperand &Reg1Op = MI.getOperand(1); 1443 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1444 return false; 1445 1446 unsigned PredReg; 1447 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1448 MachineBasicBlock::iterator MBBI(MI); 1449 MachineBasicBlock &MBB = *MI.getParent(); 1450 int Offset; 1451 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1452 PredReg, Offset); 1453 unsigned NewOpc; 1454 if (Offset == 8 || Offset == -8) { 1455 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1456 } else { 1457 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1458 if (Offset == 8 || Offset == -8) { 1459 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1460 } else 1461 return false; 1462 } 1463 MBB.erase(MergeInstr); 1464 1465 DebugLoc DL = MI.getDebugLoc(); 1466 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1467 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1468 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1469 .addReg(BaseOp.getReg(), RegState::Define); 1470 } else { 1471 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1472 MIB.addReg(BaseOp.getReg(), RegState::Define) 1473 .addOperand(Reg0Op).addOperand(Reg1Op); 1474 } 1475 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1476 .addImm(Offset).addImm(Pred).addReg(PredReg); 1477 assert(TII->get(Opcode).getNumOperands() == 6 && 1478 TII->get(NewOpc).getNumOperands() == 7 && 1479 "Unexpected number of operands in Opcode specification."); 1480 1481 // Transfer implicit operands. 1482 for (const MachineOperand &MO : MI.implicit_operands()) 1483 MIB.addOperand(MO); 1484 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1485 1486 MBB.erase(MBBI); 1487 return true; 1488 } 1489 1490 /// Returns true if instruction is a memory operation that this pass is capable 1491 /// of operating on. 1492 static bool isMemoryOp(const MachineInstr &MI) { 1493 unsigned Opcode = MI.getOpcode(); 1494 switch (Opcode) { 1495 case ARM::VLDRS: 1496 case ARM::VSTRS: 1497 case ARM::VLDRD: 1498 case ARM::VSTRD: 1499 case ARM::LDRi12: 1500 case ARM::STRi12: 1501 case ARM::tLDRi: 1502 case ARM::tSTRi: 1503 case ARM::tLDRspi: 1504 case ARM::tSTRspi: 1505 case ARM::t2LDRi8: 1506 case ARM::t2LDRi12: 1507 case ARM::t2STRi8: 1508 case ARM::t2STRi12: 1509 break; 1510 default: 1511 return false; 1512 } 1513 if (!MI.getOperand(1).isReg()) 1514 return false; 1515 1516 // When no memory operands are present, conservatively assume unaligned, 1517 // volatile, unfoldable. 1518 if (!MI.hasOneMemOperand()) 1519 return false; 1520 1521 const MachineMemOperand &MMO = **MI.memoperands_begin(); 1522 1523 // Don't touch volatile memory accesses - we may be changing their order. 1524 if (MMO.isVolatile()) 1525 return false; 1526 1527 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1528 // not. 1529 if (MMO.getAlignment() < 4) 1530 return false; 1531 1532 // str <undef> could probably be eliminated entirely, but for now we just want 1533 // to avoid making a mess of it. 1534 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1535 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef()) 1536 return false; 1537 1538 // Likewise don't mess with references to undefined addresses. 1539 if (MI.getOperand(1).isUndef()) 1540 return false; 1541 1542 return true; 1543 } 1544 1545 static void InsertLDR_STR(MachineBasicBlock &MBB, 1546 MachineBasicBlock::iterator &MBBI, 1547 int Offset, bool isDef, 1548 DebugLoc DL, unsigned NewOpc, 1549 unsigned Reg, bool RegDeadKill, bool RegUndef, 1550 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1551 bool OffKill, bool OffUndef, 1552 ARMCC::CondCodes Pred, unsigned PredReg, 1553 const TargetInstrInfo *TII, bool isT2) { 1554 if (isDef) { 1555 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1556 TII->get(NewOpc)) 1557 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1558 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1559 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1560 } else { 1561 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1562 TII->get(NewOpc)) 1563 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1564 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1565 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1566 } 1567 } 1568 1569 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1570 MachineBasicBlock::iterator &MBBI) { 1571 MachineInstr *MI = &*MBBI; 1572 unsigned Opcode = MI->getOpcode(); 1573 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1574 return false; 1575 1576 const MachineOperand &BaseOp = MI->getOperand(2); 1577 unsigned BaseReg = BaseOp.getReg(); 1578 unsigned EvenReg = MI->getOperand(0).getReg(); 1579 unsigned OddReg = MI->getOperand(1).getReg(); 1580 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1581 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1582 1583 // ARM errata 602117: LDRD with base in list may result in incorrect base 1584 // register when interrupted or faulted. 1585 bool Errata602117 = EvenReg == BaseReg && 1586 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1587 // ARM LDRD/STRD needs consecutive registers. 1588 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1589 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1590 1591 if (!Errata602117 && !NonConsecutiveRegs) 1592 return false; 1593 1594 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1595 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1596 bool EvenDeadKill = isLd ? 1597 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1598 bool EvenUndef = MI->getOperand(0).isUndef(); 1599 bool OddDeadKill = isLd ? 1600 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1601 bool OddUndef = MI->getOperand(1).isUndef(); 1602 bool BaseKill = BaseOp.isKill(); 1603 bool BaseUndef = BaseOp.isUndef(); 1604 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1605 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1606 int OffImm = getMemoryOpOffset(MI); 1607 unsigned PredReg = 0; 1608 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1609 1610 if (OddRegNum > EvenRegNum && OffImm == 0) { 1611 // Ascending register numbers and no offset. It's safe to change it to a 1612 // ldm or stm. 1613 unsigned NewOpc = (isLd) 1614 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1615 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1616 if (isLd) { 1617 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1618 .addReg(BaseReg, getKillRegState(BaseKill)) 1619 .addImm(Pred).addReg(PredReg) 1620 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1621 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1622 ++NumLDRD2LDM; 1623 } else { 1624 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1625 .addReg(BaseReg, getKillRegState(BaseKill)) 1626 .addImm(Pred).addReg(PredReg) 1627 .addReg(EvenReg, 1628 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1629 .addReg(OddReg, 1630 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1631 ++NumSTRD2STM; 1632 } 1633 } else { 1634 // Split into two instructions. 1635 unsigned NewOpc = (isLd) 1636 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1637 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1638 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1639 // so adjust and use t2LDRi12 here for that. 1640 unsigned NewOpc2 = (isLd) 1641 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1642 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1643 DebugLoc dl = MBBI->getDebugLoc(); 1644 // If this is a load and base register is killed, it may have been 1645 // re-defed by the load, make sure the first load does not clobber it. 1646 if (isLd && 1647 (BaseKill || OffKill) && 1648 (TRI->regsOverlap(EvenReg, BaseReg))) { 1649 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1650 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1651 OddReg, OddDeadKill, false, 1652 BaseReg, false, BaseUndef, false, OffUndef, 1653 Pred, PredReg, TII, isT2); 1654 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1655 EvenReg, EvenDeadKill, false, 1656 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1657 Pred, PredReg, TII, isT2); 1658 } else { 1659 if (OddReg == EvenReg && EvenDeadKill) { 1660 // If the two source operands are the same, the kill marker is 1661 // probably on the first one. e.g. 1662 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1663 EvenDeadKill = false; 1664 OddDeadKill = true; 1665 } 1666 // Never kill the base register in the first instruction. 1667 if (EvenReg == BaseReg) 1668 EvenDeadKill = false; 1669 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1670 EvenReg, EvenDeadKill, EvenUndef, 1671 BaseReg, false, BaseUndef, false, OffUndef, 1672 Pred, PredReg, TII, isT2); 1673 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1674 OddReg, OddDeadKill, OddUndef, 1675 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1676 Pred, PredReg, TII, isT2); 1677 } 1678 if (isLd) 1679 ++NumLDRD2LDR; 1680 else 1681 ++NumSTRD2STR; 1682 } 1683 1684 MBBI = MBB.erase(MBBI); 1685 return true; 1686 } 1687 1688 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1689 /// incrementing offset into LDM / STM ops. 1690 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1691 MemOpQueue MemOps; 1692 unsigned CurrBase = 0; 1693 unsigned CurrOpc = ~0u; 1694 ARMCC::CondCodes CurrPred = ARMCC::AL; 1695 unsigned Position = 0; 1696 assert(Candidates.size() == 0); 1697 assert(MergeBaseCandidates.size() == 0); 1698 LiveRegsValid = false; 1699 1700 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1701 I = MBBI) { 1702 // The instruction in front of the iterator is the one we look at. 1703 MBBI = std::prev(I); 1704 if (FixInvalidRegPairOp(MBB, MBBI)) 1705 continue; 1706 ++Position; 1707 1708 if (isMemoryOp(*MBBI)) { 1709 unsigned Opcode = MBBI->getOpcode(); 1710 const MachineOperand &MO = MBBI->getOperand(0); 1711 unsigned Reg = MO.getReg(); 1712 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1713 unsigned PredReg = 0; 1714 ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg); 1715 int Offset = getMemoryOpOffset(MBBI); 1716 if (CurrBase == 0) { 1717 // Start of a new chain. 1718 CurrBase = Base; 1719 CurrOpc = Opcode; 1720 CurrPred = Pred; 1721 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1722 continue; 1723 } 1724 // Note: No need to match PredReg in the next if. 1725 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1726 // Watch out for: 1727 // r4 := ldr [r0, #8] 1728 // r4 := ldr [r0, #4] 1729 // or 1730 // r0 := ldr [r0] 1731 // If a load overrides the base register or a register loaded by 1732 // another load in our chain, we cannot take this instruction. 1733 bool Overlap = false; 1734 if (isLoadSingle(Opcode)) { 1735 Overlap = (Base == Reg); 1736 if (!Overlap) { 1737 for (const MemOpQueueEntry &E : MemOps) { 1738 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1739 Overlap = true; 1740 break; 1741 } 1742 } 1743 } 1744 } 1745 1746 if (!Overlap) { 1747 // Check offset and sort memory operation into the current chain. 1748 if (Offset > MemOps.back().Offset) { 1749 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1750 continue; 1751 } else { 1752 MemOpQueue::iterator MI, ME; 1753 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1754 if (Offset < MI->Offset) { 1755 // Found a place to insert. 1756 break; 1757 } 1758 if (Offset == MI->Offset) { 1759 // Collision, abort. 1760 MI = ME; 1761 break; 1762 } 1763 } 1764 if (MI != MemOps.end()) { 1765 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1766 continue; 1767 } 1768 } 1769 } 1770 } 1771 1772 // Don't advance the iterator; The op will start a new chain next. 1773 MBBI = I; 1774 --Position; 1775 // Fallthrough to look into existing chain. 1776 } else if (MBBI->isDebugValue()) { 1777 continue; 1778 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1779 MBBI->getOpcode() == ARM::t2STRDi8) { 1780 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1781 // remember them because we may still be able to merge add/sub into them. 1782 MergeBaseCandidates.push_back(MBBI); 1783 } 1784 1785 1786 // If we are here then the chain is broken; Extract candidates for a merge. 1787 if (MemOps.size() > 0) { 1788 FormCandidates(MemOps); 1789 // Reset for the next chain. 1790 CurrBase = 0; 1791 CurrOpc = ~0u; 1792 CurrPred = ARMCC::AL; 1793 MemOps.clear(); 1794 } 1795 } 1796 if (MemOps.size() > 0) 1797 FormCandidates(MemOps); 1798 1799 // Sort candidates so they get processed from end to begin of the basic 1800 // block later; This is necessary for liveness calculation. 1801 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1802 return M0->InsertPos < M1->InsertPos; 1803 }; 1804 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1805 1806 // Go through list of candidates and merge. 1807 bool Changed = false; 1808 for (const MergeCandidate *Candidate : Candidates) { 1809 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1810 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1811 // Merge preceding/trailing base inc/dec into the merged op. 1812 if (Merged) { 1813 Changed = true; 1814 unsigned Opcode = Merged->getOpcode(); 1815 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1816 MergeBaseUpdateLSDouble(*Merged); 1817 else 1818 MergeBaseUpdateLSMultiple(Merged); 1819 } else { 1820 for (MachineInstr *MI : Candidate->Instrs) { 1821 if (MergeBaseUpdateLoadStore(MI)) 1822 Changed = true; 1823 } 1824 } 1825 } else { 1826 assert(Candidate->Instrs.size() == 1); 1827 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1828 Changed = true; 1829 } 1830 } 1831 Candidates.clear(); 1832 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1833 for (MachineInstr *MI : MergeBaseCandidates) 1834 MergeBaseUpdateLSDouble(*MI); 1835 MergeBaseCandidates.clear(); 1836 1837 return Changed; 1838 } 1839 1840 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1841 /// into the preceding stack restore so it directly restore the value of LR 1842 /// into pc. 1843 /// ldmfd sp!, {..., lr} 1844 /// bx lr 1845 /// or 1846 /// ldmfd sp!, {..., lr} 1847 /// mov pc, lr 1848 /// => 1849 /// ldmfd sp!, {..., pc} 1850 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1851 // Thumb1 LDM doesn't allow high registers. 1852 if (isThumb1) return false; 1853 if (MBB.empty()) return false; 1854 1855 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1856 if (MBBI != MBB.begin() && 1857 (MBBI->getOpcode() == ARM::BX_RET || 1858 MBBI->getOpcode() == ARM::tBX_RET || 1859 MBBI->getOpcode() == ARM::MOVPCLR)) { 1860 MachineBasicBlock::iterator PrevI = std::prev(MBBI); 1861 // Ignore any DBG_VALUE instructions. 1862 while (PrevI->isDebugValue() && PrevI != MBB.begin()) 1863 --PrevI; 1864 MachineInstr *PrevMI = PrevI; 1865 unsigned Opcode = PrevMI->getOpcode(); 1866 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1867 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1868 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1869 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1870 if (MO.getReg() != ARM::LR) 1871 return false; 1872 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1873 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1874 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1875 PrevMI->setDesc(TII->get(NewOpc)); 1876 MO.setReg(ARM::PC); 1877 PrevMI->copyImplicitOps(*MBB.getParent(), *MBBI); 1878 MBB.erase(MBBI); 1879 return true; 1880 } 1881 } 1882 return false; 1883 } 1884 1885 bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { 1886 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1887 if (MBBI == MBB.begin() || MBBI == MBB.end() || 1888 MBBI->getOpcode() != ARM::tBX_RET) 1889 return false; 1890 1891 MachineBasicBlock::iterator Prev = MBBI; 1892 --Prev; 1893 if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR)) 1894 return false; 1895 1896 for (auto Use : Prev->uses()) 1897 if (Use.isKill()) { 1898 AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) 1899 .addReg(Use.getReg(), RegState::Kill)) 1900 .copyImplicitOps(*MBBI); 1901 MBB.erase(MBBI); 1902 MBB.erase(Prev); 1903 return true; 1904 } 1905 1906 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?"); 1907 } 1908 1909 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1910 if (skipFunction(*Fn.getFunction())) 1911 return false; 1912 1913 MF = &Fn; 1914 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1915 TL = STI->getTargetLowering(); 1916 AFI = Fn.getInfo<ARMFunctionInfo>(); 1917 TII = STI->getInstrInfo(); 1918 TRI = STI->getRegisterInfo(); 1919 1920 RegClassInfoValid = false; 1921 isThumb2 = AFI->isThumb2Function(); 1922 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1923 1924 bool Modified = false; 1925 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1926 ++MFI) { 1927 MachineBasicBlock &MBB = *MFI; 1928 Modified |= LoadStoreMultipleOpti(MBB); 1929 if (STI->hasV5TOps()) 1930 Modified |= MergeReturnIntoLDM(MBB); 1931 if (isThumb1) 1932 Modified |= CombineMovBx(MBB); 1933 } 1934 1935 Allocator.DestroyAll(); 1936 return Modified; 1937 } 1938 1939 namespace llvm { 1940 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); 1941 } 1942 1943 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \ 1944 "ARM pre- register allocation load / store optimization pass" 1945 1946 namespace { 1947 /// Pre- register allocation pass that move load / stores from consecutive 1948 /// locations close to make it more likely they will be combined later. 1949 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1950 static char ID; 1951 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) { 1952 initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry()); 1953 } 1954 1955 const DataLayout *TD; 1956 const TargetInstrInfo *TII; 1957 const TargetRegisterInfo *TRI; 1958 const ARMSubtarget *STI; 1959 MachineRegisterInfo *MRI; 1960 MachineFunction *MF; 1961 1962 bool runOnMachineFunction(MachineFunction &Fn) override; 1963 1964 const char *getPassName() const override { 1965 return ARM_PREALLOC_LOAD_STORE_OPT_NAME; 1966 } 1967 1968 private: 1969 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1970 unsigned &NewOpc, unsigned &EvenReg, 1971 unsigned &OddReg, unsigned &BaseReg, 1972 int &Offset, 1973 unsigned &PredReg, ARMCC::CondCodes &Pred, 1974 bool &isT2); 1975 bool RescheduleOps(MachineBasicBlock *MBB, 1976 SmallVectorImpl<MachineInstr *> &Ops, 1977 unsigned Base, bool isLd, 1978 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1979 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1980 }; 1981 char ARMPreAllocLoadStoreOpt::ID = 0; 1982 } 1983 1984 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt", 1985 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) 1986 1987 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1988 if (AssumeMisalignedLoadStores || skipFunction(*Fn.getFunction())) 1989 return false; 1990 1991 TD = &Fn.getDataLayout(); 1992 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1993 TII = STI->getInstrInfo(); 1994 TRI = STI->getRegisterInfo(); 1995 MRI = &Fn.getRegInfo(); 1996 MF = &Fn; 1997 1998 bool Modified = false; 1999 for (MachineBasicBlock &MFI : Fn) 2000 Modified |= RescheduleLoadStoreInstrs(&MFI); 2001 2002 return Modified; 2003 } 2004 2005 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 2006 MachineBasicBlock::iterator I, 2007 MachineBasicBlock::iterator E, 2008 SmallPtrSetImpl<MachineInstr*> &MemOps, 2009 SmallSet<unsigned, 4> &MemRegs, 2010 const TargetRegisterInfo *TRI) { 2011 // Are there stores / loads / calls between them? 2012 // FIXME: This is overly conservative. We should make use of alias information 2013 // some day. 2014 SmallSet<unsigned, 4> AddedRegPressure; 2015 while (++I != E) { 2016 if (I->isDebugValue() || MemOps.count(&*I)) 2017 continue; 2018 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 2019 return false; 2020 if (isLd && I->mayStore()) 2021 return false; 2022 if (!isLd) { 2023 if (I->mayLoad()) 2024 return false; 2025 // It's not safe to move the first 'str' down. 2026 // str r1, [r0] 2027 // strh r5, [r0] 2028 // str r4, [r0, #+4] 2029 if (I->mayStore()) 2030 return false; 2031 } 2032 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 2033 MachineOperand &MO = I->getOperand(j); 2034 if (!MO.isReg()) 2035 continue; 2036 unsigned Reg = MO.getReg(); 2037 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 2038 return false; 2039 if (Reg != Base && !MemRegs.count(Reg)) 2040 AddedRegPressure.insert(Reg); 2041 } 2042 } 2043 2044 // Estimate register pressure increase due to the transformation. 2045 if (MemRegs.size() <= 4) 2046 // Ok if we are moving small number of instructions. 2047 return true; 2048 return AddedRegPressure.size() <= MemRegs.size() * 2; 2049 } 2050 2051 bool 2052 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 2053 DebugLoc &dl, unsigned &NewOpc, 2054 unsigned &FirstReg, 2055 unsigned &SecondReg, 2056 unsigned &BaseReg, int &Offset, 2057 unsigned &PredReg, 2058 ARMCC::CondCodes &Pred, 2059 bool &isT2) { 2060 // Make sure we're allowed to generate LDRD/STRD. 2061 if (!STI->hasV5TEOps()) 2062 return false; 2063 2064 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 2065 unsigned Scale = 1; 2066 unsigned Opcode = Op0->getOpcode(); 2067 if (Opcode == ARM::LDRi12) { 2068 NewOpc = ARM::LDRD; 2069 } else if (Opcode == ARM::STRi12) { 2070 NewOpc = ARM::STRD; 2071 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 2072 NewOpc = ARM::t2LDRDi8; 2073 Scale = 4; 2074 isT2 = true; 2075 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 2076 NewOpc = ARM::t2STRDi8; 2077 Scale = 4; 2078 isT2 = true; 2079 } else { 2080 return false; 2081 } 2082 2083 // Make sure the base address satisfies i64 ld / st alignment requirement. 2084 // At the moment, we ignore the memoryoperand's value. 2085 // If we want to use AliasAnalysis, we should check it accordingly. 2086 if (!Op0->hasOneMemOperand() || 2087 (*Op0->memoperands_begin())->isVolatile()) 2088 return false; 2089 2090 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 2091 const Function *Func = MF->getFunction(); 2092 unsigned ReqAlign = STI->hasV6Ops() 2093 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 2094 : 8; // Pre-v6 need 8-byte align 2095 if (Align < ReqAlign) 2096 return false; 2097 2098 // Then make sure the immediate offset fits. 2099 int OffImm = getMemoryOpOffset(Op0); 2100 if (isT2) { 2101 int Limit = (1 << 8) * Scale; 2102 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2103 return false; 2104 Offset = OffImm; 2105 } else { 2106 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2107 if (OffImm < 0) { 2108 AddSub = ARM_AM::sub; 2109 OffImm = - OffImm; 2110 } 2111 int Limit = (1 << 8) * Scale; 2112 if (OffImm >= Limit || (OffImm & (Scale-1))) 2113 return false; 2114 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2115 } 2116 FirstReg = Op0->getOperand(0).getReg(); 2117 SecondReg = Op1->getOperand(0).getReg(); 2118 if (FirstReg == SecondReg) 2119 return false; 2120 BaseReg = Op0->getOperand(1).getReg(); 2121 Pred = getInstrPredicate(*Op0, PredReg); 2122 dl = Op0->getDebugLoc(); 2123 return true; 2124 } 2125 2126 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2127 SmallVectorImpl<MachineInstr *> &Ops, 2128 unsigned Base, bool isLd, 2129 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2130 bool RetVal = false; 2131 2132 // Sort by offset (in reverse order). 2133 std::sort(Ops.begin(), Ops.end(), 2134 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2135 int LOffset = getMemoryOpOffset(LHS); 2136 int ROffset = getMemoryOpOffset(RHS); 2137 assert(LHS == RHS || LOffset != ROffset); 2138 return LOffset > ROffset; 2139 }); 2140 2141 // The loads / stores of the same base are in order. Scan them from first to 2142 // last and check for the following: 2143 // 1. Any def of base. 2144 // 2. Any gaps. 2145 while (Ops.size() > 1) { 2146 unsigned FirstLoc = ~0U; 2147 unsigned LastLoc = 0; 2148 MachineInstr *FirstOp = nullptr; 2149 MachineInstr *LastOp = nullptr; 2150 int LastOffset = 0; 2151 unsigned LastOpcode = 0; 2152 unsigned LastBytes = 0; 2153 unsigned NumMove = 0; 2154 for (int i = Ops.size() - 1; i >= 0; --i) { 2155 MachineInstr *Op = Ops[i]; 2156 unsigned Loc = MI2LocMap[Op]; 2157 if (Loc <= FirstLoc) { 2158 FirstLoc = Loc; 2159 FirstOp = Op; 2160 } 2161 if (Loc >= LastLoc) { 2162 LastLoc = Loc; 2163 LastOp = Op; 2164 } 2165 2166 unsigned LSMOpcode 2167 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2168 if (LastOpcode && LSMOpcode != LastOpcode) 2169 break; 2170 2171 int Offset = getMemoryOpOffset(Op); 2172 unsigned Bytes = getLSMultipleTransferSize(Op); 2173 if (LastBytes) { 2174 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2175 break; 2176 } 2177 LastOffset = Offset; 2178 LastBytes = Bytes; 2179 LastOpcode = LSMOpcode; 2180 if (++NumMove == 8) // FIXME: Tune this limit. 2181 break; 2182 } 2183 2184 if (NumMove <= 1) 2185 Ops.pop_back(); 2186 else { 2187 SmallPtrSet<MachineInstr*, 4> MemOps; 2188 SmallSet<unsigned, 4> MemRegs; 2189 for (int i = NumMove-1; i >= 0; --i) { 2190 MemOps.insert(Ops[i]); 2191 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2192 } 2193 2194 // Be conservative, if the instructions are too far apart, don't 2195 // move them. We want to limit the increase of register pressure. 2196 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2197 if (DoMove) 2198 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2199 MemOps, MemRegs, TRI); 2200 if (!DoMove) { 2201 for (unsigned i = 0; i != NumMove; ++i) 2202 Ops.pop_back(); 2203 } else { 2204 // This is the new location for the loads / stores. 2205 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2206 while (InsertPos != MBB->end() 2207 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2208 ++InsertPos; 2209 2210 // If we are moving a pair of loads / stores, see if it makes sense 2211 // to try to allocate a pair of registers that can form register pairs. 2212 MachineInstr *Op0 = Ops.back(); 2213 MachineInstr *Op1 = Ops[Ops.size()-2]; 2214 unsigned FirstReg = 0, SecondReg = 0; 2215 unsigned BaseReg = 0, PredReg = 0; 2216 ARMCC::CondCodes Pred = ARMCC::AL; 2217 bool isT2 = false; 2218 unsigned NewOpc = 0; 2219 int Offset = 0; 2220 DebugLoc dl; 2221 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2222 FirstReg, SecondReg, BaseReg, 2223 Offset, PredReg, Pred, isT2)) { 2224 Ops.pop_back(); 2225 Ops.pop_back(); 2226 2227 const MCInstrDesc &MCID = TII->get(NewOpc); 2228 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2229 MRI->constrainRegClass(FirstReg, TRC); 2230 MRI->constrainRegClass(SecondReg, TRC); 2231 2232 // Form the pair instruction. 2233 if (isLd) { 2234 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2235 .addReg(FirstReg, RegState::Define) 2236 .addReg(SecondReg, RegState::Define) 2237 .addReg(BaseReg); 2238 // FIXME: We're converting from LDRi12 to an insn that still 2239 // uses addrmode2, so we need an explicit offset reg. It should 2240 // always by reg0 since we're transforming LDRi12s. 2241 if (!isT2) 2242 MIB.addReg(0); 2243 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2244 MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1)); 2245 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2246 ++NumLDRDFormed; 2247 } else { 2248 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2249 .addReg(FirstReg) 2250 .addReg(SecondReg) 2251 .addReg(BaseReg); 2252 // FIXME: We're converting from LDRi12 to an insn that still 2253 // uses addrmode2, so we need an explicit offset reg. It should 2254 // always by reg0 since we're transforming STRi12s. 2255 if (!isT2) 2256 MIB.addReg(0); 2257 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2258 MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1)); 2259 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2260 ++NumSTRDFormed; 2261 } 2262 MBB->erase(Op0); 2263 MBB->erase(Op1); 2264 2265 if (!isT2) { 2266 // Add register allocation hints to form register pairs. 2267 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2268 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2269 } 2270 } else { 2271 for (unsigned i = 0; i != NumMove; ++i) { 2272 MachineInstr *Op = Ops.back(); 2273 Ops.pop_back(); 2274 MBB->splice(InsertPos, MBB, Op); 2275 } 2276 } 2277 2278 NumLdStMoved += NumMove; 2279 RetVal = true; 2280 } 2281 } 2282 } 2283 2284 return RetVal; 2285 } 2286 2287 bool 2288 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2289 bool RetVal = false; 2290 2291 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2292 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2293 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2294 SmallVector<unsigned, 4> LdBases; 2295 SmallVector<unsigned, 4> StBases; 2296 2297 unsigned Loc = 0; 2298 MachineBasicBlock::iterator MBBI = MBB->begin(); 2299 MachineBasicBlock::iterator E = MBB->end(); 2300 while (MBBI != E) { 2301 for (; MBBI != E; ++MBBI) { 2302 MachineInstr *MI = MBBI; 2303 if (MI->isCall() || MI->isTerminator()) { 2304 // Stop at barriers. 2305 ++MBBI; 2306 break; 2307 } 2308 2309 if (!MI->isDebugValue()) 2310 MI2LocMap[MI] = ++Loc; 2311 2312 if (!isMemoryOp(*MI)) 2313 continue; 2314 unsigned PredReg = 0; 2315 if (getInstrPredicate(*MI, PredReg) != ARMCC::AL) 2316 continue; 2317 2318 int Opc = MI->getOpcode(); 2319 bool isLd = isLoadSingle(Opc); 2320 unsigned Base = MI->getOperand(1).getReg(); 2321 int Offset = getMemoryOpOffset(MI); 2322 2323 bool StopHere = false; 2324 if (isLd) { 2325 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2326 Base2LdsMap.find(Base); 2327 if (BI != Base2LdsMap.end()) { 2328 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2329 if (Offset == getMemoryOpOffset(BI->second[i])) { 2330 StopHere = true; 2331 break; 2332 } 2333 } 2334 if (!StopHere) 2335 BI->second.push_back(MI); 2336 } else { 2337 Base2LdsMap[Base].push_back(MI); 2338 LdBases.push_back(Base); 2339 } 2340 } else { 2341 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2342 Base2StsMap.find(Base); 2343 if (BI != Base2StsMap.end()) { 2344 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2345 if (Offset == getMemoryOpOffset(BI->second[i])) { 2346 StopHere = true; 2347 break; 2348 } 2349 } 2350 if (!StopHere) 2351 BI->second.push_back(MI); 2352 } else { 2353 Base2StsMap[Base].push_back(MI); 2354 StBases.push_back(Base); 2355 } 2356 } 2357 2358 if (StopHere) { 2359 // Found a duplicate (a base+offset combination that's seen earlier). 2360 // Backtrack. 2361 --Loc; 2362 break; 2363 } 2364 } 2365 2366 // Re-schedule loads. 2367 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2368 unsigned Base = LdBases[i]; 2369 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2370 if (Lds.size() > 1) 2371 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2372 } 2373 2374 // Re-schedule stores. 2375 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2376 unsigned Base = StBases[i]; 2377 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2378 if (Sts.size() > 1) 2379 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2380 } 2381 2382 if (MBBI != E) { 2383 Base2LdsMap.clear(); 2384 Base2StsMap.clear(); 2385 LdBases.clear(); 2386 StBases.clear(); 2387 } 2388 } 2389 2390 return RetVal; 2391 } 2392 2393 2394 /// Returns an instance of the load / store optimization pass. 2395 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2396 if (PreAlloc) 2397 return new ARMPreAllocLoadStoreOpt(); 2398 return new ARMLoadStoreOpt(); 2399 } 2400