1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 /// This switch disables formation of double/multi instructions that could 64 /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP 65 /// disabled. This can be used to create libraries that are robust even when 66 /// users provoke undefined behaviour by supplying misaligned pointers. 67 /// \see mayCombineMisaligned() 68 static cl::opt<bool> 69 AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, 70 cl::init(false), cl::desc("Be more conservative in ARM load/store opt")); 71 72 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" 73 74 namespace { 75 /// Post- register allocation pass the combine load / store instructions to 76 /// form ldm / stm instructions. 77 struct ARMLoadStoreOpt : public MachineFunctionPass { 78 static char ID; 79 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 80 81 const MachineFunction *MF; 82 const TargetInstrInfo *TII; 83 const TargetRegisterInfo *TRI; 84 const ARMSubtarget *STI; 85 const TargetLowering *TL; 86 ARMFunctionInfo *AFI; 87 LivePhysRegs LiveRegs; 88 RegisterClassInfo RegClassInfo; 89 MachineBasicBlock::const_iterator LiveRegPos; 90 bool LiveRegsValid; 91 bool RegClassInfoValid; 92 bool isThumb1, isThumb2; 93 94 bool runOnMachineFunction(MachineFunction &Fn) override; 95 96 MachineFunctionProperties getRequiredProperties() const override { 97 return MachineFunctionProperties().set( 98 MachineFunctionProperties::Property::NoVRegs); 99 } 100 101 const char *getPassName() const override { 102 return ARM_LOAD_STORE_OPT_NAME; 103 } 104 105 private: 106 /// A set of load/store MachineInstrs with same base register sorted by 107 /// offset. 108 struct MemOpQueueEntry { 109 MachineInstr *MI; 110 int Offset; ///< Load/Store offset. 111 unsigned Position; ///< Position as counted from end of basic block. 112 MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position) 113 : MI(&MI), Offset(Offset), Position(Position) {} 114 }; 115 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 116 117 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 118 /// merged into a LDM/STM. 119 struct MergeCandidate { 120 /// List of instructions ordered by load/store offset. 121 SmallVector<MachineInstr*, 4> Instrs; 122 /// Index in Instrs of the instruction being latest in the schedule. 123 unsigned LatestMIIdx; 124 /// Index in Instrs of the instruction being earliest in the schedule. 125 unsigned EarliestMIIdx; 126 /// Index into the basic block where the merged instruction will be 127 /// inserted. (See MemOpQueueEntry.Position) 128 unsigned InsertPos; 129 /// Whether the instructions can be merged into a ldm/stm instruction. 130 bool CanMergeToLSMulti; 131 /// Whether the instructions can be merged into a ldrd/strd instruction. 132 bool CanMergeToLSDouble; 133 }; 134 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 135 SmallVector<const MergeCandidate*,4> Candidates; 136 SmallVector<MachineInstr*,4> MergeBaseCandidates; 137 138 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 139 MachineBasicBlock::const_iterator Before); 140 unsigned findFreeReg(const TargetRegisterClass &RegClass); 141 void UpdateBaseRegUses(MachineBasicBlock &MBB, 142 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 143 unsigned Base, unsigned WordOffset, 144 ARMCC::CondCodes Pred, unsigned PredReg); 145 MachineInstr *CreateLoadStoreMulti( 146 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, 147 int Offset, unsigned Base, bool BaseKill, unsigned Opcode, 148 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, 149 ArrayRef<std::pair<unsigned, bool>> Regs); 150 MachineInstr *CreateLoadStoreDouble( 151 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, 152 int Offset, unsigned Base, bool BaseKill, unsigned Opcode, 153 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, 154 ArrayRef<std::pair<unsigned, bool>> Regs) const; 155 void FormCandidates(const MemOpQueue &MemOps); 156 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 157 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 158 MachineBasicBlock::iterator &MBBI); 159 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 160 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 161 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 162 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 163 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 164 bool CombineMovBx(MachineBasicBlock &MBB); 165 }; 166 char ARMLoadStoreOpt::ID = 0; 167 } 168 169 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false, 170 false) 171 172 static bool definesCPSR(const MachineInstr &MI) { 173 for (const auto &MO : MI.operands()) { 174 if (!MO.isReg()) 175 continue; 176 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 177 // If the instruction has live CPSR def, then it's not safe to fold it 178 // into load / store. 179 return true; 180 } 181 182 return false; 183 } 184 185 static int getMemoryOpOffset(const MachineInstr &MI) { 186 unsigned Opcode = MI.getOpcode(); 187 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 188 unsigned NumOperands = MI.getDesc().getNumOperands(); 189 unsigned OffField = MI.getOperand(NumOperands - 3).getImm(); 190 191 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 192 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 193 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 194 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 195 return OffField; 196 197 // Thumb1 immediate offsets are scaled by 4 198 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 199 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 200 return OffField * 4; 201 202 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 203 : ARM_AM::getAM5Offset(OffField) * 4; 204 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 205 : ARM_AM::getAM5Op(OffField); 206 207 if (Op == ARM_AM::sub) 208 return -Offset; 209 210 return Offset; 211 } 212 213 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 214 return MI.getOperand(1); 215 } 216 217 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 218 return MI.getOperand(0); 219 } 220 221 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 222 switch (Opcode) { 223 default: llvm_unreachable("Unhandled opcode!"); 224 case ARM::LDRi12: 225 ++NumLDMGened; 226 switch (Mode) { 227 default: llvm_unreachable("Unhandled submode!"); 228 case ARM_AM::ia: return ARM::LDMIA; 229 case ARM_AM::da: return ARM::LDMDA; 230 case ARM_AM::db: return ARM::LDMDB; 231 case ARM_AM::ib: return ARM::LDMIB; 232 } 233 case ARM::STRi12: 234 ++NumSTMGened; 235 switch (Mode) { 236 default: llvm_unreachable("Unhandled submode!"); 237 case ARM_AM::ia: return ARM::STMIA; 238 case ARM_AM::da: return ARM::STMDA; 239 case ARM_AM::db: return ARM::STMDB; 240 case ARM_AM::ib: return ARM::STMIB; 241 } 242 case ARM::tLDRi: 243 case ARM::tLDRspi: 244 // tLDMIA is writeback-only - unless the base register is in the input 245 // reglist. 246 ++NumLDMGened; 247 switch (Mode) { 248 default: llvm_unreachable("Unhandled submode!"); 249 case ARM_AM::ia: return ARM::tLDMIA; 250 } 251 case ARM::tSTRi: 252 case ARM::tSTRspi: 253 // There is no non-writeback tSTMIA either. 254 ++NumSTMGened; 255 switch (Mode) { 256 default: llvm_unreachable("Unhandled submode!"); 257 case ARM_AM::ia: return ARM::tSTMIA_UPD; 258 } 259 case ARM::t2LDRi8: 260 case ARM::t2LDRi12: 261 ++NumLDMGened; 262 switch (Mode) { 263 default: llvm_unreachable("Unhandled submode!"); 264 case ARM_AM::ia: return ARM::t2LDMIA; 265 case ARM_AM::db: return ARM::t2LDMDB; 266 } 267 case ARM::t2STRi8: 268 case ARM::t2STRi12: 269 ++NumSTMGened; 270 switch (Mode) { 271 default: llvm_unreachable("Unhandled submode!"); 272 case ARM_AM::ia: return ARM::t2STMIA; 273 case ARM_AM::db: return ARM::t2STMDB; 274 } 275 case ARM::VLDRS: 276 ++NumVLDMGened; 277 switch (Mode) { 278 default: llvm_unreachable("Unhandled submode!"); 279 case ARM_AM::ia: return ARM::VLDMSIA; 280 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 281 } 282 case ARM::VSTRS: 283 ++NumVSTMGened; 284 switch (Mode) { 285 default: llvm_unreachable("Unhandled submode!"); 286 case ARM_AM::ia: return ARM::VSTMSIA; 287 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 288 } 289 case ARM::VLDRD: 290 ++NumVLDMGened; 291 switch (Mode) { 292 default: llvm_unreachable("Unhandled submode!"); 293 case ARM_AM::ia: return ARM::VLDMDIA; 294 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 295 } 296 case ARM::VSTRD: 297 ++NumVSTMGened; 298 switch (Mode) { 299 default: llvm_unreachable("Unhandled submode!"); 300 case ARM_AM::ia: return ARM::VSTMDIA; 301 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 302 } 303 } 304 } 305 306 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 307 switch (Opcode) { 308 default: llvm_unreachable("Unhandled opcode!"); 309 case ARM::LDMIA_RET: 310 case ARM::LDMIA: 311 case ARM::LDMIA_UPD: 312 case ARM::STMIA: 313 case ARM::STMIA_UPD: 314 case ARM::tLDMIA: 315 case ARM::tLDMIA_UPD: 316 case ARM::tSTMIA_UPD: 317 case ARM::t2LDMIA_RET: 318 case ARM::t2LDMIA: 319 case ARM::t2LDMIA_UPD: 320 case ARM::t2STMIA: 321 case ARM::t2STMIA_UPD: 322 case ARM::VLDMSIA: 323 case ARM::VLDMSIA_UPD: 324 case ARM::VSTMSIA: 325 case ARM::VSTMSIA_UPD: 326 case ARM::VLDMDIA: 327 case ARM::VLDMDIA_UPD: 328 case ARM::VSTMDIA: 329 case ARM::VSTMDIA_UPD: 330 return ARM_AM::ia; 331 332 case ARM::LDMDA: 333 case ARM::LDMDA_UPD: 334 case ARM::STMDA: 335 case ARM::STMDA_UPD: 336 return ARM_AM::da; 337 338 case ARM::LDMDB: 339 case ARM::LDMDB_UPD: 340 case ARM::STMDB: 341 case ARM::STMDB_UPD: 342 case ARM::t2LDMDB: 343 case ARM::t2LDMDB_UPD: 344 case ARM::t2STMDB: 345 case ARM::t2STMDB_UPD: 346 case ARM::VLDMSDB_UPD: 347 case ARM::VSTMSDB_UPD: 348 case ARM::VLDMDDB_UPD: 349 case ARM::VSTMDDB_UPD: 350 return ARM_AM::db; 351 352 case ARM::LDMIB: 353 case ARM::LDMIB_UPD: 354 case ARM::STMIB: 355 case ARM::STMIB_UPD: 356 return ARM_AM::ib; 357 } 358 } 359 360 static bool isT1i32Load(unsigned Opc) { 361 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 362 } 363 364 static bool isT2i32Load(unsigned Opc) { 365 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 366 } 367 368 static bool isi32Load(unsigned Opc) { 369 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 370 } 371 372 static bool isT1i32Store(unsigned Opc) { 373 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 374 } 375 376 static bool isT2i32Store(unsigned Opc) { 377 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 378 } 379 380 static bool isi32Store(unsigned Opc) { 381 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 382 } 383 384 static bool isLoadSingle(unsigned Opc) { 385 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 386 } 387 388 static unsigned getImmScale(unsigned Opc) { 389 switch (Opc) { 390 default: llvm_unreachable("Unhandled opcode!"); 391 case ARM::tLDRi: 392 case ARM::tSTRi: 393 case ARM::tLDRspi: 394 case ARM::tSTRspi: 395 return 1; 396 case ARM::tLDRHi: 397 case ARM::tSTRHi: 398 return 2; 399 case ARM::tLDRBi: 400 case ARM::tSTRBi: 401 return 4; 402 } 403 } 404 405 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 406 switch (MI->getOpcode()) { 407 default: return 0; 408 case ARM::LDRi12: 409 case ARM::STRi12: 410 case ARM::tLDRi: 411 case ARM::tSTRi: 412 case ARM::tLDRspi: 413 case ARM::tSTRspi: 414 case ARM::t2LDRi8: 415 case ARM::t2LDRi12: 416 case ARM::t2STRi8: 417 case ARM::t2STRi12: 418 case ARM::VLDRS: 419 case ARM::VSTRS: 420 return 4; 421 case ARM::VLDRD: 422 case ARM::VSTRD: 423 return 8; 424 case ARM::LDMIA: 425 case ARM::LDMDA: 426 case ARM::LDMDB: 427 case ARM::LDMIB: 428 case ARM::STMIA: 429 case ARM::STMDA: 430 case ARM::STMDB: 431 case ARM::STMIB: 432 case ARM::tLDMIA: 433 case ARM::tLDMIA_UPD: 434 case ARM::tSTMIA_UPD: 435 case ARM::t2LDMIA: 436 case ARM::t2LDMDB: 437 case ARM::t2STMIA: 438 case ARM::t2STMDB: 439 case ARM::VLDMSIA: 440 case ARM::VSTMSIA: 441 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 442 case ARM::VLDMDIA: 443 case ARM::VSTMDIA: 444 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 445 } 446 } 447 448 /// Update future uses of the base register with the offset introduced 449 /// due to writeback. This function only works on Thumb1. 450 void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 451 MachineBasicBlock::iterator MBBI, 452 const DebugLoc &DL, unsigned Base, 453 unsigned WordOffset, 454 ARMCC::CondCodes Pred, 455 unsigned PredReg) { 456 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 457 // Start updating any instructions with immediate offsets. Insert a SUB before 458 // the first non-updateable instruction (if any). 459 for (; MBBI != MBB.end(); ++MBBI) { 460 bool InsertSub = false; 461 unsigned Opc = MBBI->getOpcode(); 462 463 if (MBBI->readsRegister(Base)) { 464 int Offset; 465 bool IsLoad = 466 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 467 bool IsStore = 468 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 469 470 if (IsLoad || IsStore) { 471 // Loads and stores with immediate offsets can be updated, but only if 472 // the new offset isn't negative. 473 // The MachineOperand containing the offset immediate is the last one 474 // before predicates. 475 MachineOperand &MO = 476 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 477 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 478 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 479 480 // If storing the base register, it needs to be reset first. 481 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 482 483 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 484 MO.setImm(Offset); 485 else 486 InsertSub = true; 487 488 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 489 !definesCPSR(*MBBI)) { 490 // SUBS/ADDS using this register, with a dead def of the CPSR. 491 // Merge it with the update; if the merged offset is too large, 492 // insert a new sub instead. 493 MachineOperand &MO = 494 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 495 Offset = (Opc == ARM::tSUBi8) ? 496 MO.getImm() + WordOffset * 4 : 497 MO.getImm() - WordOffset * 4 ; 498 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 499 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 500 // Offset == 0. 501 MO.setImm(Offset); 502 // The base register has now been reset, so exit early. 503 return; 504 } else { 505 InsertSub = true; 506 } 507 508 } else { 509 // Can't update the instruction. 510 InsertSub = true; 511 } 512 513 } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) { 514 // Since SUBS sets the condition flags, we can't place the base reset 515 // after an instruction that has a live CPSR def. 516 // The base register might also contain an argument for a function call. 517 InsertSub = true; 518 } 519 520 if (InsertSub) { 521 // An instruction above couldn't be updated, so insert a sub. 522 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 523 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 524 return; 525 } 526 527 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 528 // Register got killed. Stop updating. 529 return; 530 } 531 532 // End of block was reached. 533 if (MBB.succ_size() > 0) { 534 // FIXME: Because of a bug, live registers are sometimes missing from 535 // the successor blocks' live-in sets. This means we can't trust that 536 // information and *always* have to reset at the end of a block. 537 // See PR21029. 538 if (MBBI != MBB.end()) --MBBI; 539 AddDefaultT1CC( 540 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 541 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 542 } 543 } 544 545 /// Return the first register of class \p RegClass that is not in \p Regs. 546 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 547 if (!RegClassInfoValid) { 548 RegClassInfo.runOnMachineFunction(*MF); 549 RegClassInfoValid = true; 550 } 551 552 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 553 if (!LiveRegs.contains(Reg)) 554 return Reg; 555 return 0; 556 } 557 558 /// Compute live registers just before instruction \p Before (in normal schedule 559 /// direction). Computes backwards so multiple queries in the same block must 560 /// come in reverse order. 561 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 562 MachineBasicBlock::const_iterator Before) { 563 // Initialize if we never queried in this block. 564 if (!LiveRegsValid) { 565 LiveRegs.init(TRI); 566 LiveRegs.addLiveOuts(MBB); 567 LiveRegPos = MBB.end(); 568 LiveRegsValid = true; 569 } 570 // Move backward just before the "Before" position. 571 while (LiveRegPos != Before) { 572 --LiveRegPos; 573 LiveRegs.stepBackward(*LiveRegPos); 574 } 575 } 576 577 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 578 unsigned Reg) { 579 for (const std::pair<unsigned, bool> &R : Regs) 580 if (R.first == Reg) 581 return true; 582 return false; 583 } 584 585 /// Create and insert a LDM or STM with Base as base register and registers in 586 /// Regs as the register operands that would be loaded / stored. It returns 587 /// true if the transformation is done. 588 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( 589 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, 590 int Offset, unsigned Base, bool BaseKill, unsigned Opcode, 591 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, 592 ArrayRef<std::pair<unsigned, bool>> Regs) { 593 unsigned NumRegs = Regs.size(); 594 assert(NumRegs > 1); 595 596 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 597 // Compute liveness information for that register to make the decision. 598 bool SafeToClobberCPSR = !isThumb1 || 599 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 600 MachineBasicBlock::LQR_Dead); 601 602 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 603 604 // Exception: If the base register is in the input reglist, Thumb1 LDM is 605 // non-writeback. 606 // It's also not possible to merge an STR of the base register in Thumb1. 607 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 608 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 609 if (Opcode == ARM::tLDRi) { 610 Writeback = false; 611 } else if (Opcode == ARM::tSTRi) { 612 return nullptr; 613 } 614 } 615 616 ARM_AM::AMSubMode Mode = ARM_AM::ia; 617 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 618 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 619 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 620 621 if (Offset == 4 && haveIBAndDA) { 622 Mode = ARM_AM::ib; 623 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 624 Mode = ARM_AM::da; 625 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 626 // VLDM/VSTM do not support DB mode without also updating the base reg. 627 Mode = ARM_AM::db; 628 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 629 // Check if this is a supported opcode before inserting instructions to 630 // calculate a new base register. 631 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 632 633 // If starting offset isn't zero, insert a MI to materialize a new base. 634 // But only do so if it is cost effective, i.e. merging more than two 635 // loads / stores. 636 if (NumRegs <= 2) 637 return nullptr; 638 639 // On Thumb1, it's not worth materializing a new base register without 640 // clobbering the CPSR (i.e. not using ADDS/SUBS). 641 if (!SafeToClobberCPSR) 642 return nullptr; 643 644 unsigned NewBase; 645 if (isi32Load(Opcode)) { 646 // If it is a load, then just use one of the destination registers 647 // as the new base. Will no longer be writeback in Thumb1. 648 NewBase = Regs[NumRegs-1].first; 649 Writeback = false; 650 } else { 651 // Find a free register that we can use as scratch register. 652 moveLiveRegsBefore(MBB, InsertBefore); 653 // The merged instruction does not exist yet but will use several Regs if 654 // it is a Store. 655 if (!isLoadSingle(Opcode)) 656 for (const std::pair<unsigned, bool> &R : Regs) 657 LiveRegs.addReg(R.first); 658 659 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 660 if (NewBase == 0) 661 return nullptr; 662 } 663 664 int BaseOpc = 665 isThumb2 ? ARM::t2ADDri : 666 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 667 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 668 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 669 670 if (Offset < 0) { 671 Offset = - Offset; 672 BaseOpc = 673 isThumb2 ? ARM::t2SUBri : 674 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 675 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 676 } 677 678 if (!TL->isLegalAddImmediate(Offset)) 679 // FIXME: Try add with register operand? 680 return nullptr; // Probably not worth it then. 681 682 // We can only append a kill flag to the add/sub input if the value is not 683 // used in the register list of the stm as well. 684 bool KillOldBase = BaseKill && 685 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 686 687 if (isThumb1) { 688 // Thumb1: depending on immediate size, use either 689 // ADDS NewBase, Base, #imm3 690 // or 691 // MOV NewBase, Base 692 // ADDS NewBase, #imm8. 693 if (Base != NewBase && 694 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 695 // Need to insert a MOV to the new base first. 696 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 697 !STI->hasV6Ops()) { 698 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 699 if (Pred != ARMCC::AL) 700 return nullptr; 701 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 702 .addReg(Base, getKillRegState(KillOldBase)); 703 } else 704 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 705 .addReg(Base, getKillRegState(KillOldBase)) 706 .addImm(Pred).addReg(PredReg); 707 708 // The following ADDS/SUBS becomes an update. 709 Base = NewBase; 710 KillOldBase = true; 711 } 712 if (BaseOpc == ARM::tADDrSPi) { 713 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 714 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 715 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 716 .addImm(Pred).addReg(PredReg); 717 } else 718 AddDefaultT1CC( 719 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 720 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 721 .addImm(Pred).addReg(PredReg); 722 } else { 723 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 724 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 725 .addImm(Pred).addReg(PredReg).addReg(0); 726 } 727 Base = NewBase; 728 BaseKill = true; // New base is always killed straight away. 729 } 730 731 bool isDef = isLoadSingle(Opcode); 732 733 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 734 // base register writeback. 735 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 736 if (!Opcode) 737 return nullptr; 738 739 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 740 // - There is no writeback (LDM of base register), 741 // - the base register is killed by the merged instruction, 742 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 743 // to reset the base register. 744 // Otherwise, don't merge. 745 // It's safe to return here since the code to materialize a new base register 746 // above is also conditional on SafeToClobberCPSR. 747 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 748 return nullptr; 749 750 MachineInstrBuilder MIB; 751 752 if (Writeback) { 753 assert(isThumb1 && "expected Writeback only inThumb1"); 754 if (Opcode == ARM::tLDMIA) { 755 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs"); 756 // Update tLDMIA with writeback if necessary. 757 Opcode = ARM::tLDMIA_UPD; 758 } 759 760 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 761 762 // Thumb1: we might need to set base writeback when building the MI. 763 MIB.addReg(Base, getDefRegState(true)) 764 .addReg(Base, getKillRegState(BaseKill)); 765 766 // The base isn't dead after a merged instruction with writeback. 767 // Insert a sub instruction after the newly formed instruction to reset. 768 if (!BaseKill) 769 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 770 771 } else { 772 // No writeback, simply build the MachineInstr. 773 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 774 MIB.addReg(Base, getKillRegState(BaseKill)); 775 } 776 777 MIB.addImm(Pred).addReg(PredReg); 778 779 for (const std::pair<unsigned, bool> &R : Regs) 780 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 781 782 return MIB.getInstr(); 783 } 784 785 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble( 786 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, 787 int Offset, unsigned Base, bool BaseKill, unsigned Opcode, 788 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, 789 ArrayRef<std::pair<unsigned, bool>> Regs) const { 790 bool IsLoad = isi32Load(Opcode); 791 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 792 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 793 794 assert(Regs.size() == 2); 795 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 796 TII->get(LoadStoreOpcode)); 797 if (IsLoad) { 798 MIB.addReg(Regs[0].first, RegState::Define) 799 .addReg(Regs[1].first, RegState::Define); 800 } else { 801 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 802 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 803 } 804 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 805 return MIB.getInstr(); 806 } 807 808 /// Call MergeOps and update MemOps and merges accordingly on success. 809 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 810 const MachineInstr *First = Cand.Instrs.front(); 811 unsigned Opcode = First->getOpcode(); 812 bool IsLoad = isLoadSingle(Opcode); 813 SmallVector<std::pair<unsigned, bool>, 8> Regs; 814 SmallVector<unsigned, 4> ImpDefs; 815 DenseSet<unsigned> KilledRegs; 816 DenseSet<unsigned> UsedRegs; 817 // Determine list of registers and list of implicit super-register defs. 818 for (const MachineInstr *MI : Cand.Instrs) { 819 const MachineOperand &MO = getLoadStoreRegOp(*MI); 820 unsigned Reg = MO.getReg(); 821 bool IsKill = MO.isKill(); 822 if (IsKill) 823 KilledRegs.insert(Reg); 824 Regs.push_back(std::make_pair(Reg, IsKill)); 825 UsedRegs.insert(Reg); 826 827 if (IsLoad) { 828 // Collect any implicit defs of super-registers, after merging we can't 829 // be sure anymore that we properly preserved these live ranges and must 830 // removed these implicit operands. 831 for (const MachineOperand &MO : MI->implicit_operands()) { 832 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 833 continue; 834 assert(MO.isImplicit()); 835 unsigned DefReg = MO.getReg(); 836 837 if (is_contained(ImpDefs, DefReg)) 838 continue; 839 // We can ignore cases where the super-reg is read and written. 840 if (MI->readsRegister(DefReg)) 841 continue; 842 ImpDefs.push_back(DefReg); 843 } 844 } 845 } 846 847 // Attempt the merge. 848 typedef MachineBasicBlock::iterator iterator; 849 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 850 iterator InsertBefore = std::next(iterator(LatestMI)); 851 MachineBasicBlock &MBB = *LatestMI->getParent(); 852 unsigned Offset = getMemoryOpOffset(*First); 853 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 854 bool BaseKill = LatestMI->killsRegister(Base); 855 unsigned PredReg = 0; 856 ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg); 857 DebugLoc DL = First->getDebugLoc(); 858 MachineInstr *Merged = nullptr; 859 if (Cand.CanMergeToLSDouble) 860 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 861 Opcode, Pred, PredReg, DL, Regs); 862 if (!Merged && Cand.CanMergeToLSMulti) 863 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 864 Opcode, Pred, PredReg, DL, Regs); 865 if (!Merged) 866 return nullptr; 867 868 // Determine earliest instruction that will get removed. We then keep an 869 // iterator just above it so the following erases don't invalidated it. 870 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 871 bool EarliestAtBegin = false; 872 if (EarliestI == MBB.begin()) { 873 EarliestAtBegin = true; 874 } else { 875 EarliestI = std::prev(EarliestI); 876 } 877 878 // Remove instructions which have been merged. 879 for (MachineInstr *MI : Cand.Instrs) 880 MBB.erase(MI); 881 882 // Determine range between the earliest removed instruction and the new one. 883 if (EarliestAtBegin) 884 EarliestI = MBB.begin(); 885 else 886 EarliestI = std::next(EarliestI); 887 auto FixupRange = make_range(EarliestI, iterator(Merged)); 888 889 if (isLoadSingle(Opcode)) { 890 // If the previous loads defined a super-reg, then we have to mark earlier 891 // operands undef; Replicate the super-reg def on the merged instruction. 892 for (MachineInstr &MI : FixupRange) { 893 for (unsigned &ImpDefReg : ImpDefs) { 894 for (MachineOperand &MO : MI.implicit_operands()) { 895 if (!MO.isReg() || MO.getReg() != ImpDefReg) 896 continue; 897 if (MO.readsReg()) 898 MO.setIsUndef(); 899 else if (MO.isDef()) 900 ImpDefReg = 0; 901 } 902 } 903 } 904 905 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 906 for (unsigned ImpDef : ImpDefs) 907 MIB.addReg(ImpDef, RegState::ImplicitDefine); 908 } else { 909 // Remove kill flags: We are possibly storing the values later now. 910 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 911 for (MachineInstr &MI : FixupRange) { 912 for (MachineOperand &MO : MI.uses()) { 913 if (!MO.isReg() || !MO.isKill()) 914 continue; 915 if (UsedRegs.count(MO.getReg())) 916 MO.setIsKill(false); 917 } 918 } 919 assert(ImpDefs.empty()); 920 } 921 922 return Merged; 923 } 924 925 static bool isValidLSDoubleOffset(int Offset) { 926 unsigned Value = abs(Offset); 927 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 928 // multiplied by 4. 929 return (Value % 4) == 0 && Value < 1024; 930 } 931 932 /// Return true for loads/stores that can be combined to a double/multi 933 /// operation without increasing the requirements for alignment. 934 static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, 935 const MachineInstr &MI) { 936 // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no 937 // difference. 938 unsigned Opcode = MI.getOpcode(); 939 if (!isi32Load(Opcode) && !isi32Store(Opcode)) 940 return true; 941 942 // Stack pointer alignment is out of the programmers control so we can trust 943 // SP-relative loads/stores. 944 if (getLoadStoreBaseOp(MI).getReg() == ARM::SP && 945 STI.getFrameLowering()->getTransientStackAlignment() >= 4) 946 return true; 947 return false; 948 } 949 950 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 951 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 952 const MachineInstr *FirstMI = MemOps[0].MI; 953 unsigned Opcode = FirstMI->getOpcode(); 954 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 955 unsigned Size = getLSMultipleTransferSize(FirstMI); 956 957 unsigned SIndex = 0; 958 unsigned EIndex = MemOps.size(); 959 do { 960 // Look at the first instruction. 961 const MachineInstr *MI = MemOps[SIndex].MI; 962 int Offset = MemOps[SIndex].Offset; 963 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 964 unsigned PReg = PMO.getReg(); 965 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 966 unsigned Latest = SIndex; 967 unsigned Earliest = SIndex; 968 unsigned Count = 1; 969 bool CanMergeToLSDouble = 970 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 971 // ARM errata 602117: LDRD with base in list may result in incorrect base 972 // register when interrupted or faulted. 973 if (STI->isCortexM3() && isi32Load(Opcode) && 974 PReg == getLoadStoreBaseOp(*MI).getReg()) 975 CanMergeToLSDouble = false; 976 977 bool CanMergeToLSMulti = true; 978 // On swift vldm/vstm starting with an odd register number as that needs 979 // more uops than single vldrs. 980 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1) 981 CanMergeToLSMulti = false; 982 983 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it 984 // deprecated; LDM to PC is fine but cannot happen here. 985 if (PReg == ARM::SP || PReg == ARM::PC) 986 CanMergeToLSMulti = CanMergeToLSDouble = false; 987 988 // Should we be conservative? 989 if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI)) 990 CanMergeToLSMulti = CanMergeToLSDouble = false; 991 992 // Merge following instructions where possible. 993 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 994 int NewOffset = MemOps[I].Offset; 995 if (NewOffset != Offset + (int)Size) 996 break; 997 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 998 unsigned Reg = MO.getReg(); 999 if (Reg == ARM::SP || Reg == ARM::PC) 1000 break; 1001 1002 // See if the current load/store may be part of a multi load/store. 1003 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 1004 bool PartOfLSMulti = CanMergeToLSMulti; 1005 if (PartOfLSMulti) { 1006 // Register numbers must be in ascending order. 1007 if (RegNum <= PRegNum) 1008 PartOfLSMulti = false; 1009 // For VFP / NEON load/store multiples, the registers must be 1010 // consecutive and within the limit on the number of registers per 1011 // instruction. 1012 else if (!isNotVFP && RegNum != PRegNum+1) 1013 PartOfLSMulti = false; 1014 } 1015 // See if the current load/store may be part of a double load/store. 1016 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 1017 1018 if (!PartOfLSMulti && !PartOfLSDouble) 1019 break; 1020 CanMergeToLSMulti &= PartOfLSMulti; 1021 CanMergeToLSDouble &= PartOfLSDouble; 1022 // Track MemOp with latest and earliest position (Positions are 1023 // counted in reverse). 1024 unsigned Position = MemOps[I].Position; 1025 if (Position < MemOps[Latest].Position) 1026 Latest = I; 1027 else if (Position > MemOps[Earliest].Position) 1028 Earliest = I; 1029 // Prepare for next MemOp. 1030 Offset += Size; 1031 PRegNum = RegNum; 1032 } 1033 1034 // Form a candidate from the Ops collected so far. 1035 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 1036 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 1037 Candidate->Instrs.push_back(MemOps[C].MI); 1038 Candidate->LatestMIIdx = Latest - SIndex; 1039 Candidate->EarliestMIIdx = Earliest - SIndex; 1040 Candidate->InsertPos = MemOps[Latest].Position; 1041 if (Count == 1) 1042 CanMergeToLSMulti = CanMergeToLSDouble = false; 1043 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 1044 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 1045 Candidates.push_back(Candidate); 1046 // Continue after the chain. 1047 SIndex += Count; 1048 } while (SIndex < EIndex); 1049 } 1050 1051 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1052 ARM_AM::AMSubMode Mode) { 1053 switch (Opc) { 1054 default: llvm_unreachable("Unhandled opcode!"); 1055 case ARM::LDMIA: 1056 case ARM::LDMDA: 1057 case ARM::LDMDB: 1058 case ARM::LDMIB: 1059 switch (Mode) { 1060 default: llvm_unreachable("Unhandled submode!"); 1061 case ARM_AM::ia: return ARM::LDMIA_UPD; 1062 case ARM_AM::ib: return ARM::LDMIB_UPD; 1063 case ARM_AM::da: return ARM::LDMDA_UPD; 1064 case ARM_AM::db: return ARM::LDMDB_UPD; 1065 } 1066 case ARM::STMIA: 1067 case ARM::STMDA: 1068 case ARM::STMDB: 1069 case ARM::STMIB: 1070 switch (Mode) { 1071 default: llvm_unreachable("Unhandled submode!"); 1072 case ARM_AM::ia: return ARM::STMIA_UPD; 1073 case ARM_AM::ib: return ARM::STMIB_UPD; 1074 case ARM_AM::da: return ARM::STMDA_UPD; 1075 case ARM_AM::db: return ARM::STMDB_UPD; 1076 } 1077 case ARM::t2LDMIA: 1078 case ARM::t2LDMDB: 1079 switch (Mode) { 1080 default: llvm_unreachable("Unhandled submode!"); 1081 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1082 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1083 } 1084 case ARM::t2STMIA: 1085 case ARM::t2STMDB: 1086 switch (Mode) { 1087 default: llvm_unreachable("Unhandled submode!"); 1088 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1089 case ARM_AM::db: return ARM::t2STMDB_UPD; 1090 } 1091 case ARM::VLDMSIA: 1092 switch (Mode) { 1093 default: llvm_unreachable("Unhandled submode!"); 1094 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1095 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1096 } 1097 case ARM::VLDMDIA: 1098 switch (Mode) { 1099 default: llvm_unreachable("Unhandled submode!"); 1100 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1101 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1102 } 1103 case ARM::VSTMSIA: 1104 switch (Mode) { 1105 default: llvm_unreachable("Unhandled submode!"); 1106 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1107 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1108 } 1109 case ARM::VSTMDIA: 1110 switch (Mode) { 1111 default: llvm_unreachable("Unhandled submode!"); 1112 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1113 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1114 } 1115 } 1116 } 1117 1118 /// Check if the given instruction increments or decrements a register and 1119 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1120 /// generated by the instruction are possibly read as well. 1121 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1122 ARMCC::CondCodes Pred, unsigned PredReg) { 1123 bool CheckCPSRDef; 1124 int Scale; 1125 switch (MI.getOpcode()) { 1126 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1127 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1128 case ARM::t2SUBri: 1129 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1130 case ARM::t2ADDri: 1131 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1132 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1133 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1134 default: return 0; 1135 } 1136 1137 unsigned MIPredReg; 1138 if (MI.getOperand(0).getReg() != Reg || 1139 MI.getOperand(1).getReg() != Reg || 1140 getInstrPredicate(MI, MIPredReg) != Pred || 1141 MIPredReg != PredReg) 1142 return 0; 1143 1144 if (CheckCPSRDef && definesCPSR(MI)) 1145 return 0; 1146 return MI.getOperand(2).getImm() * Scale; 1147 } 1148 1149 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1150 static MachineBasicBlock::iterator 1151 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1152 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1153 Offset = 0; 1154 MachineBasicBlock &MBB = *MBBI->getParent(); 1155 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1156 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1157 if (MBBI == BeginMBBI) 1158 return EndMBBI; 1159 1160 // Skip debug values. 1161 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1162 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1163 --PrevMBBI; 1164 1165 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1166 return Offset == 0 ? EndMBBI : PrevMBBI; 1167 } 1168 1169 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1170 static MachineBasicBlock::iterator 1171 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1172 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1173 Offset = 0; 1174 MachineBasicBlock &MBB = *MBBI->getParent(); 1175 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1176 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1177 // Skip debug values. 1178 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1179 ++NextMBBI; 1180 if (NextMBBI == EndMBBI) 1181 return EndMBBI; 1182 1183 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1184 return Offset == 0 ? EndMBBI : NextMBBI; 1185 } 1186 1187 /// Fold proceeding/trailing inc/dec of base register into the 1188 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1189 /// 1190 /// stmia rn, <ra, rb, rc> 1191 /// rn := rn + 4 * 3; 1192 /// => 1193 /// stmia rn!, <ra, rb, rc> 1194 /// 1195 /// rn := rn - 4 * 3; 1196 /// ldmia rn, <ra, rb, rc> 1197 /// => 1198 /// ldmdb rn!, <ra, rb, rc> 1199 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1200 // Thumb1 is already using updating loads/stores. 1201 if (isThumb1) return false; 1202 1203 const MachineOperand &BaseOP = MI->getOperand(0); 1204 unsigned Base = BaseOP.getReg(); 1205 bool BaseKill = BaseOP.isKill(); 1206 unsigned PredReg = 0; 1207 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1208 unsigned Opcode = MI->getOpcode(); 1209 DebugLoc DL = MI->getDebugLoc(); 1210 1211 // Can't use an updating ld/st if the base register is also a dest 1212 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1213 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1214 if (MI->getOperand(i).getReg() == Base) 1215 return false; 1216 1217 int Bytes = getLSMultipleTransferSize(MI); 1218 MachineBasicBlock &MBB = *MI->getParent(); 1219 MachineBasicBlock::iterator MBBI(MI); 1220 int Offset; 1221 MachineBasicBlock::iterator MergeInstr 1222 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1223 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1224 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1225 Mode = ARM_AM::db; 1226 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1227 Mode = ARM_AM::da; 1228 } else { 1229 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1230 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1231 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) { 1232 1233 // We couldn't find an inc/dec to merge. But if the base is dead, we 1234 // can still change to a writeback form as that will save us 2 bytes 1235 // of code size. It can create WAW hazards though, so only do it if 1236 // we're minimizing code size. 1237 if (!MBB.getParent()->getFunction()->optForMinSize() || !BaseKill) 1238 return false; 1239 1240 bool HighRegsUsed = false; 1241 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1242 if (MI->getOperand(i).getReg() >= ARM::R8) { 1243 HighRegsUsed = true; 1244 break; 1245 } 1246 1247 if (!HighRegsUsed) 1248 MergeInstr = MBB.end(); 1249 else 1250 return false; 1251 } 1252 } 1253 if (MergeInstr != MBB.end()) 1254 MBB.erase(MergeInstr); 1255 1256 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1257 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1258 .addReg(Base, getDefRegState(true)) // WB base register 1259 .addReg(Base, getKillRegState(BaseKill)) 1260 .addImm(Pred).addReg(PredReg); 1261 1262 // Transfer the rest of operands. 1263 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1264 MIB.addOperand(MI->getOperand(OpNum)); 1265 1266 // Transfer memoperands. 1267 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1268 1269 MBB.erase(MBBI); 1270 return true; 1271 } 1272 1273 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1274 ARM_AM::AddrOpc Mode) { 1275 switch (Opc) { 1276 case ARM::LDRi12: 1277 return ARM::LDR_PRE_IMM; 1278 case ARM::STRi12: 1279 return ARM::STR_PRE_IMM; 1280 case ARM::VLDRS: 1281 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1282 case ARM::VLDRD: 1283 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1284 case ARM::VSTRS: 1285 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1286 case ARM::VSTRD: 1287 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1288 case ARM::t2LDRi8: 1289 case ARM::t2LDRi12: 1290 return ARM::t2LDR_PRE; 1291 case ARM::t2STRi8: 1292 case ARM::t2STRi12: 1293 return ARM::t2STR_PRE; 1294 default: llvm_unreachable("Unhandled opcode!"); 1295 } 1296 } 1297 1298 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1299 ARM_AM::AddrOpc Mode) { 1300 switch (Opc) { 1301 case ARM::LDRi12: 1302 return ARM::LDR_POST_IMM; 1303 case ARM::STRi12: 1304 return ARM::STR_POST_IMM; 1305 case ARM::VLDRS: 1306 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1307 case ARM::VLDRD: 1308 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1309 case ARM::VSTRS: 1310 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1311 case ARM::VSTRD: 1312 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1313 case ARM::t2LDRi8: 1314 case ARM::t2LDRi12: 1315 return ARM::t2LDR_POST; 1316 case ARM::t2STRi8: 1317 case ARM::t2STRi12: 1318 return ARM::t2STR_POST; 1319 default: llvm_unreachable("Unhandled opcode!"); 1320 } 1321 } 1322 1323 /// Fold proceeding/trailing inc/dec of base register into the 1324 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1325 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1326 // Thumb1 doesn't have updating LDR/STR. 1327 // FIXME: Use LDM/STM with single register instead. 1328 if (isThumb1) return false; 1329 1330 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1331 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1332 unsigned Opcode = MI->getOpcode(); 1333 DebugLoc DL = MI->getDebugLoc(); 1334 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1335 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1336 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1337 if (isi32Load(Opcode) || isi32Store(Opcode)) 1338 if (MI->getOperand(2).getImm() != 0) 1339 return false; 1340 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1341 return false; 1342 1343 // Can't do the merge if the destination register is the same as the would-be 1344 // writeback register. 1345 if (MI->getOperand(0).getReg() == Base) 1346 return false; 1347 1348 unsigned PredReg = 0; 1349 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1350 int Bytes = getLSMultipleTransferSize(MI); 1351 MachineBasicBlock &MBB = *MI->getParent(); 1352 MachineBasicBlock::iterator MBBI(MI); 1353 int Offset; 1354 MachineBasicBlock::iterator MergeInstr 1355 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1356 unsigned NewOpc; 1357 if (!isAM5 && Offset == Bytes) { 1358 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1359 } else if (Offset == -Bytes) { 1360 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1361 } else { 1362 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1363 if (Offset == Bytes) { 1364 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1365 } else if (!isAM5 && Offset == -Bytes) { 1366 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1367 } else 1368 return false; 1369 } 1370 MBB.erase(MergeInstr); 1371 1372 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1373 1374 bool isLd = isLoadSingle(Opcode); 1375 if (isAM5) { 1376 // VLDM[SD]_UPD, VSTM[SD]_UPD 1377 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1378 // updating load/store-multiple instructions can be used with only one 1379 // register.) 1380 MachineOperand &MO = MI->getOperand(0); 1381 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1382 .addReg(Base, getDefRegState(true)) // WB base register 1383 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1384 .addImm(Pred).addReg(PredReg) 1385 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1386 getKillRegState(MO.isKill()))); 1387 } else if (isLd) { 1388 if (isAM2) { 1389 // LDR_PRE, LDR_POST 1390 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1391 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1392 .addReg(Base, RegState::Define) 1393 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1394 } else { 1395 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1396 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1397 .addReg(Base, RegState::Define) 1398 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1399 } 1400 } else { 1401 // t2LDR_PRE, t2LDR_POST 1402 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1403 .addReg(Base, RegState::Define) 1404 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1405 } 1406 } else { 1407 MachineOperand &MO = MI->getOperand(0); 1408 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1409 // the vestigal zero-reg offset register. When that's fixed, this clause 1410 // can be removed entirely. 1411 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1412 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1413 // STR_PRE, STR_POST 1414 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1415 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1416 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1417 } else { 1418 // t2STR_PRE, t2STR_POST 1419 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1420 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1421 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1422 } 1423 } 1424 MBB.erase(MBBI); 1425 1426 return true; 1427 } 1428 1429 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1430 unsigned Opcode = MI.getOpcode(); 1431 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1432 "Must have t2STRDi8 or t2LDRDi8"); 1433 if (MI.getOperand(3).getImm() != 0) 1434 return false; 1435 1436 // Behaviour for writeback is undefined if base register is the same as one 1437 // of the others. 1438 const MachineOperand &BaseOp = MI.getOperand(2); 1439 unsigned Base = BaseOp.getReg(); 1440 const MachineOperand &Reg0Op = MI.getOperand(0); 1441 const MachineOperand &Reg1Op = MI.getOperand(1); 1442 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1443 return false; 1444 1445 unsigned PredReg; 1446 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1447 MachineBasicBlock::iterator MBBI(MI); 1448 MachineBasicBlock &MBB = *MI.getParent(); 1449 int Offset; 1450 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1451 PredReg, Offset); 1452 unsigned NewOpc; 1453 if (Offset == 8 || Offset == -8) { 1454 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1455 } else { 1456 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1457 if (Offset == 8 || Offset == -8) { 1458 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1459 } else 1460 return false; 1461 } 1462 MBB.erase(MergeInstr); 1463 1464 DebugLoc DL = MI.getDebugLoc(); 1465 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1466 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1467 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1468 .addReg(BaseOp.getReg(), RegState::Define); 1469 } else { 1470 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1471 MIB.addReg(BaseOp.getReg(), RegState::Define) 1472 .addOperand(Reg0Op).addOperand(Reg1Op); 1473 } 1474 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1475 .addImm(Offset).addImm(Pred).addReg(PredReg); 1476 assert(TII->get(Opcode).getNumOperands() == 6 && 1477 TII->get(NewOpc).getNumOperands() == 7 && 1478 "Unexpected number of operands in Opcode specification."); 1479 1480 // Transfer implicit operands. 1481 for (const MachineOperand &MO : MI.implicit_operands()) 1482 MIB.addOperand(MO); 1483 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1484 1485 MBB.erase(MBBI); 1486 return true; 1487 } 1488 1489 /// Returns true if instruction is a memory operation that this pass is capable 1490 /// of operating on. 1491 static bool isMemoryOp(const MachineInstr &MI) { 1492 unsigned Opcode = MI.getOpcode(); 1493 switch (Opcode) { 1494 case ARM::VLDRS: 1495 case ARM::VSTRS: 1496 case ARM::VLDRD: 1497 case ARM::VSTRD: 1498 case ARM::LDRi12: 1499 case ARM::STRi12: 1500 case ARM::tLDRi: 1501 case ARM::tSTRi: 1502 case ARM::tLDRspi: 1503 case ARM::tSTRspi: 1504 case ARM::t2LDRi8: 1505 case ARM::t2LDRi12: 1506 case ARM::t2STRi8: 1507 case ARM::t2STRi12: 1508 break; 1509 default: 1510 return false; 1511 } 1512 if (!MI.getOperand(1).isReg()) 1513 return false; 1514 1515 // When no memory operands are present, conservatively assume unaligned, 1516 // volatile, unfoldable. 1517 if (!MI.hasOneMemOperand()) 1518 return false; 1519 1520 const MachineMemOperand &MMO = **MI.memoperands_begin(); 1521 1522 // Don't touch volatile memory accesses - we may be changing their order. 1523 if (MMO.isVolatile()) 1524 return false; 1525 1526 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1527 // not. 1528 if (MMO.getAlignment() < 4) 1529 return false; 1530 1531 // str <undef> could probably be eliminated entirely, but for now we just want 1532 // to avoid making a mess of it. 1533 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1534 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef()) 1535 return false; 1536 1537 // Likewise don't mess with references to undefined addresses. 1538 if (MI.getOperand(1).isUndef()) 1539 return false; 1540 1541 return true; 1542 } 1543 1544 static void InsertLDR_STR(MachineBasicBlock &MBB, 1545 MachineBasicBlock::iterator &MBBI, int Offset, 1546 bool isDef, const DebugLoc &DL, unsigned NewOpc, 1547 unsigned Reg, bool RegDeadKill, bool RegUndef, 1548 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1549 bool OffKill, bool OffUndef, ARMCC::CondCodes Pred, 1550 unsigned PredReg, const TargetInstrInfo *TII, 1551 bool isT2) { 1552 if (isDef) { 1553 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1554 TII->get(NewOpc)) 1555 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1556 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1557 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1558 } else { 1559 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1560 TII->get(NewOpc)) 1561 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1562 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1563 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1564 } 1565 } 1566 1567 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1568 MachineBasicBlock::iterator &MBBI) { 1569 MachineInstr *MI = &*MBBI; 1570 unsigned Opcode = MI->getOpcode(); 1571 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1572 return false; 1573 1574 const MachineOperand &BaseOp = MI->getOperand(2); 1575 unsigned BaseReg = BaseOp.getReg(); 1576 unsigned EvenReg = MI->getOperand(0).getReg(); 1577 unsigned OddReg = MI->getOperand(1).getReg(); 1578 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1579 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1580 1581 // ARM errata 602117: LDRD with base in list may result in incorrect base 1582 // register when interrupted or faulted. 1583 bool Errata602117 = EvenReg == BaseReg && 1584 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1585 // ARM LDRD/STRD needs consecutive registers. 1586 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1587 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1588 1589 if (!Errata602117 && !NonConsecutiveRegs) 1590 return false; 1591 1592 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1593 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1594 bool EvenDeadKill = isLd ? 1595 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1596 bool EvenUndef = MI->getOperand(0).isUndef(); 1597 bool OddDeadKill = isLd ? 1598 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1599 bool OddUndef = MI->getOperand(1).isUndef(); 1600 bool BaseKill = BaseOp.isKill(); 1601 bool BaseUndef = BaseOp.isUndef(); 1602 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1603 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1604 int OffImm = getMemoryOpOffset(*MI); 1605 unsigned PredReg = 0; 1606 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); 1607 1608 if (OddRegNum > EvenRegNum && OffImm == 0) { 1609 // Ascending register numbers and no offset. It's safe to change it to a 1610 // ldm or stm. 1611 unsigned NewOpc = (isLd) 1612 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1613 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1614 if (isLd) { 1615 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1616 .addReg(BaseReg, getKillRegState(BaseKill)) 1617 .addImm(Pred).addReg(PredReg) 1618 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1619 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1620 ++NumLDRD2LDM; 1621 } else { 1622 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1623 .addReg(BaseReg, getKillRegState(BaseKill)) 1624 .addImm(Pred).addReg(PredReg) 1625 .addReg(EvenReg, 1626 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1627 .addReg(OddReg, 1628 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1629 ++NumSTRD2STM; 1630 } 1631 } else { 1632 // Split into two instructions. 1633 unsigned NewOpc = (isLd) 1634 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1635 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1636 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1637 // so adjust and use t2LDRi12 here for that. 1638 unsigned NewOpc2 = (isLd) 1639 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1640 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1641 DebugLoc dl = MBBI->getDebugLoc(); 1642 // If this is a load and base register is killed, it may have been 1643 // re-defed by the load, make sure the first load does not clobber it. 1644 if (isLd && 1645 (BaseKill || OffKill) && 1646 (TRI->regsOverlap(EvenReg, BaseReg))) { 1647 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1648 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1649 OddReg, OddDeadKill, false, 1650 BaseReg, false, BaseUndef, false, OffUndef, 1651 Pred, PredReg, TII, isT2); 1652 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1653 EvenReg, EvenDeadKill, false, 1654 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1655 Pred, PredReg, TII, isT2); 1656 } else { 1657 if (OddReg == EvenReg && EvenDeadKill) { 1658 // If the two source operands are the same, the kill marker is 1659 // probably on the first one. e.g. 1660 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1661 EvenDeadKill = false; 1662 OddDeadKill = true; 1663 } 1664 // Never kill the base register in the first instruction. 1665 if (EvenReg == BaseReg) 1666 EvenDeadKill = false; 1667 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1668 EvenReg, EvenDeadKill, EvenUndef, 1669 BaseReg, false, BaseUndef, false, OffUndef, 1670 Pred, PredReg, TII, isT2); 1671 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1672 OddReg, OddDeadKill, OddUndef, 1673 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1674 Pred, PredReg, TII, isT2); 1675 } 1676 if (isLd) 1677 ++NumLDRD2LDR; 1678 else 1679 ++NumSTRD2STR; 1680 } 1681 1682 MBBI = MBB.erase(MBBI); 1683 return true; 1684 } 1685 1686 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1687 /// incrementing offset into LDM / STM ops. 1688 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1689 MemOpQueue MemOps; 1690 unsigned CurrBase = 0; 1691 unsigned CurrOpc = ~0u; 1692 ARMCC::CondCodes CurrPred = ARMCC::AL; 1693 unsigned Position = 0; 1694 assert(Candidates.size() == 0); 1695 assert(MergeBaseCandidates.size() == 0); 1696 LiveRegsValid = false; 1697 1698 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1699 I = MBBI) { 1700 // The instruction in front of the iterator is the one we look at. 1701 MBBI = std::prev(I); 1702 if (FixInvalidRegPairOp(MBB, MBBI)) 1703 continue; 1704 ++Position; 1705 1706 if (isMemoryOp(*MBBI)) { 1707 unsigned Opcode = MBBI->getOpcode(); 1708 const MachineOperand &MO = MBBI->getOperand(0); 1709 unsigned Reg = MO.getReg(); 1710 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1711 unsigned PredReg = 0; 1712 ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg); 1713 int Offset = getMemoryOpOffset(*MBBI); 1714 if (CurrBase == 0) { 1715 // Start of a new chain. 1716 CurrBase = Base; 1717 CurrOpc = Opcode; 1718 CurrPred = Pred; 1719 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position)); 1720 continue; 1721 } 1722 // Note: No need to match PredReg in the next if. 1723 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1724 // Watch out for: 1725 // r4 := ldr [r0, #8] 1726 // r4 := ldr [r0, #4] 1727 // or 1728 // r0 := ldr [r0] 1729 // If a load overrides the base register or a register loaded by 1730 // another load in our chain, we cannot take this instruction. 1731 bool Overlap = false; 1732 if (isLoadSingle(Opcode)) { 1733 Overlap = (Base == Reg); 1734 if (!Overlap) { 1735 for (const MemOpQueueEntry &E : MemOps) { 1736 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1737 Overlap = true; 1738 break; 1739 } 1740 } 1741 } 1742 } 1743 1744 if (!Overlap) { 1745 // Check offset and sort memory operation into the current chain. 1746 if (Offset > MemOps.back().Offset) { 1747 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position)); 1748 continue; 1749 } else { 1750 MemOpQueue::iterator MI, ME; 1751 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1752 if (Offset < MI->Offset) { 1753 // Found a place to insert. 1754 break; 1755 } 1756 if (Offset == MI->Offset) { 1757 // Collision, abort. 1758 MI = ME; 1759 break; 1760 } 1761 } 1762 if (MI != MemOps.end()) { 1763 MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position)); 1764 continue; 1765 } 1766 } 1767 } 1768 } 1769 1770 // Don't advance the iterator; The op will start a new chain next. 1771 MBBI = I; 1772 --Position; 1773 // Fallthrough to look into existing chain. 1774 } else if (MBBI->isDebugValue()) { 1775 continue; 1776 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1777 MBBI->getOpcode() == ARM::t2STRDi8) { 1778 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1779 // remember them because we may still be able to merge add/sub into them. 1780 MergeBaseCandidates.push_back(&*MBBI); 1781 } 1782 1783 1784 // If we are here then the chain is broken; Extract candidates for a merge. 1785 if (MemOps.size() > 0) { 1786 FormCandidates(MemOps); 1787 // Reset for the next chain. 1788 CurrBase = 0; 1789 CurrOpc = ~0u; 1790 CurrPred = ARMCC::AL; 1791 MemOps.clear(); 1792 } 1793 } 1794 if (MemOps.size() > 0) 1795 FormCandidates(MemOps); 1796 1797 // Sort candidates so they get processed from end to begin of the basic 1798 // block later; This is necessary for liveness calculation. 1799 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1800 return M0->InsertPos < M1->InsertPos; 1801 }; 1802 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1803 1804 // Go through list of candidates and merge. 1805 bool Changed = false; 1806 for (const MergeCandidate *Candidate : Candidates) { 1807 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1808 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1809 // Merge preceding/trailing base inc/dec into the merged op. 1810 if (Merged) { 1811 Changed = true; 1812 unsigned Opcode = Merged->getOpcode(); 1813 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1814 MergeBaseUpdateLSDouble(*Merged); 1815 else 1816 MergeBaseUpdateLSMultiple(Merged); 1817 } else { 1818 for (MachineInstr *MI : Candidate->Instrs) { 1819 if (MergeBaseUpdateLoadStore(MI)) 1820 Changed = true; 1821 } 1822 } 1823 } else { 1824 assert(Candidate->Instrs.size() == 1); 1825 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1826 Changed = true; 1827 } 1828 } 1829 Candidates.clear(); 1830 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1831 for (MachineInstr *MI : MergeBaseCandidates) 1832 MergeBaseUpdateLSDouble(*MI); 1833 MergeBaseCandidates.clear(); 1834 1835 return Changed; 1836 } 1837 1838 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1839 /// into the preceding stack restore so it directly restore the value of LR 1840 /// into pc. 1841 /// ldmfd sp!, {..., lr} 1842 /// bx lr 1843 /// or 1844 /// ldmfd sp!, {..., lr} 1845 /// mov pc, lr 1846 /// => 1847 /// ldmfd sp!, {..., pc} 1848 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1849 // Thumb1 LDM doesn't allow high registers. 1850 if (isThumb1) return false; 1851 if (MBB.empty()) return false; 1852 1853 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1854 if (MBBI != MBB.begin() && MBBI != MBB.end() && 1855 (MBBI->getOpcode() == ARM::BX_RET || 1856 MBBI->getOpcode() == ARM::tBX_RET || 1857 MBBI->getOpcode() == ARM::MOVPCLR)) { 1858 MachineBasicBlock::iterator PrevI = std::prev(MBBI); 1859 // Ignore any DBG_VALUE instructions. 1860 while (PrevI->isDebugValue() && PrevI != MBB.begin()) 1861 --PrevI; 1862 MachineInstr &PrevMI = *PrevI; 1863 unsigned Opcode = PrevMI.getOpcode(); 1864 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1865 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1866 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1867 MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1); 1868 if (MO.getReg() != ARM::LR) 1869 return false; 1870 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1871 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1872 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1873 PrevMI.setDesc(TII->get(NewOpc)); 1874 MO.setReg(ARM::PC); 1875 PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI); 1876 MBB.erase(MBBI); 1877 return true; 1878 } 1879 } 1880 return false; 1881 } 1882 1883 bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { 1884 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1885 if (MBBI == MBB.begin() || MBBI == MBB.end() || 1886 MBBI->getOpcode() != ARM::tBX_RET) 1887 return false; 1888 1889 MachineBasicBlock::iterator Prev = MBBI; 1890 --Prev; 1891 if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR)) 1892 return false; 1893 1894 for (auto Use : Prev->uses()) 1895 if (Use.isKill()) { 1896 AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) 1897 .addReg(Use.getReg(), RegState::Kill)) 1898 .copyImplicitOps(*MBBI); 1899 MBB.erase(MBBI); 1900 MBB.erase(Prev); 1901 return true; 1902 } 1903 1904 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?"); 1905 } 1906 1907 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1908 if (skipFunction(*Fn.getFunction())) 1909 return false; 1910 1911 MF = &Fn; 1912 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1913 TL = STI->getTargetLowering(); 1914 AFI = Fn.getInfo<ARMFunctionInfo>(); 1915 TII = STI->getInstrInfo(); 1916 TRI = STI->getRegisterInfo(); 1917 1918 RegClassInfoValid = false; 1919 isThumb2 = AFI->isThumb2Function(); 1920 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1921 1922 bool Modified = false; 1923 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1924 ++MFI) { 1925 MachineBasicBlock &MBB = *MFI; 1926 Modified |= LoadStoreMultipleOpti(MBB); 1927 if (STI->hasV5TOps()) 1928 Modified |= MergeReturnIntoLDM(MBB); 1929 if (isThumb1) 1930 Modified |= CombineMovBx(MBB); 1931 } 1932 1933 Allocator.DestroyAll(); 1934 return Modified; 1935 } 1936 1937 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \ 1938 "ARM pre- register allocation load / store optimization pass" 1939 1940 namespace { 1941 /// Pre- register allocation pass that move load / stores from consecutive 1942 /// locations close to make it more likely they will be combined later. 1943 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1944 static char ID; 1945 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1946 1947 const DataLayout *TD; 1948 const TargetInstrInfo *TII; 1949 const TargetRegisterInfo *TRI; 1950 const ARMSubtarget *STI; 1951 MachineRegisterInfo *MRI; 1952 MachineFunction *MF; 1953 1954 bool runOnMachineFunction(MachineFunction &Fn) override; 1955 1956 const char *getPassName() const override { 1957 return ARM_PREALLOC_LOAD_STORE_OPT_NAME; 1958 } 1959 1960 private: 1961 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1962 unsigned &NewOpc, unsigned &EvenReg, 1963 unsigned &OddReg, unsigned &BaseReg, 1964 int &Offset, 1965 unsigned &PredReg, ARMCC::CondCodes &Pred, 1966 bool &isT2); 1967 bool RescheduleOps(MachineBasicBlock *MBB, 1968 SmallVectorImpl<MachineInstr *> &Ops, 1969 unsigned Base, bool isLd, 1970 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1971 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1972 }; 1973 char ARMPreAllocLoadStoreOpt::ID = 0; 1974 } 1975 1976 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt", 1977 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) 1978 1979 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1980 if (AssumeMisalignedLoadStores || skipFunction(*Fn.getFunction())) 1981 return false; 1982 1983 TD = &Fn.getDataLayout(); 1984 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1985 TII = STI->getInstrInfo(); 1986 TRI = STI->getRegisterInfo(); 1987 MRI = &Fn.getRegInfo(); 1988 MF = &Fn; 1989 1990 bool Modified = false; 1991 for (MachineBasicBlock &MFI : Fn) 1992 Modified |= RescheduleLoadStoreInstrs(&MFI); 1993 1994 return Modified; 1995 } 1996 1997 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1998 MachineBasicBlock::iterator I, 1999 MachineBasicBlock::iterator E, 2000 SmallPtrSetImpl<MachineInstr*> &MemOps, 2001 SmallSet<unsigned, 4> &MemRegs, 2002 const TargetRegisterInfo *TRI) { 2003 // Are there stores / loads / calls between them? 2004 // FIXME: This is overly conservative. We should make use of alias information 2005 // some day. 2006 SmallSet<unsigned, 4> AddedRegPressure; 2007 while (++I != E) { 2008 if (I->isDebugValue() || MemOps.count(&*I)) 2009 continue; 2010 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 2011 return false; 2012 if (isLd && I->mayStore()) 2013 return false; 2014 if (!isLd) { 2015 if (I->mayLoad()) 2016 return false; 2017 // It's not safe to move the first 'str' down. 2018 // str r1, [r0] 2019 // strh r5, [r0] 2020 // str r4, [r0, #+4] 2021 if (I->mayStore()) 2022 return false; 2023 } 2024 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 2025 MachineOperand &MO = I->getOperand(j); 2026 if (!MO.isReg()) 2027 continue; 2028 unsigned Reg = MO.getReg(); 2029 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 2030 return false; 2031 if (Reg != Base && !MemRegs.count(Reg)) 2032 AddedRegPressure.insert(Reg); 2033 } 2034 } 2035 2036 // Estimate register pressure increase due to the transformation. 2037 if (MemRegs.size() <= 4) 2038 // Ok if we are moving small number of instructions. 2039 return true; 2040 return AddedRegPressure.size() <= MemRegs.size() * 2; 2041 } 2042 2043 bool 2044 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 2045 DebugLoc &dl, unsigned &NewOpc, 2046 unsigned &FirstReg, 2047 unsigned &SecondReg, 2048 unsigned &BaseReg, int &Offset, 2049 unsigned &PredReg, 2050 ARMCC::CondCodes &Pred, 2051 bool &isT2) { 2052 // Make sure we're allowed to generate LDRD/STRD. 2053 if (!STI->hasV5TEOps()) 2054 return false; 2055 2056 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 2057 unsigned Scale = 1; 2058 unsigned Opcode = Op0->getOpcode(); 2059 if (Opcode == ARM::LDRi12) { 2060 NewOpc = ARM::LDRD; 2061 } else if (Opcode == ARM::STRi12) { 2062 NewOpc = ARM::STRD; 2063 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 2064 NewOpc = ARM::t2LDRDi8; 2065 Scale = 4; 2066 isT2 = true; 2067 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 2068 NewOpc = ARM::t2STRDi8; 2069 Scale = 4; 2070 isT2 = true; 2071 } else { 2072 return false; 2073 } 2074 2075 // Make sure the base address satisfies i64 ld / st alignment requirement. 2076 // At the moment, we ignore the memoryoperand's value. 2077 // If we want to use AliasAnalysis, we should check it accordingly. 2078 if (!Op0->hasOneMemOperand() || 2079 (*Op0->memoperands_begin())->isVolatile()) 2080 return false; 2081 2082 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 2083 const Function *Func = MF->getFunction(); 2084 unsigned ReqAlign = STI->hasV6Ops() 2085 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 2086 : 8; // Pre-v6 need 8-byte align 2087 if (Align < ReqAlign) 2088 return false; 2089 2090 // Then make sure the immediate offset fits. 2091 int OffImm = getMemoryOpOffset(*Op0); 2092 if (isT2) { 2093 int Limit = (1 << 8) * Scale; 2094 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2095 return false; 2096 Offset = OffImm; 2097 } else { 2098 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2099 if (OffImm < 0) { 2100 AddSub = ARM_AM::sub; 2101 OffImm = - OffImm; 2102 } 2103 int Limit = (1 << 8) * Scale; 2104 if (OffImm >= Limit || (OffImm & (Scale-1))) 2105 return false; 2106 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2107 } 2108 FirstReg = Op0->getOperand(0).getReg(); 2109 SecondReg = Op1->getOperand(0).getReg(); 2110 if (FirstReg == SecondReg) 2111 return false; 2112 BaseReg = Op0->getOperand(1).getReg(); 2113 Pred = getInstrPredicate(*Op0, PredReg); 2114 dl = Op0->getDebugLoc(); 2115 return true; 2116 } 2117 2118 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2119 SmallVectorImpl<MachineInstr *> &Ops, 2120 unsigned Base, bool isLd, 2121 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2122 bool RetVal = false; 2123 2124 // Sort by offset (in reverse order). 2125 std::sort(Ops.begin(), Ops.end(), 2126 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2127 int LOffset = getMemoryOpOffset(*LHS); 2128 int ROffset = getMemoryOpOffset(*RHS); 2129 assert(LHS == RHS || LOffset != ROffset); 2130 return LOffset > ROffset; 2131 }); 2132 2133 // The loads / stores of the same base are in order. Scan them from first to 2134 // last and check for the following: 2135 // 1. Any def of base. 2136 // 2. Any gaps. 2137 while (Ops.size() > 1) { 2138 unsigned FirstLoc = ~0U; 2139 unsigned LastLoc = 0; 2140 MachineInstr *FirstOp = nullptr; 2141 MachineInstr *LastOp = nullptr; 2142 int LastOffset = 0; 2143 unsigned LastOpcode = 0; 2144 unsigned LastBytes = 0; 2145 unsigned NumMove = 0; 2146 for (int i = Ops.size() - 1; i >= 0; --i) { 2147 MachineInstr *Op = Ops[i]; 2148 unsigned Loc = MI2LocMap[Op]; 2149 if (Loc <= FirstLoc) { 2150 FirstLoc = Loc; 2151 FirstOp = Op; 2152 } 2153 if (Loc >= LastLoc) { 2154 LastLoc = Loc; 2155 LastOp = Op; 2156 } 2157 2158 unsigned LSMOpcode 2159 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2160 if (LastOpcode && LSMOpcode != LastOpcode) 2161 break; 2162 2163 int Offset = getMemoryOpOffset(*Op); 2164 unsigned Bytes = getLSMultipleTransferSize(Op); 2165 if (LastBytes) { 2166 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2167 break; 2168 } 2169 LastOffset = Offset; 2170 LastBytes = Bytes; 2171 LastOpcode = LSMOpcode; 2172 if (++NumMove == 8) // FIXME: Tune this limit. 2173 break; 2174 } 2175 2176 if (NumMove <= 1) 2177 Ops.pop_back(); 2178 else { 2179 SmallPtrSet<MachineInstr*, 4> MemOps; 2180 SmallSet<unsigned, 4> MemRegs; 2181 for (int i = NumMove-1; i >= 0; --i) { 2182 MemOps.insert(Ops[i]); 2183 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2184 } 2185 2186 // Be conservative, if the instructions are too far apart, don't 2187 // move them. We want to limit the increase of register pressure. 2188 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2189 if (DoMove) 2190 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2191 MemOps, MemRegs, TRI); 2192 if (!DoMove) { 2193 for (unsigned i = 0; i != NumMove; ++i) 2194 Ops.pop_back(); 2195 } else { 2196 // This is the new location for the loads / stores. 2197 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2198 while (InsertPos != MBB->end() && 2199 (MemOps.count(&*InsertPos) || InsertPos->isDebugValue())) 2200 ++InsertPos; 2201 2202 // If we are moving a pair of loads / stores, see if it makes sense 2203 // to try to allocate a pair of registers that can form register pairs. 2204 MachineInstr *Op0 = Ops.back(); 2205 MachineInstr *Op1 = Ops[Ops.size()-2]; 2206 unsigned FirstReg = 0, SecondReg = 0; 2207 unsigned BaseReg = 0, PredReg = 0; 2208 ARMCC::CondCodes Pred = ARMCC::AL; 2209 bool isT2 = false; 2210 unsigned NewOpc = 0; 2211 int Offset = 0; 2212 DebugLoc dl; 2213 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2214 FirstReg, SecondReg, BaseReg, 2215 Offset, PredReg, Pred, isT2)) { 2216 Ops.pop_back(); 2217 Ops.pop_back(); 2218 2219 const MCInstrDesc &MCID = TII->get(NewOpc); 2220 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2221 MRI->constrainRegClass(FirstReg, TRC); 2222 MRI->constrainRegClass(SecondReg, TRC); 2223 2224 // Form the pair instruction. 2225 if (isLd) { 2226 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2227 .addReg(FirstReg, RegState::Define) 2228 .addReg(SecondReg, RegState::Define) 2229 .addReg(BaseReg); 2230 // FIXME: We're converting from LDRi12 to an insn that still 2231 // uses addrmode2, so we need an explicit offset reg. It should 2232 // always by reg0 since we're transforming LDRi12s. 2233 if (!isT2) 2234 MIB.addReg(0); 2235 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2236 MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1)); 2237 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2238 ++NumLDRDFormed; 2239 } else { 2240 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2241 .addReg(FirstReg) 2242 .addReg(SecondReg) 2243 .addReg(BaseReg); 2244 // FIXME: We're converting from LDRi12 to an insn that still 2245 // uses addrmode2, so we need an explicit offset reg. It should 2246 // always by reg0 since we're transforming STRi12s. 2247 if (!isT2) 2248 MIB.addReg(0); 2249 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2250 MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1)); 2251 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2252 ++NumSTRDFormed; 2253 } 2254 MBB->erase(Op0); 2255 MBB->erase(Op1); 2256 2257 if (!isT2) { 2258 // Add register allocation hints to form register pairs. 2259 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2260 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2261 } 2262 } else { 2263 for (unsigned i = 0; i != NumMove; ++i) { 2264 MachineInstr *Op = Ops.back(); 2265 Ops.pop_back(); 2266 MBB->splice(InsertPos, MBB, Op); 2267 } 2268 } 2269 2270 NumLdStMoved += NumMove; 2271 RetVal = true; 2272 } 2273 } 2274 } 2275 2276 return RetVal; 2277 } 2278 2279 bool 2280 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2281 bool RetVal = false; 2282 2283 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2284 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2285 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2286 SmallVector<unsigned, 4> LdBases; 2287 SmallVector<unsigned, 4> StBases; 2288 2289 unsigned Loc = 0; 2290 MachineBasicBlock::iterator MBBI = MBB->begin(); 2291 MachineBasicBlock::iterator E = MBB->end(); 2292 while (MBBI != E) { 2293 for (; MBBI != E; ++MBBI) { 2294 MachineInstr &MI = *MBBI; 2295 if (MI.isCall() || MI.isTerminator()) { 2296 // Stop at barriers. 2297 ++MBBI; 2298 break; 2299 } 2300 2301 if (!MI.isDebugValue()) 2302 MI2LocMap[&MI] = ++Loc; 2303 2304 if (!isMemoryOp(MI)) 2305 continue; 2306 unsigned PredReg = 0; 2307 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2308 continue; 2309 2310 int Opc = MI.getOpcode(); 2311 bool isLd = isLoadSingle(Opc); 2312 unsigned Base = MI.getOperand(1).getReg(); 2313 int Offset = getMemoryOpOffset(MI); 2314 2315 bool StopHere = false; 2316 if (isLd) { 2317 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2318 Base2LdsMap.find(Base); 2319 if (BI != Base2LdsMap.end()) { 2320 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2321 if (Offset == getMemoryOpOffset(*BI->second[i])) { 2322 StopHere = true; 2323 break; 2324 } 2325 } 2326 if (!StopHere) 2327 BI->second.push_back(&MI); 2328 } else { 2329 Base2LdsMap[Base].push_back(&MI); 2330 LdBases.push_back(Base); 2331 } 2332 } else { 2333 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2334 Base2StsMap.find(Base); 2335 if (BI != Base2StsMap.end()) { 2336 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2337 if (Offset == getMemoryOpOffset(*BI->second[i])) { 2338 StopHere = true; 2339 break; 2340 } 2341 } 2342 if (!StopHere) 2343 BI->second.push_back(&MI); 2344 } else { 2345 Base2StsMap[Base].push_back(&MI); 2346 StBases.push_back(Base); 2347 } 2348 } 2349 2350 if (StopHere) { 2351 // Found a duplicate (a base+offset combination that's seen earlier). 2352 // Backtrack. 2353 --Loc; 2354 break; 2355 } 2356 } 2357 2358 // Re-schedule loads. 2359 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2360 unsigned Base = LdBases[i]; 2361 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2362 if (Lds.size() > 1) 2363 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2364 } 2365 2366 // Re-schedule stores. 2367 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2368 unsigned Base = StBases[i]; 2369 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2370 if (Sts.size() > 1) 2371 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2372 } 2373 2374 if (MBBI != E) { 2375 Base2LdsMap.clear(); 2376 Base2StsMap.clear(); 2377 LdBases.clear(); 2378 StBases.clear(); 2379 } 2380 } 2381 2382 return RetVal; 2383 } 2384 2385 2386 /// Returns an instance of the load / store optimization pass. 2387 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2388 if (PreAlloc) 2389 return new ARMPreAllocLoadStoreOpt(); 2390 return new ARMLoadStoreOpt(); 2391 } 2392