1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 namespace llvm { 64 void initializeARMLoadStoreOptPass(PassRegistry &); 65 } 66 67 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" 68 69 namespace { 70 /// Post- register allocation pass the combine load / store instructions to 71 /// form ldm / stm instructions. 72 struct ARMLoadStoreOpt : public MachineFunctionPass { 73 static char ID; 74 ARMLoadStoreOpt() : MachineFunctionPass(ID) { 75 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry()); 76 } 77 78 const MachineFunction *MF; 79 const TargetInstrInfo *TII; 80 const TargetRegisterInfo *TRI; 81 const ARMSubtarget *STI; 82 const TargetLowering *TL; 83 ARMFunctionInfo *AFI; 84 LivePhysRegs LiveRegs; 85 RegisterClassInfo RegClassInfo; 86 MachineBasicBlock::const_iterator LiveRegPos; 87 bool LiveRegsValid; 88 bool RegClassInfoValid; 89 bool isThumb1, isThumb2; 90 91 bool runOnMachineFunction(MachineFunction &Fn) override; 92 93 const char *getPassName() const override { 94 return ARM_LOAD_STORE_OPT_NAME; 95 } 96 97 private: 98 /// A set of load/store MachineInstrs with same base register sorted by 99 /// offset. 100 struct MemOpQueueEntry { 101 MachineInstr *MI; 102 int Offset; ///< Load/Store offset. 103 unsigned Position; ///< Position as counted from end of basic block. 104 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 105 : MI(MI), Offset(Offset), Position(Position) {} 106 }; 107 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 108 109 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 110 /// merged into a LDM/STM. 111 struct MergeCandidate { 112 /// List of instructions ordered by load/store offset. 113 SmallVector<MachineInstr*, 4> Instrs; 114 /// Index in Instrs of the instruction being latest in the schedule. 115 unsigned LatestMIIdx; 116 /// Index in Instrs of the instruction being earliest in the schedule. 117 unsigned EarliestMIIdx; 118 /// Index into the basic block where the merged instruction will be 119 /// inserted. (See MemOpQueueEntry.Position) 120 unsigned InsertPos; 121 /// Whether the instructions can be merged into a ldm/stm instruction. 122 bool CanMergeToLSMulti; 123 /// Whether the instructions can be merged into a ldrd/strd instruction. 124 bool CanMergeToLSDouble; 125 }; 126 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 127 SmallVector<const MergeCandidate*,4> Candidates; 128 SmallVector<MachineInstr*,4> MergeBaseCandidates; 129 130 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 131 MachineBasicBlock::const_iterator Before); 132 unsigned findFreeReg(const TargetRegisterClass &RegClass); 133 void UpdateBaseRegUses(MachineBasicBlock &MBB, 134 MachineBasicBlock::iterator MBBI, 135 DebugLoc DL, unsigned Base, unsigned WordOffset, 136 ARMCC::CondCodes Pred, unsigned PredReg); 137 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 138 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 139 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 140 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 141 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 142 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 143 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 144 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 145 void FormCandidates(const MemOpQueue &MemOps); 146 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 147 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 148 MachineBasicBlock::iterator &MBBI); 149 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 150 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 151 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 152 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 153 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 154 }; 155 char ARMLoadStoreOpt::ID = 0; 156 } 157 158 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false) 159 160 static bool definesCPSR(const MachineInstr *MI) { 161 for (const auto &MO : MI->operands()) { 162 if (!MO.isReg()) 163 continue; 164 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 165 // If the instruction has live CPSR def, then it's not safe to fold it 166 // into load / store. 167 return true; 168 } 169 170 return false; 171 } 172 173 static int getMemoryOpOffset(const MachineInstr *MI) { 174 unsigned Opcode = MI->getOpcode(); 175 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 176 unsigned NumOperands = MI->getDesc().getNumOperands(); 177 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 178 179 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 180 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 181 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 182 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 183 return OffField; 184 185 // Thumb1 immediate offsets are scaled by 4 186 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 187 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 188 return OffField * 4; 189 190 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 191 : ARM_AM::getAM5Offset(OffField) * 4; 192 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 193 : ARM_AM::getAM5Op(OffField); 194 195 if (Op == ARM_AM::sub) 196 return -Offset; 197 198 return Offset; 199 } 200 201 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 202 return MI.getOperand(1); 203 } 204 205 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 206 return MI.getOperand(0); 207 } 208 209 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 210 switch (Opcode) { 211 default: llvm_unreachable("Unhandled opcode!"); 212 case ARM::LDRi12: 213 ++NumLDMGened; 214 switch (Mode) { 215 default: llvm_unreachable("Unhandled submode!"); 216 case ARM_AM::ia: return ARM::LDMIA; 217 case ARM_AM::da: return ARM::LDMDA; 218 case ARM_AM::db: return ARM::LDMDB; 219 case ARM_AM::ib: return ARM::LDMIB; 220 } 221 case ARM::STRi12: 222 ++NumSTMGened; 223 switch (Mode) { 224 default: llvm_unreachable("Unhandled submode!"); 225 case ARM_AM::ia: return ARM::STMIA; 226 case ARM_AM::da: return ARM::STMDA; 227 case ARM_AM::db: return ARM::STMDB; 228 case ARM_AM::ib: return ARM::STMIB; 229 } 230 case ARM::tLDRi: 231 case ARM::tLDRspi: 232 // tLDMIA is writeback-only - unless the base register is in the input 233 // reglist. 234 ++NumLDMGened; 235 switch (Mode) { 236 default: llvm_unreachable("Unhandled submode!"); 237 case ARM_AM::ia: return ARM::tLDMIA; 238 } 239 case ARM::tSTRi: 240 case ARM::tSTRspi: 241 // There is no non-writeback tSTMIA either. 242 ++NumSTMGened; 243 switch (Mode) { 244 default: llvm_unreachable("Unhandled submode!"); 245 case ARM_AM::ia: return ARM::tSTMIA_UPD; 246 } 247 case ARM::t2LDRi8: 248 case ARM::t2LDRi12: 249 ++NumLDMGened; 250 switch (Mode) { 251 default: llvm_unreachable("Unhandled submode!"); 252 case ARM_AM::ia: return ARM::t2LDMIA; 253 case ARM_AM::db: return ARM::t2LDMDB; 254 } 255 case ARM::t2STRi8: 256 case ARM::t2STRi12: 257 ++NumSTMGened; 258 switch (Mode) { 259 default: llvm_unreachable("Unhandled submode!"); 260 case ARM_AM::ia: return ARM::t2STMIA; 261 case ARM_AM::db: return ARM::t2STMDB; 262 } 263 case ARM::VLDRS: 264 ++NumVLDMGened; 265 switch (Mode) { 266 default: llvm_unreachable("Unhandled submode!"); 267 case ARM_AM::ia: return ARM::VLDMSIA; 268 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 269 } 270 case ARM::VSTRS: 271 ++NumVSTMGened; 272 switch (Mode) { 273 default: llvm_unreachable("Unhandled submode!"); 274 case ARM_AM::ia: return ARM::VSTMSIA; 275 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 276 } 277 case ARM::VLDRD: 278 ++NumVLDMGened; 279 switch (Mode) { 280 default: llvm_unreachable("Unhandled submode!"); 281 case ARM_AM::ia: return ARM::VLDMDIA; 282 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 283 } 284 case ARM::VSTRD: 285 ++NumVSTMGened; 286 switch (Mode) { 287 default: llvm_unreachable("Unhandled submode!"); 288 case ARM_AM::ia: return ARM::VSTMDIA; 289 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 290 } 291 } 292 } 293 294 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 295 switch (Opcode) { 296 default: llvm_unreachable("Unhandled opcode!"); 297 case ARM::LDMIA_RET: 298 case ARM::LDMIA: 299 case ARM::LDMIA_UPD: 300 case ARM::STMIA: 301 case ARM::STMIA_UPD: 302 case ARM::tLDMIA: 303 case ARM::tLDMIA_UPD: 304 case ARM::tSTMIA_UPD: 305 case ARM::t2LDMIA_RET: 306 case ARM::t2LDMIA: 307 case ARM::t2LDMIA_UPD: 308 case ARM::t2STMIA: 309 case ARM::t2STMIA_UPD: 310 case ARM::VLDMSIA: 311 case ARM::VLDMSIA_UPD: 312 case ARM::VSTMSIA: 313 case ARM::VSTMSIA_UPD: 314 case ARM::VLDMDIA: 315 case ARM::VLDMDIA_UPD: 316 case ARM::VSTMDIA: 317 case ARM::VSTMDIA_UPD: 318 return ARM_AM::ia; 319 320 case ARM::LDMDA: 321 case ARM::LDMDA_UPD: 322 case ARM::STMDA: 323 case ARM::STMDA_UPD: 324 return ARM_AM::da; 325 326 case ARM::LDMDB: 327 case ARM::LDMDB_UPD: 328 case ARM::STMDB: 329 case ARM::STMDB_UPD: 330 case ARM::t2LDMDB: 331 case ARM::t2LDMDB_UPD: 332 case ARM::t2STMDB: 333 case ARM::t2STMDB_UPD: 334 case ARM::VLDMSDB_UPD: 335 case ARM::VSTMSDB_UPD: 336 case ARM::VLDMDDB_UPD: 337 case ARM::VSTMDDB_UPD: 338 return ARM_AM::db; 339 340 case ARM::LDMIB: 341 case ARM::LDMIB_UPD: 342 case ARM::STMIB: 343 case ARM::STMIB_UPD: 344 return ARM_AM::ib; 345 } 346 } 347 348 static bool isT1i32Load(unsigned Opc) { 349 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 350 } 351 352 static bool isT2i32Load(unsigned Opc) { 353 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 354 } 355 356 static bool isi32Load(unsigned Opc) { 357 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 358 } 359 360 static bool isT1i32Store(unsigned Opc) { 361 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 362 } 363 364 static bool isT2i32Store(unsigned Opc) { 365 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 366 } 367 368 static bool isi32Store(unsigned Opc) { 369 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 370 } 371 372 static bool isLoadSingle(unsigned Opc) { 373 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 374 } 375 376 static unsigned getImmScale(unsigned Opc) { 377 switch (Opc) { 378 default: llvm_unreachable("Unhandled opcode!"); 379 case ARM::tLDRi: 380 case ARM::tSTRi: 381 case ARM::tLDRspi: 382 case ARM::tSTRspi: 383 return 1; 384 case ARM::tLDRHi: 385 case ARM::tSTRHi: 386 return 2; 387 case ARM::tLDRBi: 388 case ARM::tSTRBi: 389 return 4; 390 } 391 } 392 393 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 394 switch (MI->getOpcode()) { 395 default: return 0; 396 case ARM::LDRi12: 397 case ARM::STRi12: 398 case ARM::tLDRi: 399 case ARM::tSTRi: 400 case ARM::tLDRspi: 401 case ARM::tSTRspi: 402 case ARM::t2LDRi8: 403 case ARM::t2LDRi12: 404 case ARM::t2STRi8: 405 case ARM::t2STRi12: 406 case ARM::VLDRS: 407 case ARM::VSTRS: 408 return 4; 409 case ARM::VLDRD: 410 case ARM::VSTRD: 411 return 8; 412 case ARM::LDMIA: 413 case ARM::LDMDA: 414 case ARM::LDMDB: 415 case ARM::LDMIB: 416 case ARM::STMIA: 417 case ARM::STMDA: 418 case ARM::STMDB: 419 case ARM::STMIB: 420 case ARM::tLDMIA: 421 case ARM::tLDMIA_UPD: 422 case ARM::tSTMIA_UPD: 423 case ARM::t2LDMIA: 424 case ARM::t2LDMDB: 425 case ARM::t2STMIA: 426 case ARM::t2STMDB: 427 case ARM::VLDMSIA: 428 case ARM::VSTMSIA: 429 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 430 case ARM::VLDMDIA: 431 case ARM::VSTMDIA: 432 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 433 } 434 } 435 436 /// Update future uses of the base register with the offset introduced 437 /// due to writeback. This function only works on Thumb1. 438 void 439 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 440 MachineBasicBlock::iterator MBBI, 441 DebugLoc DL, unsigned Base, 442 unsigned WordOffset, 443 ARMCC::CondCodes Pred, unsigned PredReg) { 444 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 445 // Start updating any instructions with immediate offsets. Insert a SUB before 446 // the first non-updateable instruction (if any). 447 for (; MBBI != MBB.end(); ++MBBI) { 448 bool InsertSub = false; 449 unsigned Opc = MBBI->getOpcode(); 450 451 if (MBBI->readsRegister(Base)) { 452 int Offset; 453 bool IsLoad = 454 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 455 bool IsStore = 456 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 457 458 if (IsLoad || IsStore) { 459 // Loads and stores with immediate offsets can be updated, but only if 460 // the new offset isn't negative. 461 // The MachineOperand containing the offset immediate is the last one 462 // before predicates. 463 MachineOperand &MO = 464 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 465 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 466 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 467 468 // If storing the base register, it needs to be reset first. 469 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 470 471 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 472 MO.setImm(Offset); 473 else 474 InsertSub = true; 475 476 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 477 !definesCPSR(MBBI)) { 478 // SUBS/ADDS using this register, with a dead def of the CPSR. 479 // Merge it with the update; if the merged offset is too large, 480 // insert a new sub instead. 481 MachineOperand &MO = 482 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 483 Offset = (Opc == ARM::tSUBi8) ? 484 MO.getImm() + WordOffset * 4 : 485 MO.getImm() - WordOffset * 4 ; 486 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 487 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 488 // Offset == 0. 489 MO.setImm(Offset); 490 // The base register has now been reset, so exit early. 491 return; 492 } else { 493 InsertSub = true; 494 } 495 496 } else { 497 // Can't update the instruction. 498 InsertSub = true; 499 } 500 501 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 502 // Since SUBS sets the condition flags, we can't place the base reset 503 // after an instruction that has a live CPSR def. 504 // The base register might also contain an argument for a function call. 505 InsertSub = true; 506 } 507 508 if (InsertSub) { 509 // An instruction above couldn't be updated, so insert a sub. 510 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 511 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 512 return; 513 } 514 515 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 516 // Register got killed. Stop updating. 517 return; 518 } 519 520 // End of block was reached. 521 if (MBB.succ_size() > 0) { 522 // FIXME: Because of a bug, live registers are sometimes missing from 523 // the successor blocks' live-in sets. This means we can't trust that 524 // information and *always* have to reset at the end of a block. 525 // See PR21029. 526 if (MBBI != MBB.end()) --MBBI; 527 AddDefaultT1CC( 528 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 529 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 530 } 531 } 532 533 /// Return the first register of class \p RegClass that is not in \p Regs. 534 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 535 if (!RegClassInfoValid) { 536 RegClassInfo.runOnMachineFunction(*MF); 537 RegClassInfoValid = true; 538 } 539 540 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 541 if (!LiveRegs.contains(Reg)) 542 return Reg; 543 return 0; 544 } 545 546 /// Compute live registers just before instruction \p Before (in normal schedule 547 /// direction). Computes backwards so multiple queries in the same block must 548 /// come in reverse order. 549 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 550 MachineBasicBlock::const_iterator Before) { 551 // Initialize if we never queried in this block. 552 if (!LiveRegsValid) { 553 LiveRegs.init(TRI); 554 LiveRegs.addLiveOuts(&MBB, true); 555 LiveRegPos = MBB.end(); 556 LiveRegsValid = true; 557 } 558 // Move backward just before the "Before" position. 559 while (LiveRegPos != Before) { 560 --LiveRegPos; 561 LiveRegs.stepBackward(*LiveRegPos); 562 } 563 } 564 565 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 566 unsigned Reg) { 567 for (const std::pair<unsigned, bool> &R : Regs) 568 if (R.first == Reg) 569 return true; 570 return false; 571 } 572 573 /// Create and insert a LDM or STM with Base as base register and registers in 574 /// Regs as the register operands that would be loaded / stored. It returns 575 /// true if the transformation is done. 576 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 577 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 578 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 579 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 580 unsigned NumRegs = Regs.size(); 581 assert(NumRegs > 1); 582 583 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 584 // Compute liveness information for that register to make the decision. 585 bool SafeToClobberCPSR = !isThumb1 || 586 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 587 MachineBasicBlock::LQR_Dead); 588 589 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 590 591 // Exception: If the base register is in the input reglist, Thumb1 LDM is 592 // non-writeback. 593 // It's also not possible to merge an STR of the base register in Thumb1. 594 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 595 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 596 if (Opcode == ARM::tLDRi) { 597 Writeback = false; 598 } else if (Opcode == ARM::tSTRi) { 599 return nullptr; 600 } 601 } 602 603 ARM_AM::AMSubMode Mode = ARM_AM::ia; 604 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 605 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 606 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 607 608 if (Offset == 4 && haveIBAndDA) { 609 Mode = ARM_AM::ib; 610 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 611 Mode = ARM_AM::da; 612 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 613 // VLDM/VSTM do not support DB mode without also updating the base reg. 614 Mode = ARM_AM::db; 615 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 616 // Check if this is a supported opcode before inserting instructions to 617 // calculate a new base register. 618 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 619 620 // If starting offset isn't zero, insert a MI to materialize a new base. 621 // But only do so if it is cost effective, i.e. merging more than two 622 // loads / stores. 623 if (NumRegs <= 2) 624 return nullptr; 625 626 // On Thumb1, it's not worth materializing a new base register without 627 // clobbering the CPSR (i.e. not using ADDS/SUBS). 628 if (!SafeToClobberCPSR) 629 return nullptr; 630 631 unsigned NewBase; 632 if (isi32Load(Opcode)) { 633 // If it is a load, then just use one of the destination register to 634 // use as the new base. 635 NewBase = Regs[NumRegs-1].first; 636 } else { 637 // Find a free register that we can use as scratch register. 638 moveLiveRegsBefore(MBB, InsertBefore); 639 // The merged instruction does not exist yet but will use several Regs if 640 // it is a Store. 641 if (!isLoadSingle(Opcode)) 642 for (const std::pair<unsigned, bool> &R : Regs) 643 LiveRegs.addReg(R.first); 644 645 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 646 if (NewBase == 0) 647 return nullptr; 648 } 649 650 int BaseOpc = 651 isThumb2 ? ARM::t2ADDri : 652 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 653 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 654 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 655 656 if (Offset < 0) { 657 Offset = - Offset; 658 BaseOpc = 659 isThumb2 ? ARM::t2SUBri : 660 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 661 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 662 } 663 664 if (!TL->isLegalAddImmediate(Offset)) 665 // FIXME: Try add with register operand? 666 return nullptr; // Probably not worth it then. 667 668 // We can only append a kill flag to the add/sub input if the value is not 669 // used in the register list of the stm as well. 670 bool KillOldBase = BaseKill && 671 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 672 673 if (isThumb1) { 674 // Thumb1: depending on immediate size, use either 675 // ADDS NewBase, Base, #imm3 676 // or 677 // MOV NewBase, Base 678 // ADDS NewBase, #imm8. 679 if (Base != NewBase && 680 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 681 // Need to insert a MOV to the new base first. 682 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 683 !STI->hasV6Ops()) { 684 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 685 if (Pred != ARMCC::AL) 686 return nullptr; 687 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 688 .addReg(Base, getKillRegState(KillOldBase)); 689 } else 690 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 691 .addReg(Base, getKillRegState(KillOldBase)) 692 .addImm(Pred).addReg(PredReg); 693 694 // The following ADDS/SUBS becomes an update. 695 Base = NewBase; 696 KillOldBase = true; 697 } 698 if (BaseOpc == ARM::tADDrSPi) { 699 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 700 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 701 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 702 .addImm(Pred).addReg(PredReg); 703 } else 704 AddDefaultT1CC( 705 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 706 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 707 .addImm(Pred).addReg(PredReg); 708 } else { 709 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 710 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 711 .addImm(Pred).addReg(PredReg).addReg(0); 712 } 713 Base = NewBase; 714 BaseKill = true; // New base is always killed straight away. 715 } 716 717 bool isDef = isLoadSingle(Opcode); 718 719 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 720 // base register writeback. 721 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 722 if (!Opcode) 723 return nullptr; 724 725 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 726 // - There is no writeback (LDM of base register), 727 // - the base register is killed by the merged instruction, 728 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 729 // to reset the base register. 730 // Otherwise, don't merge. 731 // It's safe to return here since the code to materialize a new base register 732 // above is also conditional on SafeToClobberCPSR. 733 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 734 return nullptr; 735 736 MachineInstrBuilder MIB; 737 738 if (Writeback) { 739 if (Opcode == ARM::tLDMIA) 740 // Update tLDMIA with writeback if necessary. 741 Opcode = ARM::tLDMIA_UPD; 742 743 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 744 745 // Thumb1: we might need to set base writeback when building the MI. 746 MIB.addReg(Base, getDefRegState(true)) 747 .addReg(Base, getKillRegState(BaseKill)); 748 749 // The base isn't dead after a merged instruction with writeback. 750 // Insert a sub instruction after the newly formed instruction to reset. 751 if (!BaseKill) 752 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 753 754 } else { 755 // No writeback, simply build the MachineInstr. 756 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 757 MIB.addReg(Base, getKillRegState(BaseKill)); 758 } 759 760 MIB.addImm(Pred).addReg(PredReg); 761 762 for (const std::pair<unsigned, bool> &R : Regs) 763 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 764 765 return MIB.getInstr(); 766 } 767 768 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 769 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 770 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 771 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 772 bool IsLoad = isi32Load(Opcode); 773 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 774 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 775 776 assert(Regs.size() == 2); 777 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 778 TII->get(LoadStoreOpcode)); 779 if (IsLoad) { 780 MIB.addReg(Regs[0].first, RegState::Define) 781 .addReg(Regs[1].first, RegState::Define); 782 } else { 783 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 784 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 785 } 786 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 787 return MIB.getInstr(); 788 } 789 790 /// Call MergeOps and update MemOps and merges accordingly on success. 791 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 792 const MachineInstr *First = Cand.Instrs.front(); 793 unsigned Opcode = First->getOpcode(); 794 bool IsLoad = isLoadSingle(Opcode); 795 SmallVector<std::pair<unsigned, bool>, 8> Regs; 796 SmallVector<unsigned, 4> ImpDefs; 797 DenseSet<unsigned> KilledRegs; 798 DenseSet<unsigned> UsedRegs; 799 // Determine list of registers and list of implicit super-register defs. 800 for (const MachineInstr *MI : Cand.Instrs) { 801 const MachineOperand &MO = getLoadStoreRegOp(*MI); 802 unsigned Reg = MO.getReg(); 803 bool IsKill = MO.isKill(); 804 if (IsKill) 805 KilledRegs.insert(Reg); 806 Regs.push_back(std::make_pair(Reg, IsKill)); 807 UsedRegs.insert(Reg); 808 809 if (IsLoad) { 810 // Collect any implicit defs of super-registers, after merging we can't 811 // be sure anymore that we properly preserved these live ranges and must 812 // removed these implicit operands. 813 for (const MachineOperand &MO : MI->implicit_operands()) { 814 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 815 continue; 816 assert(MO.isImplicit()); 817 unsigned DefReg = MO.getReg(); 818 819 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 820 continue; 821 // We can ignore cases where the super-reg is read and written. 822 if (MI->readsRegister(DefReg)) 823 continue; 824 ImpDefs.push_back(DefReg); 825 } 826 } 827 } 828 829 // Attempt the merge. 830 typedef MachineBasicBlock::iterator iterator; 831 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 832 iterator InsertBefore = std::next(iterator(LatestMI)); 833 MachineBasicBlock &MBB = *LatestMI->getParent(); 834 unsigned Offset = getMemoryOpOffset(First); 835 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 836 bool BaseKill = LatestMI->killsRegister(Base); 837 unsigned PredReg = 0; 838 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); 839 DebugLoc DL = First->getDebugLoc(); 840 MachineInstr *Merged = nullptr; 841 if (Cand.CanMergeToLSDouble) 842 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 843 Opcode, Pred, PredReg, DL, Regs); 844 if (!Merged && Cand.CanMergeToLSMulti) 845 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 846 Opcode, Pred, PredReg, DL, Regs); 847 if (!Merged) 848 return nullptr; 849 850 // Determine earliest instruction that will get removed. We then keep an 851 // iterator just above it so the following erases don't invalidated it. 852 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 853 bool EarliestAtBegin = false; 854 if (EarliestI == MBB.begin()) { 855 EarliestAtBegin = true; 856 } else { 857 EarliestI = std::prev(EarliestI); 858 } 859 860 // Remove instructions which have been merged. 861 for (MachineInstr *MI : Cand.Instrs) 862 MBB.erase(MI); 863 864 // Determine range between the earliest removed instruction and the new one. 865 if (EarliestAtBegin) 866 EarliestI = MBB.begin(); 867 else 868 EarliestI = std::next(EarliestI); 869 auto FixupRange = make_range(EarliestI, iterator(Merged)); 870 871 if (isLoadSingle(Opcode)) { 872 // If the previous loads defined a super-reg, then we have to mark earlier 873 // operands undef; Replicate the super-reg def on the merged instruction. 874 for (MachineInstr &MI : FixupRange) { 875 for (unsigned &ImpDefReg : ImpDefs) { 876 for (MachineOperand &MO : MI.implicit_operands()) { 877 if (!MO.isReg() || MO.getReg() != ImpDefReg) 878 continue; 879 if (MO.readsReg()) 880 MO.setIsUndef(); 881 else if (MO.isDef()) 882 ImpDefReg = 0; 883 } 884 } 885 } 886 887 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 888 for (unsigned ImpDef : ImpDefs) 889 MIB.addReg(ImpDef, RegState::ImplicitDefine); 890 } else { 891 // Remove kill flags: We are possibly storing the values later now. 892 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 893 for (MachineInstr &MI : FixupRange) { 894 for (MachineOperand &MO : MI.uses()) { 895 if (!MO.isReg() || !MO.isKill()) 896 continue; 897 if (UsedRegs.count(MO.getReg())) 898 MO.setIsKill(false); 899 } 900 } 901 assert(ImpDefs.empty()); 902 } 903 904 return Merged; 905 } 906 907 static bool isValidLSDoubleOffset(int Offset) { 908 unsigned Value = abs(Offset); 909 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 910 // multiplied by 4. 911 return (Value % 4) == 0 && Value < 1024; 912 } 913 914 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 915 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 916 const MachineInstr *FirstMI = MemOps[0].MI; 917 unsigned Opcode = FirstMI->getOpcode(); 918 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 919 unsigned Size = getLSMultipleTransferSize(FirstMI); 920 921 unsigned SIndex = 0; 922 unsigned EIndex = MemOps.size(); 923 do { 924 // Look at the first instruction. 925 const MachineInstr *MI = MemOps[SIndex].MI; 926 int Offset = MemOps[SIndex].Offset; 927 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 928 unsigned PReg = PMO.getReg(); 929 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 930 unsigned Latest = SIndex; 931 unsigned Earliest = SIndex; 932 unsigned Count = 1; 933 bool CanMergeToLSDouble = 934 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 935 // ARM errata 602117: LDRD with base in list may result in incorrect base 936 // register when interrupted or faulted. 937 if (STI->isCortexM3() && isi32Load(Opcode) && 938 PReg == getLoadStoreBaseOp(*MI).getReg()) 939 CanMergeToLSDouble = false; 940 941 bool CanMergeToLSMulti = true; 942 // On swift vldm/vstm starting with an odd register number as that needs 943 // more uops than single vldrs. 944 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 945 CanMergeToLSMulti = false; 946 947 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it 948 // deprecated; LDM to PC is fine but cannot happen here. 949 if (PReg == ARM::SP || PReg == ARM::PC) 950 CanMergeToLSMulti = CanMergeToLSDouble = false; 951 952 // Merge following instructions where possible. 953 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 954 int NewOffset = MemOps[I].Offset; 955 if (NewOffset != Offset + (int)Size) 956 break; 957 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 958 unsigned Reg = MO.getReg(); 959 if (Reg == ARM::SP || Reg == ARM::PC) 960 break; 961 962 // See if the current load/store may be part of a multi load/store. 963 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 964 bool PartOfLSMulti = CanMergeToLSMulti; 965 if (PartOfLSMulti) { 966 // Register numbers must be in ascending order. 967 if (RegNum <= PRegNum) 968 PartOfLSMulti = false; 969 // For VFP / NEON load/store multiples, the registers must be 970 // consecutive and within the limit on the number of registers per 971 // instruction. 972 else if (!isNotVFP && RegNum != PRegNum+1) 973 PartOfLSMulti = false; 974 } 975 // See if the current load/store may be part of a double load/store. 976 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 977 978 if (!PartOfLSMulti && !PartOfLSDouble) 979 break; 980 CanMergeToLSMulti &= PartOfLSMulti; 981 CanMergeToLSDouble &= PartOfLSDouble; 982 // Track MemOp with latest and earliest position (Positions are 983 // counted in reverse). 984 unsigned Position = MemOps[I].Position; 985 if (Position < MemOps[Latest].Position) 986 Latest = I; 987 else if (Position > MemOps[Earliest].Position) 988 Earliest = I; 989 // Prepare for next MemOp. 990 Offset += Size; 991 PRegNum = RegNum; 992 } 993 994 // Form a candidate from the Ops collected so far. 995 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 996 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 997 Candidate->Instrs.push_back(MemOps[C].MI); 998 Candidate->LatestMIIdx = Latest - SIndex; 999 Candidate->EarliestMIIdx = Earliest - SIndex; 1000 Candidate->InsertPos = MemOps[Latest].Position; 1001 if (Count == 1) 1002 CanMergeToLSMulti = CanMergeToLSDouble = false; 1003 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 1004 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 1005 Candidates.push_back(Candidate); 1006 // Continue after the chain. 1007 SIndex += Count; 1008 } while (SIndex < EIndex); 1009 } 1010 1011 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1012 ARM_AM::AMSubMode Mode) { 1013 switch (Opc) { 1014 default: llvm_unreachable("Unhandled opcode!"); 1015 case ARM::LDMIA: 1016 case ARM::LDMDA: 1017 case ARM::LDMDB: 1018 case ARM::LDMIB: 1019 switch (Mode) { 1020 default: llvm_unreachable("Unhandled submode!"); 1021 case ARM_AM::ia: return ARM::LDMIA_UPD; 1022 case ARM_AM::ib: return ARM::LDMIB_UPD; 1023 case ARM_AM::da: return ARM::LDMDA_UPD; 1024 case ARM_AM::db: return ARM::LDMDB_UPD; 1025 } 1026 case ARM::STMIA: 1027 case ARM::STMDA: 1028 case ARM::STMDB: 1029 case ARM::STMIB: 1030 switch (Mode) { 1031 default: llvm_unreachable("Unhandled submode!"); 1032 case ARM_AM::ia: return ARM::STMIA_UPD; 1033 case ARM_AM::ib: return ARM::STMIB_UPD; 1034 case ARM_AM::da: return ARM::STMDA_UPD; 1035 case ARM_AM::db: return ARM::STMDB_UPD; 1036 } 1037 case ARM::t2LDMIA: 1038 case ARM::t2LDMDB: 1039 switch (Mode) { 1040 default: llvm_unreachable("Unhandled submode!"); 1041 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1042 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1043 } 1044 case ARM::t2STMIA: 1045 case ARM::t2STMDB: 1046 switch (Mode) { 1047 default: llvm_unreachable("Unhandled submode!"); 1048 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1049 case ARM_AM::db: return ARM::t2STMDB_UPD; 1050 } 1051 case ARM::VLDMSIA: 1052 switch (Mode) { 1053 default: llvm_unreachable("Unhandled submode!"); 1054 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1055 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1056 } 1057 case ARM::VLDMDIA: 1058 switch (Mode) { 1059 default: llvm_unreachable("Unhandled submode!"); 1060 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1061 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1062 } 1063 case ARM::VSTMSIA: 1064 switch (Mode) { 1065 default: llvm_unreachable("Unhandled submode!"); 1066 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1067 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1068 } 1069 case ARM::VSTMDIA: 1070 switch (Mode) { 1071 default: llvm_unreachable("Unhandled submode!"); 1072 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1073 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1074 } 1075 } 1076 } 1077 1078 /// Check if the given instruction increments or decrements a register and 1079 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1080 /// generated by the instruction are possibly read as well. 1081 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1082 ARMCC::CondCodes Pred, unsigned PredReg) { 1083 bool CheckCPSRDef; 1084 int Scale; 1085 switch (MI.getOpcode()) { 1086 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1087 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1088 case ARM::t2SUBri: 1089 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1090 case ARM::t2ADDri: 1091 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1092 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1093 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1094 default: return 0; 1095 } 1096 1097 unsigned MIPredReg; 1098 if (MI.getOperand(0).getReg() != Reg || 1099 MI.getOperand(1).getReg() != Reg || 1100 getInstrPredicate(&MI, MIPredReg) != Pred || 1101 MIPredReg != PredReg) 1102 return 0; 1103 1104 if (CheckCPSRDef && definesCPSR(&MI)) 1105 return 0; 1106 return MI.getOperand(2).getImm() * Scale; 1107 } 1108 1109 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1110 static MachineBasicBlock::iterator 1111 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1112 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1113 Offset = 0; 1114 MachineBasicBlock &MBB = *MBBI->getParent(); 1115 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1116 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1117 if (MBBI == BeginMBBI) 1118 return EndMBBI; 1119 1120 // Skip debug values. 1121 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1122 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1123 --PrevMBBI; 1124 1125 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1126 return Offset == 0 ? EndMBBI : PrevMBBI; 1127 } 1128 1129 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1130 static MachineBasicBlock::iterator 1131 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1132 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1133 Offset = 0; 1134 MachineBasicBlock &MBB = *MBBI->getParent(); 1135 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1136 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1137 // Skip debug values. 1138 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1139 ++NextMBBI; 1140 if (NextMBBI == EndMBBI) 1141 return EndMBBI; 1142 1143 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1144 return Offset == 0 ? EndMBBI : NextMBBI; 1145 } 1146 1147 /// Fold proceeding/trailing inc/dec of base register into the 1148 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1149 /// 1150 /// stmia rn, <ra, rb, rc> 1151 /// rn := rn + 4 * 3; 1152 /// => 1153 /// stmia rn!, <ra, rb, rc> 1154 /// 1155 /// rn := rn - 4 * 3; 1156 /// ldmia rn, <ra, rb, rc> 1157 /// => 1158 /// ldmdb rn!, <ra, rb, rc> 1159 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1160 // Thumb1 is already using updating loads/stores. 1161 if (isThumb1) return false; 1162 1163 const MachineOperand &BaseOP = MI->getOperand(0); 1164 unsigned Base = BaseOP.getReg(); 1165 bool BaseKill = BaseOP.isKill(); 1166 unsigned PredReg = 0; 1167 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1168 unsigned Opcode = MI->getOpcode(); 1169 DebugLoc DL = MI->getDebugLoc(); 1170 1171 // Can't use an updating ld/st if the base register is also a dest 1172 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1173 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1174 if (MI->getOperand(i).getReg() == Base) 1175 return false; 1176 1177 int Bytes = getLSMultipleTransferSize(MI); 1178 MachineBasicBlock &MBB = *MI->getParent(); 1179 MachineBasicBlock::iterator MBBI(MI); 1180 int Offset; 1181 MachineBasicBlock::iterator MergeInstr 1182 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1183 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1184 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1185 Mode = ARM_AM::db; 1186 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1187 Mode = ARM_AM::da; 1188 } else { 1189 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1190 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1191 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) 1192 return false; 1193 } 1194 MBB.erase(MergeInstr); 1195 1196 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1197 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1198 .addReg(Base, getDefRegState(true)) // WB base register 1199 .addReg(Base, getKillRegState(BaseKill)) 1200 .addImm(Pred).addReg(PredReg); 1201 1202 // Transfer the rest of operands. 1203 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1204 MIB.addOperand(MI->getOperand(OpNum)); 1205 1206 // Transfer memoperands. 1207 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1208 1209 MBB.erase(MBBI); 1210 return true; 1211 } 1212 1213 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1214 ARM_AM::AddrOpc Mode) { 1215 switch (Opc) { 1216 case ARM::LDRi12: 1217 return ARM::LDR_PRE_IMM; 1218 case ARM::STRi12: 1219 return ARM::STR_PRE_IMM; 1220 case ARM::VLDRS: 1221 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1222 case ARM::VLDRD: 1223 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1224 case ARM::VSTRS: 1225 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1226 case ARM::VSTRD: 1227 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1228 case ARM::t2LDRi8: 1229 case ARM::t2LDRi12: 1230 return ARM::t2LDR_PRE; 1231 case ARM::t2STRi8: 1232 case ARM::t2STRi12: 1233 return ARM::t2STR_PRE; 1234 default: llvm_unreachable("Unhandled opcode!"); 1235 } 1236 } 1237 1238 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1239 ARM_AM::AddrOpc Mode) { 1240 switch (Opc) { 1241 case ARM::LDRi12: 1242 return ARM::LDR_POST_IMM; 1243 case ARM::STRi12: 1244 return ARM::STR_POST_IMM; 1245 case ARM::VLDRS: 1246 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1247 case ARM::VLDRD: 1248 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1249 case ARM::VSTRS: 1250 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1251 case ARM::VSTRD: 1252 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1253 case ARM::t2LDRi8: 1254 case ARM::t2LDRi12: 1255 return ARM::t2LDR_POST; 1256 case ARM::t2STRi8: 1257 case ARM::t2STRi12: 1258 return ARM::t2STR_POST; 1259 default: llvm_unreachable("Unhandled opcode!"); 1260 } 1261 } 1262 1263 /// Fold proceeding/trailing inc/dec of base register into the 1264 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1265 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1266 // Thumb1 doesn't have updating LDR/STR. 1267 // FIXME: Use LDM/STM with single register instead. 1268 if (isThumb1) return false; 1269 1270 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1271 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1272 unsigned Opcode = MI->getOpcode(); 1273 DebugLoc DL = MI->getDebugLoc(); 1274 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1275 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1276 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1277 if (isi32Load(Opcode) || isi32Store(Opcode)) 1278 if (MI->getOperand(2).getImm() != 0) 1279 return false; 1280 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1281 return false; 1282 1283 // Can't do the merge if the destination register is the same as the would-be 1284 // writeback register. 1285 if (MI->getOperand(0).getReg() == Base) 1286 return false; 1287 1288 unsigned PredReg = 0; 1289 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1290 int Bytes = getLSMultipleTransferSize(MI); 1291 MachineBasicBlock &MBB = *MI->getParent(); 1292 MachineBasicBlock::iterator MBBI(MI); 1293 int Offset; 1294 MachineBasicBlock::iterator MergeInstr 1295 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1296 unsigned NewOpc; 1297 if (!isAM5 && Offset == Bytes) { 1298 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1299 } else if (Offset == -Bytes) { 1300 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1301 } else { 1302 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1303 if (Offset == Bytes) { 1304 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1305 } else if (!isAM5 && Offset == -Bytes) { 1306 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1307 } else 1308 return false; 1309 } 1310 MBB.erase(MergeInstr); 1311 1312 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1313 1314 bool isLd = isLoadSingle(Opcode); 1315 if (isAM5) { 1316 // VLDM[SD]_UPD, VSTM[SD]_UPD 1317 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1318 // updating load/store-multiple instructions can be used with only one 1319 // register.) 1320 MachineOperand &MO = MI->getOperand(0); 1321 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1322 .addReg(Base, getDefRegState(true)) // WB base register 1323 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1324 .addImm(Pred).addReg(PredReg) 1325 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1326 getKillRegState(MO.isKill()))); 1327 } else if (isLd) { 1328 if (isAM2) { 1329 // LDR_PRE, LDR_POST 1330 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1331 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1332 .addReg(Base, RegState::Define) 1333 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1334 } else { 1335 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1336 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1337 .addReg(Base, RegState::Define) 1338 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1339 } 1340 } else { 1341 // t2LDR_PRE, t2LDR_POST 1342 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1343 .addReg(Base, RegState::Define) 1344 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1345 } 1346 } else { 1347 MachineOperand &MO = MI->getOperand(0); 1348 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1349 // the vestigal zero-reg offset register. When that's fixed, this clause 1350 // can be removed entirely. 1351 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1352 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1353 // STR_PRE, STR_POST 1354 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1355 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1356 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1357 } else { 1358 // t2STR_PRE, t2STR_POST 1359 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1360 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1361 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1362 } 1363 } 1364 MBB.erase(MBBI); 1365 1366 return true; 1367 } 1368 1369 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1370 unsigned Opcode = MI.getOpcode(); 1371 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1372 "Must have t2STRDi8 or t2LDRDi8"); 1373 if (MI.getOperand(3).getImm() != 0) 1374 return false; 1375 1376 // Behaviour for writeback is undefined if base register is the same as one 1377 // of the others. 1378 const MachineOperand &BaseOp = MI.getOperand(2); 1379 unsigned Base = BaseOp.getReg(); 1380 const MachineOperand &Reg0Op = MI.getOperand(0); 1381 const MachineOperand &Reg1Op = MI.getOperand(1); 1382 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1383 return false; 1384 1385 unsigned PredReg; 1386 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); 1387 MachineBasicBlock::iterator MBBI(MI); 1388 MachineBasicBlock &MBB = *MI.getParent(); 1389 int Offset; 1390 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1391 PredReg, Offset); 1392 unsigned NewOpc; 1393 if (Offset == 8 || Offset == -8) { 1394 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1395 } else { 1396 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1397 if (Offset == 8 || Offset == -8) { 1398 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1399 } else 1400 return false; 1401 } 1402 MBB.erase(MergeInstr); 1403 1404 DebugLoc DL = MI.getDebugLoc(); 1405 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1406 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1407 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1408 .addReg(BaseOp.getReg(), RegState::Define); 1409 } else { 1410 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1411 MIB.addReg(BaseOp.getReg(), RegState::Define) 1412 .addOperand(Reg0Op).addOperand(Reg1Op); 1413 } 1414 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1415 .addImm(Offset).addImm(Pred).addReg(PredReg); 1416 assert(TII->get(Opcode).getNumOperands() == 6 && 1417 TII->get(NewOpc).getNumOperands() == 7 && 1418 "Unexpected number of operands in Opcode specification."); 1419 1420 // Transfer implicit operands. 1421 for (const MachineOperand &MO : MI.implicit_operands()) 1422 MIB.addOperand(MO); 1423 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1424 1425 MBB.erase(MBBI); 1426 return true; 1427 } 1428 1429 /// Returns true if instruction is a memory operation that this pass is capable 1430 /// of operating on. 1431 static bool isMemoryOp(const MachineInstr *MI) { 1432 // When no memory operands are present, conservatively assume unaligned, 1433 // volatile, unfoldable. 1434 if (!MI->hasOneMemOperand()) 1435 return false; 1436 1437 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1438 1439 // Don't touch volatile memory accesses - we may be changing their order. 1440 if (MMO->isVolatile()) 1441 return false; 1442 1443 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1444 // not. 1445 if (MMO->getAlignment() < 4) 1446 return false; 1447 1448 // str <undef> could probably be eliminated entirely, but for now we just want 1449 // to avoid making a mess of it. 1450 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1451 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1452 MI->getOperand(0).isUndef()) 1453 return false; 1454 1455 // Likewise don't mess with references to undefined addresses. 1456 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1457 MI->getOperand(1).isUndef()) 1458 return false; 1459 1460 unsigned Opcode = MI->getOpcode(); 1461 switch (Opcode) { 1462 default: break; 1463 case ARM::VLDRS: 1464 case ARM::VSTRS: 1465 return MI->getOperand(1).isReg(); 1466 case ARM::VLDRD: 1467 case ARM::VSTRD: 1468 return MI->getOperand(1).isReg(); 1469 case ARM::LDRi12: 1470 case ARM::STRi12: 1471 case ARM::tLDRi: 1472 case ARM::tSTRi: 1473 case ARM::tLDRspi: 1474 case ARM::tSTRspi: 1475 case ARM::t2LDRi8: 1476 case ARM::t2LDRi12: 1477 case ARM::t2STRi8: 1478 case ARM::t2STRi12: 1479 return MI->getOperand(1).isReg(); 1480 } 1481 return false; 1482 } 1483 1484 static void InsertLDR_STR(MachineBasicBlock &MBB, 1485 MachineBasicBlock::iterator &MBBI, 1486 int Offset, bool isDef, 1487 DebugLoc DL, unsigned NewOpc, 1488 unsigned Reg, bool RegDeadKill, bool RegUndef, 1489 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1490 bool OffKill, bool OffUndef, 1491 ARMCC::CondCodes Pred, unsigned PredReg, 1492 const TargetInstrInfo *TII, bool isT2) { 1493 if (isDef) { 1494 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1495 TII->get(NewOpc)) 1496 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1497 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1498 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1499 } else { 1500 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1501 TII->get(NewOpc)) 1502 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1503 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1504 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1505 } 1506 } 1507 1508 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1509 MachineBasicBlock::iterator &MBBI) { 1510 MachineInstr *MI = &*MBBI; 1511 unsigned Opcode = MI->getOpcode(); 1512 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1513 return false; 1514 1515 const MachineOperand &BaseOp = MI->getOperand(2); 1516 unsigned BaseReg = BaseOp.getReg(); 1517 unsigned EvenReg = MI->getOperand(0).getReg(); 1518 unsigned OddReg = MI->getOperand(1).getReg(); 1519 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1520 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1521 1522 // ARM errata 602117: LDRD with base in list may result in incorrect base 1523 // register when interrupted or faulted. 1524 bool Errata602117 = EvenReg == BaseReg && 1525 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1526 // ARM LDRD/STRD needs consecutive registers. 1527 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1528 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1529 1530 if (!Errata602117 && !NonConsecutiveRegs) 1531 return false; 1532 1533 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1534 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1535 bool EvenDeadKill = isLd ? 1536 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1537 bool EvenUndef = MI->getOperand(0).isUndef(); 1538 bool OddDeadKill = isLd ? 1539 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1540 bool OddUndef = MI->getOperand(1).isUndef(); 1541 bool BaseKill = BaseOp.isKill(); 1542 bool BaseUndef = BaseOp.isUndef(); 1543 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1544 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1545 int OffImm = getMemoryOpOffset(MI); 1546 unsigned PredReg = 0; 1547 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1548 1549 if (OddRegNum > EvenRegNum && OffImm == 0) { 1550 // Ascending register numbers and no offset. It's safe to change it to a 1551 // ldm or stm. 1552 unsigned NewOpc = (isLd) 1553 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1554 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1555 if (isLd) { 1556 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1557 .addReg(BaseReg, getKillRegState(BaseKill)) 1558 .addImm(Pred).addReg(PredReg) 1559 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1560 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1561 ++NumLDRD2LDM; 1562 } else { 1563 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1564 .addReg(BaseReg, getKillRegState(BaseKill)) 1565 .addImm(Pred).addReg(PredReg) 1566 .addReg(EvenReg, 1567 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1568 .addReg(OddReg, 1569 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1570 ++NumSTRD2STM; 1571 } 1572 } else { 1573 // Split into two instructions. 1574 unsigned NewOpc = (isLd) 1575 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1576 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1577 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1578 // so adjust and use t2LDRi12 here for that. 1579 unsigned NewOpc2 = (isLd) 1580 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1581 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1582 DebugLoc dl = MBBI->getDebugLoc(); 1583 // If this is a load and base register is killed, it may have been 1584 // re-defed by the load, make sure the first load does not clobber it. 1585 if (isLd && 1586 (BaseKill || OffKill) && 1587 (TRI->regsOverlap(EvenReg, BaseReg))) { 1588 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1589 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1590 OddReg, OddDeadKill, false, 1591 BaseReg, false, BaseUndef, false, OffUndef, 1592 Pred, PredReg, TII, isT2); 1593 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1594 EvenReg, EvenDeadKill, false, 1595 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1596 Pred, PredReg, TII, isT2); 1597 } else { 1598 if (OddReg == EvenReg && EvenDeadKill) { 1599 // If the two source operands are the same, the kill marker is 1600 // probably on the first one. e.g. 1601 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1602 EvenDeadKill = false; 1603 OddDeadKill = true; 1604 } 1605 // Never kill the base register in the first instruction. 1606 if (EvenReg == BaseReg) 1607 EvenDeadKill = false; 1608 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1609 EvenReg, EvenDeadKill, EvenUndef, 1610 BaseReg, false, BaseUndef, false, OffUndef, 1611 Pred, PredReg, TII, isT2); 1612 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1613 OddReg, OddDeadKill, OddUndef, 1614 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1615 Pred, PredReg, TII, isT2); 1616 } 1617 if (isLd) 1618 ++NumLDRD2LDR; 1619 else 1620 ++NumSTRD2STR; 1621 } 1622 1623 MBBI = MBB.erase(MBBI); 1624 return true; 1625 } 1626 1627 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1628 /// incrementing offset into LDM / STM ops. 1629 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1630 MemOpQueue MemOps; 1631 unsigned CurrBase = 0; 1632 unsigned CurrOpc = ~0u; 1633 ARMCC::CondCodes CurrPred = ARMCC::AL; 1634 unsigned Position = 0; 1635 assert(Candidates.size() == 0); 1636 assert(MergeBaseCandidates.size() == 0); 1637 LiveRegsValid = false; 1638 1639 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1640 I = MBBI) { 1641 // The instruction in front of the iterator is the one we look at. 1642 MBBI = std::prev(I); 1643 if (FixInvalidRegPairOp(MBB, MBBI)) 1644 continue; 1645 ++Position; 1646 1647 if (isMemoryOp(MBBI)) { 1648 unsigned Opcode = MBBI->getOpcode(); 1649 const MachineOperand &MO = MBBI->getOperand(0); 1650 unsigned Reg = MO.getReg(); 1651 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1652 unsigned PredReg = 0; 1653 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1654 int Offset = getMemoryOpOffset(MBBI); 1655 if (CurrBase == 0) { 1656 // Start of a new chain. 1657 CurrBase = Base; 1658 CurrOpc = Opcode; 1659 CurrPred = Pred; 1660 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1661 continue; 1662 } 1663 // Note: No need to match PredReg in the next if. 1664 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1665 // Watch out for: 1666 // r4 := ldr [r0, #8] 1667 // r4 := ldr [r0, #4] 1668 // or 1669 // r0 := ldr [r0] 1670 // If a load overrides the base register or a register loaded by 1671 // another load in our chain, we cannot take this instruction. 1672 bool Overlap = false; 1673 if (isLoadSingle(Opcode)) { 1674 Overlap = (Base == Reg); 1675 if (!Overlap) { 1676 for (const MemOpQueueEntry &E : MemOps) { 1677 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1678 Overlap = true; 1679 break; 1680 } 1681 } 1682 } 1683 } 1684 1685 if (!Overlap) { 1686 // Check offset and sort memory operation into the current chain. 1687 if (Offset > MemOps.back().Offset) { 1688 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1689 continue; 1690 } else { 1691 MemOpQueue::iterator MI, ME; 1692 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1693 if (Offset < MI->Offset) { 1694 // Found a place to insert. 1695 break; 1696 } 1697 if (Offset == MI->Offset) { 1698 // Collision, abort. 1699 MI = ME; 1700 break; 1701 } 1702 } 1703 if (MI != MemOps.end()) { 1704 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1705 continue; 1706 } 1707 } 1708 } 1709 } 1710 1711 // Don't advance the iterator; The op will start a new chain next. 1712 MBBI = I; 1713 --Position; 1714 // Fallthrough to look into existing chain. 1715 } else if (MBBI->isDebugValue()) { 1716 continue; 1717 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1718 MBBI->getOpcode() == ARM::t2STRDi8) { 1719 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1720 // remember them because we may still be able to merge add/sub into them. 1721 MergeBaseCandidates.push_back(MBBI); 1722 } 1723 1724 1725 // If we are here then the chain is broken; Extract candidates for a merge. 1726 if (MemOps.size() > 0) { 1727 FormCandidates(MemOps); 1728 // Reset for the next chain. 1729 CurrBase = 0; 1730 CurrOpc = ~0u; 1731 CurrPred = ARMCC::AL; 1732 MemOps.clear(); 1733 } 1734 } 1735 if (MemOps.size() > 0) 1736 FormCandidates(MemOps); 1737 1738 // Sort candidates so they get processed from end to begin of the basic 1739 // block later; This is necessary for liveness calculation. 1740 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1741 return M0->InsertPos < M1->InsertPos; 1742 }; 1743 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1744 1745 // Go through list of candidates and merge. 1746 bool Changed = false; 1747 for (const MergeCandidate *Candidate : Candidates) { 1748 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1749 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1750 // Merge preceding/trailing base inc/dec into the merged op. 1751 if (Merged) { 1752 Changed = true; 1753 unsigned Opcode = Merged->getOpcode(); 1754 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1755 MergeBaseUpdateLSDouble(*Merged); 1756 else 1757 MergeBaseUpdateLSMultiple(Merged); 1758 } else { 1759 for (MachineInstr *MI : Candidate->Instrs) { 1760 if (MergeBaseUpdateLoadStore(MI)) 1761 Changed = true; 1762 } 1763 } 1764 } else { 1765 assert(Candidate->Instrs.size() == 1); 1766 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1767 Changed = true; 1768 } 1769 } 1770 Candidates.clear(); 1771 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1772 for (MachineInstr *MI : MergeBaseCandidates) 1773 MergeBaseUpdateLSDouble(*MI); 1774 MergeBaseCandidates.clear(); 1775 1776 return Changed; 1777 } 1778 1779 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1780 /// into the preceding stack restore so it directly restore the value of LR 1781 /// into pc. 1782 /// ldmfd sp!, {..., lr} 1783 /// bx lr 1784 /// or 1785 /// ldmfd sp!, {..., lr} 1786 /// mov pc, lr 1787 /// => 1788 /// ldmfd sp!, {..., pc} 1789 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1790 // Thumb1 LDM doesn't allow high registers. 1791 if (isThumb1) return false; 1792 if (MBB.empty()) return false; 1793 1794 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1795 if (MBBI != MBB.begin() && 1796 (MBBI->getOpcode() == ARM::BX_RET || 1797 MBBI->getOpcode() == ARM::tBX_RET || 1798 MBBI->getOpcode() == ARM::MOVPCLR)) { 1799 MachineInstr *PrevMI = std::prev(MBBI); 1800 unsigned Opcode = PrevMI->getOpcode(); 1801 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1802 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1803 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1804 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1805 if (MO.getReg() != ARM::LR) 1806 return false; 1807 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1808 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1809 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1810 PrevMI->setDesc(TII->get(NewOpc)); 1811 MO.setReg(ARM::PC); 1812 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1813 MBB.erase(MBBI); 1814 return true; 1815 } 1816 } 1817 return false; 1818 } 1819 1820 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1821 MF = &Fn; 1822 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1823 TL = STI->getTargetLowering(); 1824 AFI = Fn.getInfo<ARMFunctionInfo>(); 1825 TII = STI->getInstrInfo(); 1826 TRI = STI->getRegisterInfo(); 1827 1828 RegClassInfoValid = false; 1829 isThumb2 = AFI->isThumb2Function(); 1830 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1831 1832 bool Modified = false; 1833 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1834 ++MFI) { 1835 MachineBasicBlock &MBB = *MFI; 1836 Modified |= LoadStoreMultipleOpti(MBB); 1837 if (STI->hasV5TOps()) 1838 Modified |= MergeReturnIntoLDM(MBB); 1839 } 1840 1841 Allocator.DestroyAll(); 1842 return Modified; 1843 } 1844 1845 namespace llvm { 1846 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); 1847 } 1848 1849 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \ 1850 "ARM pre- register allocation load / store optimization pass" 1851 1852 namespace { 1853 /// Pre- register allocation pass that move load / stores from consecutive 1854 /// locations close to make it more likely they will be combined later. 1855 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1856 static char ID; 1857 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) { 1858 initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry()); 1859 } 1860 1861 const DataLayout *TD; 1862 const TargetInstrInfo *TII; 1863 const TargetRegisterInfo *TRI; 1864 const ARMSubtarget *STI; 1865 MachineRegisterInfo *MRI; 1866 MachineFunction *MF; 1867 1868 bool runOnMachineFunction(MachineFunction &Fn) override; 1869 1870 const char *getPassName() const override { 1871 return ARM_PREALLOC_LOAD_STORE_OPT_NAME; 1872 } 1873 1874 private: 1875 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1876 unsigned &NewOpc, unsigned &EvenReg, 1877 unsigned &OddReg, unsigned &BaseReg, 1878 int &Offset, 1879 unsigned &PredReg, ARMCC::CondCodes &Pred, 1880 bool &isT2); 1881 bool RescheduleOps(MachineBasicBlock *MBB, 1882 SmallVectorImpl<MachineInstr *> &Ops, 1883 unsigned Base, bool isLd, 1884 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1885 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1886 }; 1887 char ARMPreAllocLoadStoreOpt::ID = 0; 1888 } 1889 1890 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt", 1891 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) 1892 1893 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1894 TD = &Fn.getDataLayout(); 1895 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1896 TII = STI->getInstrInfo(); 1897 TRI = STI->getRegisterInfo(); 1898 MRI = &Fn.getRegInfo(); 1899 MF = &Fn; 1900 1901 bool Modified = false; 1902 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1903 ++MFI) 1904 Modified |= RescheduleLoadStoreInstrs(MFI); 1905 1906 return Modified; 1907 } 1908 1909 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1910 MachineBasicBlock::iterator I, 1911 MachineBasicBlock::iterator E, 1912 SmallPtrSetImpl<MachineInstr*> &MemOps, 1913 SmallSet<unsigned, 4> &MemRegs, 1914 const TargetRegisterInfo *TRI) { 1915 // Are there stores / loads / calls between them? 1916 // FIXME: This is overly conservative. We should make use of alias information 1917 // some day. 1918 SmallSet<unsigned, 4> AddedRegPressure; 1919 while (++I != E) { 1920 if (I->isDebugValue() || MemOps.count(&*I)) 1921 continue; 1922 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1923 return false; 1924 if (isLd && I->mayStore()) 1925 return false; 1926 if (!isLd) { 1927 if (I->mayLoad()) 1928 return false; 1929 // It's not safe to move the first 'str' down. 1930 // str r1, [r0] 1931 // strh r5, [r0] 1932 // str r4, [r0, #+4] 1933 if (I->mayStore()) 1934 return false; 1935 } 1936 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1937 MachineOperand &MO = I->getOperand(j); 1938 if (!MO.isReg()) 1939 continue; 1940 unsigned Reg = MO.getReg(); 1941 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1942 return false; 1943 if (Reg != Base && !MemRegs.count(Reg)) 1944 AddedRegPressure.insert(Reg); 1945 } 1946 } 1947 1948 // Estimate register pressure increase due to the transformation. 1949 if (MemRegs.size() <= 4) 1950 // Ok if we are moving small number of instructions. 1951 return true; 1952 return AddedRegPressure.size() <= MemRegs.size() * 2; 1953 } 1954 1955 1956 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1957 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1958 MachineInstr *Op1) { 1959 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1960 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1961 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1962 1963 MachineFunction *MF = MI->getParent()->getParent(); 1964 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1965 MachineSDNode::mmo_iterator MemEnd = 1966 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1967 MemEnd = 1968 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1969 MI->setMemRefs(MemBegin, MemEnd); 1970 } 1971 1972 bool 1973 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1974 DebugLoc &dl, unsigned &NewOpc, 1975 unsigned &FirstReg, 1976 unsigned &SecondReg, 1977 unsigned &BaseReg, int &Offset, 1978 unsigned &PredReg, 1979 ARMCC::CondCodes &Pred, 1980 bool &isT2) { 1981 // Make sure we're allowed to generate LDRD/STRD. 1982 if (!STI->hasV5TEOps()) 1983 return false; 1984 1985 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1986 unsigned Scale = 1; 1987 unsigned Opcode = Op0->getOpcode(); 1988 if (Opcode == ARM::LDRi12) { 1989 NewOpc = ARM::LDRD; 1990 } else if (Opcode == ARM::STRi12) { 1991 NewOpc = ARM::STRD; 1992 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1993 NewOpc = ARM::t2LDRDi8; 1994 Scale = 4; 1995 isT2 = true; 1996 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1997 NewOpc = ARM::t2STRDi8; 1998 Scale = 4; 1999 isT2 = true; 2000 } else { 2001 return false; 2002 } 2003 2004 // Make sure the base address satisfies i64 ld / st alignment requirement. 2005 // At the moment, we ignore the memoryoperand's value. 2006 // If we want to use AliasAnalysis, we should check it accordingly. 2007 if (!Op0->hasOneMemOperand() || 2008 (*Op0->memoperands_begin())->isVolatile()) 2009 return false; 2010 2011 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 2012 const Function *Func = MF->getFunction(); 2013 unsigned ReqAlign = STI->hasV6Ops() 2014 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 2015 : 8; // Pre-v6 need 8-byte align 2016 if (Align < ReqAlign) 2017 return false; 2018 2019 // Then make sure the immediate offset fits. 2020 int OffImm = getMemoryOpOffset(Op0); 2021 if (isT2) { 2022 int Limit = (1 << 8) * Scale; 2023 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2024 return false; 2025 Offset = OffImm; 2026 } else { 2027 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2028 if (OffImm < 0) { 2029 AddSub = ARM_AM::sub; 2030 OffImm = - OffImm; 2031 } 2032 int Limit = (1 << 8) * Scale; 2033 if (OffImm >= Limit || (OffImm & (Scale-1))) 2034 return false; 2035 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2036 } 2037 FirstReg = Op0->getOperand(0).getReg(); 2038 SecondReg = Op1->getOperand(0).getReg(); 2039 if (FirstReg == SecondReg) 2040 return false; 2041 BaseReg = Op0->getOperand(1).getReg(); 2042 Pred = getInstrPredicate(Op0, PredReg); 2043 dl = Op0->getDebugLoc(); 2044 return true; 2045 } 2046 2047 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2048 SmallVectorImpl<MachineInstr *> &Ops, 2049 unsigned Base, bool isLd, 2050 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2051 bool RetVal = false; 2052 2053 // Sort by offset (in reverse order). 2054 std::sort(Ops.begin(), Ops.end(), 2055 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2056 int LOffset = getMemoryOpOffset(LHS); 2057 int ROffset = getMemoryOpOffset(RHS); 2058 assert(LHS == RHS || LOffset != ROffset); 2059 return LOffset > ROffset; 2060 }); 2061 2062 // The loads / stores of the same base are in order. Scan them from first to 2063 // last and check for the following: 2064 // 1. Any def of base. 2065 // 2. Any gaps. 2066 while (Ops.size() > 1) { 2067 unsigned FirstLoc = ~0U; 2068 unsigned LastLoc = 0; 2069 MachineInstr *FirstOp = nullptr; 2070 MachineInstr *LastOp = nullptr; 2071 int LastOffset = 0; 2072 unsigned LastOpcode = 0; 2073 unsigned LastBytes = 0; 2074 unsigned NumMove = 0; 2075 for (int i = Ops.size() - 1; i >= 0; --i) { 2076 MachineInstr *Op = Ops[i]; 2077 unsigned Loc = MI2LocMap[Op]; 2078 if (Loc <= FirstLoc) { 2079 FirstLoc = Loc; 2080 FirstOp = Op; 2081 } 2082 if (Loc >= LastLoc) { 2083 LastLoc = Loc; 2084 LastOp = Op; 2085 } 2086 2087 unsigned LSMOpcode 2088 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2089 if (LastOpcode && LSMOpcode != LastOpcode) 2090 break; 2091 2092 int Offset = getMemoryOpOffset(Op); 2093 unsigned Bytes = getLSMultipleTransferSize(Op); 2094 if (LastBytes) { 2095 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2096 break; 2097 } 2098 LastOffset = Offset; 2099 LastBytes = Bytes; 2100 LastOpcode = LSMOpcode; 2101 if (++NumMove == 8) // FIXME: Tune this limit. 2102 break; 2103 } 2104 2105 if (NumMove <= 1) 2106 Ops.pop_back(); 2107 else { 2108 SmallPtrSet<MachineInstr*, 4> MemOps; 2109 SmallSet<unsigned, 4> MemRegs; 2110 for (int i = NumMove-1; i >= 0; --i) { 2111 MemOps.insert(Ops[i]); 2112 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2113 } 2114 2115 // Be conservative, if the instructions are too far apart, don't 2116 // move them. We want to limit the increase of register pressure. 2117 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2118 if (DoMove) 2119 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2120 MemOps, MemRegs, TRI); 2121 if (!DoMove) { 2122 for (unsigned i = 0; i != NumMove; ++i) 2123 Ops.pop_back(); 2124 } else { 2125 // This is the new location for the loads / stores. 2126 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2127 while (InsertPos != MBB->end() 2128 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2129 ++InsertPos; 2130 2131 // If we are moving a pair of loads / stores, see if it makes sense 2132 // to try to allocate a pair of registers that can form register pairs. 2133 MachineInstr *Op0 = Ops.back(); 2134 MachineInstr *Op1 = Ops[Ops.size()-2]; 2135 unsigned FirstReg = 0, SecondReg = 0; 2136 unsigned BaseReg = 0, PredReg = 0; 2137 ARMCC::CondCodes Pred = ARMCC::AL; 2138 bool isT2 = false; 2139 unsigned NewOpc = 0; 2140 int Offset = 0; 2141 DebugLoc dl; 2142 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2143 FirstReg, SecondReg, BaseReg, 2144 Offset, PredReg, Pred, isT2)) { 2145 Ops.pop_back(); 2146 Ops.pop_back(); 2147 2148 const MCInstrDesc &MCID = TII->get(NewOpc); 2149 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2150 MRI->constrainRegClass(FirstReg, TRC); 2151 MRI->constrainRegClass(SecondReg, TRC); 2152 2153 // Form the pair instruction. 2154 if (isLd) { 2155 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2156 .addReg(FirstReg, RegState::Define) 2157 .addReg(SecondReg, RegState::Define) 2158 .addReg(BaseReg); 2159 // FIXME: We're converting from LDRi12 to an insn that still 2160 // uses addrmode2, so we need an explicit offset reg. It should 2161 // always by reg0 since we're transforming LDRi12s. 2162 if (!isT2) 2163 MIB.addReg(0); 2164 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2165 concatenateMemOperands(MIB, Op0, Op1); 2166 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2167 ++NumLDRDFormed; 2168 } else { 2169 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2170 .addReg(FirstReg) 2171 .addReg(SecondReg) 2172 .addReg(BaseReg); 2173 // FIXME: We're converting from LDRi12 to an insn that still 2174 // uses addrmode2, so we need an explicit offset reg. It should 2175 // always by reg0 since we're transforming STRi12s. 2176 if (!isT2) 2177 MIB.addReg(0); 2178 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2179 concatenateMemOperands(MIB, Op0, Op1); 2180 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2181 ++NumSTRDFormed; 2182 } 2183 MBB->erase(Op0); 2184 MBB->erase(Op1); 2185 2186 if (!isT2) { 2187 // Add register allocation hints to form register pairs. 2188 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2189 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2190 } 2191 } else { 2192 for (unsigned i = 0; i != NumMove; ++i) { 2193 MachineInstr *Op = Ops.back(); 2194 Ops.pop_back(); 2195 MBB->splice(InsertPos, MBB, Op); 2196 } 2197 } 2198 2199 NumLdStMoved += NumMove; 2200 RetVal = true; 2201 } 2202 } 2203 } 2204 2205 return RetVal; 2206 } 2207 2208 bool 2209 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2210 bool RetVal = false; 2211 2212 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2213 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2214 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2215 SmallVector<unsigned, 4> LdBases; 2216 SmallVector<unsigned, 4> StBases; 2217 2218 unsigned Loc = 0; 2219 MachineBasicBlock::iterator MBBI = MBB->begin(); 2220 MachineBasicBlock::iterator E = MBB->end(); 2221 while (MBBI != E) { 2222 for (; MBBI != E; ++MBBI) { 2223 MachineInstr *MI = MBBI; 2224 if (MI->isCall() || MI->isTerminator()) { 2225 // Stop at barriers. 2226 ++MBBI; 2227 break; 2228 } 2229 2230 if (!MI->isDebugValue()) 2231 MI2LocMap[MI] = ++Loc; 2232 2233 if (!isMemoryOp(MI)) 2234 continue; 2235 unsigned PredReg = 0; 2236 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2237 continue; 2238 2239 int Opc = MI->getOpcode(); 2240 bool isLd = isLoadSingle(Opc); 2241 unsigned Base = MI->getOperand(1).getReg(); 2242 int Offset = getMemoryOpOffset(MI); 2243 2244 bool StopHere = false; 2245 if (isLd) { 2246 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2247 Base2LdsMap.find(Base); 2248 if (BI != Base2LdsMap.end()) { 2249 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2250 if (Offset == getMemoryOpOffset(BI->second[i])) { 2251 StopHere = true; 2252 break; 2253 } 2254 } 2255 if (!StopHere) 2256 BI->second.push_back(MI); 2257 } else { 2258 Base2LdsMap[Base].push_back(MI); 2259 LdBases.push_back(Base); 2260 } 2261 } else { 2262 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2263 Base2StsMap.find(Base); 2264 if (BI != Base2StsMap.end()) { 2265 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2266 if (Offset == getMemoryOpOffset(BI->second[i])) { 2267 StopHere = true; 2268 break; 2269 } 2270 } 2271 if (!StopHere) 2272 BI->second.push_back(MI); 2273 } else { 2274 Base2StsMap[Base].push_back(MI); 2275 StBases.push_back(Base); 2276 } 2277 } 2278 2279 if (StopHere) { 2280 // Found a duplicate (a base+offset combination that's seen earlier). 2281 // Backtrack. 2282 --Loc; 2283 break; 2284 } 2285 } 2286 2287 // Re-schedule loads. 2288 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2289 unsigned Base = LdBases[i]; 2290 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2291 if (Lds.size() > 1) 2292 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2293 } 2294 2295 // Re-schedule stores. 2296 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2297 unsigned Base = StBases[i]; 2298 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2299 if (Sts.size() > 1) 2300 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2301 } 2302 2303 if (MBBI != E) { 2304 Base2LdsMap.clear(); 2305 Base2StsMap.clear(); 2306 LdBases.clear(); 2307 StBases.clear(); 2308 } 2309 } 2310 2311 return RetVal; 2312 } 2313 2314 2315 /// Returns an instance of the load / store optimization pass. 2316 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2317 if (PreAlloc) 2318 return new ARMPreAllocLoadStoreOpt(); 2319 return new ARMLoadStoreOpt(); 2320 } 2321 2322