1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 namespace llvm { 64 void initializeARMLoadStoreOptPass(PassRegistry &); 65 } 66 67 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" 68 69 namespace { 70 /// Post- register allocation pass the combine load / store instructions to 71 /// form ldm / stm instructions. 72 struct ARMLoadStoreOpt : public MachineFunctionPass { 73 static char ID; 74 ARMLoadStoreOpt() : MachineFunctionPass(ID) { 75 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry()); 76 } 77 78 const MachineFunction *MF; 79 const TargetInstrInfo *TII; 80 const TargetRegisterInfo *TRI; 81 const ARMSubtarget *STI; 82 const TargetLowering *TL; 83 ARMFunctionInfo *AFI; 84 LivePhysRegs LiveRegs; 85 RegisterClassInfo RegClassInfo; 86 MachineBasicBlock::const_iterator LiveRegPos; 87 bool LiveRegsValid; 88 bool RegClassInfoValid; 89 bool isThumb1, isThumb2; 90 91 bool runOnMachineFunction(MachineFunction &Fn) override; 92 93 const char *getPassName() const override { 94 return ARM_LOAD_STORE_OPT_NAME; 95 } 96 97 private: 98 /// A set of load/store MachineInstrs with same base register sorted by 99 /// offset. 100 struct MemOpQueueEntry { 101 MachineInstr *MI; 102 int Offset; ///< Load/Store offset. 103 unsigned Position; ///< Position as counted from end of basic block. 104 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 105 : MI(MI), Offset(Offset), Position(Position) {} 106 }; 107 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 108 109 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 110 /// merged into a LDM/STM. 111 struct MergeCandidate { 112 /// List of instructions ordered by load/store offset. 113 SmallVector<MachineInstr*, 4> Instrs; 114 /// Index in Instrs of the instruction being latest in the schedule. 115 unsigned LatestMIIdx; 116 /// Index in Instrs of the instruction being earliest in the schedule. 117 unsigned EarliestMIIdx; 118 /// Index into the basic block where the merged instruction will be 119 /// inserted. (See MemOpQueueEntry.Position) 120 unsigned InsertPos; 121 /// Whether the instructions can be merged into a ldm/stm instruction. 122 bool CanMergeToLSMulti; 123 /// Whether the instructions can be merged into a ldrd/strd instruction. 124 bool CanMergeToLSDouble; 125 }; 126 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 127 SmallVector<const MergeCandidate*,4> Candidates; 128 SmallVector<MachineInstr*,4> MergeBaseCandidates; 129 130 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 131 MachineBasicBlock::const_iterator Before); 132 unsigned findFreeReg(const TargetRegisterClass &RegClass); 133 void UpdateBaseRegUses(MachineBasicBlock &MBB, 134 MachineBasicBlock::iterator MBBI, 135 DebugLoc DL, unsigned Base, unsigned WordOffset, 136 ARMCC::CondCodes Pred, unsigned PredReg); 137 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 138 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 139 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 140 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 141 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 142 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 143 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 144 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 145 void FormCandidates(const MemOpQueue &MemOps); 146 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 147 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 148 MachineBasicBlock::iterator &MBBI); 149 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 150 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 151 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 152 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 153 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 154 }; 155 char ARMLoadStoreOpt::ID = 0; 156 } 157 158 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false) 159 160 static bool definesCPSR(const MachineInstr *MI) { 161 for (const auto &MO : MI->operands()) { 162 if (!MO.isReg()) 163 continue; 164 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 165 // If the instruction has live CPSR def, then it's not safe to fold it 166 // into load / store. 167 return true; 168 } 169 170 return false; 171 } 172 173 static int getMemoryOpOffset(const MachineInstr *MI) { 174 unsigned Opcode = MI->getOpcode(); 175 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 176 unsigned NumOperands = MI->getDesc().getNumOperands(); 177 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 178 179 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 180 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 181 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 182 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 183 return OffField; 184 185 // Thumb1 immediate offsets are scaled by 4 186 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 187 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 188 return OffField * 4; 189 190 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 191 : ARM_AM::getAM5Offset(OffField) * 4; 192 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 193 : ARM_AM::getAM5Op(OffField); 194 195 if (Op == ARM_AM::sub) 196 return -Offset; 197 198 return Offset; 199 } 200 201 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 202 return MI.getOperand(1); 203 } 204 205 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 206 return MI.getOperand(0); 207 } 208 209 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 210 switch (Opcode) { 211 default: llvm_unreachable("Unhandled opcode!"); 212 case ARM::LDRi12: 213 ++NumLDMGened; 214 switch (Mode) { 215 default: llvm_unreachable("Unhandled submode!"); 216 case ARM_AM::ia: return ARM::LDMIA; 217 case ARM_AM::da: return ARM::LDMDA; 218 case ARM_AM::db: return ARM::LDMDB; 219 case ARM_AM::ib: return ARM::LDMIB; 220 } 221 case ARM::STRi12: 222 ++NumSTMGened; 223 switch (Mode) { 224 default: llvm_unreachable("Unhandled submode!"); 225 case ARM_AM::ia: return ARM::STMIA; 226 case ARM_AM::da: return ARM::STMDA; 227 case ARM_AM::db: return ARM::STMDB; 228 case ARM_AM::ib: return ARM::STMIB; 229 } 230 case ARM::tLDRi: 231 case ARM::tLDRspi: 232 // tLDMIA is writeback-only - unless the base register is in the input 233 // reglist. 234 ++NumLDMGened; 235 switch (Mode) { 236 default: llvm_unreachable("Unhandled submode!"); 237 case ARM_AM::ia: return ARM::tLDMIA; 238 } 239 case ARM::tSTRi: 240 case ARM::tSTRspi: 241 // There is no non-writeback tSTMIA either. 242 ++NumSTMGened; 243 switch (Mode) { 244 default: llvm_unreachable("Unhandled submode!"); 245 case ARM_AM::ia: return ARM::tSTMIA_UPD; 246 } 247 case ARM::t2LDRi8: 248 case ARM::t2LDRi12: 249 ++NumLDMGened; 250 switch (Mode) { 251 default: llvm_unreachable("Unhandled submode!"); 252 case ARM_AM::ia: return ARM::t2LDMIA; 253 case ARM_AM::db: return ARM::t2LDMDB; 254 } 255 case ARM::t2STRi8: 256 case ARM::t2STRi12: 257 ++NumSTMGened; 258 switch (Mode) { 259 default: llvm_unreachable("Unhandled submode!"); 260 case ARM_AM::ia: return ARM::t2STMIA; 261 case ARM_AM::db: return ARM::t2STMDB; 262 } 263 case ARM::VLDRS: 264 ++NumVLDMGened; 265 switch (Mode) { 266 default: llvm_unreachable("Unhandled submode!"); 267 case ARM_AM::ia: return ARM::VLDMSIA; 268 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 269 } 270 case ARM::VSTRS: 271 ++NumVSTMGened; 272 switch (Mode) { 273 default: llvm_unreachable("Unhandled submode!"); 274 case ARM_AM::ia: return ARM::VSTMSIA; 275 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 276 } 277 case ARM::VLDRD: 278 ++NumVLDMGened; 279 switch (Mode) { 280 default: llvm_unreachable("Unhandled submode!"); 281 case ARM_AM::ia: return ARM::VLDMDIA; 282 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 283 } 284 case ARM::VSTRD: 285 ++NumVSTMGened; 286 switch (Mode) { 287 default: llvm_unreachable("Unhandled submode!"); 288 case ARM_AM::ia: return ARM::VSTMDIA; 289 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 290 } 291 } 292 } 293 294 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 295 switch (Opcode) { 296 default: llvm_unreachable("Unhandled opcode!"); 297 case ARM::LDMIA_RET: 298 case ARM::LDMIA: 299 case ARM::LDMIA_UPD: 300 case ARM::STMIA: 301 case ARM::STMIA_UPD: 302 case ARM::tLDMIA: 303 case ARM::tLDMIA_UPD: 304 case ARM::tSTMIA_UPD: 305 case ARM::t2LDMIA_RET: 306 case ARM::t2LDMIA: 307 case ARM::t2LDMIA_UPD: 308 case ARM::t2STMIA: 309 case ARM::t2STMIA_UPD: 310 case ARM::VLDMSIA: 311 case ARM::VLDMSIA_UPD: 312 case ARM::VSTMSIA: 313 case ARM::VSTMSIA_UPD: 314 case ARM::VLDMDIA: 315 case ARM::VLDMDIA_UPD: 316 case ARM::VSTMDIA: 317 case ARM::VSTMDIA_UPD: 318 return ARM_AM::ia; 319 320 case ARM::LDMDA: 321 case ARM::LDMDA_UPD: 322 case ARM::STMDA: 323 case ARM::STMDA_UPD: 324 return ARM_AM::da; 325 326 case ARM::LDMDB: 327 case ARM::LDMDB_UPD: 328 case ARM::STMDB: 329 case ARM::STMDB_UPD: 330 case ARM::t2LDMDB: 331 case ARM::t2LDMDB_UPD: 332 case ARM::t2STMDB: 333 case ARM::t2STMDB_UPD: 334 case ARM::VLDMSDB_UPD: 335 case ARM::VSTMSDB_UPD: 336 case ARM::VLDMDDB_UPD: 337 case ARM::VSTMDDB_UPD: 338 return ARM_AM::db; 339 340 case ARM::LDMIB: 341 case ARM::LDMIB_UPD: 342 case ARM::STMIB: 343 case ARM::STMIB_UPD: 344 return ARM_AM::ib; 345 } 346 } 347 348 static bool isT1i32Load(unsigned Opc) { 349 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 350 } 351 352 static bool isT2i32Load(unsigned Opc) { 353 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 354 } 355 356 static bool isi32Load(unsigned Opc) { 357 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 358 } 359 360 static bool isT1i32Store(unsigned Opc) { 361 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 362 } 363 364 static bool isT2i32Store(unsigned Opc) { 365 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 366 } 367 368 static bool isi32Store(unsigned Opc) { 369 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 370 } 371 372 static bool isLoadSingle(unsigned Opc) { 373 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 374 } 375 376 static unsigned getImmScale(unsigned Opc) { 377 switch (Opc) { 378 default: llvm_unreachable("Unhandled opcode!"); 379 case ARM::tLDRi: 380 case ARM::tSTRi: 381 case ARM::tLDRspi: 382 case ARM::tSTRspi: 383 return 1; 384 case ARM::tLDRHi: 385 case ARM::tSTRHi: 386 return 2; 387 case ARM::tLDRBi: 388 case ARM::tSTRBi: 389 return 4; 390 } 391 } 392 393 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 394 switch (MI->getOpcode()) { 395 default: return 0; 396 case ARM::LDRi12: 397 case ARM::STRi12: 398 case ARM::tLDRi: 399 case ARM::tSTRi: 400 case ARM::tLDRspi: 401 case ARM::tSTRspi: 402 case ARM::t2LDRi8: 403 case ARM::t2LDRi12: 404 case ARM::t2STRi8: 405 case ARM::t2STRi12: 406 case ARM::VLDRS: 407 case ARM::VSTRS: 408 return 4; 409 case ARM::VLDRD: 410 case ARM::VSTRD: 411 return 8; 412 case ARM::LDMIA: 413 case ARM::LDMDA: 414 case ARM::LDMDB: 415 case ARM::LDMIB: 416 case ARM::STMIA: 417 case ARM::STMDA: 418 case ARM::STMDB: 419 case ARM::STMIB: 420 case ARM::tLDMIA: 421 case ARM::tLDMIA_UPD: 422 case ARM::tSTMIA_UPD: 423 case ARM::t2LDMIA: 424 case ARM::t2LDMDB: 425 case ARM::t2STMIA: 426 case ARM::t2STMDB: 427 case ARM::VLDMSIA: 428 case ARM::VSTMSIA: 429 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 430 case ARM::VLDMDIA: 431 case ARM::VSTMDIA: 432 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 433 } 434 } 435 436 /// Update future uses of the base register with the offset introduced 437 /// due to writeback. This function only works on Thumb1. 438 void 439 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 440 MachineBasicBlock::iterator MBBI, 441 DebugLoc DL, unsigned Base, 442 unsigned WordOffset, 443 ARMCC::CondCodes Pred, unsigned PredReg) { 444 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 445 // Start updating any instructions with immediate offsets. Insert a SUB before 446 // the first non-updateable instruction (if any). 447 for (; MBBI != MBB.end(); ++MBBI) { 448 bool InsertSub = false; 449 unsigned Opc = MBBI->getOpcode(); 450 451 if (MBBI->readsRegister(Base)) { 452 int Offset; 453 bool IsLoad = 454 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 455 bool IsStore = 456 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 457 458 if (IsLoad || IsStore) { 459 // Loads and stores with immediate offsets can be updated, but only if 460 // the new offset isn't negative. 461 // The MachineOperand containing the offset immediate is the last one 462 // before predicates. 463 MachineOperand &MO = 464 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 465 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 466 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 467 468 // If storing the base register, it needs to be reset first. 469 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 470 471 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 472 MO.setImm(Offset); 473 else 474 InsertSub = true; 475 476 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 477 !definesCPSR(MBBI)) { 478 // SUBS/ADDS using this register, with a dead def of the CPSR. 479 // Merge it with the update; if the merged offset is too large, 480 // insert a new sub instead. 481 MachineOperand &MO = 482 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 483 Offset = (Opc == ARM::tSUBi8) ? 484 MO.getImm() + WordOffset * 4 : 485 MO.getImm() - WordOffset * 4 ; 486 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 487 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 488 // Offset == 0. 489 MO.setImm(Offset); 490 // The base register has now been reset, so exit early. 491 return; 492 } else { 493 InsertSub = true; 494 } 495 496 } else { 497 // Can't update the instruction. 498 InsertSub = true; 499 } 500 501 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 502 // Since SUBS sets the condition flags, we can't place the base reset 503 // after an instruction that has a live CPSR def. 504 // The base register might also contain an argument for a function call. 505 InsertSub = true; 506 } 507 508 if (InsertSub) { 509 // An instruction above couldn't be updated, so insert a sub. 510 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 511 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 512 return; 513 } 514 515 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 516 // Register got killed. Stop updating. 517 return; 518 } 519 520 // End of block was reached. 521 if (MBB.succ_size() > 0) { 522 // FIXME: Because of a bug, live registers are sometimes missing from 523 // the successor blocks' live-in sets. This means we can't trust that 524 // information and *always* have to reset at the end of a block. 525 // See PR21029. 526 if (MBBI != MBB.end()) --MBBI; 527 AddDefaultT1CC( 528 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 529 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 530 } 531 } 532 533 /// Return the first register of class \p RegClass that is not in \p Regs. 534 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 535 if (!RegClassInfoValid) { 536 RegClassInfo.runOnMachineFunction(*MF); 537 RegClassInfoValid = true; 538 } 539 540 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 541 if (!LiveRegs.contains(Reg)) 542 return Reg; 543 return 0; 544 } 545 546 /// Compute live registers just before instruction \p Before (in normal schedule 547 /// direction). Computes backwards so multiple queries in the same block must 548 /// come in reverse order. 549 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 550 MachineBasicBlock::const_iterator Before) { 551 // Initialize if we never queried in this block. 552 if (!LiveRegsValid) { 553 LiveRegs.init(TRI); 554 LiveRegs.addLiveOuts(&MBB, true); 555 LiveRegPos = MBB.end(); 556 LiveRegsValid = true; 557 } 558 // Move backward just before the "Before" position. 559 while (LiveRegPos != Before) { 560 --LiveRegPos; 561 LiveRegs.stepBackward(*LiveRegPos); 562 } 563 } 564 565 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 566 unsigned Reg) { 567 for (const std::pair<unsigned, bool> &R : Regs) 568 if (R.first == Reg) 569 return true; 570 return false; 571 } 572 573 /// Create and insert a LDM or STM with Base as base register and registers in 574 /// Regs as the register operands that would be loaded / stored. It returns 575 /// true if the transformation is done. 576 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 577 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 578 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 579 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 580 unsigned NumRegs = Regs.size(); 581 assert(NumRegs > 1); 582 583 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 584 // Compute liveness information for that register to make the decision. 585 bool SafeToClobberCPSR = !isThumb1 || 586 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 587 MachineBasicBlock::LQR_Dead); 588 589 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 590 591 // Exception: If the base register is in the input reglist, Thumb1 LDM is 592 // non-writeback. 593 // It's also not possible to merge an STR of the base register in Thumb1. 594 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 595 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 596 if (Opcode == ARM::tLDRi) { 597 Writeback = false; 598 } else if (Opcode == ARM::tSTRi) { 599 return nullptr; 600 } 601 } 602 603 ARM_AM::AMSubMode Mode = ARM_AM::ia; 604 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 605 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 606 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 607 608 if (Offset == 4 && haveIBAndDA) { 609 Mode = ARM_AM::ib; 610 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 611 Mode = ARM_AM::da; 612 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 613 // VLDM/VSTM do not support DB mode without also updating the base reg. 614 Mode = ARM_AM::db; 615 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 616 // Check if this is a supported opcode before inserting instructions to 617 // calculate a new base register. 618 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 619 620 // If starting offset isn't zero, insert a MI to materialize a new base. 621 // But only do so if it is cost effective, i.e. merging more than two 622 // loads / stores. 623 if (NumRegs <= 2) 624 return nullptr; 625 626 // On Thumb1, it's not worth materializing a new base register without 627 // clobbering the CPSR (i.e. not using ADDS/SUBS). 628 if (!SafeToClobberCPSR) 629 return nullptr; 630 631 unsigned NewBase; 632 if (isi32Load(Opcode)) { 633 // If it is a load, then just use one of the destination registers 634 // as the new base. Will no longer be writeback in Thumb1. 635 NewBase = Regs[NumRegs-1].first; 636 Writeback = false; 637 } else { 638 // Find a free register that we can use as scratch register. 639 moveLiveRegsBefore(MBB, InsertBefore); 640 // The merged instruction does not exist yet but will use several Regs if 641 // it is a Store. 642 if (!isLoadSingle(Opcode)) 643 for (const std::pair<unsigned, bool> &R : Regs) 644 LiveRegs.addReg(R.first); 645 646 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 647 if (NewBase == 0) 648 return nullptr; 649 } 650 651 int BaseOpc = 652 isThumb2 ? ARM::t2ADDri : 653 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 654 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 655 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 656 657 if (Offset < 0) { 658 Offset = - Offset; 659 BaseOpc = 660 isThumb2 ? ARM::t2SUBri : 661 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 662 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 663 } 664 665 if (!TL->isLegalAddImmediate(Offset)) 666 // FIXME: Try add with register operand? 667 return nullptr; // Probably not worth it then. 668 669 // We can only append a kill flag to the add/sub input if the value is not 670 // used in the register list of the stm as well. 671 bool KillOldBase = BaseKill && 672 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 673 674 if (isThumb1) { 675 // Thumb1: depending on immediate size, use either 676 // ADDS NewBase, Base, #imm3 677 // or 678 // MOV NewBase, Base 679 // ADDS NewBase, #imm8. 680 if (Base != NewBase && 681 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 682 // Need to insert a MOV to the new base first. 683 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 684 !STI->hasV6Ops()) { 685 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 686 if (Pred != ARMCC::AL) 687 return nullptr; 688 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 689 .addReg(Base, getKillRegState(KillOldBase)); 690 } else 691 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 692 .addReg(Base, getKillRegState(KillOldBase)) 693 .addImm(Pred).addReg(PredReg); 694 695 // The following ADDS/SUBS becomes an update. 696 Base = NewBase; 697 KillOldBase = true; 698 } 699 if (BaseOpc == ARM::tADDrSPi) { 700 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 701 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 702 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 703 .addImm(Pred).addReg(PredReg); 704 } else 705 AddDefaultT1CC( 706 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 707 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 708 .addImm(Pred).addReg(PredReg); 709 } else { 710 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 711 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 712 .addImm(Pred).addReg(PredReg).addReg(0); 713 } 714 Base = NewBase; 715 BaseKill = true; // New base is always killed straight away. 716 } 717 718 bool isDef = isLoadSingle(Opcode); 719 720 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 721 // base register writeback. 722 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 723 if (!Opcode) 724 return nullptr; 725 726 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 727 // - There is no writeback (LDM of base register), 728 // - the base register is killed by the merged instruction, 729 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 730 // to reset the base register. 731 // Otherwise, don't merge. 732 // It's safe to return here since the code to materialize a new base register 733 // above is also conditional on SafeToClobberCPSR. 734 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 735 return nullptr; 736 737 MachineInstrBuilder MIB; 738 739 if (Writeback) { 740 assert(isThumb1 && "expected Writeback only inThumb1"); 741 if (Opcode == ARM::tLDMIA) { 742 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs"); 743 // Update tLDMIA with writeback if necessary. 744 Opcode = ARM::tLDMIA_UPD; 745 } 746 747 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 748 749 // Thumb1: we might need to set base writeback when building the MI. 750 MIB.addReg(Base, getDefRegState(true)) 751 .addReg(Base, getKillRegState(BaseKill)); 752 753 // The base isn't dead after a merged instruction with writeback. 754 // Insert a sub instruction after the newly formed instruction to reset. 755 if (!BaseKill) 756 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 757 758 } else { 759 // No writeback, simply build the MachineInstr. 760 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 761 MIB.addReg(Base, getKillRegState(BaseKill)); 762 } 763 764 MIB.addImm(Pred).addReg(PredReg); 765 766 for (const std::pair<unsigned, bool> &R : Regs) 767 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 768 769 return MIB.getInstr(); 770 } 771 772 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 773 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 774 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 775 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 776 bool IsLoad = isi32Load(Opcode); 777 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 778 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 779 780 assert(Regs.size() == 2); 781 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 782 TII->get(LoadStoreOpcode)); 783 if (IsLoad) { 784 MIB.addReg(Regs[0].first, RegState::Define) 785 .addReg(Regs[1].first, RegState::Define); 786 } else { 787 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 788 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 789 } 790 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 791 return MIB.getInstr(); 792 } 793 794 /// Call MergeOps and update MemOps and merges accordingly on success. 795 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 796 const MachineInstr *First = Cand.Instrs.front(); 797 unsigned Opcode = First->getOpcode(); 798 bool IsLoad = isLoadSingle(Opcode); 799 SmallVector<std::pair<unsigned, bool>, 8> Regs; 800 SmallVector<unsigned, 4> ImpDefs; 801 DenseSet<unsigned> KilledRegs; 802 DenseSet<unsigned> UsedRegs; 803 // Determine list of registers and list of implicit super-register defs. 804 for (const MachineInstr *MI : Cand.Instrs) { 805 const MachineOperand &MO = getLoadStoreRegOp(*MI); 806 unsigned Reg = MO.getReg(); 807 bool IsKill = MO.isKill(); 808 if (IsKill) 809 KilledRegs.insert(Reg); 810 Regs.push_back(std::make_pair(Reg, IsKill)); 811 UsedRegs.insert(Reg); 812 813 if (IsLoad) { 814 // Collect any implicit defs of super-registers, after merging we can't 815 // be sure anymore that we properly preserved these live ranges and must 816 // removed these implicit operands. 817 for (const MachineOperand &MO : MI->implicit_operands()) { 818 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 819 continue; 820 assert(MO.isImplicit()); 821 unsigned DefReg = MO.getReg(); 822 823 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 824 continue; 825 // We can ignore cases where the super-reg is read and written. 826 if (MI->readsRegister(DefReg)) 827 continue; 828 ImpDefs.push_back(DefReg); 829 } 830 } 831 } 832 833 // Attempt the merge. 834 typedef MachineBasicBlock::iterator iterator; 835 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 836 iterator InsertBefore = std::next(iterator(LatestMI)); 837 MachineBasicBlock &MBB = *LatestMI->getParent(); 838 unsigned Offset = getMemoryOpOffset(First); 839 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 840 bool BaseKill = LatestMI->killsRegister(Base); 841 unsigned PredReg = 0; 842 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); 843 DebugLoc DL = First->getDebugLoc(); 844 MachineInstr *Merged = nullptr; 845 if (Cand.CanMergeToLSDouble) 846 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 847 Opcode, Pred, PredReg, DL, Regs); 848 if (!Merged && Cand.CanMergeToLSMulti) 849 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 850 Opcode, Pred, PredReg, DL, Regs); 851 if (!Merged) 852 return nullptr; 853 854 // Determine earliest instruction that will get removed. We then keep an 855 // iterator just above it so the following erases don't invalidated it. 856 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 857 bool EarliestAtBegin = false; 858 if (EarliestI == MBB.begin()) { 859 EarliestAtBegin = true; 860 } else { 861 EarliestI = std::prev(EarliestI); 862 } 863 864 // Remove instructions which have been merged. 865 for (MachineInstr *MI : Cand.Instrs) 866 MBB.erase(MI); 867 868 // Determine range between the earliest removed instruction and the new one. 869 if (EarliestAtBegin) 870 EarliestI = MBB.begin(); 871 else 872 EarliestI = std::next(EarliestI); 873 auto FixupRange = make_range(EarliestI, iterator(Merged)); 874 875 if (isLoadSingle(Opcode)) { 876 // If the previous loads defined a super-reg, then we have to mark earlier 877 // operands undef; Replicate the super-reg def on the merged instruction. 878 for (MachineInstr &MI : FixupRange) { 879 for (unsigned &ImpDefReg : ImpDefs) { 880 for (MachineOperand &MO : MI.implicit_operands()) { 881 if (!MO.isReg() || MO.getReg() != ImpDefReg) 882 continue; 883 if (MO.readsReg()) 884 MO.setIsUndef(); 885 else if (MO.isDef()) 886 ImpDefReg = 0; 887 } 888 } 889 } 890 891 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 892 for (unsigned ImpDef : ImpDefs) 893 MIB.addReg(ImpDef, RegState::ImplicitDefine); 894 } else { 895 // Remove kill flags: We are possibly storing the values later now. 896 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 897 for (MachineInstr &MI : FixupRange) { 898 for (MachineOperand &MO : MI.uses()) { 899 if (!MO.isReg() || !MO.isKill()) 900 continue; 901 if (UsedRegs.count(MO.getReg())) 902 MO.setIsKill(false); 903 } 904 } 905 assert(ImpDefs.empty()); 906 } 907 908 return Merged; 909 } 910 911 static bool isValidLSDoubleOffset(int Offset) { 912 unsigned Value = abs(Offset); 913 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 914 // multiplied by 4. 915 return (Value % 4) == 0 && Value < 1024; 916 } 917 918 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 919 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 920 const MachineInstr *FirstMI = MemOps[0].MI; 921 unsigned Opcode = FirstMI->getOpcode(); 922 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 923 unsigned Size = getLSMultipleTransferSize(FirstMI); 924 925 unsigned SIndex = 0; 926 unsigned EIndex = MemOps.size(); 927 do { 928 // Look at the first instruction. 929 const MachineInstr *MI = MemOps[SIndex].MI; 930 int Offset = MemOps[SIndex].Offset; 931 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 932 unsigned PReg = PMO.getReg(); 933 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 934 unsigned Latest = SIndex; 935 unsigned Earliest = SIndex; 936 unsigned Count = 1; 937 bool CanMergeToLSDouble = 938 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 939 // ARM errata 602117: LDRD with base in list may result in incorrect base 940 // register when interrupted or faulted. 941 if (STI->isCortexM3() && isi32Load(Opcode) && 942 PReg == getLoadStoreBaseOp(*MI).getReg()) 943 CanMergeToLSDouble = false; 944 945 bool CanMergeToLSMulti = true; 946 // On swift vldm/vstm starting with an odd register number as that needs 947 // more uops than single vldrs. 948 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 949 CanMergeToLSMulti = false; 950 951 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it 952 // deprecated; LDM to PC is fine but cannot happen here. 953 if (PReg == ARM::SP || PReg == ARM::PC) 954 CanMergeToLSMulti = CanMergeToLSDouble = false; 955 956 // Merge following instructions where possible. 957 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 958 int NewOffset = MemOps[I].Offset; 959 if (NewOffset != Offset + (int)Size) 960 break; 961 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 962 unsigned Reg = MO.getReg(); 963 if (Reg == ARM::SP || Reg == ARM::PC) 964 break; 965 966 // See if the current load/store may be part of a multi load/store. 967 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 968 bool PartOfLSMulti = CanMergeToLSMulti; 969 if (PartOfLSMulti) { 970 // Register numbers must be in ascending order. 971 if (RegNum <= PRegNum) 972 PartOfLSMulti = false; 973 // For VFP / NEON load/store multiples, the registers must be 974 // consecutive and within the limit on the number of registers per 975 // instruction. 976 else if (!isNotVFP && RegNum != PRegNum+1) 977 PartOfLSMulti = false; 978 } 979 // See if the current load/store may be part of a double load/store. 980 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 981 982 if (!PartOfLSMulti && !PartOfLSDouble) 983 break; 984 CanMergeToLSMulti &= PartOfLSMulti; 985 CanMergeToLSDouble &= PartOfLSDouble; 986 // Track MemOp with latest and earliest position (Positions are 987 // counted in reverse). 988 unsigned Position = MemOps[I].Position; 989 if (Position < MemOps[Latest].Position) 990 Latest = I; 991 else if (Position > MemOps[Earliest].Position) 992 Earliest = I; 993 // Prepare for next MemOp. 994 Offset += Size; 995 PRegNum = RegNum; 996 } 997 998 // Form a candidate from the Ops collected so far. 999 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 1000 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 1001 Candidate->Instrs.push_back(MemOps[C].MI); 1002 Candidate->LatestMIIdx = Latest - SIndex; 1003 Candidate->EarliestMIIdx = Earliest - SIndex; 1004 Candidate->InsertPos = MemOps[Latest].Position; 1005 if (Count == 1) 1006 CanMergeToLSMulti = CanMergeToLSDouble = false; 1007 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 1008 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 1009 Candidates.push_back(Candidate); 1010 // Continue after the chain. 1011 SIndex += Count; 1012 } while (SIndex < EIndex); 1013 } 1014 1015 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1016 ARM_AM::AMSubMode Mode) { 1017 switch (Opc) { 1018 default: llvm_unreachable("Unhandled opcode!"); 1019 case ARM::LDMIA: 1020 case ARM::LDMDA: 1021 case ARM::LDMDB: 1022 case ARM::LDMIB: 1023 switch (Mode) { 1024 default: llvm_unreachable("Unhandled submode!"); 1025 case ARM_AM::ia: return ARM::LDMIA_UPD; 1026 case ARM_AM::ib: return ARM::LDMIB_UPD; 1027 case ARM_AM::da: return ARM::LDMDA_UPD; 1028 case ARM_AM::db: return ARM::LDMDB_UPD; 1029 } 1030 case ARM::STMIA: 1031 case ARM::STMDA: 1032 case ARM::STMDB: 1033 case ARM::STMIB: 1034 switch (Mode) { 1035 default: llvm_unreachable("Unhandled submode!"); 1036 case ARM_AM::ia: return ARM::STMIA_UPD; 1037 case ARM_AM::ib: return ARM::STMIB_UPD; 1038 case ARM_AM::da: return ARM::STMDA_UPD; 1039 case ARM_AM::db: return ARM::STMDB_UPD; 1040 } 1041 case ARM::t2LDMIA: 1042 case ARM::t2LDMDB: 1043 switch (Mode) { 1044 default: llvm_unreachable("Unhandled submode!"); 1045 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1046 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1047 } 1048 case ARM::t2STMIA: 1049 case ARM::t2STMDB: 1050 switch (Mode) { 1051 default: llvm_unreachable("Unhandled submode!"); 1052 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1053 case ARM_AM::db: return ARM::t2STMDB_UPD; 1054 } 1055 case ARM::VLDMSIA: 1056 switch (Mode) { 1057 default: llvm_unreachable("Unhandled submode!"); 1058 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1059 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1060 } 1061 case ARM::VLDMDIA: 1062 switch (Mode) { 1063 default: llvm_unreachable("Unhandled submode!"); 1064 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1065 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1066 } 1067 case ARM::VSTMSIA: 1068 switch (Mode) { 1069 default: llvm_unreachable("Unhandled submode!"); 1070 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1071 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1072 } 1073 case ARM::VSTMDIA: 1074 switch (Mode) { 1075 default: llvm_unreachable("Unhandled submode!"); 1076 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1077 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1078 } 1079 } 1080 } 1081 1082 /// Check if the given instruction increments or decrements a register and 1083 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1084 /// generated by the instruction are possibly read as well. 1085 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1086 ARMCC::CondCodes Pred, unsigned PredReg) { 1087 bool CheckCPSRDef; 1088 int Scale; 1089 switch (MI.getOpcode()) { 1090 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1091 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1092 case ARM::t2SUBri: 1093 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1094 case ARM::t2ADDri: 1095 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1096 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1097 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1098 default: return 0; 1099 } 1100 1101 unsigned MIPredReg; 1102 if (MI.getOperand(0).getReg() != Reg || 1103 MI.getOperand(1).getReg() != Reg || 1104 getInstrPredicate(&MI, MIPredReg) != Pred || 1105 MIPredReg != PredReg) 1106 return 0; 1107 1108 if (CheckCPSRDef && definesCPSR(&MI)) 1109 return 0; 1110 return MI.getOperand(2).getImm() * Scale; 1111 } 1112 1113 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1114 static MachineBasicBlock::iterator 1115 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1116 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1117 Offset = 0; 1118 MachineBasicBlock &MBB = *MBBI->getParent(); 1119 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1120 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1121 if (MBBI == BeginMBBI) 1122 return EndMBBI; 1123 1124 // Skip debug values. 1125 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1126 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1127 --PrevMBBI; 1128 1129 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1130 return Offset == 0 ? EndMBBI : PrevMBBI; 1131 } 1132 1133 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1134 static MachineBasicBlock::iterator 1135 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1136 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1137 Offset = 0; 1138 MachineBasicBlock &MBB = *MBBI->getParent(); 1139 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1140 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1141 // Skip debug values. 1142 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1143 ++NextMBBI; 1144 if (NextMBBI == EndMBBI) 1145 return EndMBBI; 1146 1147 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1148 return Offset == 0 ? EndMBBI : NextMBBI; 1149 } 1150 1151 /// Fold proceeding/trailing inc/dec of base register into the 1152 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1153 /// 1154 /// stmia rn, <ra, rb, rc> 1155 /// rn := rn + 4 * 3; 1156 /// => 1157 /// stmia rn!, <ra, rb, rc> 1158 /// 1159 /// rn := rn - 4 * 3; 1160 /// ldmia rn, <ra, rb, rc> 1161 /// => 1162 /// ldmdb rn!, <ra, rb, rc> 1163 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1164 // Thumb1 is already using updating loads/stores. 1165 if (isThumb1) return false; 1166 1167 const MachineOperand &BaseOP = MI->getOperand(0); 1168 unsigned Base = BaseOP.getReg(); 1169 bool BaseKill = BaseOP.isKill(); 1170 unsigned PredReg = 0; 1171 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1172 unsigned Opcode = MI->getOpcode(); 1173 DebugLoc DL = MI->getDebugLoc(); 1174 1175 // Can't use an updating ld/st if the base register is also a dest 1176 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1177 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1178 if (MI->getOperand(i).getReg() == Base) 1179 return false; 1180 1181 int Bytes = getLSMultipleTransferSize(MI); 1182 MachineBasicBlock &MBB = *MI->getParent(); 1183 MachineBasicBlock::iterator MBBI(MI); 1184 int Offset; 1185 MachineBasicBlock::iterator MergeInstr 1186 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1187 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1188 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1189 Mode = ARM_AM::db; 1190 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1191 Mode = ARM_AM::da; 1192 } else { 1193 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1194 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1195 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) 1196 return false; 1197 } 1198 MBB.erase(MergeInstr); 1199 1200 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1201 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1202 .addReg(Base, getDefRegState(true)) // WB base register 1203 .addReg(Base, getKillRegState(BaseKill)) 1204 .addImm(Pred).addReg(PredReg); 1205 1206 // Transfer the rest of operands. 1207 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1208 MIB.addOperand(MI->getOperand(OpNum)); 1209 1210 // Transfer memoperands. 1211 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1212 1213 MBB.erase(MBBI); 1214 return true; 1215 } 1216 1217 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1218 ARM_AM::AddrOpc Mode) { 1219 switch (Opc) { 1220 case ARM::LDRi12: 1221 return ARM::LDR_PRE_IMM; 1222 case ARM::STRi12: 1223 return ARM::STR_PRE_IMM; 1224 case ARM::VLDRS: 1225 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1226 case ARM::VLDRD: 1227 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1228 case ARM::VSTRS: 1229 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1230 case ARM::VSTRD: 1231 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1232 case ARM::t2LDRi8: 1233 case ARM::t2LDRi12: 1234 return ARM::t2LDR_PRE; 1235 case ARM::t2STRi8: 1236 case ARM::t2STRi12: 1237 return ARM::t2STR_PRE; 1238 default: llvm_unreachable("Unhandled opcode!"); 1239 } 1240 } 1241 1242 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1243 ARM_AM::AddrOpc Mode) { 1244 switch (Opc) { 1245 case ARM::LDRi12: 1246 return ARM::LDR_POST_IMM; 1247 case ARM::STRi12: 1248 return ARM::STR_POST_IMM; 1249 case ARM::VLDRS: 1250 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1251 case ARM::VLDRD: 1252 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1253 case ARM::VSTRS: 1254 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1255 case ARM::VSTRD: 1256 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1257 case ARM::t2LDRi8: 1258 case ARM::t2LDRi12: 1259 return ARM::t2LDR_POST; 1260 case ARM::t2STRi8: 1261 case ARM::t2STRi12: 1262 return ARM::t2STR_POST; 1263 default: llvm_unreachable("Unhandled opcode!"); 1264 } 1265 } 1266 1267 /// Fold proceeding/trailing inc/dec of base register into the 1268 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1269 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1270 // Thumb1 doesn't have updating LDR/STR. 1271 // FIXME: Use LDM/STM with single register instead. 1272 if (isThumb1) return false; 1273 1274 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1275 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1276 unsigned Opcode = MI->getOpcode(); 1277 DebugLoc DL = MI->getDebugLoc(); 1278 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1279 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1280 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1281 if (isi32Load(Opcode) || isi32Store(Opcode)) 1282 if (MI->getOperand(2).getImm() != 0) 1283 return false; 1284 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1285 return false; 1286 1287 // Can't do the merge if the destination register is the same as the would-be 1288 // writeback register. 1289 if (MI->getOperand(0).getReg() == Base) 1290 return false; 1291 1292 unsigned PredReg = 0; 1293 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1294 int Bytes = getLSMultipleTransferSize(MI); 1295 MachineBasicBlock &MBB = *MI->getParent(); 1296 MachineBasicBlock::iterator MBBI(MI); 1297 int Offset; 1298 MachineBasicBlock::iterator MergeInstr 1299 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1300 unsigned NewOpc; 1301 if (!isAM5 && Offset == Bytes) { 1302 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1303 } else if (Offset == -Bytes) { 1304 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1305 } else { 1306 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1307 if (Offset == Bytes) { 1308 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1309 } else if (!isAM5 && Offset == -Bytes) { 1310 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1311 } else 1312 return false; 1313 } 1314 MBB.erase(MergeInstr); 1315 1316 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1317 1318 bool isLd = isLoadSingle(Opcode); 1319 if (isAM5) { 1320 // VLDM[SD]_UPD, VSTM[SD]_UPD 1321 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1322 // updating load/store-multiple instructions can be used with only one 1323 // register.) 1324 MachineOperand &MO = MI->getOperand(0); 1325 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1326 .addReg(Base, getDefRegState(true)) // WB base register 1327 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1328 .addImm(Pred).addReg(PredReg) 1329 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1330 getKillRegState(MO.isKill()))); 1331 } else if (isLd) { 1332 if (isAM2) { 1333 // LDR_PRE, LDR_POST 1334 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1335 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1336 .addReg(Base, RegState::Define) 1337 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1338 } else { 1339 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1340 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1341 .addReg(Base, RegState::Define) 1342 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1343 } 1344 } else { 1345 // t2LDR_PRE, t2LDR_POST 1346 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1347 .addReg(Base, RegState::Define) 1348 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1349 } 1350 } else { 1351 MachineOperand &MO = MI->getOperand(0); 1352 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1353 // the vestigal zero-reg offset register. When that's fixed, this clause 1354 // can be removed entirely. 1355 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1356 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1357 // STR_PRE, STR_POST 1358 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1359 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1360 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1361 } else { 1362 // t2STR_PRE, t2STR_POST 1363 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1364 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1365 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1366 } 1367 } 1368 MBB.erase(MBBI); 1369 1370 return true; 1371 } 1372 1373 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1374 unsigned Opcode = MI.getOpcode(); 1375 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1376 "Must have t2STRDi8 or t2LDRDi8"); 1377 if (MI.getOperand(3).getImm() != 0) 1378 return false; 1379 1380 // Behaviour for writeback is undefined if base register is the same as one 1381 // of the others. 1382 const MachineOperand &BaseOp = MI.getOperand(2); 1383 unsigned Base = BaseOp.getReg(); 1384 const MachineOperand &Reg0Op = MI.getOperand(0); 1385 const MachineOperand &Reg1Op = MI.getOperand(1); 1386 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1387 return false; 1388 1389 unsigned PredReg; 1390 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); 1391 MachineBasicBlock::iterator MBBI(MI); 1392 MachineBasicBlock &MBB = *MI.getParent(); 1393 int Offset; 1394 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1395 PredReg, Offset); 1396 unsigned NewOpc; 1397 if (Offset == 8 || Offset == -8) { 1398 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1399 } else { 1400 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1401 if (Offset == 8 || Offset == -8) { 1402 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1403 } else 1404 return false; 1405 } 1406 MBB.erase(MergeInstr); 1407 1408 DebugLoc DL = MI.getDebugLoc(); 1409 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1410 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1411 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1412 .addReg(BaseOp.getReg(), RegState::Define); 1413 } else { 1414 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1415 MIB.addReg(BaseOp.getReg(), RegState::Define) 1416 .addOperand(Reg0Op).addOperand(Reg1Op); 1417 } 1418 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1419 .addImm(Offset).addImm(Pred).addReg(PredReg); 1420 assert(TII->get(Opcode).getNumOperands() == 6 && 1421 TII->get(NewOpc).getNumOperands() == 7 && 1422 "Unexpected number of operands in Opcode specification."); 1423 1424 // Transfer implicit operands. 1425 for (const MachineOperand &MO : MI.implicit_operands()) 1426 MIB.addOperand(MO); 1427 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1428 1429 MBB.erase(MBBI); 1430 return true; 1431 } 1432 1433 /// Returns true if instruction is a memory operation that this pass is capable 1434 /// of operating on. 1435 static bool isMemoryOp(const MachineInstr &MI) { 1436 unsigned Opcode = MI.getOpcode(); 1437 switch (Opcode) { 1438 case ARM::VLDRS: 1439 case ARM::VSTRS: 1440 case ARM::VLDRD: 1441 case ARM::VSTRD: 1442 case ARM::LDRi12: 1443 case ARM::STRi12: 1444 case ARM::tLDRi: 1445 case ARM::tSTRi: 1446 case ARM::tLDRspi: 1447 case ARM::tSTRspi: 1448 case ARM::t2LDRi8: 1449 case ARM::t2LDRi12: 1450 case ARM::t2STRi8: 1451 case ARM::t2STRi12: 1452 break; 1453 default: 1454 return false; 1455 } 1456 if (!MI.getOperand(1).isReg()) 1457 return false; 1458 1459 // When no memory operands are present, conservatively assume unaligned, 1460 // volatile, unfoldable. 1461 if (!MI.hasOneMemOperand()) 1462 return false; 1463 1464 const MachineMemOperand &MMO = **MI.memoperands_begin(); 1465 1466 // Don't touch volatile memory accesses - we may be changing their order. 1467 if (MMO.isVolatile()) 1468 return false; 1469 1470 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1471 // not. 1472 if (MMO.getAlignment() < 4) 1473 return false; 1474 1475 // str <undef> could probably be eliminated entirely, but for now we just want 1476 // to avoid making a mess of it. 1477 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1478 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef()) 1479 return false; 1480 1481 // Likewise don't mess with references to undefined addresses. 1482 if (MI.getOperand(1).isUndef()) 1483 return false; 1484 1485 return true; 1486 } 1487 1488 static void InsertLDR_STR(MachineBasicBlock &MBB, 1489 MachineBasicBlock::iterator &MBBI, 1490 int Offset, bool isDef, 1491 DebugLoc DL, unsigned NewOpc, 1492 unsigned Reg, bool RegDeadKill, bool RegUndef, 1493 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1494 bool OffKill, bool OffUndef, 1495 ARMCC::CondCodes Pred, unsigned PredReg, 1496 const TargetInstrInfo *TII, bool isT2) { 1497 if (isDef) { 1498 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1499 TII->get(NewOpc)) 1500 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1501 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1502 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1503 } else { 1504 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1505 TII->get(NewOpc)) 1506 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1507 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1508 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1509 } 1510 } 1511 1512 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1513 MachineBasicBlock::iterator &MBBI) { 1514 MachineInstr *MI = &*MBBI; 1515 unsigned Opcode = MI->getOpcode(); 1516 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1517 return false; 1518 1519 const MachineOperand &BaseOp = MI->getOperand(2); 1520 unsigned BaseReg = BaseOp.getReg(); 1521 unsigned EvenReg = MI->getOperand(0).getReg(); 1522 unsigned OddReg = MI->getOperand(1).getReg(); 1523 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1524 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1525 1526 // ARM errata 602117: LDRD with base in list may result in incorrect base 1527 // register when interrupted or faulted. 1528 bool Errata602117 = EvenReg == BaseReg && 1529 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1530 // ARM LDRD/STRD needs consecutive registers. 1531 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1532 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1533 1534 if (!Errata602117 && !NonConsecutiveRegs) 1535 return false; 1536 1537 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1538 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1539 bool EvenDeadKill = isLd ? 1540 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1541 bool EvenUndef = MI->getOperand(0).isUndef(); 1542 bool OddDeadKill = isLd ? 1543 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1544 bool OddUndef = MI->getOperand(1).isUndef(); 1545 bool BaseKill = BaseOp.isKill(); 1546 bool BaseUndef = BaseOp.isUndef(); 1547 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1548 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1549 int OffImm = getMemoryOpOffset(MI); 1550 unsigned PredReg = 0; 1551 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1552 1553 if (OddRegNum > EvenRegNum && OffImm == 0) { 1554 // Ascending register numbers and no offset. It's safe to change it to a 1555 // ldm or stm. 1556 unsigned NewOpc = (isLd) 1557 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1558 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1559 if (isLd) { 1560 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1561 .addReg(BaseReg, getKillRegState(BaseKill)) 1562 .addImm(Pred).addReg(PredReg) 1563 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1564 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1565 ++NumLDRD2LDM; 1566 } else { 1567 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1568 .addReg(BaseReg, getKillRegState(BaseKill)) 1569 .addImm(Pred).addReg(PredReg) 1570 .addReg(EvenReg, 1571 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1572 .addReg(OddReg, 1573 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1574 ++NumSTRD2STM; 1575 } 1576 } else { 1577 // Split into two instructions. 1578 unsigned NewOpc = (isLd) 1579 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1580 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1581 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1582 // so adjust and use t2LDRi12 here for that. 1583 unsigned NewOpc2 = (isLd) 1584 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1585 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1586 DebugLoc dl = MBBI->getDebugLoc(); 1587 // If this is a load and base register is killed, it may have been 1588 // re-defed by the load, make sure the first load does not clobber it. 1589 if (isLd && 1590 (BaseKill || OffKill) && 1591 (TRI->regsOverlap(EvenReg, BaseReg))) { 1592 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1593 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1594 OddReg, OddDeadKill, false, 1595 BaseReg, false, BaseUndef, false, OffUndef, 1596 Pred, PredReg, TII, isT2); 1597 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1598 EvenReg, EvenDeadKill, false, 1599 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1600 Pred, PredReg, TII, isT2); 1601 } else { 1602 if (OddReg == EvenReg && EvenDeadKill) { 1603 // If the two source operands are the same, the kill marker is 1604 // probably on the first one. e.g. 1605 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1606 EvenDeadKill = false; 1607 OddDeadKill = true; 1608 } 1609 // Never kill the base register in the first instruction. 1610 if (EvenReg == BaseReg) 1611 EvenDeadKill = false; 1612 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1613 EvenReg, EvenDeadKill, EvenUndef, 1614 BaseReg, false, BaseUndef, false, OffUndef, 1615 Pred, PredReg, TII, isT2); 1616 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1617 OddReg, OddDeadKill, OddUndef, 1618 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1619 Pred, PredReg, TII, isT2); 1620 } 1621 if (isLd) 1622 ++NumLDRD2LDR; 1623 else 1624 ++NumSTRD2STR; 1625 } 1626 1627 MBBI = MBB.erase(MBBI); 1628 return true; 1629 } 1630 1631 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1632 /// incrementing offset into LDM / STM ops. 1633 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1634 MemOpQueue MemOps; 1635 unsigned CurrBase = 0; 1636 unsigned CurrOpc = ~0u; 1637 ARMCC::CondCodes CurrPred = ARMCC::AL; 1638 unsigned Position = 0; 1639 assert(Candidates.size() == 0); 1640 assert(MergeBaseCandidates.size() == 0); 1641 LiveRegsValid = false; 1642 1643 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1644 I = MBBI) { 1645 // The instruction in front of the iterator is the one we look at. 1646 MBBI = std::prev(I); 1647 if (FixInvalidRegPairOp(MBB, MBBI)) 1648 continue; 1649 ++Position; 1650 1651 if (isMemoryOp(*MBBI)) { 1652 unsigned Opcode = MBBI->getOpcode(); 1653 const MachineOperand &MO = MBBI->getOperand(0); 1654 unsigned Reg = MO.getReg(); 1655 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1656 unsigned PredReg = 0; 1657 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1658 int Offset = getMemoryOpOffset(MBBI); 1659 if (CurrBase == 0) { 1660 // Start of a new chain. 1661 CurrBase = Base; 1662 CurrOpc = Opcode; 1663 CurrPred = Pred; 1664 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1665 continue; 1666 } 1667 // Note: No need to match PredReg in the next if. 1668 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1669 // Watch out for: 1670 // r4 := ldr [r0, #8] 1671 // r4 := ldr [r0, #4] 1672 // or 1673 // r0 := ldr [r0] 1674 // If a load overrides the base register or a register loaded by 1675 // another load in our chain, we cannot take this instruction. 1676 bool Overlap = false; 1677 if (isLoadSingle(Opcode)) { 1678 Overlap = (Base == Reg); 1679 if (!Overlap) { 1680 for (const MemOpQueueEntry &E : MemOps) { 1681 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1682 Overlap = true; 1683 break; 1684 } 1685 } 1686 } 1687 } 1688 1689 if (!Overlap) { 1690 // Check offset and sort memory operation into the current chain. 1691 if (Offset > MemOps.back().Offset) { 1692 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1693 continue; 1694 } else { 1695 MemOpQueue::iterator MI, ME; 1696 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1697 if (Offset < MI->Offset) { 1698 // Found a place to insert. 1699 break; 1700 } 1701 if (Offset == MI->Offset) { 1702 // Collision, abort. 1703 MI = ME; 1704 break; 1705 } 1706 } 1707 if (MI != MemOps.end()) { 1708 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1709 continue; 1710 } 1711 } 1712 } 1713 } 1714 1715 // Don't advance the iterator; The op will start a new chain next. 1716 MBBI = I; 1717 --Position; 1718 // Fallthrough to look into existing chain. 1719 } else if (MBBI->isDebugValue()) { 1720 continue; 1721 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1722 MBBI->getOpcode() == ARM::t2STRDi8) { 1723 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1724 // remember them because we may still be able to merge add/sub into them. 1725 MergeBaseCandidates.push_back(MBBI); 1726 } 1727 1728 1729 // If we are here then the chain is broken; Extract candidates for a merge. 1730 if (MemOps.size() > 0) { 1731 FormCandidates(MemOps); 1732 // Reset for the next chain. 1733 CurrBase = 0; 1734 CurrOpc = ~0u; 1735 CurrPred = ARMCC::AL; 1736 MemOps.clear(); 1737 } 1738 } 1739 if (MemOps.size() > 0) 1740 FormCandidates(MemOps); 1741 1742 // Sort candidates so they get processed from end to begin of the basic 1743 // block later; This is necessary for liveness calculation. 1744 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1745 return M0->InsertPos < M1->InsertPos; 1746 }; 1747 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1748 1749 // Go through list of candidates and merge. 1750 bool Changed = false; 1751 for (const MergeCandidate *Candidate : Candidates) { 1752 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1753 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1754 // Merge preceding/trailing base inc/dec into the merged op. 1755 if (Merged) { 1756 Changed = true; 1757 unsigned Opcode = Merged->getOpcode(); 1758 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1759 MergeBaseUpdateLSDouble(*Merged); 1760 else 1761 MergeBaseUpdateLSMultiple(Merged); 1762 } else { 1763 for (MachineInstr *MI : Candidate->Instrs) { 1764 if (MergeBaseUpdateLoadStore(MI)) 1765 Changed = true; 1766 } 1767 } 1768 } else { 1769 assert(Candidate->Instrs.size() == 1); 1770 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1771 Changed = true; 1772 } 1773 } 1774 Candidates.clear(); 1775 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1776 for (MachineInstr *MI : MergeBaseCandidates) 1777 MergeBaseUpdateLSDouble(*MI); 1778 MergeBaseCandidates.clear(); 1779 1780 return Changed; 1781 } 1782 1783 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1784 /// into the preceding stack restore so it directly restore the value of LR 1785 /// into pc. 1786 /// ldmfd sp!, {..., lr} 1787 /// bx lr 1788 /// or 1789 /// ldmfd sp!, {..., lr} 1790 /// mov pc, lr 1791 /// => 1792 /// ldmfd sp!, {..., pc} 1793 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1794 // Thumb1 LDM doesn't allow high registers. 1795 if (isThumb1) return false; 1796 if (MBB.empty()) return false; 1797 1798 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1799 if (MBBI != MBB.begin() && 1800 (MBBI->getOpcode() == ARM::BX_RET || 1801 MBBI->getOpcode() == ARM::tBX_RET || 1802 MBBI->getOpcode() == ARM::MOVPCLR)) { 1803 MachineInstr *PrevMI = std::prev(MBBI); 1804 unsigned Opcode = PrevMI->getOpcode(); 1805 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1806 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1807 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1808 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1809 if (MO.getReg() != ARM::LR) 1810 return false; 1811 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1812 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1813 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1814 PrevMI->setDesc(TII->get(NewOpc)); 1815 MO.setReg(ARM::PC); 1816 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1817 MBB.erase(MBBI); 1818 return true; 1819 } 1820 } 1821 return false; 1822 } 1823 1824 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1825 MF = &Fn; 1826 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1827 TL = STI->getTargetLowering(); 1828 AFI = Fn.getInfo<ARMFunctionInfo>(); 1829 TII = STI->getInstrInfo(); 1830 TRI = STI->getRegisterInfo(); 1831 1832 RegClassInfoValid = false; 1833 isThumb2 = AFI->isThumb2Function(); 1834 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1835 1836 bool Modified = false; 1837 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1838 ++MFI) { 1839 MachineBasicBlock &MBB = *MFI; 1840 Modified |= LoadStoreMultipleOpti(MBB); 1841 if (STI->hasV5TOps()) 1842 Modified |= MergeReturnIntoLDM(MBB); 1843 } 1844 1845 Allocator.DestroyAll(); 1846 return Modified; 1847 } 1848 1849 namespace llvm { 1850 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); 1851 } 1852 1853 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \ 1854 "ARM pre- register allocation load / store optimization pass" 1855 1856 namespace { 1857 /// Pre- register allocation pass that move load / stores from consecutive 1858 /// locations close to make it more likely they will be combined later. 1859 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1860 static char ID; 1861 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) { 1862 initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry()); 1863 } 1864 1865 const DataLayout *TD; 1866 const TargetInstrInfo *TII; 1867 const TargetRegisterInfo *TRI; 1868 const ARMSubtarget *STI; 1869 MachineRegisterInfo *MRI; 1870 MachineFunction *MF; 1871 1872 bool runOnMachineFunction(MachineFunction &Fn) override; 1873 1874 const char *getPassName() const override { 1875 return ARM_PREALLOC_LOAD_STORE_OPT_NAME; 1876 } 1877 1878 private: 1879 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1880 unsigned &NewOpc, unsigned &EvenReg, 1881 unsigned &OddReg, unsigned &BaseReg, 1882 int &Offset, 1883 unsigned &PredReg, ARMCC::CondCodes &Pred, 1884 bool &isT2); 1885 bool RescheduleOps(MachineBasicBlock *MBB, 1886 SmallVectorImpl<MachineInstr *> &Ops, 1887 unsigned Base, bool isLd, 1888 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1889 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1890 }; 1891 char ARMPreAllocLoadStoreOpt::ID = 0; 1892 } 1893 1894 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt", 1895 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) 1896 1897 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1898 TD = &Fn.getDataLayout(); 1899 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1900 TII = STI->getInstrInfo(); 1901 TRI = STI->getRegisterInfo(); 1902 MRI = &Fn.getRegInfo(); 1903 MF = &Fn; 1904 1905 bool Modified = false; 1906 for (MachineBasicBlock &MFI : Fn) 1907 Modified |= RescheduleLoadStoreInstrs(&MFI); 1908 1909 return Modified; 1910 } 1911 1912 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1913 MachineBasicBlock::iterator I, 1914 MachineBasicBlock::iterator E, 1915 SmallPtrSetImpl<MachineInstr*> &MemOps, 1916 SmallSet<unsigned, 4> &MemRegs, 1917 const TargetRegisterInfo *TRI) { 1918 // Are there stores / loads / calls between them? 1919 // FIXME: This is overly conservative. We should make use of alias information 1920 // some day. 1921 SmallSet<unsigned, 4> AddedRegPressure; 1922 while (++I != E) { 1923 if (I->isDebugValue() || MemOps.count(&*I)) 1924 continue; 1925 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1926 return false; 1927 if (isLd && I->mayStore()) 1928 return false; 1929 if (!isLd) { 1930 if (I->mayLoad()) 1931 return false; 1932 // It's not safe to move the first 'str' down. 1933 // str r1, [r0] 1934 // strh r5, [r0] 1935 // str r4, [r0, #+4] 1936 if (I->mayStore()) 1937 return false; 1938 } 1939 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1940 MachineOperand &MO = I->getOperand(j); 1941 if (!MO.isReg()) 1942 continue; 1943 unsigned Reg = MO.getReg(); 1944 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1945 return false; 1946 if (Reg != Base && !MemRegs.count(Reg)) 1947 AddedRegPressure.insert(Reg); 1948 } 1949 } 1950 1951 // Estimate register pressure increase due to the transformation. 1952 if (MemRegs.size() <= 4) 1953 // Ok if we are moving small number of instructions. 1954 return true; 1955 return AddedRegPressure.size() <= MemRegs.size() * 2; 1956 } 1957 1958 1959 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1960 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1961 MachineInstr *Op1) { 1962 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1963 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1964 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1965 1966 MachineFunction *MF = MI->getParent()->getParent(); 1967 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1968 MachineSDNode::mmo_iterator MemEnd = 1969 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1970 MemEnd = 1971 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1972 MI->setMemRefs(MemBegin, MemEnd); 1973 } 1974 1975 bool 1976 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1977 DebugLoc &dl, unsigned &NewOpc, 1978 unsigned &FirstReg, 1979 unsigned &SecondReg, 1980 unsigned &BaseReg, int &Offset, 1981 unsigned &PredReg, 1982 ARMCC::CondCodes &Pred, 1983 bool &isT2) { 1984 // Make sure we're allowed to generate LDRD/STRD. 1985 if (!STI->hasV5TEOps()) 1986 return false; 1987 1988 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1989 unsigned Scale = 1; 1990 unsigned Opcode = Op0->getOpcode(); 1991 if (Opcode == ARM::LDRi12) { 1992 NewOpc = ARM::LDRD; 1993 } else if (Opcode == ARM::STRi12) { 1994 NewOpc = ARM::STRD; 1995 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1996 NewOpc = ARM::t2LDRDi8; 1997 Scale = 4; 1998 isT2 = true; 1999 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 2000 NewOpc = ARM::t2STRDi8; 2001 Scale = 4; 2002 isT2 = true; 2003 } else { 2004 return false; 2005 } 2006 2007 // Make sure the base address satisfies i64 ld / st alignment requirement. 2008 // At the moment, we ignore the memoryoperand's value. 2009 // If we want to use AliasAnalysis, we should check it accordingly. 2010 if (!Op0->hasOneMemOperand() || 2011 (*Op0->memoperands_begin())->isVolatile()) 2012 return false; 2013 2014 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 2015 const Function *Func = MF->getFunction(); 2016 unsigned ReqAlign = STI->hasV6Ops() 2017 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 2018 : 8; // Pre-v6 need 8-byte align 2019 if (Align < ReqAlign) 2020 return false; 2021 2022 // Then make sure the immediate offset fits. 2023 int OffImm = getMemoryOpOffset(Op0); 2024 if (isT2) { 2025 int Limit = (1 << 8) * Scale; 2026 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2027 return false; 2028 Offset = OffImm; 2029 } else { 2030 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2031 if (OffImm < 0) { 2032 AddSub = ARM_AM::sub; 2033 OffImm = - OffImm; 2034 } 2035 int Limit = (1 << 8) * Scale; 2036 if (OffImm >= Limit || (OffImm & (Scale-1))) 2037 return false; 2038 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2039 } 2040 FirstReg = Op0->getOperand(0).getReg(); 2041 SecondReg = Op1->getOperand(0).getReg(); 2042 if (FirstReg == SecondReg) 2043 return false; 2044 BaseReg = Op0->getOperand(1).getReg(); 2045 Pred = getInstrPredicate(Op0, PredReg); 2046 dl = Op0->getDebugLoc(); 2047 return true; 2048 } 2049 2050 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2051 SmallVectorImpl<MachineInstr *> &Ops, 2052 unsigned Base, bool isLd, 2053 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2054 bool RetVal = false; 2055 2056 // Sort by offset (in reverse order). 2057 std::sort(Ops.begin(), Ops.end(), 2058 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2059 int LOffset = getMemoryOpOffset(LHS); 2060 int ROffset = getMemoryOpOffset(RHS); 2061 assert(LHS == RHS || LOffset != ROffset); 2062 return LOffset > ROffset; 2063 }); 2064 2065 // The loads / stores of the same base are in order. Scan them from first to 2066 // last and check for the following: 2067 // 1. Any def of base. 2068 // 2. Any gaps. 2069 while (Ops.size() > 1) { 2070 unsigned FirstLoc = ~0U; 2071 unsigned LastLoc = 0; 2072 MachineInstr *FirstOp = nullptr; 2073 MachineInstr *LastOp = nullptr; 2074 int LastOffset = 0; 2075 unsigned LastOpcode = 0; 2076 unsigned LastBytes = 0; 2077 unsigned NumMove = 0; 2078 for (int i = Ops.size() - 1; i >= 0; --i) { 2079 MachineInstr *Op = Ops[i]; 2080 unsigned Loc = MI2LocMap[Op]; 2081 if (Loc <= FirstLoc) { 2082 FirstLoc = Loc; 2083 FirstOp = Op; 2084 } 2085 if (Loc >= LastLoc) { 2086 LastLoc = Loc; 2087 LastOp = Op; 2088 } 2089 2090 unsigned LSMOpcode 2091 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2092 if (LastOpcode && LSMOpcode != LastOpcode) 2093 break; 2094 2095 int Offset = getMemoryOpOffset(Op); 2096 unsigned Bytes = getLSMultipleTransferSize(Op); 2097 if (LastBytes) { 2098 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2099 break; 2100 } 2101 LastOffset = Offset; 2102 LastBytes = Bytes; 2103 LastOpcode = LSMOpcode; 2104 if (++NumMove == 8) // FIXME: Tune this limit. 2105 break; 2106 } 2107 2108 if (NumMove <= 1) 2109 Ops.pop_back(); 2110 else { 2111 SmallPtrSet<MachineInstr*, 4> MemOps; 2112 SmallSet<unsigned, 4> MemRegs; 2113 for (int i = NumMove-1; i >= 0; --i) { 2114 MemOps.insert(Ops[i]); 2115 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2116 } 2117 2118 // Be conservative, if the instructions are too far apart, don't 2119 // move them. We want to limit the increase of register pressure. 2120 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2121 if (DoMove) 2122 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2123 MemOps, MemRegs, TRI); 2124 if (!DoMove) { 2125 for (unsigned i = 0; i != NumMove; ++i) 2126 Ops.pop_back(); 2127 } else { 2128 // This is the new location for the loads / stores. 2129 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2130 while (InsertPos != MBB->end() 2131 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2132 ++InsertPos; 2133 2134 // If we are moving a pair of loads / stores, see if it makes sense 2135 // to try to allocate a pair of registers that can form register pairs. 2136 MachineInstr *Op0 = Ops.back(); 2137 MachineInstr *Op1 = Ops[Ops.size()-2]; 2138 unsigned FirstReg = 0, SecondReg = 0; 2139 unsigned BaseReg = 0, PredReg = 0; 2140 ARMCC::CondCodes Pred = ARMCC::AL; 2141 bool isT2 = false; 2142 unsigned NewOpc = 0; 2143 int Offset = 0; 2144 DebugLoc dl; 2145 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2146 FirstReg, SecondReg, BaseReg, 2147 Offset, PredReg, Pred, isT2)) { 2148 Ops.pop_back(); 2149 Ops.pop_back(); 2150 2151 const MCInstrDesc &MCID = TII->get(NewOpc); 2152 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2153 MRI->constrainRegClass(FirstReg, TRC); 2154 MRI->constrainRegClass(SecondReg, TRC); 2155 2156 // Form the pair instruction. 2157 if (isLd) { 2158 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2159 .addReg(FirstReg, RegState::Define) 2160 .addReg(SecondReg, RegState::Define) 2161 .addReg(BaseReg); 2162 // FIXME: We're converting from LDRi12 to an insn that still 2163 // uses addrmode2, so we need an explicit offset reg. It should 2164 // always by reg0 since we're transforming LDRi12s. 2165 if (!isT2) 2166 MIB.addReg(0); 2167 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2168 concatenateMemOperands(MIB, Op0, Op1); 2169 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2170 ++NumLDRDFormed; 2171 } else { 2172 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2173 .addReg(FirstReg) 2174 .addReg(SecondReg) 2175 .addReg(BaseReg); 2176 // FIXME: We're converting from LDRi12 to an insn that still 2177 // uses addrmode2, so we need an explicit offset reg. It should 2178 // always by reg0 since we're transforming STRi12s. 2179 if (!isT2) 2180 MIB.addReg(0); 2181 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2182 concatenateMemOperands(MIB, Op0, Op1); 2183 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2184 ++NumSTRDFormed; 2185 } 2186 MBB->erase(Op0); 2187 MBB->erase(Op1); 2188 2189 if (!isT2) { 2190 // Add register allocation hints to form register pairs. 2191 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2192 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2193 } 2194 } else { 2195 for (unsigned i = 0; i != NumMove; ++i) { 2196 MachineInstr *Op = Ops.back(); 2197 Ops.pop_back(); 2198 MBB->splice(InsertPos, MBB, Op); 2199 } 2200 } 2201 2202 NumLdStMoved += NumMove; 2203 RetVal = true; 2204 } 2205 } 2206 } 2207 2208 return RetVal; 2209 } 2210 2211 bool 2212 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2213 bool RetVal = false; 2214 2215 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2216 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2217 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2218 SmallVector<unsigned, 4> LdBases; 2219 SmallVector<unsigned, 4> StBases; 2220 2221 unsigned Loc = 0; 2222 MachineBasicBlock::iterator MBBI = MBB->begin(); 2223 MachineBasicBlock::iterator E = MBB->end(); 2224 while (MBBI != E) { 2225 for (; MBBI != E; ++MBBI) { 2226 MachineInstr *MI = MBBI; 2227 if (MI->isCall() || MI->isTerminator()) { 2228 // Stop at barriers. 2229 ++MBBI; 2230 break; 2231 } 2232 2233 if (!MI->isDebugValue()) 2234 MI2LocMap[MI] = ++Loc; 2235 2236 if (!isMemoryOp(*MI)) 2237 continue; 2238 unsigned PredReg = 0; 2239 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2240 continue; 2241 2242 int Opc = MI->getOpcode(); 2243 bool isLd = isLoadSingle(Opc); 2244 unsigned Base = MI->getOperand(1).getReg(); 2245 int Offset = getMemoryOpOffset(MI); 2246 2247 bool StopHere = false; 2248 if (isLd) { 2249 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2250 Base2LdsMap.find(Base); 2251 if (BI != Base2LdsMap.end()) { 2252 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2253 if (Offset == getMemoryOpOffset(BI->second[i])) { 2254 StopHere = true; 2255 break; 2256 } 2257 } 2258 if (!StopHere) 2259 BI->second.push_back(MI); 2260 } else { 2261 Base2LdsMap[Base].push_back(MI); 2262 LdBases.push_back(Base); 2263 } 2264 } else { 2265 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2266 Base2StsMap.find(Base); 2267 if (BI != Base2StsMap.end()) { 2268 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2269 if (Offset == getMemoryOpOffset(BI->second[i])) { 2270 StopHere = true; 2271 break; 2272 } 2273 } 2274 if (!StopHere) 2275 BI->second.push_back(MI); 2276 } else { 2277 Base2StsMap[Base].push_back(MI); 2278 StBases.push_back(Base); 2279 } 2280 } 2281 2282 if (StopHere) { 2283 // Found a duplicate (a base+offset combination that's seen earlier). 2284 // Backtrack. 2285 --Loc; 2286 break; 2287 } 2288 } 2289 2290 // Re-schedule loads. 2291 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2292 unsigned Base = LdBases[i]; 2293 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2294 if (Lds.size() > 1) 2295 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2296 } 2297 2298 // Re-schedule stores. 2299 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2300 unsigned Base = StBases[i]; 2301 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2302 if (Sts.size() > 1) 2303 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2304 } 2305 2306 if (MBBI != E) { 2307 Base2LdsMap.clear(); 2308 Base2StsMap.clear(); 2309 LdBases.clear(); 2310 StBases.clear(); 2311 } 2312 } 2313 2314 return RetVal; 2315 } 2316 2317 2318 /// Returns an instance of the load / store optimization pass. 2319 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2320 if (PreAlloc) 2321 return new ARMPreAllocLoadStoreOpt(); 2322 return new ARMLoadStoreOpt(); 2323 } 2324 2325