1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMMachineFunctionInfo.h" 19 #include "ARMRegisterInfo.h" 20 #include "MCTargetDesc/ARMAddressingModes.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/CodeGen/SelectionDAGNodes.h" 30 #include "llvm/Target/TargetData.h" 31 #include "llvm/Target/TargetInstrInfo.h" 32 #include "llvm/Target/TargetMachine.h" 33 #include "llvm/Target/TargetRegisterInfo.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/ADT/DenseMap.h" 37 #include "llvm/ADT/STLExtras.h" 38 #include "llvm/ADT/SmallPtrSet.h" 39 #include "llvm/ADT/SmallSet.h" 40 #include "llvm/ADT/SmallVector.h" 41 #include "llvm/ADT/Statistic.h" 42 using namespace llvm; 43 44 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 45 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 46 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 47 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 48 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 49 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 50 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 51 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 52 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 53 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 54 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 55 56 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 57 /// load / store instructions to form ldm / stm instructions. 58 59 namespace { 60 struct ARMLoadStoreOpt : public MachineFunctionPass { 61 static char ID; 62 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 63 64 const TargetInstrInfo *TII; 65 const TargetRegisterInfo *TRI; 66 const ARMSubtarget *STI; 67 ARMFunctionInfo *AFI; 68 RegScavenger *RS; 69 bool isThumb2; 70 71 virtual bool runOnMachineFunction(MachineFunction &Fn); 72 73 virtual const char *getPassName() const { 74 return "ARM load / store optimization pass"; 75 } 76 77 private: 78 struct MemOpQueueEntry { 79 int Offset; 80 unsigned Reg; 81 bool isKill; 82 unsigned Position; 83 MachineBasicBlock::iterator MBBI; 84 bool Merged; 85 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 86 MachineBasicBlock::iterator i) 87 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 88 }; 89 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 90 typedef MemOpQueue::iterator MemOpQueueIter; 91 92 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 93 int Offset, unsigned Base, bool BaseKill, int Opcode, 94 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 95 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 96 void MergeOpsUpdate(MachineBasicBlock &MBB, 97 MemOpQueue &MemOps, 98 unsigned memOpsBegin, 99 unsigned memOpsEnd, 100 unsigned insertAfter, 101 int Offset, 102 unsigned Base, 103 bool BaseKill, 104 int Opcode, 105 ARMCC::CondCodes Pred, 106 unsigned PredReg, 107 unsigned Scratch, 108 DebugLoc dl, 109 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 110 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 111 int Opcode, unsigned Size, 112 ARMCC::CondCodes Pred, unsigned PredReg, 113 unsigned Scratch, MemOpQueue &MemOps, 114 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 115 116 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 117 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 118 MachineBasicBlock::iterator &MBBI); 119 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 120 MachineBasicBlock::iterator MBBI, 121 const TargetInstrInfo *TII, 122 bool &Advance, 123 MachineBasicBlock::iterator &I); 124 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 125 MachineBasicBlock::iterator MBBI, 126 bool &Advance, 127 MachineBasicBlock::iterator &I); 128 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 129 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 130 }; 131 char ARMLoadStoreOpt::ID = 0; 132 } 133 134 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 135 switch (Opcode) { 136 default: llvm_unreachable("Unhandled opcode!"); 137 case ARM::LDRi12: 138 ++NumLDMGened; 139 switch (Mode) { 140 default: llvm_unreachable("Unhandled submode!"); 141 case ARM_AM::ia: return ARM::LDMIA; 142 case ARM_AM::da: return ARM::LDMDA; 143 case ARM_AM::db: return ARM::LDMDB; 144 case ARM_AM::ib: return ARM::LDMIB; 145 } 146 break; 147 case ARM::STRi12: 148 ++NumSTMGened; 149 switch (Mode) { 150 default: llvm_unreachable("Unhandled submode!"); 151 case ARM_AM::ia: return ARM::STMIA; 152 case ARM_AM::da: return ARM::STMDA; 153 case ARM_AM::db: return ARM::STMDB; 154 case ARM_AM::ib: return ARM::STMIB; 155 } 156 break; 157 case ARM::t2LDRi8: 158 case ARM::t2LDRi12: 159 ++NumLDMGened; 160 switch (Mode) { 161 default: llvm_unreachable("Unhandled submode!"); 162 case ARM_AM::ia: return ARM::t2LDMIA; 163 case ARM_AM::db: return ARM::t2LDMDB; 164 } 165 break; 166 case ARM::t2STRi8: 167 case ARM::t2STRi12: 168 ++NumSTMGened; 169 switch (Mode) { 170 default: llvm_unreachable("Unhandled submode!"); 171 case ARM_AM::ia: return ARM::t2STMIA; 172 case ARM_AM::db: return ARM::t2STMDB; 173 } 174 break; 175 case ARM::VLDRS: 176 ++NumVLDMGened; 177 switch (Mode) { 178 default: llvm_unreachable("Unhandled submode!"); 179 case ARM_AM::ia: return ARM::VLDMSIA; 180 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 181 } 182 break; 183 case ARM::VSTRS: 184 ++NumVSTMGened; 185 switch (Mode) { 186 default: llvm_unreachable("Unhandled submode!"); 187 case ARM_AM::ia: return ARM::VSTMSIA; 188 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 189 } 190 break; 191 case ARM::VLDRD: 192 ++NumVLDMGened; 193 switch (Mode) { 194 default: llvm_unreachable("Unhandled submode!"); 195 case ARM_AM::ia: return ARM::VLDMDIA; 196 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 197 } 198 break; 199 case ARM::VSTRD: 200 ++NumVSTMGened; 201 switch (Mode) { 202 default: llvm_unreachable("Unhandled submode!"); 203 case ARM_AM::ia: return ARM::VSTMDIA; 204 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 205 } 206 break; 207 } 208 209 return 0; 210 } 211 212 namespace llvm { 213 namespace ARM_AM { 214 215 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 216 switch (Opcode) { 217 default: llvm_unreachable("Unhandled opcode!"); 218 case ARM::LDMIA_RET: 219 case ARM::LDMIA: 220 case ARM::LDMIA_UPD: 221 case ARM::STMIA: 222 case ARM::STMIA_UPD: 223 case ARM::t2LDMIA_RET: 224 case ARM::t2LDMIA: 225 case ARM::t2LDMIA_UPD: 226 case ARM::t2STMIA: 227 case ARM::t2STMIA_UPD: 228 case ARM::VLDMSIA: 229 case ARM::VLDMSIA_UPD: 230 case ARM::VSTMSIA: 231 case ARM::VSTMSIA_UPD: 232 case ARM::VLDMDIA: 233 case ARM::VLDMDIA_UPD: 234 case ARM::VSTMDIA: 235 case ARM::VSTMDIA_UPD: 236 return ARM_AM::ia; 237 238 case ARM::LDMDA: 239 case ARM::LDMDA_UPD: 240 case ARM::STMDA: 241 case ARM::STMDA_UPD: 242 return ARM_AM::da; 243 244 case ARM::LDMDB: 245 case ARM::LDMDB_UPD: 246 case ARM::STMDB: 247 case ARM::STMDB_UPD: 248 case ARM::t2LDMDB: 249 case ARM::t2LDMDB_UPD: 250 case ARM::t2STMDB: 251 case ARM::t2STMDB_UPD: 252 case ARM::VLDMSDB_UPD: 253 case ARM::VSTMSDB_UPD: 254 case ARM::VLDMDDB_UPD: 255 case ARM::VSTMDDB_UPD: 256 return ARM_AM::db; 257 258 case ARM::LDMIB: 259 case ARM::LDMIB_UPD: 260 case ARM::STMIB: 261 case ARM::STMIB_UPD: 262 return ARM_AM::ib; 263 } 264 265 return ARM_AM::bad_am_submode; 266 } 267 268 } // end namespace ARM_AM 269 } // end namespace llvm 270 271 static bool isT2i32Load(unsigned Opc) { 272 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 273 } 274 275 static bool isi32Load(unsigned Opc) { 276 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 277 } 278 279 static bool isT2i32Store(unsigned Opc) { 280 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 281 } 282 283 static bool isi32Store(unsigned Opc) { 284 return Opc == ARM::STRi12 || isT2i32Store(Opc); 285 } 286 287 /// MergeOps - Create and insert a LDM or STM with Base as base register and 288 /// registers in Regs as the register operands that would be loaded / stored. 289 /// It returns true if the transformation is done. 290 bool 291 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 292 MachineBasicBlock::iterator MBBI, 293 int Offset, unsigned Base, bool BaseKill, 294 int Opcode, ARMCC::CondCodes Pred, 295 unsigned PredReg, unsigned Scratch, DebugLoc dl, 296 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 297 // Only a single register to load / store. Don't bother. 298 unsigned NumRegs = Regs.size(); 299 if (NumRegs <= 1) 300 return false; 301 302 ARM_AM::AMSubMode Mode = ARM_AM::ia; 303 // VFP and Thumb2 do not support IB or DA modes. 304 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 305 bool haveIBAndDA = isNotVFP && !isThumb2; 306 if (Offset == 4 && haveIBAndDA) 307 Mode = ARM_AM::ib; 308 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 309 Mode = ARM_AM::da; 310 else if (Offset == -4 * (int)NumRegs && isNotVFP) 311 // VLDM/VSTM do not support DB mode without also updating the base reg. 312 Mode = ARM_AM::db; 313 else if (Offset != 0) { 314 // Check if this is a supported opcode before we insert instructions to 315 // calculate a new base register. 316 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 317 318 // If starting offset isn't zero, insert a MI to materialize a new base. 319 // But only do so if it is cost effective, i.e. merging more than two 320 // loads / stores. 321 if (NumRegs <= 2) 322 return false; 323 324 unsigned NewBase; 325 if (isi32Load(Opcode)) 326 // If it is a load, then just use one of the destination register to 327 // use as the new base. 328 NewBase = Regs[NumRegs-1].first; 329 else { 330 // Use the scratch register to use as a new base. 331 NewBase = Scratch; 332 if (NewBase == 0) 333 return false; 334 } 335 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 336 if (Offset < 0) { 337 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 338 Offset = - Offset; 339 } 340 int ImmedOffset = isThumb2 341 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 342 if (ImmedOffset == -1) 343 // FIXME: Try t2ADDri12 or t2SUBri12? 344 return false; // Probably not worth it then. 345 346 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 347 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 348 .addImm(Pred).addReg(PredReg).addReg(0); 349 Base = NewBase; 350 BaseKill = true; // New base is always killed right its use. 351 } 352 353 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 354 Opcode == ARM::VLDRD); 355 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 356 if (!Opcode) return false; 357 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 358 .addReg(Base, getKillRegState(BaseKill)) 359 .addImm(Pred).addReg(PredReg); 360 for (unsigned i = 0; i != NumRegs; ++i) 361 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 362 | getKillRegState(Regs[i].second)); 363 364 return true; 365 } 366 367 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 368 // success. 369 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 370 MemOpQueue &memOps, 371 unsigned memOpsBegin, unsigned memOpsEnd, 372 unsigned insertAfter, int Offset, 373 unsigned Base, bool BaseKill, 374 int Opcode, 375 ARMCC::CondCodes Pred, unsigned PredReg, 376 unsigned Scratch, 377 DebugLoc dl, 378 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 379 // First calculate which of the registers should be killed by the merged 380 // instruction. 381 const unsigned insertPos = memOps[insertAfter].Position; 382 SmallSet<unsigned, 4> KilledRegs; 383 DenseMap<unsigned, unsigned> Killer; 384 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 385 if (i == memOpsBegin) { 386 i = memOpsEnd; 387 if (i == e) 388 break; 389 } 390 if (memOps[i].Position < insertPos && memOps[i].isKill) { 391 unsigned Reg = memOps[i].Reg; 392 KilledRegs.insert(Reg); 393 Killer[Reg] = i; 394 } 395 } 396 397 SmallVector<std::pair<unsigned, bool>, 8> Regs; 398 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 399 unsigned Reg = memOps[i].Reg; 400 // If we are inserting the merged operation after an operation that 401 // uses the same register, make sure to transfer any kill flag. 402 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 403 Regs.push_back(std::make_pair(Reg, isKill)); 404 } 405 406 // Try to do the merge. 407 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 408 ++Loc; 409 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 410 Pred, PredReg, Scratch, dl, Regs)) 411 return; 412 413 // Merge succeeded, update records. 414 Merges.push_back(prior(Loc)); 415 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 416 // Remove kill flags from any memops that come before insertPos. 417 if (Regs[i-memOpsBegin].second) { 418 unsigned Reg = Regs[i-memOpsBegin].first; 419 if (KilledRegs.count(Reg)) { 420 unsigned j = Killer[Reg]; 421 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 422 assert(Idx >= 0 && "Cannot find killing operand"); 423 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 424 memOps[j].isKill = false; 425 } 426 memOps[i].isKill = true; 427 } 428 MBB.erase(memOps[i].MBBI); 429 // Update this memop to refer to the merged instruction. 430 // We may need to move kill flags again. 431 memOps[i].Merged = true; 432 memOps[i].MBBI = Merges.back(); 433 memOps[i].Position = insertPos; 434 } 435 } 436 437 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 438 /// load / store multiple instructions. 439 void 440 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 441 unsigned Base, int Opcode, unsigned Size, 442 ARMCC::CondCodes Pred, unsigned PredReg, 443 unsigned Scratch, MemOpQueue &MemOps, 444 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 445 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 446 int Offset = MemOps[SIndex].Offset; 447 int SOffset = Offset; 448 unsigned insertAfter = SIndex; 449 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 450 DebugLoc dl = Loc->getDebugLoc(); 451 const MachineOperand &PMO = Loc->getOperand(0); 452 unsigned PReg = PMO.getReg(); 453 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 454 : getARMRegisterNumbering(PReg); 455 unsigned Count = 1; 456 unsigned Limit = ~0U; 457 458 // vldm / vstm limit are 32 for S variants, 16 for D variants. 459 460 switch (Opcode) { 461 default: break; 462 case ARM::VSTRS: 463 Limit = 32; 464 break; 465 case ARM::VSTRD: 466 Limit = 16; 467 break; 468 case ARM::VLDRD: 469 Limit = 16; 470 break; 471 case ARM::VLDRS: 472 Limit = 32; 473 break; 474 } 475 476 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 477 int NewOffset = MemOps[i].Offset; 478 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 479 unsigned Reg = MO.getReg(); 480 unsigned RegNum = MO.isUndef() ? UINT_MAX 481 : getARMRegisterNumbering(Reg); 482 // Register numbers must be in ascending order. For VFP / NEON load and 483 // store multiples, the registers must also be consecutive and within the 484 // limit on the number of registers per instruction. 485 if (Reg != ARM::SP && 486 NewOffset == Offset + (int)Size && 487 ((isNotVFP && RegNum > PRegNum) || 488 ((Count < Limit) && RegNum == PRegNum+1))) { 489 Offset += Size; 490 PRegNum = RegNum; 491 ++Count; 492 } else { 493 // Can't merge this in. Try merge the earlier ones first. 494 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 495 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 496 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 497 MemOps, Merges); 498 return; 499 } 500 501 if (MemOps[i].Position > MemOps[insertAfter].Position) 502 insertAfter = i; 503 } 504 505 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 506 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 507 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 508 return; 509 } 510 511 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 512 unsigned Bytes, unsigned Limit, 513 ARMCC::CondCodes Pred, unsigned PredReg){ 514 unsigned MyPredReg = 0; 515 if (!MI) 516 return false; 517 if (MI->getOpcode() != ARM::t2SUBri && 518 MI->getOpcode() != ARM::tSUBspi && 519 MI->getOpcode() != ARM::SUBri) 520 return false; 521 522 // Make sure the offset fits in 8 bits. 523 if (Bytes == 0 || (Limit && Bytes >= Limit)) 524 return false; 525 526 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 527 return (MI->getOperand(0).getReg() == Base && 528 MI->getOperand(1).getReg() == Base && 529 (MI->getOperand(2).getImm()*Scale) == Bytes && 530 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 531 MyPredReg == PredReg); 532 } 533 534 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 535 unsigned Bytes, unsigned Limit, 536 ARMCC::CondCodes Pred, unsigned PredReg){ 537 unsigned MyPredReg = 0; 538 if (!MI) 539 return false; 540 if (MI->getOpcode() != ARM::t2ADDri && 541 MI->getOpcode() != ARM::tADDspi && 542 MI->getOpcode() != ARM::ADDri) 543 return false; 544 545 if (Bytes == 0 || (Limit && Bytes >= Limit)) 546 // Make sure the offset fits in 8 bits. 547 return false; 548 549 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 550 return (MI->getOperand(0).getReg() == Base && 551 MI->getOperand(1).getReg() == Base && 552 (MI->getOperand(2).getImm()*Scale) == Bytes && 553 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 554 MyPredReg == PredReg); 555 } 556 557 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 558 switch (MI->getOpcode()) { 559 default: return 0; 560 case ARM::LDRi12: 561 case ARM::STRi12: 562 case ARM::t2LDRi8: 563 case ARM::t2LDRi12: 564 case ARM::t2STRi8: 565 case ARM::t2STRi12: 566 case ARM::VLDRS: 567 case ARM::VSTRS: 568 return 4; 569 case ARM::VLDRD: 570 case ARM::VSTRD: 571 return 8; 572 case ARM::LDMIA: 573 case ARM::LDMDA: 574 case ARM::LDMDB: 575 case ARM::LDMIB: 576 case ARM::STMIA: 577 case ARM::STMDA: 578 case ARM::STMDB: 579 case ARM::STMIB: 580 case ARM::t2LDMIA: 581 case ARM::t2LDMDB: 582 case ARM::t2STMIA: 583 case ARM::t2STMDB: 584 case ARM::VLDMSIA: 585 case ARM::VSTMSIA: 586 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 587 case ARM::VLDMDIA: 588 case ARM::VSTMDIA: 589 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 590 } 591 } 592 593 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 594 ARM_AM::AMSubMode Mode) { 595 switch (Opc) { 596 default: llvm_unreachable("Unhandled opcode!"); 597 case ARM::LDMIA: 598 case ARM::LDMDA: 599 case ARM::LDMDB: 600 case ARM::LDMIB: 601 switch (Mode) { 602 default: llvm_unreachable("Unhandled submode!"); 603 case ARM_AM::ia: return ARM::LDMIA_UPD; 604 case ARM_AM::ib: return ARM::LDMIB_UPD; 605 case ARM_AM::da: return ARM::LDMDA_UPD; 606 case ARM_AM::db: return ARM::LDMDB_UPD; 607 } 608 break; 609 case ARM::STMIA: 610 case ARM::STMDA: 611 case ARM::STMDB: 612 case ARM::STMIB: 613 switch (Mode) { 614 default: llvm_unreachable("Unhandled submode!"); 615 case ARM_AM::ia: return ARM::STMIA_UPD; 616 case ARM_AM::ib: return ARM::STMIB_UPD; 617 case ARM_AM::da: return ARM::STMDA_UPD; 618 case ARM_AM::db: return ARM::STMDB_UPD; 619 } 620 break; 621 case ARM::t2LDMIA: 622 case ARM::t2LDMDB: 623 switch (Mode) { 624 default: llvm_unreachable("Unhandled submode!"); 625 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 626 case ARM_AM::db: return ARM::t2LDMDB_UPD; 627 } 628 break; 629 case ARM::t2STMIA: 630 case ARM::t2STMDB: 631 switch (Mode) { 632 default: llvm_unreachable("Unhandled submode!"); 633 case ARM_AM::ia: return ARM::t2STMIA_UPD; 634 case ARM_AM::db: return ARM::t2STMDB_UPD; 635 } 636 break; 637 case ARM::VLDMSIA: 638 switch (Mode) { 639 default: llvm_unreachable("Unhandled submode!"); 640 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 641 case ARM_AM::db: return ARM::VLDMSDB_UPD; 642 } 643 break; 644 case ARM::VLDMDIA: 645 switch (Mode) { 646 default: llvm_unreachable("Unhandled submode!"); 647 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 648 case ARM_AM::db: return ARM::VLDMDDB_UPD; 649 } 650 break; 651 case ARM::VSTMSIA: 652 switch (Mode) { 653 default: llvm_unreachable("Unhandled submode!"); 654 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 655 case ARM_AM::db: return ARM::VSTMSDB_UPD; 656 } 657 break; 658 case ARM::VSTMDIA: 659 switch (Mode) { 660 default: llvm_unreachable("Unhandled submode!"); 661 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 662 case ARM_AM::db: return ARM::VSTMDDB_UPD; 663 } 664 break; 665 } 666 667 return 0; 668 } 669 670 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 671 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 672 /// 673 /// stmia rn, <ra, rb, rc> 674 /// rn := rn + 4 * 3; 675 /// => 676 /// stmia rn!, <ra, rb, rc> 677 /// 678 /// rn := rn - 4 * 3; 679 /// ldmia rn, <ra, rb, rc> 680 /// => 681 /// ldmdb rn!, <ra, rb, rc> 682 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 683 MachineBasicBlock::iterator MBBI, 684 bool &Advance, 685 MachineBasicBlock::iterator &I) { 686 MachineInstr *MI = MBBI; 687 unsigned Base = MI->getOperand(0).getReg(); 688 bool BaseKill = MI->getOperand(0).isKill(); 689 unsigned Bytes = getLSMultipleTransferSize(MI); 690 unsigned PredReg = 0; 691 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 692 int Opcode = MI->getOpcode(); 693 DebugLoc dl = MI->getDebugLoc(); 694 695 // Can't use an updating ld/st if the base register is also a dest 696 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 697 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 698 if (MI->getOperand(i).getReg() == Base) 699 return false; 700 701 bool DoMerge = false; 702 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 703 704 // Try merging with the previous instruction. 705 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 706 if (MBBI != BeginMBBI) { 707 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 708 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 709 --PrevMBBI; 710 if (Mode == ARM_AM::ia && 711 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 712 Mode = ARM_AM::db; 713 DoMerge = true; 714 } else if (Mode == ARM_AM::ib && 715 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 716 Mode = ARM_AM::da; 717 DoMerge = true; 718 } 719 if (DoMerge) 720 MBB.erase(PrevMBBI); 721 } 722 723 // Try merging with the next instruction. 724 MachineBasicBlock::iterator EndMBBI = MBB.end(); 725 if (!DoMerge && MBBI != EndMBBI) { 726 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 727 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 728 ++NextMBBI; 729 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 730 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 731 DoMerge = true; 732 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 733 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 734 DoMerge = true; 735 } 736 if (DoMerge) { 737 if (NextMBBI == I) { 738 Advance = true; 739 ++I; 740 } 741 MBB.erase(NextMBBI); 742 } 743 } 744 745 if (!DoMerge) 746 return false; 747 748 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 749 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 750 .addReg(Base, getDefRegState(true)) // WB base register 751 .addReg(Base, getKillRegState(BaseKill)) 752 .addImm(Pred).addReg(PredReg); 753 754 // Transfer the rest of operands. 755 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 756 MIB.addOperand(MI->getOperand(OpNum)); 757 758 // Transfer memoperands. 759 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 760 761 MBB.erase(MBBI); 762 return true; 763 } 764 765 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 766 ARM_AM::AddrOpc Mode) { 767 switch (Opc) { 768 case ARM::LDRi12: 769 return ARM::LDR_PRE_IMM; 770 case ARM::STRi12: 771 return ARM::STR_PRE_IMM; 772 case ARM::VLDRS: 773 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 774 case ARM::VLDRD: 775 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 776 case ARM::VSTRS: 777 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 778 case ARM::VSTRD: 779 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 780 case ARM::t2LDRi8: 781 case ARM::t2LDRi12: 782 return ARM::t2LDR_PRE; 783 case ARM::t2STRi8: 784 case ARM::t2STRi12: 785 return ARM::t2STR_PRE; 786 default: llvm_unreachable("Unhandled opcode!"); 787 } 788 return 0; 789 } 790 791 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 792 ARM_AM::AddrOpc Mode) { 793 switch (Opc) { 794 case ARM::LDRi12: 795 return ARM::LDR_POST_IMM; 796 case ARM::STRi12: 797 return ARM::STR_POST_IMM; 798 case ARM::VLDRS: 799 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 800 case ARM::VLDRD: 801 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 802 case ARM::VSTRS: 803 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 804 case ARM::VSTRD: 805 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 806 case ARM::t2LDRi8: 807 case ARM::t2LDRi12: 808 return ARM::t2LDR_POST; 809 case ARM::t2STRi8: 810 case ARM::t2STRi12: 811 return ARM::t2STR_POST; 812 default: llvm_unreachable("Unhandled opcode!"); 813 } 814 return 0; 815 } 816 817 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 818 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 819 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 820 MachineBasicBlock::iterator MBBI, 821 const TargetInstrInfo *TII, 822 bool &Advance, 823 MachineBasicBlock::iterator &I) { 824 MachineInstr *MI = MBBI; 825 unsigned Base = MI->getOperand(1).getReg(); 826 bool BaseKill = MI->getOperand(1).isKill(); 827 unsigned Bytes = getLSMultipleTransferSize(MI); 828 int Opcode = MI->getOpcode(); 829 DebugLoc dl = MI->getDebugLoc(); 830 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 831 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 832 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 833 if (isi32Load(Opcode) || isi32Store(Opcode)) 834 if (MI->getOperand(2).getImm() != 0) 835 return false; 836 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 837 return false; 838 839 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 840 // Can't do the merge if the destination register is the same as the would-be 841 // writeback register. 842 if (isLd && MI->getOperand(0).getReg() == Base) 843 return false; 844 845 unsigned PredReg = 0; 846 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 847 bool DoMerge = false; 848 ARM_AM::AddrOpc AddSub = ARM_AM::add; 849 unsigned NewOpc = 0; 850 // AM2 - 12 bits, thumb2 - 8 bits. 851 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 852 853 // Try merging with the previous instruction. 854 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 855 if (MBBI != BeginMBBI) { 856 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 857 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 858 --PrevMBBI; 859 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 860 DoMerge = true; 861 AddSub = ARM_AM::sub; 862 } else if (!isAM5 && 863 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 864 DoMerge = true; 865 } 866 if (DoMerge) { 867 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 868 MBB.erase(PrevMBBI); 869 } 870 } 871 872 // Try merging with the next instruction. 873 MachineBasicBlock::iterator EndMBBI = MBB.end(); 874 if (!DoMerge && MBBI != EndMBBI) { 875 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 876 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 877 ++NextMBBI; 878 if (!isAM5 && 879 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 880 DoMerge = true; 881 AddSub = ARM_AM::sub; 882 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 883 DoMerge = true; 884 } 885 if (DoMerge) { 886 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 887 if (NextMBBI == I) { 888 Advance = true; 889 ++I; 890 } 891 MBB.erase(NextMBBI); 892 } 893 } 894 895 if (!DoMerge) 896 return false; 897 898 if (isAM5) { 899 // VLDM[SD}_UPD, VSTM[SD]_UPD 900 // (There are no base-updating versions of VLDR/VSTR instructions, but the 901 // updating load/store-multiple instructions can be used with only one 902 // register.) 903 MachineOperand &MO = MI->getOperand(0); 904 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 905 .addReg(Base, getDefRegState(true)) // WB base register 906 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 907 .addImm(Pred).addReg(PredReg) 908 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 909 getKillRegState(MO.isKill()))); 910 } else if (isLd) { 911 if (isAM2) { 912 // LDR_PRE, LDR_POST 913 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 914 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 915 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 916 .addReg(Base, RegState::Define) 917 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 918 } else { 919 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 920 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 921 .addReg(Base, RegState::Define) 922 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 923 } 924 } else { 925 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 926 // t2LDR_PRE, t2LDR_POST 927 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 928 .addReg(Base, RegState::Define) 929 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 930 } 931 } else { 932 MachineOperand &MO = MI->getOperand(0); 933 // FIXME: post-indexed stores use am2offset_imm, which still encodes 934 // the vestigal zero-reg offset register. When that's fixed, this clause 935 // can be removed entirely. 936 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 937 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 938 // STR_PRE, STR_POST 939 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 940 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 941 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 942 } else { 943 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 944 // t2STR_PRE, t2STR_POST 945 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 946 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 947 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 948 } 949 } 950 MBB.erase(MBBI); 951 952 return true; 953 } 954 955 /// isMemoryOp - Returns true if instruction is a memory operation that this 956 /// pass is capable of operating on. 957 static bool isMemoryOp(const MachineInstr *MI) { 958 // When no memory operands are present, conservatively assume unaligned, 959 // volatile, unfoldable. 960 if (!MI->hasOneMemOperand()) 961 return false; 962 963 const MachineMemOperand *MMO = *MI->memoperands_begin(); 964 965 // Don't touch volatile memory accesses - we may be changing their order. 966 if (MMO->isVolatile()) 967 return false; 968 969 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 970 // not. 971 if (MMO->getAlignment() < 4) 972 return false; 973 974 // str <undef> could probably be eliminated entirely, but for now we just want 975 // to avoid making a mess of it. 976 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 977 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 978 MI->getOperand(0).isUndef()) 979 return false; 980 981 // Likewise don't mess with references to undefined addresses. 982 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 983 MI->getOperand(1).isUndef()) 984 return false; 985 986 int Opcode = MI->getOpcode(); 987 switch (Opcode) { 988 default: break; 989 case ARM::VLDRS: 990 case ARM::VSTRS: 991 return MI->getOperand(1).isReg(); 992 case ARM::VLDRD: 993 case ARM::VSTRD: 994 return MI->getOperand(1).isReg(); 995 case ARM::LDRi12: 996 case ARM::STRi12: 997 case ARM::t2LDRi8: 998 case ARM::t2LDRi12: 999 case ARM::t2STRi8: 1000 case ARM::t2STRi12: 1001 return MI->getOperand(1).isReg(); 1002 } 1003 return false; 1004 } 1005 1006 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1007 /// op that is being merged. 1008 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1009 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1010 unsigned Position = MemOps[0].Position; 1011 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1012 if (MemOps[i].Position < Position) { 1013 Position = MemOps[i].Position; 1014 Loc = MemOps[i].MBBI; 1015 } 1016 } 1017 1018 if (Loc != MBB.begin()) 1019 RS->forward(prior(Loc)); 1020 } 1021 1022 static int getMemoryOpOffset(const MachineInstr *MI) { 1023 int Opcode = MI->getOpcode(); 1024 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1025 unsigned NumOperands = MI->getDesc().getNumOperands(); 1026 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1027 1028 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1029 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1030 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1031 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1032 return OffField; 1033 1034 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1035 : ARM_AM::getAM5Offset(OffField) * 4; 1036 if (isAM3) { 1037 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1038 Offset = -Offset; 1039 } else { 1040 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1041 Offset = -Offset; 1042 } 1043 return Offset; 1044 } 1045 1046 static void InsertLDR_STR(MachineBasicBlock &MBB, 1047 MachineBasicBlock::iterator &MBBI, 1048 int Offset, bool isDef, 1049 DebugLoc dl, unsigned NewOpc, 1050 unsigned Reg, bool RegDeadKill, bool RegUndef, 1051 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1052 bool OffKill, bool OffUndef, 1053 ARMCC::CondCodes Pred, unsigned PredReg, 1054 const TargetInstrInfo *TII, bool isT2) { 1055 if (isDef) { 1056 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1057 TII->get(NewOpc)) 1058 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1059 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1060 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1061 } else { 1062 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1063 TII->get(NewOpc)) 1064 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1065 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1066 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1067 } 1068 } 1069 1070 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1071 MachineBasicBlock::iterator &MBBI) { 1072 MachineInstr *MI = &*MBBI; 1073 unsigned Opcode = MI->getOpcode(); 1074 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1075 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1076 const MachineOperand &BaseOp = MI->getOperand(2); 1077 unsigned BaseReg = BaseOp.getReg(); 1078 unsigned EvenReg = MI->getOperand(0).getReg(); 1079 unsigned OddReg = MI->getOperand(1).getReg(); 1080 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1081 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1082 // ARM errata 602117: LDRD with base in list may result in incorrect base 1083 // register when interrupted or faulted. 1084 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1085 if (!Errata602117 && 1086 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1087 return false; 1088 1089 MachineBasicBlock::iterator NewBBI = MBBI; 1090 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1091 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1092 bool EvenDeadKill = isLd ? 1093 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1094 bool EvenUndef = MI->getOperand(0).isUndef(); 1095 bool OddDeadKill = isLd ? 1096 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1097 bool OddUndef = MI->getOperand(1).isUndef(); 1098 bool BaseKill = BaseOp.isKill(); 1099 bool BaseUndef = BaseOp.isUndef(); 1100 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1101 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1102 int OffImm = getMemoryOpOffset(MI); 1103 unsigned PredReg = 0; 1104 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 1105 1106 if (OddRegNum > EvenRegNum && OffImm == 0) { 1107 // Ascending register numbers and no offset. It's safe to change it to a 1108 // ldm or stm. 1109 unsigned NewOpc = (isLd) 1110 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1111 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1112 if (isLd) { 1113 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1114 .addReg(BaseReg, getKillRegState(BaseKill)) 1115 .addImm(Pred).addReg(PredReg) 1116 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1117 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1118 ++NumLDRD2LDM; 1119 } else { 1120 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1121 .addReg(BaseReg, getKillRegState(BaseKill)) 1122 .addImm(Pred).addReg(PredReg) 1123 .addReg(EvenReg, 1124 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1125 .addReg(OddReg, 1126 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1127 ++NumSTRD2STM; 1128 } 1129 NewBBI = llvm::prior(MBBI); 1130 } else { 1131 // Split into two instructions. 1132 unsigned NewOpc = (isLd) 1133 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1134 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1135 DebugLoc dl = MBBI->getDebugLoc(); 1136 // If this is a load and base register is killed, it may have been 1137 // re-defed by the load, make sure the first load does not clobber it. 1138 if (isLd && 1139 (BaseKill || OffKill) && 1140 (TRI->regsOverlap(EvenReg, BaseReg))) { 1141 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1142 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1143 OddReg, OddDeadKill, false, 1144 BaseReg, false, BaseUndef, false, OffUndef, 1145 Pred, PredReg, TII, isT2); 1146 NewBBI = llvm::prior(MBBI); 1147 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1148 EvenReg, EvenDeadKill, false, 1149 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1150 Pred, PredReg, TII, isT2); 1151 } else { 1152 if (OddReg == EvenReg && EvenDeadKill) { 1153 // If the two source operands are the same, the kill marker is 1154 // probably on the first one. e.g. 1155 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1156 EvenDeadKill = false; 1157 OddDeadKill = true; 1158 } 1159 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1160 EvenReg, EvenDeadKill, EvenUndef, 1161 BaseReg, false, BaseUndef, false, OffUndef, 1162 Pred, PredReg, TII, isT2); 1163 NewBBI = llvm::prior(MBBI); 1164 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1165 OddReg, OddDeadKill, OddUndef, 1166 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1167 Pred, PredReg, TII, isT2); 1168 } 1169 if (isLd) 1170 ++NumLDRD2LDR; 1171 else 1172 ++NumSTRD2STR; 1173 } 1174 1175 MBB.erase(MI); 1176 MBBI = NewBBI; 1177 return true; 1178 } 1179 return false; 1180 } 1181 1182 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1183 /// ops of the same base and incrementing offset into LDM / STM ops. 1184 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1185 unsigned NumMerges = 0; 1186 unsigned NumMemOps = 0; 1187 MemOpQueue MemOps; 1188 unsigned CurrBase = 0; 1189 int CurrOpc = -1; 1190 unsigned CurrSize = 0; 1191 ARMCC::CondCodes CurrPred = ARMCC::AL; 1192 unsigned CurrPredReg = 0; 1193 unsigned Position = 0; 1194 SmallVector<MachineBasicBlock::iterator,4> Merges; 1195 1196 RS->enterBasicBlock(&MBB); 1197 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1198 while (MBBI != E) { 1199 if (FixInvalidRegPairOp(MBB, MBBI)) 1200 continue; 1201 1202 bool Advance = false; 1203 bool TryMerge = false; 1204 bool Clobber = false; 1205 1206 bool isMemOp = isMemoryOp(MBBI); 1207 if (isMemOp) { 1208 int Opcode = MBBI->getOpcode(); 1209 unsigned Size = getLSMultipleTransferSize(MBBI); 1210 const MachineOperand &MO = MBBI->getOperand(0); 1211 unsigned Reg = MO.getReg(); 1212 bool isKill = MO.isDef() ? false : MO.isKill(); 1213 unsigned Base = MBBI->getOperand(1).getReg(); 1214 unsigned PredReg = 0; 1215 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1216 int Offset = getMemoryOpOffset(MBBI); 1217 // Watch out for: 1218 // r4 := ldr [r5] 1219 // r5 := ldr [r5, #4] 1220 // r6 := ldr [r5, #8] 1221 // 1222 // The second ldr has effectively broken the chain even though it 1223 // looks like the later ldr(s) use the same base register. Try to 1224 // merge the ldr's so far, including this one. But don't try to 1225 // combine the following ldr(s). 1226 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1227 if (CurrBase == 0 && !Clobber) { 1228 // Start of a new chain. 1229 CurrBase = Base; 1230 CurrOpc = Opcode; 1231 CurrSize = Size; 1232 CurrPred = Pred; 1233 CurrPredReg = PredReg; 1234 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1235 ++NumMemOps; 1236 Advance = true; 1237 } else { 1238 if (Clobber) { 1239 TryMerge = true; 1240 Advance = true; 1241 } 1242 1243 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1244 // No need to match PredReg. 1245 // Continue adding to the queue. 1246 if (Offset > MemOps.back().Offset) { 1247 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1248 Position, MBBI)); 1249 ++NumMemOps; 1250 Advance = true; 1251 } else { 1252 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1253 I != E; ++I) { 1254 if (Offset < I->Offset) { 1255 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1256 Position, MBBI)); 1257 ++NumMemOps; 1258 Advance = true; 1259 break; 1260 } else if (Offset == I->Offset) { 1261 // Collision! This can't be merged! 1262 break; 1263 } 1264 } 1265 } 1266 } 1267 } 1268 } 1269 1270 if (MBBI->isDebugValue()) { 1271 ++MBBI; 1272 if (MBBI == E) 1273 // Reach the end of the block, try merging the memory instructions. 1274 TryMerge = true; 1275 } else if (Advance) { 1276 ++Position; 1277 ++MBBI; 1278 if (MBBI == E) 1279 // Reach the end of the block, try merging the memory instructions. 1280 TryMerge = true; 1281 } else 1282 TryMerge = true; 1283 1284 if (TryMerge) { 1285 if (NumMemOps > 1) { 1286 // Try to find a free register to use as a new base in case it's needed. 1287 // First advance to the instruction just before the start of the chain. 1288 AdvanceRS(MBB, MemOps); 1289 // Find a scratch register. 1290 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1291 // Process the load / store instructions. 1292 RS->forward(prior(MBBI)); 1293 1294 // Merge ops. 1295 Merges.clear(); 1296 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1297 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1298 1299 // Try folding preceding/trailing base inc/dec into the generated 1300 // LDM/STM ops. 1301 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1302 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1303 ++NumMerges; 1304 NumMerges += Merges.size(); 1305 1306 // Try folding preceding/trailing base inc/dec into those load/store 1307 // that were not merged to form LDM/STM ops. 1308 for (unsigned i = 0; i != NumMemOps; ++i) 1309 if (!MemOps[i].Merged) 1310 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1311 ++NumMerges; 1312 1313 // RS may be pointing to an instruction that's deleted. 1314 RS->skipTo(prior(MBBI)); 1315 } else if (NumMemOps == 1) { 1316 // Try folding preceding/trailing base inc/dec into the single 1317 // load/store. 1318 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1319 ++NumMerges; 1320 RS->forward(prior(MBBI)); 1321 } 1322 } 1323 1324 CurrBase = 0; 1325 CurrOpc = -1; 1326 CurrSize = 0; 1327 CurrPred = ARMCC::AL; 1328 CurrPredReg = 0; 1329 if (NumMemOps) { 1330 MemOps.clear(); 1331 NumMemOps = 0; 1332 } 1333 1334 // If iterator hasn't been advanced and this is not a memory op, skip it. 1335 // It can't start a new chain anyway. 1336 if (!Advance && !isMemOp && MBBI != E) { 1337 ++Position; 1338 ++MBBI; 1339 } 1340 } 1341 } 1342 return NumMerges > 0; 1343 } 1344 1345 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1346 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1347 /// directly restore the value of LR into pc. 1348 /// ldmfd sp!, {..., lr} 1349 /// bx lr 1350 /// or 1351 /// ldmfd sp!, {..., lr} 1352 /// mov pc, lr 1353 /// => 1354 /// ldmfd sp!, {..., pc} 1355 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1356 if (MBB.empty()) return false; 1357 1358 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1359 if (MBBI != MBB.begin() && 1360 (MBBI->getOpcode() == ARM::BX_RET || 1361 MBBI->getOpcode() == ARM::tBX_RET || 1362 MBBI->getOpcode() == ARM::MOVPCLR)) { 1363 MachineInstr *PrevMI = prior(MBBI); 1364 unsigned Opcode = PrevMI->getOpcode(); 1365 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1366 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1367 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1368 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1369 if (MO.getReg() != ARM::LR) 1370 return false; 1371 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1372 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1373 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1374 PrevMI->setDesc(TII->get(NewOpc)); 1375 MO.setReg(ARM::PC); 1376 PrevMI->copyImplicitOps(&*MBBI); 1377 MBB.erase(MBBI); 1378 return true; 1379 } 1380 } 1381 return false; 1382 } 1383 1384 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1385 const TargetMachine &TM = Fn.getTarget(); 1386 AFI = Fn.getInfo<ARMFunctionInfo>(); 1387 TII = TM.getInstrInfo(); 1388 TRI = TM.getRegisterInfo(); 1389 STI = &TM.getSubtarget<ARMSubtarget>(); 1390 RS = new RegScavenger(); 1391 isThumb2 = AFI->isThumb2Function(); 1392 1393 bool Modified = false; 1394 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1395 ++MFI) { 1396 MachineBasicBlock &MBB = *MFI; 1397 Modified |= LoadStoreMultipleOpti(MBB); 1398 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1399 Modified |= MergeReturnIntoLDM(MBB); 1400 } 1401 1402 delete RS; 1403 return Modified; 1404 } 1405 1406 1407 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1408 /// load / stores from consecutive locations close to make it more 1409 /// likely they will be combined later. 1410 1411 namespace { 1412 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1413 static char ID; 1414 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1415 1416 const TargetData *TD; 1417 const TargetInstrInfo *TII; 1418 const TargetRegisterInfo *TRI; 1419 const ARMSubtarget *STI; 1420 MachineRegisterInfo *MRI; 1421 MachineFunction *MF; 1422 1423 virtual bool runOnMachineFunction(MachineFunction &Fn); 1424 1425 virtual const char *getPassName() const { 1426 return "ARM pre- register allocation load / store optimization pass"; 1427 } 1428 1429 private: 1430 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1431 unsigned &NewOpc, unsigned &EvenReg, 1432 unsigned &OddReg, unsigned &BaseReg, 1433 int &Offset, 1434 unsigned &PredReg, ARMCC::CondCodes &Pred, 1435 bool &isT2); 1436 bool RescheduleOps(MachineBasicBlock *MBB, 1437 SmallVector<MachineInstr*, 4> &Ops, 1438 unsigned Base, bool isLd, 1439 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1440 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1441 }; 1442 char ARMPreAllocLoadStoreOpt::ID = 0; 1443 } 1444 1445 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1446 TD = Fn.getTarget().getTargetData(); 1447 TII = Fn.getTarget().getInstrInfo(); 1448 TRI = Fn.getTarget().getRegisterInfo(); 1449 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1450 MRI = &Fn.getRegInfo(); 1451 MF = &Fn; 1452 1453 bool Modified = false; 1454 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1455 ++MFI) 1456 Modified |= RescheduleLoadStoreInstrs(MFI); 1457 1458 return Modified; 1459 } 1460 1461 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1462 MachineBasicBlock::iterator I, 1463 MachineBasicBlock::iterator E, 1464 SmallPtrSet<MachineInstr*, 4> &MemOps, 1465 SmallSet<unsigned, 4> &MemRegs, 1466 const TargetRegisterInfo *TRI) { 1467 // Are there stores / loads / calls between them? 1468 // FIXME: This is overly conservative. We should make use of alias information 1469 // some day. 1470 SmallSet<unsigned, 4> AddedRegPressure; 1471 while (++I != E) { 1472 if (I->isDebugValue() || MemOps.count(&*I)) 1473 continue; 1474 const MCInstrDesc &MCID = I->getDesc(); 1475 if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects()) 1476 return false; 1477 if (isLd && MCID.mayStore()) 1478 return false; 1479 if (!isLd) { 1480 if (MCID.mayLoad()) 1481 return false; 1482 // It's not safe to move the first 'str' down. 1483 // str r1, [r0] 1484 // strh r5, [r0] 1485 // str r4, [r0, #+4] 1486 if (MCID.mayStore()) 1487 return false; 1488 } 1489 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1490 MachineOperand &MO = I->getOperand(j); 1491 if (!MO.isReg()) 1492 continue; 1493 unsigned Reg = MO.getReg(); 1494 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1495 return false; 1496 if (Reg != Base && !MemRegs.count(Reg)) 1497 AddedRegPressure.insert(Reg); 1498 } 1499 } 1500 1501 // Estimate register pressure increase due to the transformation. 1502 if (MemRegs.size() <= 4) 1503 // Ok if we are moving small number of instructions. 1504 return true; 1505 return AddedRegPressure.size() <= MemRegs.size() * 2; 1506 } 1507 1508 1509 /// Copy Op0 and Op1 operands into a new array assigned to MI. 1510 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1511 MachineInstr *Op1) { 1512 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1513 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1514 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1515 1516 MachineFunction *MF = MI->getParent()->getParent(); 1517 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1518 MachineSDNode::mmo_iterator MemEnd = 1519 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1520 MemEnd = 1521 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1522 MI->setMemRefs(MemBegin, MemEnd); 1523 } 1524 1525 bool 1526 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1527 DebugLoc &dl, 1528 unsigned &NewOpc, unsigned &EvenReg, 1529 unsigned &OddReg, unsigned &BaseReg, 1530 int &Offset, unsigned &PredReg, 1531 ARMCC::CondCodes &Pred, 1532 bool &isT2) { 1533 // Make sure we're allowed to generate LDRD/STRD. 1534 if (!STI->hasV5TEOps()) 1535 return false; 1536 1537 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1538 unsigned Scale = 1; 1539 unsigned Opcode = Op0->getOpcode(); 1540 if (Opcode == ARM::LDRi12) 1541 NewOpc = ARM::LDRD; 1542 else if (Opcode == ARM::STRi12) 1543 NewOpc = ARM::STRD; 1544 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1545 NewOpc = ARM::t2LDRDi8; 1546 Scale = 4; 1547 isT2 = true; 1548 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1549 NewOpc = ARM::t2STRDi8; 1550 Scale = 4; 1551 isT2 = true; 1552 } else 1553 return false; 1554 1555 // Make sure the base address satisfies i64 ld / st alignment requirement. 1556 if (!Op0->hasOneMemOperand() || 1557 !(*Op0->memoperands_begin())->getValue() || 1558 (*Op0->memoperands_begin())->isVolatile()) 1559 return false; 1560 1561 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1562 const Function *Func = MF->getFunction(); 1563 unsigned ReqAlign = STI->hasV6Ops() 1564 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1565 : 8; // Pre-v6 need 8-byte align 1566 if (Align < ReqAlign) 1567 return false; 1568 1569 // Then make sure the immediate offset fits. 1570 int OffImm = getMemoryOpOffset(Op0); 1571 if (isT2) { 1572 int Limit = (1 << 8) * Scale; 1573 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1574 return false; 1575 Offset = OffImm; 1576 } else { 1577 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1578 if (OffImm < 0) { 1579 AddSub = ARM_AM::sub; 1580 OffImm = - OffImm; 1581 } 1582 int Limit = (1 << 8) * Scale; 1583 if (OffImm >= Limit || (OffImm & (Scale-1))) 1584 return false; 1585 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1586 } 1587 EvenReg = Op0->getOperand(0).getReg(); 1588 OddReg = Op1->getOperand(0).getReg(); 1589 if (EvenReg == OddReg) 1590 return false; 1591 BaseReg = Op0->getOperand(1).getReg(); 1592 Pred = llvm::getInstrPredicate(Op0, PredReg); 1593 dl = Op0->getDebugLoc(); 1594 return true; 1595 } 1596 1597 namespace { 1598 struct OffsetCompare { 1599 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1600 int LOffset = getMemoryOpOffset(LHS); 1601 int ROffset = getMemoryOpOffset(RHS); 1602 assert(LHS == RHS || LOffset != ROffset); 1603 return LOffset > ROffset; 1604 } 1605 }; 1606 } 1607 1608 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1609 SmallVector<MachineInstr*, 4> &Ops, 1610 unsigned Base, bool isLd, 1611 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1612 bool RetVal = false; 1613 1614 // Sort by offset (in reverse order). 1615 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1616 1617 // The loads / stores of the same base are in order. Scan them from first to 1618 // last and check for the following: 1619 // 1. Any def of base. 1620 // 2. Any gaps. 1621 while (Ops.size() > 1) { 1622 unsigned FirstLoc = ~0U; 1623 unsigned LastLoc = 0; 1624 MachineInstr *FirstOp = 0; 1625 MachineInstr *LastOp = 0; 1626 int LastOffset = 0; 1627 unsigned LastOpcode = 0; 1628 unsigned LastBytes = 0; 1629 unsigned NumMove = 0; 1630 for (int i = Ops.size() - 1; i >= 0; --i) { 1631 MachineInstr *Op = Ops[i]; 1632 unsigned Loc = MI2LocMap[Op]; 1633 if (Loc <= FirstLoc) { 1634 FirstLoc = Loc; 1635 FirstOp = Op; 1636 } 1637 if (Loc >= LastLoc) { 1638 LastLoc = Loc; 1639 LastOp = Op; 1640 } 1641 1642 unsigned Opcode = Op->getOpcode(); 1643 if (LastOpcode && Opcode != LastOpcode) 1644 break; 1645 1646 int Offset = getMemoryOpOffset(Op); 1647 unsigned Bytes = getLSMultipleTransferSize(Op); 1648 if (LastBytes) { 1649 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1650 break; 1651 } 1652 LastOffset = Offset; 1653 LastBytes = Bytes; 1654 LastOpcode = Opcode; 1655 if (++NumMove == 8) // FIXME: Tune this limit. 1656 break; 1657 } 1658 1659 if (NumMove <= 1) 1660 Ops.pop_back(); 1661 else { 1662 SmallPtrSet<MachineInstr*, 4> MemOps; 1663 SmallSet<unsigned, 4> MemRegs; 1664 for (int i = NumMove-1; i >= 0; --i) { 1665 MemOps.insert(Ops[i]); 1666 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1667 } 1668 1669 // Be conservative, if the instructions are too far apart, don't 1670 // move them. We want to limit the increase of register pressure. 1671 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1672 if (DoMove) 1673 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1674 MemOps, MemRegs, TRI); 1675 if (!DoMove) { 1676 for (unsigned i = 0; i != NumMove; ++i) 1677 Ops.pop_back(); 1678 } else { 1679 // This is the new location for the loads / stores. 1680 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1681 while (InsertPos != MBB->end() 1682 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1683 ++InsertPos; 1684 1685 // If we are moving a pair of loads / stores, see if it makes sense 1686 // to try to allocate a pair of registers that can form register pairs. 1687 MachineInstr *Op0 = Ops.back(); 1688 MachineInstr *Op1 = Ops[Ops.size()-2]; 1689 unsigned EvenReg = 0, OddReg = 0; 1690 unsigned BaseReg = 0, PredReg = 0; 1691 ARMCC::CondCodes Pred = ARMCC::AL; 1692 bool isT2 = false; 1693 unsigned NewOpc = 0; 1694 int Offset = 0; 1695 DebugLoc dl; 1696 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1697 EvenReg, OddReg, BaseReg, 1698 Offset, PredReg, Pred, isT2)) { 1699 Ops.pop_back(); 1700 Ops.pop_back(); 1701 1702 const MCInstrDesc &MCID = TII->get(NewOpc); 1703 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); 1704 MRI->constrainRegClass(EvenReg, TRC); 1705 MRI->constrainRegClass(OddReg, TRC); 1706 1707 // Form the pair instruction. 1708 if (isLd) { 1709 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1710 .addReg(EvenReg, RegState::Define) 1711 .addReg(OddReg, RegState::Define) 1712 .addReg(BaseReg); 1713 // FIXME: We're converting from LDRi12 to an insn that still 1714 // uses addrmode2, so we need an explicit offset reg. It should 1715 // always by reg0 since we're transforming LDRi12s. 1716 if (!isT2) 1717 MIB.addReg(0); 1718 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1719 concatenateMemOperands(MIB, Op0, Op1); 1720 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1721 ++NumLDRDFormed; 1722 } else { 1723 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1724 .addReg(EvenReg) 1725 .addReg(OddReg) 1726 .addReg(BaseReg); 1727 // FIXME: We're converting from LDRi12 to an insn that still 1728 // uses addrmode2, so we need an explicit offset reg. It should 1729 // always by reg0 since we're transforming STRi12s. 1730 if (!isT2) 1731 MIB.addReg(0); 1732 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1733 concatenateMemOperands(MIB, Op0, Op1); 1734 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1735 ++NumSTRDFormed; 1736 } 1737 MBB->erase(Op0); 1738 MBB->erase(Op1); 1739 1740 // Add register allocation hints to form register pairs. 1741 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1742 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1743 } else { 1744 for (unsigned i = 0; i != NumMove; ++i) { 1745 MachineInstr *Op = Ops.back(); 1746 Ops.pop_back(); 1747 MBB->splice(InsertPos, MBB, Op); 1748 } 1749 } 1750 1751 NumLdStMoved += NumMove; 1752 RetVal = true; 1753 } 1754 } 1755 } 1756 1757 return RetVal; 1758 } 1759 1760 bool 1761 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1762 bool RetVal = false; 1763 1764 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1765 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1766 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1767 SmallVector<unsigned, 4> LdBases; 1768 SmallVector<unsigned, 4> StBases; 1769 1770 unsigned Loc = 0; 1771 MachineBasicBlock::iterator MBBI = MBB->begin(); 1772 MachineBasicBlock::iterator E = MBB->end(); 1773 while (MBBI != E) { 1774 for (; MBBI != E; ++MBBI) { 1775 MachineInstr *MI = MBBI; 1776 const MCInstrDesc &MCID = MI->getDesc(); 1777 if (MCID.isCall() || MCID.isTerminator()) { 1778 // Stop at barriers. 1779 ++MBBI; 1780 break; 1781 } 1782 1783 if (!MI->isDebugValue()) 1784 MI2LocMap[MI] = ++Loc; 1785 1786 if (!isMemoryOp(MI)) 1787 continue; 1788 unsigned PredReg = 0; 1789 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1790 continue; 1791 1792 int Opc = MI->getOpcode(); 1793 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1794 unsigned Base = MI->getOperand(1).getReg(); 1795 int Offset = getMemoryOpOffset(MI); 1796 1797 bool StopHere = false; 1798 if (isLd) { 1799 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1800 Base2LdsMap.find(Base); 1801 if (BI != Base2LdsMap.end()) { 1802 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1803 if (Offset == getMemoryOpOffset(BI->second[i])) { 1804 StopHere = true; 1805 break; 1806 } 1807 } 1808 if (!StopHere) 1809 BI->second.push_back(MI); 1810 } else { 1811 SmallVector<MachineInstr*, 4> MIs; 1812 MIs.push_back(MI); 1813 Base2LdsMap[Base] = MIs; 1814 LdBases.push_back(Base); 1815 } 1816 } else { 1817 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1818 Base2StsMap.find(Base); 1819 if (BI != Base2StsMap.end()) { 1820 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1821 if (Offset == getMemoryOpOffset(BI->second[i])) { 1822 StopHere = true; 1823 break; 1824 } 1825 } 1826 if (!StopHere) 1827 BI->second.push_back(MI); 1828 } else { 1829 SmallVector<MachineInstr*, 4> MIs; 1830 MIs.push_back(MI); 1831 Base2StsMap[Base] = MIs; 1832 StBases.push_back(Base); 1833 } 1834 } 1835 1836 if (StopHere) { 1837 // Found a duplicate (a base+offset combination that's seen earlier). 1838 // Backtrack. 1839 --Loc; 1840 break; 1841 } 1842 } 1843 1844 // Re-schedule loads. 1845 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1846 unsigned Base = LdBases[i]; 1847 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1848 if (Lds.size() > 1) 1849 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1850 } 1851 1852 // Re-schedule stores. 1853 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1854 unsigned Base = StBases[i]; 1855 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1856 if (Sts.size() > 1) 1857 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1858 } 1859 1860 if (MBBI != E) { 1861 Base2LdsMap.clear(); 1862 Base2StsMap.clear(); 1863 LdBases.clear(); 1864 StBases.clear(); 1865 } 1866 } 1867 1868 return RetVal; 1869 } 1870 1871 1872 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1873 /// optimization pass. 1874 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1875 if (PreAlloc) 1876 return new ARMPreAllocLoadStoreOpt(); 1877 return new ARMLoadStoreOpt(); 1878 } 1879