1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMBaseRegisterInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "MCTargetDesc/ARMAddressingModes.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallSet.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/CodeGen/MachineBasicBlock.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstr.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/RegisterScavenging.h" 33 #include "llvm/CodeGen/SelectionDAGNodes.h" 34 #include "llvm/IR/DataLayout.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/Function.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include "llvm/Target/TargetInstrInfo.h" 41 #include "llvm/Target/TargetMachine.h" 42 #include "llvm/Target/TargetRegisterInfo.h" 43 using namespace llvm; 44 45 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 46 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 56 57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 58 /// load / store instructions to form ldm / stm instructions. 59 60 namespace { 61 struct ARMLoadStoreOpt : public MachineFunctionPass { 62 static char ID; 63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 64 65 const TargetInstrInfo *TII; 66 const TargetRegisterInfo *TRI; 67 const ARMSubtarget *STI; 68 ARMFunctionInfo *AFI; 69 RegScavenger *RS; 70 bool isThumb2; 71 72 virtual bool runOnMachineFunction(MachineFunction &Fn); 73 74 virtual const char *getPassName() const { 75 return "ARM load / store optimization pass"; 76 } 77 78 private: 79 struct MemOpQueueEntry { 80 int Offset; 81 unsigned Reg; 82 bool isKill; 83 unsigned Position; 84 MachineBasicBlock::iterator MBBI; 85 bool Merged; 86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 87 MachineBasicBlock::iterator i) 88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 89 }; 90 class UnitRegsMap { 91 public: 92 UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {} 93 const SmallVector<unsigned, 4>& operator[](unsigned Reg) { 94 DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found = 95 Cache.find(Reg); 96 if (found != Cache.end()) 97 return found->second; 98 else 99 return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg))) 100 .first->second; 101 } 102 private: 103 SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) { 104 SmallVector<unsigned, 4> Res; 105 106 const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg); 107 if (TRC == &ARM::QPRRegClass) { 108 if (Reg > ARM::Q7) { 109 Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0)); 110 Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1)); 111 return Res; 112 } 113 114 Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); 115 Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); 116 Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2)); 117 Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3)); 118 119 return Res; 120 } 121 122 if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) { 123 Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); 124 Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); 125 126 return Res; 127 } 128 129 Res.push_back(Reg); 130 131 return Res; 132 133 } 134 const TargetRegisterInfo* TRI; 135 DenseMap<unsigned, SmallVector<unsigned, 4> > Cache; 136 }; 137 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 138 typedef MemOpQueue::iterator MemOpQueueIter; 139 140 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 141 int Offset, unsigned Base, bool BaseKill, int Opcode, 142 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 143 DebugLoc dl, 144 ArrayRef<std::pair<unsigned, bool> > Regs, 145 ArrayRef<unsigned> ImpDefs); 146 void MergeOpsUpdate(MachineBasicBlock &MBB, 147 MemOpQueue &MemOps, 148 unsigned memOpsBegin, 149 unsigned memOpsEnd, 150 unsigned insertAfter, 151 int Offset, 152 unsigned Base, 153 bool BaseKill, 154 int Opcode, 155 ARMCC::CondCodes Pred, 156 unsigned PredReg, 157 unsigned Scratch, 158 DebugLoc dl, 159 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 160 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 161 int Opcode, unsigned Size, 162 ARMCC::CondCodes Pred, unsigned PredReg, 163 unsigned Scratch, MemOpQueue &MemOps, 164 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 165 166 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 167 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 168 MachineBasicBlock::iterator &MBBI); 169 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 170 MachineBasicBlock::iterator MBBI, 171 const TargetInstrInfo *TII, 172 bool &Advance, 173 MachineBasicBlock::iterator &I); 174 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 175 MachineBasicBlock::iterator MBBI, 176 bool &Advance, 177 MachineBasicBlock::iterator &I); 178 unsigned AddMemOp(MemOpQueue& MemOps, 179 const MemOpQueueEntry newEntry, 180 UnitRegsMap& UnitRegsInfo, 181 SmallSet<unsigned, 4>& UsedUnitRegs, 182 unsigned At = -1U); 183 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 184 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 185 }; 186 char ARMLoadStoreOpt::ID = 0; 187 } 188 189 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 190 switch (Opcode) { 191 default: llvm_unreachable("Unhandled opcode!"); 192 case ARM::LDRi12: 193 ++NumLDMGened; 194 switch (Mode) { 195 default: llvm_unreachable("Unhandled submode!"); 196 case ARM_AM::ia: return ARM::LDMIA; 197 case ARM_AM::da: return ARM::LDMDA; 198 case ARM_AM::db: return ARM::LDMDB; 199 case ARM_AM::ib: return ARM::LDMIB; 200 } 201 case ARM::STRi12: 202 ++NumSTMGened; 203 switch (Mode) { 204 default: llvm_unreachable("Unhandled submode!"); 205 case ARM_AM::ia: return ARM::STMIA; 206 case ARM_AM::da: return ARM::STMDA; 207 case ARM_AM::db: return ARM::STMDB; 208 case ARM_AM::ib: return ARM::STMIB; 209 } 210 case ARM::t2LDRi8: 211 case ARM::t2LDRi12: 212 ++NumLDMGened; 213 switch (Mode) { 214 default: llvm_unreachable("Unhandled submode!"); 215 case ARM_AM::ia: return ARM::t2LDMIA; 216 case ARM_AM::db: return ARM::t2LDMDB; 217 } 218 case ARM::t2STRi8: 219 case ARM::t2STRi12: 220 ++NumSTMGened; 221 switch (Mode) { 222 default: llvm_unreachable("Unhandled submode!"); 223 case ARM_AM::ia: return ARM::t2STMIA; 224 case ARM_AM::db: return ARM::t2STMDB; 225 } 226 case ARM::VLDRS: 227 ++NumVLDMGened; 228 switch (Mode) { 229 default: llvm_unreachable("Unhandled submode!"); 230 case ARM_AM::ia: return ARM::VLDMSIA; 231 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 232 } 233 case ARM::VSTRS: 234 ++NumVSTMGened; 235 switch (Mode) { 236 default: llvm_unreachable("Unhandled submode!"); 237 case ARM_AM::ia: return ARM::VSTMSIA; 238 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 239 } 240 case ARM::VLDRD: 241 ++NumVLDMGened; 242 switch (Mode) { 243 default: llvm_unreachable("Unhandled submode!"); 244 case ARM_AM::ia: return ARM::VLDMDIA; 245 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 246 } 247 case ARM::VSTRD: 248 ++NumVSTMGened; 249 switch (Mode) { 250 default: llvm_unreachable("Unhandled submode!"); 251 case ARM_AM::ia: return ARM::VSTMDIA; 252 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 253 } 254 } 255 } 256 257 namespace llvm { 258 namespace ARM_AM { 259 260 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 261 switch (Opcode) { 262 default: llvm_unreachable("Unhandled opcode!"); 263 case ARM::LDMIA_RET: 264 case ARM::LDMIA: 265 case ARM::LDMIA_UPD: 266 case ARM::STMIA: 267 case ARM::STMIA_UPD: 268 case ARM::t2LDMIA_RET: 269 case ARM::t2LDMIA: 270 case ARM::t2LDMIA_UPD: 271 case ARM::t2STMIA: 272 case ARM::t2STMIA_UPD: 273 case ARM::VLDMSIA: 274 case ARM::VLDMSIA_UPD: 275 case ARM::VSTMSIA: 276 case ARM::VSTMSIA_UPD: 277 case ARM::VLDMDIA: 278 case ARM::VLDMDIA_UPD: 279 case ARM::VSTMDIA: 280 case ARM::VSTMDIA_UPD: 281 return ARM_AM::ia; 282 283 case ARM::LDMDA: 284 case ARM::LDMDA_UPD: 285 case ARM::STMDA: 286 case ARM::STMDA_UPD: 287 return ARM_AM::da; 288 289 case ARM::LDMDB: 290 case ARM::LDMDB_UPD: 291 case ARM::STMDB: 292 case ARM::STMDB_UPD: 293 case ARM::t2LDMDB: 294 case ARM::t2LDMDB_UPD: 295 case ARM::t2STMDB: 296 case ARM::t2STMDB_UPD: 297 case ARM::VLDMSDB_UPD: 298 case ARM::VSTMSDB_UPD: 299 case ARM::VLDMDDB_UPD: 300 case ARM::VSTMDDB_UPD: 301 return ARM_AM::db; 302 303 case ARM::LDMIB: 304 case ARM::LDMIB_UPD: 305 case ARM::STMIB: 306 case ARM::STMIB_UPD: 307 return ARM_AM::ib; 308 } 309 } 310 311 } // end namespace ARM_AM 312 } // end namespace llvm 313 314 static bool isT2i32Load(unsigned Opc) { 315 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 316 } 317 318 static bool isi32Load(unsigned Opc) { 319 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 320 } 321 322 static bool isT2i32Store(unsigned Opc) { 323 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 324 } 325 326 static bool isi32Store(unsigned Opc) { 327 return Opc == ARM::STRi12 || isT2i32Store(Opc); 328 } 329 330 /// MergeOps - Create and insert a LDM or STM with Base as base register and 331 /// registers in Regs as the register operands that would be loaded / stored. 332 /// It returns true if the transformation is done. 333 bool 334 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 335 MachineBasicBlock::iterator MBBI, 336 int Offset, unsigned Base, bool BaseKill, 337 int Opcode, ARMCC::CondCodes Pred, 338 unsigned PredReg, unsigned Scratch, DebugLoc dl, 339 ArrayRef<std::pair<unsigned, bool> > Regs, 340 ArrayRef<unsigned> ImpDefs) { 341 // Only a single register to load / store. Don't bother. 342 unsigned NumRegs = Regs.size(); 343 if (NumRegs <= 1) 344 return false; 345 346 ARM_AM::AMSubMode Mode = ARM_AM::ia; 347 // VFP and Thumb2 do not support IB or DA modes. 348 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 349 bool haveIBAndDA = isNotVFP && !isThumb2; 350 if (Offset == 4 && haveIBAndDA) 351 Mode = ARM_AM::ib; 352 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 353 Mode = ARM_AM::da; 354 else if (Offset == -4 * (int)NumRegs && isNotVFP) 355 // VLDM/VSTM do not support DB mode without also updating the base reg. 356 Mode = ARM_AM::db; 357 else if (Offset != 0) { 358 // Check if this is a supported opcode before we insert instructions to 359 // calculate a new base register. 360 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 361 362 // If starting offset isn't zero, insert a MI to materialize a new base. 363 // But only do so if it is cost effective, i.e. merging more than two 364 // loads / stores. 365 if (NumRegs <= 2) 366 return false; 367 368 unsigned NewBase; 369 if (isi32Load(Opcode)) 370 // If it is a load, then just use one of the destination register to 371 // use as the new base. 372 NewBase = Regs[NumRegs-1].first; 373 else { 374 // Use the scratch register to use as a new base. 375 NewBase = Scratch; 376 if (NewBase == 0) 377 return false; 378 } 379 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 380 if (Offset < 0) { 381 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 382 Offset = - Offset; 383 } 384 int ImmedOffset = isThumb2 385 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 386 if (ImmedOffset == -1) 387 // FIXME: Try t2ADDri12 or t2SUBri12? 388 return false; // Probably not worth it then. 389 390 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 391 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 392 .addImm(Pred).addReg(PredReg).addReg(0); 393 Base = NewBase; 394 BaseKill = true; // New base is always killed right its use. 395 } 396 397 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 398 Opcode == ARM::VLDRD); 399 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 400 if (!Opcode) return false; 401 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 402 .addReg(Base, getKillRegState(BaseKill)) 403 .addImm(Pred).addReg(PredReg); 404 for (unsigned i = 0; i != NumRegs; ++i) 405 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 406 | getKillRegState(Regs[i].second)); 407 408 // Add implicit defs for super-registers. 409 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) 410 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); 411 412 return true; 413 } 414 415 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 416 // success. 417 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 418 MemOpQueue &memOps, 419 unsigned memOpsBegin, unsigned memOpsEnd, 420 unsigned insertAfter, int Offset, 421 unsigned Base, bool BaseKill, 422 int Opcode, 423 ARMCC::CondCodes Pred, unsigned PredReg, 424 unsigned Scratch, 425 DebugLoc dl, 426 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 427 // First calculate which of the registers should be killed by the merged 428 // instruction. 429 const unsigned insertPos = memOps[insertAfter].Position; 430 SmallSet<unsigned, 4> KilledRegs; 431 DenseMap<unsigned, unsigned> Killer; 432 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 433 if (i == memOpsBegin) { 434 i = memOpsEnd; 435 if (i == e) 436 break; 437 } 438 if (memOps[i].Position < insertPos && memOps[i].isKill) { 439 unsigned Reg = memOps[i].Reg; 440 KilledRegs.insert(Reg); 441 Killer[Reg] = i; 442 } 443 } 444 445 SmallVector<std::pair<unsigned, bool>, 8> Regs; 446 SmallVector<unsigned, 8> ImpDefs; 447 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 448 unsigned Reg = memOps[i].Reg; 449 // If we are inserting the merged operation after an operation that 450 // uses the same register, make sure to transfer any kill flag. 451 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 452 Regs.push_back(std::make_pair(Reg, isKill)); 453 454 // Collect any implicit defs of super-registers. They must be preserved. 455 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { 456 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) 457 continue; 458 unsigned DefReg = MO->getReg(); 459 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 460 ImpDefs.push_back(DefReg); 461 } 462 } 463 464 // Try to do the merge. 465 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 466 ++Loc; 467 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 468 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 469 return; 470 471 // Merge succeeded, update records. 472 Merges.push_back(prior(Loc)); 473 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 474 // Remove kill flags from any memops that come before insertPos. 475 if (Regs[i-memOpsBegin].second) { 476 unsigned Reg = Regs[i-memOpsBegin].first; 477 if (KilledRegs.count(Reg)) { 478 unsigned j = Killer[Reg]; 479 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 480 assert(Idx >= 0 && "Cannot find killing operand"); 481 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 482 memOps[j].isKill = false; 483 } 484 memOps[i].isKill = true; 485 } 486 MBB.erase(memOps[i].MBBI); 487 // Update this memop to refer to the merged instruction. 488 // We may need to move kill flags again. 489 memOps[i].Merged = true; 490 memOps[i].MBBI = Merges.back(); 491 memOps[i].Position = insertPos; 492 } 493 } 494 495 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 496 /// load / store multiple instructions. 497 void 498 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 499 unsigned Base, int Opcode, unsigned Size, 500 ARMCC::CondCodes Pred, unsigned PredReg, 501 unsigned Scratch, MemOpQueue &MemOps, 502 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 503 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 504 int Offset = MemOps[SIndex].Offset; 505 int SOffset = Offset; 506 unsigned insertAfter = SIndex; 507 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 508 DebugLoc dl = Loc->getDebugLoc(); 509 const MachineOperand &PMO = Loc->getOperand(0); 510 unsigned PReg = PMO.getReg(); 511 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 512 unsigned Count = 1; 513 unsigned Limit = ~0U; 514 515 // vldm / vstm limit are 32 for S variants, 16 for D variants. 516 517 switch (Opcode) { 518 default: break; 519 case ARM::VSTRS: 520 Limit = 32; 521 break; 522 case ARM::VSTRD: 523 Limit = 16; 524 break; 525 case ARM::VLDRD: 526 Limit = 16; 527 break; 528 case ARM::VLDRS: 529 Limit = 32; 530 break; 531 } 532 533 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 534 int NewOffset = MemOps[i].Offset; 535 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 536 unsigned Reg = MO.getReg(); 537 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 538 // Register numbers must be in ascending order. For VFP / NEON load and 539 // store multiples, the registers must also be consecutive and within the 540 // limit on the number of registers per instruction. 541 if (Reg != ARM::SP && 542 NewOffset == Offset + (int)Size && 543 ((isNotVFP && RegNum > PRegNum) || 544 ((Count < Limit) && RegNum == PRegNum+1))) { 545 Offset += Size; 546 PRegNum = RegNum; 547 ++Count; 548 } else { 549 // Can't merge this in. Try merge the earlier ones first. 550 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 551 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 552 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 553 MemOps, Merges); 554 return; 555 } 556 557 if (MemOps[i].Position > MemOps[insertAfter].Position) 558 insertAfter = i; 559 } 560 561 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 562 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 563 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 564 return; 565 } 566 567 static bool definesCPSR(MachineInstr *MI) { 568 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 569 const MachineOperand &MO = MI->getOperand(i); 570 if (!MO.isReg()) 571 continue; 572 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 573 // If the instruction has live CPSR def, then it's not safe to fold it 574 // into load / store. 575 return true; 576 } 577 578 return false; 579 } 580 581 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 582 unsigned Bytes, unsigned Limit, 583 ARMCC::CondCodes Pred, unsigned PredReg) { 584 unsigned MyPredReg = 0; 585 if (!MI) 586 return false; 587 588 bool CheckCPSRDef = false; 589 switch (MI->getOpcode()) { 590 default: return false; 591 case ARM::t2SUBri: 592 case ARM::SUBri: 593 CheckCPSRDef = true; 594 // fallthrough 595 case ARM::tSUBspi: 596 break; 597 } 598 599 // Make sure the offset fits in 8 bits. 600 if (Bytes == 0 || (Limit && Bytes >= Limit)) 601 return false; 602 603 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 604 if (!(MI->getOperand(0).getReg() == Base && 605 MI->getOperand(1).getReg() == Base && 606 (MI->getOperand(2).getImm()*Scale) == Bytes && 607 getInstrPredicate(MI, MyPredReg) == Pred && 608 MyPredReg == PredReg)) 609 return false; 610 611 return CheckCPSRDef ? !definesCPSR(MI) : true; 612 } 613 614 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 615 unsigned Bytes, unsigned Limit, 616 ARMCC::CondCodes Pred, unsigned PredReg) { 617 unsigned MyPredReg = 0; 618 if (!MI) 619 return false; 620 621 bool CheckCPSRDef = false; 622 switch (MI->getOpcode()) { 623 default: return false; 624 case ARM::t2ADDri: 625 case ARM::ADDri: 626 CheckCPSRDef = true; 627 // fallthrough 628 case ARM::tADDspi: 629 break; 630 } 631 632 if (Bytes == 0 || (Limit && Bytes >= Limit)) 633 // Make sure the offset fits in 8 bits. 634 return false; 635 636 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 637 if (!(MI->getOperand(0).getReg() == Base && 638 MI->getOperand(1).getReg() == Base && 639 (MI->getOperand(2).getImm()*Scale) == Bytes && 640 getInstrPredicate(MI, MyPredReg) == Pred && 641 MyPredReg == PredReg)) 642 return false; 643 644 return CheckCPSRDef ? !definesCPSR(MI) : true; 645 } 646 647 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 648 switch (MI->getOpcode()) { 649 default: return 0; 650 case ARM::LDRi12: 651 case ARM::STRi12: 652 case ARM::t2LDRi8: 653 case ARM::t2LDRi12: 654 case ARM::t2STRi8: 655 case ARM::t2STRi12: 656 case ARM::VLDRS: 657 case ARM::VSTRS: 658 return 4; 659 case ARM::VLDRD: 660 case ARM::VSTRD: 661 return 8; 662 case ARM::LDMIA: 663 case ARM::LDMDA: 664 case ARM::LDMDB: 665 case ARM::LDMIB: 666 case ARM::STMIA: 667 case ARM::STMDA: 668 case ARM::STMDB: 669 case ARM::STMIB: 670 case ARM::t2LDMIA: 671 case ARM::t2LDMDB: 672 case ARM::t2STMIA: 673 case ARM::t2STMDB: 674 case ARM::VLDMSIA: 675 case ARM::VSTMSIA: 676 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 677 case ARM::VLDMDIA: 678 case ARM::VSTMDIA: 679 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 680 } 681 } 682 683 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 684 ARM_AM::AMSubMode Mode) { 685 switch (Opc) { 686 default: llvm_unreachable("Unhandled opcode!"); 687 case ARM::LDMIA: 688 case ARM::LDMDA: 689 case ARM::LDMDB: 690 case ARM::LDMIB: 691 switch (Mode) { 692 default: llvm_unreachable("Unhandled submode!"); 693 case ARM_AM::ia: return ARM::LDMIA_UPD; 694 case ARM_AM::ib: return ARM::LDMIB_UPD; 695 case ARM_AM::da: return ARM::LDMDA_UPD; 696 case ARM_AM::db: return ARM::LDMDB_UPD; 697 } 698 case ARM::STMIA: 699 case ARM::STMDA: 700 case ARM::STMDB: 701 case ARM::STMIB: 702 switch (Mode) { 703 default: llvm_unreachable("Unhandled submode!"); 704 case ARM_AM::ia: return ARM::STMIA_UPD; 705 case ARM_AM::ib: return ARM::STMIB_UPD; 706 case ARM_AM::da: return ARM::STMDA_UPD; 707 case ARM_AM::db: return ARM::STMDB_UPD; 708 } 709 case ARM::t2LDMIA: 710 case ARM::t2LDMDB: 711 switch (Mode) { 712 default: llvm_unreachable("Unhandled submode!"); 713 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 714 case ARM_AM::db: return ARM::t2LDMDB_UPD; 715 } 716 case ARM::t2STMIA: 717 case ARM::t2STMDB: 718 switch (Mode) { 719 default: llvm_unreachable("Unhandled submode!"); 720 case ARM_AM::ia: return ARM::t2STMIA_UPD; 721 case ARM_AM::db: return ARM::t2STMDB_UPD; 722 } 723 case ARM::VLDMSIA: 724 switch (Mode) { 725 default: llvm_unreachable("Unhandled submode!"); 726 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 727 case ARM_AM::db: return ARM::VLDMSDB_UPD; 728 } 729 case ARM::VLDMDIA: 730 switch (Mode) { 731 default: llvm_unreachable("Unhandled submode!"); 732 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 733 case ARM_AM::db: return ARM::VLDMDDB_UPD; 734 } 735 case ARM::VSTMSIA: 736 switch (Mode) { 737 default: llvm_unreachable("Unhandled submode!"); 738 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 739 case ARM_AM::db: return ARM::VSTMSDB_UPD; 740 } 741 case ARM::VSTMDIA: 742 switch (Mode) { 743 default: llvm_unreachable("Unhandled submode!"); 744 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 745 case ARM_AM::db: return ARM::VSTMDDB_UPD; 746 } 747 } 748 } 749 750 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 751 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 752 /// 753 /// stmia rn, <ra, rb, rc> 754 /// rn := rn + 4 * 3; 755 /// => 756 /// stmia rn!, <ra, rb, rc> 757 /// 758 /// rn := rn - 4 * 3; 759 /// ldmia rn, <ra, rb, rc> 760 /// => 761 /// ldmdb rn!, <ra, rb, rc> 762 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 763 MachineBasicBlock::iterator MBBI, 764 bool &Advance, 765 MachineBasicBlock::iterator &I) { 766 MachineInstr *MI = MBBI; 767 unsigned Base = MI->getOperand(0).getReg(); 768 bool BaseKill = MI->getOperand(0).isKill(); 769 unsigned Bytes = getLSMultipleTransferSize(MI); 770 unsigned PredReg = 0; 771 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 772 int Opcode = MI->getOpcode(); 773 DebugLoc dl = MI->getDebugLoc(); 774 775 // Can't use an updating ld/st if the base register is also a dest 776 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 777 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 778 if (MI->getOperand(i).getReg() == Base) 779 return false; 780 781 bool DoMerge = false; 782 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 783 784 // Try merging with the previous instruction. 785 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 786 if (MBBI != BeginMBBI) { 787 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 788 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 789 --PrevMBBI; 790 if (Mode == ARM_AM::ia && 791 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 792 Mode = ARM_AM::db; 793 DoMerge = true; 794 } else if (Mode == ARM_AM::ib && 795 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 796 Mode = ARM_AM::da; 797 DoMerge = true; 798 } 799 if (DoMerge) 800 MBB.erase(PrevMBBI); 801 } 802 803 // Try merging with the next instruction. 804 MachineBasicBlock::iterator EndMBBI = MBB.end(); 805 if (!DoMerge && MBBI != EndMBBI) { 806 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 807 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 808 ++NextMBBI; 809 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 810 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 811 DoMerge = true; 812 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 813 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 814 DoMerge = true; 815 } 816 if (DoMerge) { 817 if (NextMBBI == I) { 818 Advance = true; 819 ++I; 820 } 821 MBB.erase(NextMBBI); 822 } 823 } 824 825 if (!DoMerge) 826 return false; 827 828 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 829 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 830 .addReg(Base, getDefRegState(true)) // WB base register 831 .addReg(Base, getKillRegState(BaseKill)) 832 .addImm(Pred).addReg(PredReg); 833 834 // Transfer the rest of operands. 835 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 836 MIB.addOperand(MI->getOperand(OpNum)); 837 838 // Transfer memoperands. 839 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 840 841 MBB.erase(MBBI); 842 return true; 843 } 844 845 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 846 ARM_AM::AddrOpc Mode) { 847 switch (Opc) { 848 case ARM::LDRi12: 849 return ARM::LDR_PRE_IMM; 850 case ARM::STRi12: 851 return ARM::STR_PRE_IMM; 852 case ARM::VLDRS: 853 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 854 case ARM::VLDRD: 855 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 856 case ARM::VSTRS: 857 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 858 case ARM::VSTRD: 859 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 860 case ARM::t2LDRi8: 861 case ARM::t2LDRi12: 862 return ARM::t2LDR_PRE; 863 case ARM::t2STRi8: 864 case ARM::t2STRi12: 865 return ARM::t2STR_PRE; 866 default: llvm_unreachable("Unhandled opcode!"); 867 } 868 } 869 870 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 871 ARM_AM::AddrOpc Mode) { 872 switch (Opc) { 873 case ARM::LDRi12: 874 return ARM::LDR_POST_IMM; 875 case ARM::STRi12: 876 return ARM::STR_POST_IMM; 877 case ARM::VLDRS: 878 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 879 case ARM::VLDRD: 880 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 881 case ARM::VSTRS: 882 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 883 case ARM::VSTRD: 884 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 885 case ARM::t2LDRi8: 886 case ARM::t2LDRi12: 887 return ARM::t2LDR_POST; 888 case ARM::t2STRi8: 889 case ARM::t2STRi12: 890 return ARM::t2STR_POST; 891 default: llvm_unreachable("Unhandled opcode!"); 892 } 893 } 894 895 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 896 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 897 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 898 MachineBasicBlock::iterator MBBI, 899 const TargetInstrInfo *TII, 900 bool &Advance, 901 MachineBasicBlock::iterator &I) { 902 MachineInstr *MI = MBBI; 903 unsigned Base = MI->getOperand(1).getReg(); 904 bool BaseKill = MI->getOperand(1).isKill(); 905 unsigned Bytes = getLSMultipleTransferSize(MI); 906 int Opcode = MI->getOpcode(); 907 DebugLoc dl = MI->getDebugLoc(); 908 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 909 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 910 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 911 if (isi32Load(Opcode) || isi32Store(Opcode)) 912 if (MI->getOperand(2).getImm() != 0) 913 return false; 914 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 915 return false; 916 917 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 918 // Can't do the merge if the destination register is the same as the would-be 919 // writeback register. 920 if (MI->getOperand(0).getReg() == Base) 921 return false; 922 923 unsigned PredReg = 0; 924 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 925 bool DoMerge = false; 926 ARM_AM::AddrOpc AddSub = ARM_AM::add; 927 unsigned NewOpc = 0; 928 // AM2 - 12 bits, thumb2 - 8 bits. 929 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 930 931 // Try merging with the previous instruction. 932 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 933 if (MBBI != BeginMBBI) { 934 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 935 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 936 --PrevMBBI; 937 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 938 DoMerge = true; 939 AddSub = ARM_AM::sub; 940 } else if (!isAM5 && 941 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 942 DoMerge = true; 943 } 944 if (DoMerge) { 945 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 946 MBB.erase(PrevMBBI); 947 } 948 } 949 950 // Try merging with the next instruction. 951 MachineBasicBlock::iterator EndMBBI = MBB.end(); 952 if (!DoMerge && MBBI != EndMBBI) { 953 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 954 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 955 ++NextMBBI; 956 if (!isAM5 && 957 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 958 DoMerge = true; 959 AddSub = ARM_AM::sub; 960 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 961 DoMerge = true; 962 } 963 if (DoMerge) { 964 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 965 if (NextMBBI == I) { 966 Advance = true; 967 ++I; 968 } 969 MBB.erase(NextMBBI); 970 } 971 } 972 973 if (!DoMerge) 974 return false; 975 976 if (isAM5) { 977 // VLDM[SD}_UPD, VSTM[SD]_UPD 978 // (There are no base-updating versions of VLDR/VSTR instructions, but the 979 // updating load/store-multiple instructions can be used with only one 980 // register.) 981 MachineOperand &MO = MI->getOperand(0); 982 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 983 .addReg(Base, getDefRegState(true)) // WB base register 984 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 985 .addImm(Pred).addReg(PredReg) 986 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 987 getKillRegState(MO.isKill()))); 988 } else if (isLd) { 989 if (isAM2) { 990 // LDR_PRE, LDR_POST 991 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 992 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 993 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 994 .addReg(Base, RegState::Define) 995 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 996 } else { 997 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 998 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 999 .addReg(Base, RegState::Define) 1000 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1001 } 1002 } else { 1003 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1004 // t2LDR_PRE, t2LDR_POST 1005 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1006 .addReg(Base, RegState::Define) 1007 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1008 } 1009 } else { 1010 MachineOperand &MO = MI->getOperand(0); 1011 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1012 // the vestigal zero-reg offset register. When that's fixed, this clause 1013 // can be removed entirely. 1014 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1015 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1016 // STR_PRE, STR_POST 1017 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1018 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1019 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1020 } else { 1021 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1022 // t2STR_PRE, t2STR_POST 1023 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1024 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1025 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1026 } 1027 } 1028 MBB.erase(MBBI); 1029 1030 return true; 1031 } 1032 1033 /// isMemoryOp - Returns true if instruction is a memory operation that this 1034 /// pass is capable of operating on. 1035 static bool isMemoryOp(const MachineInstr *MI) { 1036 // When no memory operands are present, conservatively assume unaligned, 1037 // volatile, unfoldable. 1038 if (!MI->hasOneMemOperand()) 1039 return false; 1040 1041 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1042 1043 // Don't touch volatile memory accesses - we may be changing their order. 1044 if (MMO->isVolatile()) 1045 return false; 1046 1047 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1048 // not. 1049 if (MMO->getAlignment() < 4) 1050 return false; 1051 1052 // str <undef> could probably be eliminated entirely, but for now we just want 1053 // to avoid making a mess of it. 1054 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1055 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1056 MI->getOperand(0).isUndef()) 1057 return false; 1058 1059 // Likewise don't mess with references to undefined addresses. 1060 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1061 MI->getOperand(1).isUndef()) 1062 return false; 1063 1064 int Opcode = MI->getOpcode(); 1065 switch (Opcode) { 1066 default: break; 1067 case ARM::VLDRS: 1068 case ARM::VSTRS: 1069 return MI->getOperand(1).isReg(); 1070 case ARM::VLDRD: 1071 case ARM::VSTRD: 1072 return MI->getOperand(1).isReg(); 1073 case ARM::LDRi12: 1074 case ARM::STRi12: 1075 case ARM::t2LDRi8: 1076 case ARM::t2LDRi12: 1077 case ARM::t2STRi8: 1078 case ARM::t2STRi12: 1079 return MI->getOperand(1).isReg(); 1080 } 1081 return false; 1082 } 1083 1084 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1085 /// op that is being merged. 1086 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1087 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1088 unsigned Position = MemOps[0].Position; 1089 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1090 if (MemOps[i].Position < Position) { 1091 Position = MemOps[i].Position; 1092 Loc = MemOps[i].MBBI; 1093 } 1094 } 1095 1096 if (Loc != MBB.begin()) 1097 RS->forward(prior(Loc)); 1098 } 1099 1100 static int getMemoryOpOffset(const MachineInstr *MI) { 1101 int Opcode = MI->getOpcode(); 1102 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1103 unsigned NumOperands = MI->getDesc().getNumOperands(); 1104 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1105 1106 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1107 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1108 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1109 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1110 return OffField; 1111 1112 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1113 : ARM_AM::getAM5Offset(OffField) * 4; 1114 if (isAM3) { 1115 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1116 Offset = -Offset; 1117 } else { 1118 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1119 Offset = -Offset; 1120 } 1121 return Offset; 1122 } 1123 1124 static void InsertLDR_STR(MachineBasicBlock &MBB, 1125 MachineBasicBlock::iterator &MBBI, 1126 int Offset, bool isDef, 1127 DebugLoc dl, unsigned NewOpc, 1128 unsigned Reg, bool RegDeadKill, bool RegUndef, 1129 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1130 bool OffKill, bool OffUndef, 1131 ARMCC::CondCodes Pred, unsigned PredReg, 1132 const TargetInstrInfo *TII, bool isT2) { 1133 if (isDef) { 1134 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1135 TII->get(NewOpc)) 1136 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1137 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1138 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1139 } else { 1140 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1141 TII->get(NewOpc)) 1142 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1143 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1144 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1145 } 1146 } 1147 1148 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1149 MachineBasicBlock::iterator &MBBI) { 1150 MachineInstr *MI = &*MBBI; 1151 unsigned Opcode = MI->getOpcode(); 1152 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1153 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1154 const MachineOperand &BaseOp = MI->getOperand(2); 1155 unsigned BaseReg = BaseOp.getReg(); 1156 unsigned EvenReg = MI->getOperand(0).getReg(); 1157 unsigned OddReg = MI->getOperand(1).getReg(); 1158 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1159 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1160 // ARM errata 602117: LDRD with base in list may result in incorrect base 1161 // register when interrupted or faulted. 1162 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1163 if (!Errata602117 && 1164 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1165 return false; 1166 1167 MachineBasicBlock::iterator NewBBI = MBBI; 1168 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1169 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1170 bool EvenDeadKill = isLd ? 1171 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1172 bool EvenUndef = MI->getOperand(0).isUndef(); 1173 bool OddDeadKill = isLd ? 1174 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1175 bool OddUndef = MI->getOperand(1).isUndef(); 1176 bool BaseKill = BaseOp.isKill(); 1177 bool BaseUndef = BaseOp.isUndef(); 1178 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1179 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1180 int OffImm = getMemoryOpOffset(MI); 1181 unsigned PredReg = 0; 1182 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1183 1184 if (OddRegNum > EvenRegNum && OffImm == 0) { 1185 // Ascending register numbers and no offset. It's safe to change it to a 1186 // ldm or stm. 1187 unsigned NewOpc = (isLd) 1188 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1189 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1190 if (isLd) { 1191 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1192 .addReg(BaseReg, getKillRegState(BaseKill)) 1193 .addImm(Pred).addReg(PredReg) 1194 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1195 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1196 ++NumLDRD2LDM; 1197 } else { 1198 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1199 .addReg(BaseReg, getKillRegState(BaseKill)) 1200 .addImm(Pred).addReg(PredReg) 1201 .addReg(EvenReg, 1202 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1203 .addReg(OddReg, 1204 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1205 ++NumSTRD2STM; 1206 } 1207 NewBBI = llvm::prior(MBBI); 1208 } else { 1209 // Split into two instructions. 1210 unsigned NewOpc = (isLd) 1211 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1212 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1213 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1214 // so adjust and use t2LDRi12 here for that. 1215 unsigned NewOpc2 = (isLd) 1216 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1217 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1218 DebugLoc dl = MBBI->getDebugLoc(); 1219 // If this is a load and base register is killed, it may have been 1220 // re-defed by the load, make sure the first load does not clobber it. 1221 if (isLd && 1222 (BaseKill || OffKill) && 1223 (TRI->regsOverlap(EvenReg, BaseReg))) { 1224 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1225 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1226 OddReg, OddDeadKill, false, 1227 BaseReg, false, BaseUndef, false, OffUndef, 1228 Pred, PredReg, TII, isT2); 1229 NewBBI = llvm::prior(MBBI); 1230 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1231 EvenReg, EvenDeadKill, false, 1232 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1233 Pred, PredReg, TII, isT2); 1234 } else { 1235 if (OddReg == EvenReg && EvenDeadKill) { 1236 // If the two source operands are the same, the kill marker is 1237 // probably on the first one. e.g. 1238 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1239 EvenDeadKill = false; 1240 OddDeadKill = true; 1241 } 1242 // Never kill the base register in the first instruction. 1243 if (EvenReg == BaseReg) 1244 EvenDeadKill = false; 1245 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1246 EvenReg, EvenDeadKill, EvenUndef, 1247 BaseReg, false, BaseUndef, false, OffUndef, 1248 Pred, PredReg, TII, isT2); 1249 NewBBI = llvm::prior(MBBI); 1250 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1251 OddReg, OddDeadKill, OddUndef, 1252 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1253 Pred, PredReg, TII, isT2); 1254 } 1255 if (isLd) 1256 ++NumLDRD2LDR; 1257 else 1258 ++NumSTRD2STR; 1259 } 1260 1261 MBB.erase(MI); 1262 MBBI = NewBBI; 1263 return true; 1264 } 1265 return false; 1266 } 1267 1268 /// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti. 1269 /// It adds store mem ops with simple push_back/insert method, 1270 /// without any additional logic. 1271 /// For load operation it does the next: 1272 /// 1. Adds new load operation into MemOp collection at "At" position. 1273 /// 2. Removes any "load" operations from MemOps, that changes "Reg" register 1274 /// contents, prior to "At". 1275 /// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector > 1276 /// UsedUnitRegs - set of unit-registers currently in use. 1277 /// At - position at which it would added, and prior which the clean-up 1278 /// should be made (for load operation). 1279 /// FIXME: The clean-up also should be made for store operations, 1280 /// but the memory address should be analyzed instead of unit registers. 1281 unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps, 1282 const MemOpQueueEntry NewEntry, 1283 UnitRegsMap& UnitRegsInfo, 1284 SmallSet<unsigned, 4>& UsedUnitRegs, 1285 unsigned At) { 1286 unsigned Cleaned = 0; 1287 1288 if (At == -1U) { 1289 At = MemOps.size(); 1290 MemOps.push_back(NewEntry); 1291 } else 1292 MemOps.insert(&MemOps[At], NewEntry); 1293 1294 // FIXME: 1295 // If operation is not load, leave it as is by now, 1296 // So 0 overridden ops would cleaned in this case. 1297 if (!NewEntry.MBBI->mayLoad()) 1298 return 0; 1299 1300 const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg]; 1301 1302 bool FoundOverriddenLoads = false; 1303 1304 for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i) 1305 if (UsedUnitRegs.count(NewEntryUnitRegs[i])) { 1306 FoundOverriddenLoads = true; 1307 break; 1308 } 1309 1310 // If we detect that this register is used by load operations that are 1311 // predecessors for the new one, remove them from MemOps then. 1312 if (FoundOverriddenLoads) { 1313 MemOpQueue UpdatedMemOps; 1314 1315 // Scan through MemOps entries. 1316 for (unsigned i = 0; i != At; ++i) { 1317 MemOpQueueEntry& MemOpEntry = MemOps[i]; 1318 1319 // FIXME: Skip non-load operations by now. 1320 if (!MemOpEntry.MBBI->mayLoad()) 1321 continue; 1322 1323 const SmallVector<unsigned, 4>& MemOpUnitRegs = 1324 UnitRegsInfo[MemOpEntry.Reg]; 1325 1326 // Lookup entry that loads contents into register used by new entry. 1327 bool ReleaseThisEntry = false; 1328 for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) { 1329 if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(), 1330 MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) { 1331 ReleaseThisEntry = true; 1332 ++Cleaned; 1333 break; 1334 } 1335 } 1336 1337 if (ReleaseThisEntry) { 1338 const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg]; 1339 for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r) 1340 UsedUnitRegs.erase(RelesedRegs[r]); 1341 } else 1342 UpdatedMemOps.push_back(MemOpEntry); 1343 } 1344 1345 // Keep anything without changes after At position. 1346 for (unsigned i = At, e = MemOps.size(); i != e; ++i) 1347 UpdatedMemOps.push_back(MemOps[i]); 1348 1349 MemOps.swap(UpdatedMemOps); 1350 } 1351 1352 UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end()); 1353 1354 return Cleaned; 1355 } 1356 1357 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1358 /// ops of the same base and incrementing offset into LDM / STM ops. 1359 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1360 unsigned NumMerges = 0; 1361 unsigned NumMemOps = 0; 1362 MemOpQueue MemOps; 1363 UnitRegsMap UnitRegsInfo(TRI); 1364 SmallSet<unsigned, 4> UsedRegUnits; 1365 unsigned CurrBase = 0; 1366 int CurrOpc = -1; 1367 unsigned CurrSize = 0; 1368 ARMCC::CondCodes CurrPred = ARMCC::AL; 1369 unsigned CurrPredReg = 0; 1370 unsigned Position = 0; 1371 SmallVector<MachineBasicBlock::iterator,4> Merges; 1372 1373 RS->enterBasicBlock(&MBB); 1374 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1375 while (MBBI != E) { 1376 if (FixInvalidRegPairOp(MBB, MBBI)) 1377 continue; 1378 1379 bool Advance = false; 1380 bool TryMerge = false; 1381 bool Clobber = false; 1382 1383 bool isMemOp = isMemoryOp(MBBI); 1384 if (isMemOp) { 1385 int Opcode = MBBI->getOpcode(); 1386 unsigned Size = getLSMultipleTransferSize(MBBI); 1387 const MachineOperand &MO = MBBI->getOperand(0); 1388 unsigned Reg = MO.getReg(); 1389 bool isKill = MO.isDef() ? false : MO.isKill(); 1390 unsigned Base = MBBI->getOperand(1).getReg(); 1391 unsigned PredReg = 0; 1392 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1393 int Offset = getMemoryOpOffset(MBBI); 1394 // Watch out for: 1395 // r4 := ldr [r5] 1396 // r5 := ldr [r5, #4] 1397 // r6 := ldr [r5, #8] 1398 // 1399 // The second ldr has effectively broken the chain even though it 1400 // looks like the later ldr(s) use the same base register. Try to 1401 // merge the ldr's so far, including this one. But don't try to 1402 // combine the following ldr(s). 1403 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1404 if (CurrBase == 0 && !Clobber) { 1405 // Start of a new chain. 1406 CurrBase = Base; 1407 CurrOpc = Opcode; 1408 CurrSize = Size; 1409 CurrPred = Pred; 1410 CurrPredReg = PredReg; 1411 1412 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1413 ++NumMemOps; 1414 const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg]; 1415 UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end()); 1416 Advance = true; 1417 } else { 1418 if (Clobber) { 1419 TryMerge = true; 1420 Advance = true; 1421 } 1422 1423 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1424 // No need to match PredReg. 1425 // Continue adding to the queue. 1426 if (Offset > MemOps.back().Offset) { 1427 unsigned OverridesCleaned = 1428 AddMemOp(MemOps, 1429 MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI), 1430 UnitRegsInfo, UsedRegUnits) != 0; 1431 NumMemOps += 1 - OverridesCleaned; 1432 Advance = true; 1433 } else { 1434 for (unsigned I = 0; I != NumMemOps; ++I) { 1435 if (Offset < MemOps[I].Offset) { 1436 MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI); 1437 unsigned OverridesCleaned = 1438 AddMemOp(MemOps, entry, UnitRegsInfo, 1439 UsedRegUnits, I) != 0; 1440 NumMemOps += 1 - OverridesCleaned; 1441 1442 Advance = true; 1443 break; 1444 } else if (Offset == MemOps[I].Offset) { 1445 // Collision! This can't be merged! 1446 break; 1447 } 1448 } 1449 } 1450 } 1451 } 1452 } 1453 1454 if (MBBI->isDebugValue()) { 1455 ++MBBI; 1456 if (MBBI == E) 1457 // Reach the end of the block, try merging the memory instructions. 1458 TryMerge = true; 1459 } else if (Advance) { 1460 ++Position; 1461 ++MBBI; 1462 if (MBBI == E) 1463 // Reach the end of the block, try merging the memory instructions. 1464 TryMerge = true; 1465 } else 1466 TryMerge = true; 1467 1468 if (TryMerge) { 1469 if (NumMemOps > 1) { 1470 // Try to find a free register to use as a new base in case it's needed. 1471 // First advance to the instruction just before the start of the chain. 1472 AdvanceRS(MBB, MemOps); 1473 // Find a scratch register. 1474 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass); 1475 // Process the load / store instructions. 1476 RS->forward(prior(MBBI)); 1477 1478 // Merge ops. 1479 Merges.clear(); 1480 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1481 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1482 1483 // Try folding preceding/trailing base inc/dec into the generated 1484 // LDM/STM ops. 1485 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1486 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1487 ++NumMerges; 1488 NumMerges += Merges.size(); 1489 1490 // Try folding preceding/trailing base inc/dec into those load/store 1491 // that were not merged to form LDM/STM ops. 1492 for (unsigned i = 0; i != NumMemOps; ++i) 1493 if (!MemOps[i].Merged) 1494 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1495 ++NumMerges; 1496 1497 // RS may be pointing to an instruction that's deleted. 1498 RS->skipTo(prior(MBBI)); 1499 } else if (NumMemOps == 1) { 1500 // Try folding preceding/trailing base inc/dec into the single 1501 // load/store. 1502 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1503 ++NumMerges; 1504 RS->forward(prior(MBBI)); 1505 } 1506 } 1507 1508 CurrBase = 0; 1509 CurrOpc = -1; 1510 CurrSize = 0; 1511 CurrPred = ARMCC::AL; 1512 CurrPredReg = 0; 1513 if (NumMemOps) { 1514 MemOps.clear(); 1515 UsedRegUnits.clear(); 1516 NumMemOps = 0; 1517 } 1518 1519 // If iterator hasn't been advanced and this is not a memory op, skip it. 1520 // It can't start a new chain anyway. 1521 if (!Advance && !isMemOp && MBBI != E) { 1522 ++Position; 1523 ++MBBI; 1524 } 1525 } 1526 } 1527 return NumMerges > 0; 1528 } 1529 1530 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1531 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1532 /// directly restore the value of LR into pc. 1533 /// ldmfd sp!, {..., lr} 1534 /// bx lr 1535 /// or 1536 /// ldmfd sp!, {..., lr} 1537 /// mov pc, lr 1538 /// => 1539 /// ldmfd sp!, {..., pc} 1540 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1541 if (MBB.empty()) return false; 1542 1543 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1544 if (MBBI != MBB.begin() && 1545 (MBBI->getOpcode() == ARM::BX_RET || 1546 MBBI->getOpcode() == ARM::tBX_RET || 1547 MBBI->getOpcode() == ARM::MOVPCLR)) { 1548 MachineInstr *PrevMI = prior(MBBI); 1549 unsigned Opcode = PrevMI->getOpcode(); 1550 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1551 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1552 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1553 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1554 if (MO.getReg() != ARM::LR) 1555 return false; 1556 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1557 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1558 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1559 PrevMI->setDesc(TII->get(NewOpc)); 1560 MO.setReg(ARM::PC); 1561 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1562 MBB.erase(MBBI); 1563 return true; 1564 } 1565 } 1566 return false; 1567 } 1568 1569 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1570 const TargetMachine &TM = Fn.getTarget(); 1571 AFI = Fn.getInfo<ARMFunctionInfo>(); 1572 TII = TM.getInstrInfo(); 1573 TRI = TM.getRegisterInfo(); 1574 STI = &TM.getSubtarget<ARMSubtarget>(); 1575 RS = new RegScavenger(); 1576 isThumb2 = AFI->isThumb2Function(); 1577 1578 bool Modified = false; 1579 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1580 ++MFI) { 1581 MachineBasicBlock &MBB = *MFI; 1582 Modified |= LoadStoreMultipleOpti(MBB); 1583 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1584 Modified |= MergeReturnIntoLDM(MBB); 1585 } 1586 1587 delete RS; 1588 return Modified; 1589 } 1590 1591 1592 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1593 /// load / stores from consecutive locations close to make it more 1594 /// likely they will be combined later. 1595 1596 namespace { 1597 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1598 static char ID; 1599 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1600 1601 const DataLayout *TD; 1602 const TargetInstrInfo *TII; 1603 const TargetRegisterInfo *TRI; 1604 const ARMSubtarget *STI; 1605 MachineRegisterInfo *MRI; 1606 MachineFunction *MF; 1607 1608 virtual bool runOnMachineFunction(MachineFunction &Fn); 1609 1610 virtual const char *getPassName() const { 1611 return "ARM pre- register allocation load / store optimization pass"; 1612 } 1613 1614 private: 1615 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1616 unsigned &NewOpc, unsigned &EvenReg, 1617 unsigned &OddReg, unsigned &BaseReg, 1618 int &Offset, 1619 unsigned &PredReg, ARMCC::CondCodes &Pred, 1620 bool &isT2); 1621 bool RescheduleOps(MachineBasicBlock *MBB, 1622 SmallVector<MachineInstr*, 4> &Ops, 1623 unsigned Base, bool isLd, 1624 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1625 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1626 }; 1627 char ARMPreAllocLoadStoreOpt::ID = 0; 1628 } 1629 1630 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1631 TD = Fn.getTarget().getDataLayout(); 1632 TII = Fn.getTarget().getInstrInfo(); 1633 TRI = Fn.getTarget().getRegisterInfo(); 1634 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1635 MRI = &Fn.getRegInfo(); 1636 MF = &Fn; 1637 1638 bool Modified = false; 1639 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1640 ++MFI) 1641 Modified |= RescheduleLoadStoreInstrs(MFI); 1642 1643 return Modified; 1644 } 1645 1646 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1647 MachineBasicBlock::iterator I, 1648 MachineBasicBlock::iterator E, 1649 SmallPtrSet<MachineInstr*, 4> &MemOps, 1650 SmallSet<unsigned, 4> &MemRegs, 1651 const TargetRegisterInfo *TRI) { 1652 // Are there stores / loads / calls between them? 1653 // FIXME: This is overly conservative. We should make use of alias information 1654 // some day. 1655 SmallSet<unsigned, 4> AddedRegPressure; 1656 while (++I != E) { 1657 if (I->isDebugValue() || MemOps.count(&*I)) 1658 continue; 1659 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1660 return false; 1661 if (isLd && I->mayStore()) 1662 return false; 1663 if (!isLd) { 1664 if (I->mayLoad()) 1665 return false; 1666 // It's not safe to move the first 'str' down. 1667 // str r1, [r0] 1668 // strh r5, [r0] 1669 // str r4, [r0, #+4] 1670 if (I->mayStore()) 1671 return false; 1672 } 1673 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1674 MachineOperand &MO = I->getOperand(j); 1675 if (!MO.isReg()) 1676 continue; 1677 unsigned Reg = MO.getReg(); 1678 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1679 return false; 1680 if (Reg != Base && !MemRegs.count(Reg)) 1681 AddedRegPressure.insert(Reg); 1682 } 1683 } 1684 1685 // Estimate register pressure increase due to the transformation. 1686 if (MemRegs.size() <= 4) 1687 // Ok if we are moving small number of instructions. 1688 return true; 1689 return AddedRegPressure.size() <= MemRegs.size() * 2; 1690 } 1691 1692 1693 /// Copy Op0 and Op1 operands into a new array assigned to MI. 1694 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1695 MachineInstr *Op1) { 1696 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1697 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1698 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1699 1700 MachineFunction *MF = MI->getParent()->getParent(); 1701 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1702 MachineSDNode::mmo_iterator MemEnd = 1703 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1704 MemEnd = 1705 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1706 MI->setMemRefs(MemBegin, MemEnd); 1707 } 1708 1709 bool 1710 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1711 DebugLoc &dl, 1712 unsigned &NewOpc, unsigned &EvenReg, 1713 unsigned &OddReg, unsigned &BaseReg, 1714 int &Offset, unsigned &PredReg, 1715 ARMCC::CondCodes &Pred, 1716 bool &isT2) { 1717 // Make sure we're allowed to generate LDRD/STRD. 1718 if (!STI->hasV5TEOps()) 1719 return false; 1720 1721 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1722 unsigned Scale = 1; 1723 unsigned Opcode = Op0->getOpcode(); 1724 if (Opcode == ARM::LDRi12) 1725 NewOpc = ARM::LDRD; 1726 else if (Opcode == ARM::STRi12) 1727 NewOpc = ARM::STRD; 1728 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1729 NewOpc = ARM::t2LDRDi8; 1730 Scale = 4; 1731 isT2 = true; 1732 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1733 NewOpc = ARM::t2STRDi8; 1734 Scale = 4; 1735 isT2 = true; 1736 } else 1737 return false; 1738 1739 // Make sure the base address satisfies i64 ld / st alignment requirement. 1740 if (!Op0->hasOneMemOperand() || 1741 !(*Op0->memoperands_begin())->getValue() || 1742 (*Op0->memoperands_begin())->isVolatile()) 1743 return false; 1744 1745 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1746 const Function *Func = MF->getFunction(); 1747 unsigned ReqAlign = STI->hasV6Ops() 1748 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1749 : 8; // Pre-v6 need 8-byte align 1750 if (Align < ReqAlign) 1751 return false; 1752 1753 // Then make sure the immediate offset fits. 1754 int OffImm = getMemoryOpOffset(Op0); 1755 if (isT2) { 1756 int Limit = (1 << 8) * Scale; 1757 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1758 return false; 1759 Offset = OffImm; 1760 } else { 1761 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1762 if (OffImm < 0) { 1763 AddSub = ARM_AM::sub; 1764 OffImm = - OffImm; 1765 } 1766 int Limit = (1 << 8) * Scale; 1767 if (OffImm >= Limit || (OffImm & (Scale-1))) 1768 return false; 1769 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1770 } 1771 EvenReg = Op0->getOperand(0).getReg(); 1772 OddReg = Op1->getOperand(0).getReg(); 1773 if (EvenReg == OddReg) 1774 return false; 1775 BaseReg = Op0->getOperand(1).getReg(); 1776 Pred = getInstrPredicate(Op0, PredReg); 1777 dl = Op0->getDebugLoc(); 1778 return true; 1779 } 1780 1781 namespace { 1782 struct OffsetCompare { 1783 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1784 int LOffset = getMemoryOpOffset(LHS); 1785 int ROffset = getMemoryOpOffset(RHS); 1786 assert(LHS == RHS || LOffset != ROffset); 1787 return LOffset > ROffset; 1788 } 1789 }; 1790 } 1791 1792 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1793 SmallVector<MachineInstr*, 4> &Ops, 1794 unsigned Base, bool isLd, 1795 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1796 bool RetVal = false; 1797 1798 // Sort by offset (in reverse order). 1799 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1800 1801 // The loads / stores of the same base are in order. Scan them from first to 1802 // last and check for the following: 1803 // 1. Any def of base. 1804 // 2. Any gaps. 1805 while (Ops.size() > 1) { 1806 unsigned FirstLoc = ~0U; 1807 unsigned LastLoc = 0; 1808 MachineInstr *FirstOp = 0; 1809 MachineInstr *LastOp = 0; 1810 int LastOffset = 0; 1811 unsigned LastOpcode = 0; 1812 unsigned LastBytes = 0; 1813 unsigned NumMove = 0; 1814 for (int i = Ops.size() - 1; i >= 0; --i) { 1815 MachineInstr *Op = Ops[i]; 1816 unsigned Loc = MI2LocMap[Op]; 1817 if (Loc <= FirstLoc) { 1818 FirstLoc = Loc; 1819 FirstOp = Op; 1820 } 1821 if (Loc >= LastLoc) { 1822 LastLoc = Loc; 1823 LastOp = Op; 1824 } 1825 1826 unsigned LSMOpcode 1827 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 1828 if (LastOpcode && LSMOpcode != LastOpcode) 1829 break; 1830 1831 int Offset = getMemoryOpOffset(Op); 1832 unsigned Bytes = getLSMultipleTransferSize(Op); 1833 if (LastBytes) { 1834 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1835 break; 1836 } 1837 LastOffset = Offset; 1838 LastBytes = Bytes; 1839 LastOpcode = LSMOpcode; 1840 if (++NumMove == 8) // FIXME: Tune this limit. 1841 break; 1842 } 1843 1844 if (NumMove <= 1) 1845 Ops.pop_back(); 1846 else { 1847 SmallPtrSet<MachineInstr*, 4> MemOps; 1848 SmallSet<unsigned, 4> MemRegs; 1849 for (int i = NumMove-1; i >= 0; --i) { 1850 MemOps.insert(Ops[i]); 1851 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1852 } 1853 1854 // Be conservative, if the instructions are too far apart, don't 1855 // move them. We want to limit the increase of register pressure. 1856 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1857 if (DoMove) 1858 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1859 MemOps, MemRegs, TRI); 1860 if (!DoMove) { 1861 for (unsigned i = 0; i != NumMove; ++i) 1862 Ops.pop_back(); 1863 } else { 1864 // This is the new location for the loads / stores. 1865 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1866 while (InsertPos != MBB->end() 1867 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1868 ++InsertPos; 1869 1870 // If we are moving a pair of loads / stores, see if it makes sense 1871 // to try to allocate a pair of registers that can form register pairs. 1872 MachineInstr *Op0 = Ops.back(); 1873 MachineInstr *Op1 = Ops[Ops.size()-2]; 1874 unsigned EvenReg = 0, OddReg = 0; 1875 unsigned BaseReg = 0, PredReg = 0; 1876 ARMCC::CondCodes Pred = ARMCC::AL; 1877 bool isT2 = false; 1878 unsigned NewOpc = 0; 1879 int Offset = 0; 1880 DebugLoc dl; 1881 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1882 EvenReg, OddReg, BaseReg, 1883 Offset, PredReg, Pred, isT2)) { 1884 Ops.pop_back(); 1885 Ops.pop_back(); 1886 1887 const MCInstrDesc &MCID = TII->get(NewOpc); 1888 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 1889 MRI->constrainRegClass(EvenReg, TRC); 1890 MRI->constrainRegClass(OddReg, TRC); 1891 1892 // Form the pair instruction. 1893 if (isLd) { 1894 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1895 .addReg(EvenReg, RegState::Define) 1896 .addReg(OddReg, RegState::Define) 1897 .addReg(BaseReg); 1898 // FIXME: We're converting from LDRi12 to an insn that still 1899 // uses addrmode2, so we need an explicit offset reg. It should 1900 // always by reg0 since we're transforming LDRi12s. 1901 if (!isT2) 1902 MIB.addReg(0); 1903 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1904 concatenateMemOperands(MIB, Op0, Op1); 1905 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1906 ++NumLDRDFormed; 1907 } else { 1908 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1909 .addReg(EvenReg) 1910 .addReg(OddReg) 1911 .addReg(BaseReg); 1912 // FIXME: We're converting from LDRi12 to an insn that still 1913 // uses addrmode2, so we need an explicit offset reg. It should 1914 // always by reg0 since we're transforming STRi12s. 1915 if (!isT2) 1916 MIB.addReg(0); 1917 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1918 concatenateMemOperands(MIB, Op0, Op1); 1919 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1920 ++NumSTRDFormed; 1921 } 1922 MBB->erase(Op0); 1923 MBB->erase(Op1); 1924 1925 // Add register allocation hints to form register pairs. 1926 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1927 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1928 } else { 1929 for (unsigned i = 0; i != NumMove; ++i) { 1930 MachineInstr *Op = Ops.back(); 1931 Ops.pop_back(); 1932 MBB->splice(InsertPos, MBB, Op); 1933 } 1934 } 1935 1936 NumLdStMoved += NumMove; 1937 RetVal = true; 1938 } 1939 } 1940 } 1941 1942 return RetVal; 1943 } 1944 1945 bool 1946 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1947 bool RetVal = false; 1948 1949 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1950 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1951 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1952 SmallVector<unsigned, 4> LdBases; 1953 SmallVector<unsigned, 4> StBases; 1954 1955 unsigned Loc = 0; 1956 MachineBasicBlock::iterator MBBI = MBB->begin(); 1957 MachineBasicBlock::iterator E = MBB->end(); 1958 while (MBBI != E) { 1959 for (; MBBI != E; ++MBBI) { 1960 MachineInstr *MI = MBBI; 1961 if (MI->isCall() || MI->isTerminator()) { 1962 // Stop at barriers. 1963 ++MBBI; 1964 break; 1965 } 1966 1967 if (!MI->isDebugValue()) 1968 MI2LocMap[MI] = ++Loc; 1969 1970 if (!isMemoryOp(MI)) 1971 continue; 1972 unsigned PredReg = 0; 1973 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 1974 continue; 1975 1976 int Opc = MI->getOpcode(); 1977 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1978 unsigned Base = MI->getOperand(1).getReg(); 1979 int Offset = getMemoryOpOffset(MI); 1980 1981 bool StopHere = false; 1982 if (isLd) { 1983 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1984 Base2LdsMap.find(Base); 1985 if (BI != Base2LdsMap.end()) { 1986 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1987 if (Offset == getMemoryOpOffset(BI->second[i])) { 1988 StopHere = true; 1989 break; 1990 } 1991 } 1992 if (!StopHere) 1993 BI->second.push_back(MI); 1994 } else { 1995 SmallVector<MachineInstr*, 4> MIs; 1996 MIs.push_back(MI); 1997 Base2LdsMap[Base] = MIs; 1998 LdBases.push_back(Base); 1999 } 2000 } else { 2001 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2002 Base2StsMap.find(Base); 2003 if (BI != Base2StsMap.end()) { 2004 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2005 if (Offset == getMemoryOpOffset(BI->second[i])) { 2006 StopHere = true; 2007 break; 2008 } 2009 } 2010 if (!StopHere) 2011 BI->second.push_back(MI); 2012 } else { 2013 SmallVector<MachineInstr*, 4> MIs; 2014 MIs.push_back(MI); 2015 Base2StsMap[Base] = MIs; 2016 StBases.push_back(Base); 2017 } 2018 } 2019 2020 if (StopHere) { 2021 // Found a duplicate (a base+offset combination that's seen earlier). 2022 // Backtrack. 2023 --Loc; 2024 break; 2025 } 2026 } 2027 2028 // Re-schedule loads. 2029 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2030 unsigned Base = LdBases[i]; 2031 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 2032 if (Lds.size() > 1) 2033 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2034 } 2035 2036 // Re-schedule stores. 2037 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2038 unsigned Base = StBases[i]; 2039 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 2040 if (Sts.size() > 1) 2041 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2042 } 2043 2044 if (MBBI != E) { 2045 Base2LdsMap.clear(); 2046 Base2StsMap.clear(); 2047 LdBases.clear(); 2048 StBases.clear(); 2049 } 2050 } 2051 2052 return RetVal; 2053 } 2054 2055 2056 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 2057 /// optimization pass. 2058 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2059 if (PreAlloc) 2060 return new ARMPreAllocLoadStoreOpt(); 2061 return new ARMLoadStoreOpt(); 2062 } 2063