1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMMachineFunctionInfo.h" 19 #include "ARMRegisterInfo.h" 20 #include "MCTargetDesc/ARMAddressingModes.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/CodeGen/SelectionDAGNodes.h" 30 #include "llvm/Target/TargetData.h" 31 #include "llvm/Target/TargetInstrInfo.h" 32 #include "llvm/Target/TargetMachine.h" 33 #include "llvm/Target/TargetRegisterInfo.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include "llvm/ADT/DenseMap.h" 38 #include "llvm/ADT/STLExtras.h" 39 #include "llvm/ADT/SmallPtrSet.h" 40 #include "llvm/ADT/SmallSet.h" 41 #include "llvm/ADT/SmallVector.h" 42 #include "llvm/ADT/Statistic.h" 43 using namespace llvm; 44 45 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 46 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 56 57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 58 /// load / store instructions to form ldm / stm instructions. 59 60 namespace { 61 struct ARMLoadStoreOpt : public MachineFunctionPass { 62 static char ID; 63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 64 65 const TargetInstrInfo *TII; 66 const TargetRegisterInfo *TRI; 67 const ARMSubtarget *STI; 68 ARMFunctionInfo *AFI; 69 RegScavenger *RS; 70 bool isThumb2; 71 72 virtual bool runOnMachineFunction(MachineFunction &Fn); 73 74 virtual const char *getPassName() const { 75 return "ARM load / store optimization pass"; 76 } 77 78 private: 79 struct MemOpQueueEntry { 80 int Offset; 81 unsigned Reg; 82 bool isKill; 83 unsigned Position; 84 MachineBasicBlock::iterator MBBI; 85 bool Merged; 86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 87 MachineBasicBlock::iterator i) 88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 89 }; 90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 91 typedef MemOpQueue::iterator MemOpQueueIter; 92 93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 94 int Offset, unsigned Base, bool BaseKill, int Opcode, 95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 96 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 97 void MergeOpsUpdate(MachineBasicBlock &MBB, 98 MemOpQueue &MemOps, 99 unsigned memOpsBegin, 100 unsigned memOpsEnd, 101 unsigned insertAfter, 102 int Offset, 103 unsigned Base, 104 bool BaseKill, 105 int Opcode, 106 ARMCC::CondCodes Pred, 107 unsigned PredReg, 108 unsigned Scratch, 109 DebugLoc dl, 110 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 111 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 112 int Opcode, unsigned Size, 113 ARMCC::CondCodes Pred, unsigned PredReg, 114 unsigned Scratch, MemOpQueue &MemOps, 115 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 116 117 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 118 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator &MBBI); 120 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 121 MachineBasicBlock::iterator MBBI, 122 const TargetInstrInfo *TII, 123 bool &Advance, 124 MachineBasicBlock::iterator &I); 125 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 126 MachineBasicBlock::iterator MBBI, 127 bool &Advance, 128 MachineBasicBlock::iterator &I); 129 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 130 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 131 }; 132 char ARMLoadStoreOpt::ID = 0; 133 } 134 135 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 136 switch (Opcode) { 137 default: llvm_unreachable("Unhandled opcode!"); 138 case ARM::LDRi12: 139 ++NumLDMGened; 140 switch (Mode) { 141 default: llvm_unreachable("Unhandled submode!"); 142 case ARM_AM::ia: return ARM::LDMIA; 143 case ARM_AM::da: return ARM::LDMDA; 144 case ARM_AM::db: return ARM::LDMDB; 145 case ARM_AM::ib: return ARM::LDMIB; 146 } 147 case ARM::STRi12: 148 ++NumSTMGened; 149 switch (Mode) { 150 default: llvm_unreachable("Unhandled submode!"); 151 case ARM_AM::ia: return ARM::STMIA; 152 case ARM_AM::da: return ARM::STMDA; 153 case ARM_AM::db: return ARM::STMDB; 154 case ARM_AM::ib: return ARM::STMIB; 155 } 156 case ARM::t2LDRi8: 157 case ARM::t2LDRi12: 158 ++NumLDMGened; 159 switch (Mode) { 160 default: llvm_unreachable("Unhandled submode!"); 161 case ARM_AM::ia: return ARM::t2LDMIA; 162 case ARM_AM::db: return ARM::t2LDMDB; 163 } 164 case ARM::t2STRi8: 165 case ARM::t2STRi12: 166 ++NumSTMGened; 167 switch (Mode) { 168 default: llvm_unreachable("Unhandled submode!"); 169 case ARM_AM::ia: return ARM::t2STMIA; 170 case ARM_AM::db: return ARM::t2STMDB; 171 } 172 case ARM::VLDRS: 173 ++NumVLDMGened; 174 switch (Mode) { 175 default: llvm_unreachable("Unhandled submode!"); 176 case ARM_AM::ia: return ARM::VLDMSIA; 177 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 178 } 179 case ARM::VSTRS: 180 ++NumVSTMGened; 181 switch (Mode) { 182 default: llvm_unreachable("Unhandled submode!"); 183 case ARM_AM::ia: return ARM::VSTMSIA; 184 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 185 } 186 case ARM::VLDRD: 187 ++NumVLDMGened; 188 switch (Mode) { 189 default: llvm_unreachable("Unhandled submode!"); 190 case ARM_AM::ia: return ARM::VLDMDIA; 191 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 192 } 193 case ARM::VSTRD: 194 ++NumVSTMGened; 195 switch (Mode) { 196 default: llvm_unreachable("Unhandled submode!"); 197 case ARM_AM::ia: return ARM::VSTMDIA; 198 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 199 } 200 } 201 } 202 203 namespace llvm { 204 namespace ARM_AM { 205 206 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 207 switch (Opcode) { 208 default: llvm_unreachable("Unhandled opcode!"); 209 case ARM::LDMIA_RET: 210 case ARM::LDMIA: 211 case ARM::LDMIA_UPD: 212 case ARM::STMIA: 213 case ARM::STMIA_UPD: 214 case ARM::t2LDMIA_RET: 215 case ARM::t2LDMIA: 216 case ARM::t2LDMIA_UPD: 217 case ARM::t2STMIA: 218 case ARM::t2STMIA_UPD: 219 case ARM::VLDMSIA: 220 case ARM::VLDMSIA_UPD: 221 case ARM::VSTMSIA: 222 case ARM::VSTMSIA_UPD: 223 case ARM::VLDMDIA: 224 case ARM::VLDMDIA_UPD: 225 case ARM::VSTMDIA: 226 case ARM::VSTMDIA_UPD: 227 return ARM_AM::ia; 228 229 case ARM::LDMDA: 230 case ARM::LDMDA_UPD: 231 case ARM::STMDA: 232 case ARM::STMDA_UPD: 233 return ARM_AM::da; 234 235 case ARM::LDMDB: 236 case ARM::LDMDB_UPD: 237 case ARM::STMDB: 238 case ARM::STMDB_UPD: 239 case ARM::t2LDMDB: 240 case ARM::t2LDMDB_UPD: 241 case ARM::t2STMDB: 242 case ARM::t2STMDB_UPD: 243 case ARM::VLDMSDB_UPD: 244 case ARM::VSTMSDB_UPD: 245 case ARM::VLDMDDB_UPD: 246 case ARM::VSTMDDB_UPD: 247 return ARM_AM::db; 248 249 case ARM::LDMIB: 250 case ARM::LDMIB_UPD: 251 case ARM::STMIB: 252 case ARM::STMIB_UPD: 253 return ARM_AM::ib; 254 } 255 } 256 257 } // end namespace ARM_AM 258 } // end namespace llvm 259 260 static bool isT2i32Load(unsigned Opc) { 261 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 262 } 263 264 static bool isi32Load(unsigned Opc) { 265 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 266 } 267 268 static bool isT2i32Store(unsigned Opc) { 269 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 270 } 271 272 static bool isi32Store(unsigned Opc) { 273 return Opc == ARM::STRi12 || isT2i32Store(Opc); 274 } 275 276 /// MergeOps - Create and insert a LDM or STM with Base as base register and 277 /// registers in Regs as the register operands that would be loaded / stored. 278 /// It returns true if the transformation is done. 279 bool 280 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 281 MachineBasicBlock::iterator MBBI, 282 int Offset, unsigned Base, bool BaseKill, 283 int Opcode, ARMCC::CondCodes Pred, 284 unsigned PredReg, unsigned Scratch, DebugLoc dl, 285 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 286 // Only a single register to load / store. Don't bother. 287 unsigned NumRegs = Regs.size(); 288 if (NumRegs <= 1) 289 return false; 290 291 ARM_AM::AMSubMode Mode = ARM_AM::ia; 292 // VFP and Thumb2 do not support IB or DA modes. 293 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 294 bool haveIBAndDA = isNotVFP && !isThumb2; 295 if (Offset == 4 && haveIBAndDA) 296 Mode = ARM_AM::ib; 297 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 298 Mode = ARM_AM::da; 299 else if (Offset == -4 * (int)NumRegs && isNotVFP) 300 // VLDM/VSTM do not support DB mode without also updating the base reg. 301 Mode = ARM_AM::db; 302 else if (Offset != 0) { 303 // Check if this is a supported opcode before we insert instructions to 304 // calculate a new base register. 305 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 306 307 // If starting offset isn't zero, insert a MI to materialize a new base. 308 // But only do so if it is cost effective, i.e. merging more than two 309 // loads / stores. 310 if (NumRegs <= 2) 311 return false; 312 313 unsigned NewBase; 314 if (isi32Load(Opcode)) 315 // If it is a load, then just use one of the destination register to 316 // use as the new base. 317 NewBase = Regs[NumRegs-1].first; 318 else { 319 // Use the scratch register to use as a new base. 320 NewBase = Scratch; 321 if (NewBase == 0) 322 return false; 323 } 324 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 325 if (Offset < 0) { 326 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 327 Offset = - Offset; 328 } 329 int ImmedOffset = isThumb2 330 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 331 if (ImmedOffset == -1) 332 // FIXME: Try t2ADDri12 or t2SUBri12? 333 return false; // Probably not worth it then. 334 335 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 336 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 337 .addImm(Pred).addReg(PredReg).addReg(0); 338 Base = NewBase; 339 BaseKill = true; // New base is always killed right its use. 340 } 341 342 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 343 Opcode == ARM::VLDRD); 344 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 345 if (!Opcode) return false; 346 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 347 .addReg(Base, getKillRegState(BaseKill)) 348 .addImm(Pred).addReg(PredReg); 349 for (unsigned i = 0; i != NumRegs; ++i) 350 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 351 | getKillRegState(Regs[i].second)); 352 353 return true; 354 } 355 356 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 357 // success. 358 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 359 MemOpQueue &memOps, 360 unsigned memOpsBegin, unsigned memOpsEnd, 361 unsigned insertAfter, int Offset, 362 unsigned Base, bool BaseKill, 363 int Opcode, 364 ARMCC::CondCodes Pred, unsigned PredReg, 365 unsigned Scratch, 366 DebugLoc dl, 367 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 368 // First calculate which of the registers should be killed by the merged 369 // instruction. 370 const unsigned insertPos = memOps[insertAfter].Position; 371 SmallSet<unsigned, 4> KilledRegs; 372 DenseMap<unsigned, unsigned> Killer; 373 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 374 if (i == memOpsBegin) { 375 i = memOpsEnd; 376 if (i == e) 377 break; 378 } 379 if (memOps[i].Position < insertPos && memOps[i].isKill) { 380 unsigned Reg = memOps[i].Reg; 381 KilledRegs.insert(Reg); 382 Killer[Reg] = i; 383 } 384 } 385 386 SmallVector<std::pair<unsigned, bool>, 8> Regs; 387 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 388 unsigned Reg = memOps[i].Reg; 389 // If we are inserting the merged operation after an operation that 390 // uses the same register, make sure to transfer any kill flag. 391 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 392 Regs.push_back(std::make_pair(Reg, isKill)); 393 } 394 395 // Try to do the merge. 396 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 397 ++Loc; 398 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 399 Pred, PredReg, Scratch, dl, Regs)) 400 return; 401 402 // Merge succeeded, update records. 403 Merges.push_back(prior(Loc)); 404 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 405 // Remove kill flags from any memops that come before insertPos. 406 if (Regs[i-memOpsBegin].second) { 407 unsigned Reg = Regs[i-memOpsBegin].first; 408 if (KilledRegs.count(Reg)) { 409 unsigned j = Killer[Reg]; 410 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 411 assert(Idx >= 0 && "Cannot find killing operand"); 412 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 413 memOps[j].isKill = false; 414 } 415 memOps[i].isKill = true; 416 } 417 MBB.erase(memOps[i].MBBI); 418 // Update this memop to refer to the merged instruction. 419 // We may need to move kill flags again. 420 memOps[i].Merged = true; 421 memOps[i].MBBI = Merges.back(); 422 memOps[i].Position = insertPos; 423 } 424 } 425 426 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 427 /// load / store multiple instructions. 428 void 429 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 430 unsigned Base, int Opcode, unsigned Size, 431 ARMCC::CondCodes Pred, unsigned PredReg, 432 unsigned Scratch, MemOpQueue &MemOps, 433 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 434 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 435 int Offset = MemOps[SIndex].Offset; 436 int SOffset = Offset; 437 unsigned insertAfter = SIndex; 438 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 439 DebugLoc dl = Loc->getDebugLoc(); 440 const MachineOperand &PMO = Loc->getOperand(0); 441 unsigned PReg = PMO.getReg(); 442 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 443 : getARMRegisterNumbering(PReg); 444 unsigned Count = 1; 445 unsigned Limit = ~0U; 446 447 // vldm / vstm limit are 32 for S variants, 16 for D variants. 448 449 switch (Opcode) { 450 default: break; 451 case ARM::VSTRS: 452 Limit = 32; 453 break; 454 case ARM::VSTRD: 455 Limit = 16; 456 break; 457 case ARM::VLDRD: 458 Limit = 16; 459 break; 460 case ARM::VLDRS: 461 Limit = 32; 462 break; 463 } 464 465 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 466 int NewOffset = MemOps[i].Offset; 467 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 468 unsigned Reg = MO.getReg(); 469 unsigned RegNum = MO.isUndef() ? UINT_MAX 470 : getARMRegisterNumbering(Reg); 471 // Register numbers must be in ascending order. For VFP / NEON load and 472 // store multiples, the registers must also be consecutive and within the 473 // limit on the number of registers per instruction. 474 if (Reg != ARM::SP && 475 NewOffset == Offset + (int)Size && 476 ((isNotVFP && RegNum > PRegNum) || 477 ((Count < Limit) && RegNum == PRegNum+1))) { 478 Offset += Size; 479 PRegNum = RegNum; 480 ++Count; 481 } else { 482 // Can't merge this in. Try merge the earlier ones first. 483 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 484 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 485 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 486 MemOps, Merges); 487 return; 488 } 489 490 if (MemOps[i].Position > MemOps[insertAfter].Position) 491 insertAfter = i; 492 } 493 494 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 495 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 496 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 497 return; 498 } 499 500 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 501 unsigned Bytes, unsigned Limit, 502 ARMCC::CondCodes Pred, unsigned PredReg){ 503 unsigned MyPredReg = 0; 504 if (!MI) 505 return false; 506 if (MI->getOpcode() != ARM::t2SUBri && 507 MI->getOpcode() != ARM::tSUBspi && 508 MI->getOpcode() != ARM::SUBri) 509 return false; 510 511 // Make sure the offset fits in 8 bits. 512 if (Bytes == 0 || (Limit && Bytes >= Limit)) 513 return false; 514 515 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 516 return (MI->getOperand(0).getReg() == Base && 517 MI->getOperand(1).getReg() == Base && 518 (MI->getOperand(2).getImm()*Scale) == Bytes && 519 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 520 MyPredReg == PredReg); 521 } 522 523 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 524 unsigned Bytes, unsigned Limit, 525 ARMCC::CondCodes Pred, unsigned PredReg){ 526 unsigned MyPredReg = 0; 527 if (!MI) 528 return false; 529 if (MI->getOpcode() != ARM::t2ADDri && 530 MI->getOpcode() != ARM::tADDspi && 531 MI->getOpcode() != ARM::ADDri) 532 return false; 533 534 if (Bytes == 0 || (Limit && Bytes >= Limit)) 535 // Make sure the offset fits in 8 bits. 536 return false; 537 538 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 539 return (MI->getOperand(0).getReg() == Base && 540 MI->getOperand(1).getReg() == Base && 541 (MI->getOperand(2).getImm()*Scale) == Bytes && 542 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 543 MyPredReg == PredReg); 544 } 545 546 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 547 switch (MI->getOpcode()) { 548 default: return 0; 549 case ARM::LDRi12: 550 case ARM::STRi12: 551 case ARM::t2LDRi8: 552 case ARM::t2LDRi12: 553 case ARM::t2STRi8: 554 case ARM::t2STRi12: 555 case ARM::VLDRS: 556 case ARM::VSTRS: 557 return 4; 558 case ARM::VLDRD: 559 case ARM::VSTRD: 560 return 8; 561 case ARM::LDMIA: 562 case ARM::LDMDA: 563 case ARM::LDMDB: 564 case ARM::LDMIB: 565 case ARM::STMIA: 566 case ARM::STMDA: 567 case ARM::STMDB: 568 case ARM::STMIB: 569 case ARM::t2LDMIA: 570 case ARM::t2LDMDB: 571 case ARM::t2STMIA: 572 case ARM::t2STMDB: 573 case ARM::VLDMSIA: 574 case ARM::VSTMSIA: 575 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 576 case ARM::VLDMDIA: 577 case ARM::VSTMDIA: 578 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 579 } 580 } 581 582 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 583 ARM_AM::AMSubMode Mode) { 584 switch (Opc) { 585 default: llvm_unreachable("Unhandled opcode!"); 586 case ARM::LDMIA: 587 case ARM::LDMDA: 588 case ARM::LDMDB: 589 case ARM::LDMIB: 590 switch (Mode) { 591 default: llvm_unreachable("Unhandled submode!"); 592 case ARM_AM::ia: return ARM::LDMIA_UPD; 593 case ARM_AM::ib: return ARM::LDMIB_UPD; 594 case ARM_AM::da: return ARM::LDMDA_UPD; 595 case ARM_AM::db: return ARM::LDMDB_UPD; 596 } 597 case ARM::STMIA: 598 case ARM::STMDA: 599 case ARM::STMDB: 600 case ARM::STMIB: 601 switch (Mode) { 602 default: llvm_unreachable("Unhandled submode!"); 603 case ARM_AM::ia: return ARM::STMIA_UPD; 604 case ARM_AM::ib: return ARM::STMIB_UPD; 605 case ARM_AM::da: return ARM::STMDA_UPD; 606 case ARM_AM::db: return ARM::STMDB_UPD; 607 } 608 case ARM::t2LDMIA: 609 case ARM::t2LDMDB: 610 switch (Mode) { 611 default: llvm_unreachable("Unhandled submode!"); 612 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 613 case ARM_AM::db: return ARM::t2LDMDB_UPD; 614 } 615 case ARM::t2STMIA: 616 case ARM::t2STMDB: 617 switch (Mode) { 618 default: llvm_unreachable("Unhandled submode!"); 619 case ARM_AM::ia: return ARM::t2STMIA_UPD; 620 case ARM_AM::db: return ARM::t2STMDB_UPD; 621 } 622 case ARM::VLDMSIA: 623 switch (Mode) { 624 default: llvm_unreachable("Unhandled submode!"); 625 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 626 case ARM_AM::db: return ARM::VLDMSDB_UPD; 627 } 628 case ARM::VLDMDIA: 629 switch (Mode) { 630 default: llvm_unreachable("Unhandled submode!"); 631 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 632 case ARM_AM::db: return ARM::VLDMDDB_UPD; 633 } 634 case ARM::VSTMSIA: 635 switch (Mode) { 636 default: llvm_unreachable("Unhandled submode!"); 637 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 638 case ARM_AM::db: return ARM::VSTMSDB_UPD; 639 } 640 case ARM::VSTMDIA: 641 switch (Mode) { 642 default: llvm_unreachable("Unhandled submode!"); 643 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 644 case ARM_AM::db: return ARM::VSTMDDB_UPD; 645 } 646 } 647 } 648 649 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 650 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 651 /// 652 /// stmia rn, <ra, rb, rc> 653 /// rn := rn + 4 * 3; 654 /// => 655 /// stmia rn!, <ra, rb, rc> 656 /// 657 /// rn := rn - 4 * 3; 658 /// ldmia rn, <ra, rb, rc> 659 /// => 660 /// ldmdb rn!, <ra, rb, rc> 661 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 662 MachineBasicBlock::iterator MBBI, 663 bool &Advance, 664 MachineBasicBlock::iterator &I) { 665 MachineInstr *MI = MBBI; 666 unsigned Base = MI->getOperand(0).getReg(); 667 bool BaseKill = MI->getOperand(0).isKill(); 668 unsigned Bytes = getLSMultipleTransferSize(MI); 669 unsigned PredReg = 0; 670 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 671 int Opcode = MI->getOpcode(); 672 DebugLoc dl = MI->getDebugLoc(); 673 674 // Can't use an updating ld/st if the base register is also a dest 675 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 676 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 677 if (MI->getOperand(i).getReg() == Base) 678 return false; 679 680 bool DoMerge = false; 681 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 682 683 // Try merging with the previous instruction. 684 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 685 if (MBBI != BeginMBBI) { 686 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 687 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 688 --PrevMBBI; 689 if (Mode == ARM_AM::ia && 690 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 691 Mode = ARM_AM::db; 692 DoMerge = true; 693 } else if (Mode == ARM_AM::ib && 694 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 695 Mode = ARM_AM::da; 696 DoMerge = true; 697 } 698 if (DoMerge) 699 MBB.erase(PrevMBBI); 700 } 701 702 // Try merging with the next instruction. 703 MachineBasicBlock::iterator EndMBBI = MBB.end(); 704 if (!DoMerge && MBBI != EndMBBI) { 705 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 706 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 707 ++NextMBBI; 708 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 709 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 710 DoMerge = true; 711 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 712 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 713 DoMerge = true; 714 } 715 if (DoMerge) { 716 if (NextMBBI == I) { 717 Advance = true; 718 ++I; 719 } 720 MBB.erase(NextMBBI); 721 } 722 } 723 724 if (!DoMerge) 725 return false; 726 727 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 728 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 729 .addReg(Base, getDefRegState(true)) // WB base register 730 .addReg(Base, getKillRegState(BaseKill)) 731 .addImm(Pred).addReg(PredReg); 732 733 // Transfer the rest of operands. 734 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 735 MIB.addOperand(MI->getOperand(OpNum)); 736 737 // Transfer memoperands. 738 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 739 740 MBB.erase(MBBI); 741 return true; 742 } 743 744 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 745 ARM_AM::AddrOpc Mode) { 746 switch (Opc) { 747 case ARM::LDRi12: 748 return ARM::LDR_PRE_IMM; 749 case ARM::STRi12: 750 return ARM::STR_PRE_IMM; 751 case ARM::VLDRS: 752 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 753 case ARM::VLDRD: 754 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 755 case ARM::VSTRS: 756 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 757 case ARM::VSTRD: 758 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 759 case ARM::t2LDRi8: 760 case ARM::t2LDRi12: 761 return ARM::t2LDR_PRE; 762 case ARM::t2STRi8: 763 case ARM::t2STRi12: 764 return ARM::t2STR_PRE; 765 default: llvm_unreachable("Unhandled opcode!"); 766 } 767 } 768 769 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 770 ARM_AM::AddrOpc Mode) { 771 switch (Opc) { 772 case ARM::LDRi12: 773 return ARM::LDR_POST_IMM; 774 case ARM::STRi12: 775 return ARM::STR_POST_IMM; 776 case ARM::VLDRS: 777 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 778 case ARM::VLDRD: 779 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 780 case ARM::VSTRS: 781 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 782 case ARM::VSTRD: 783 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 784 case ARM::t2LDRi8: 785 case ARM::t2LDRi12: 786 return ARM::t2LDR_POST; 787 case ARM::t2STRi8: 788 case ARM::t2STRi12: 789 return ARM::t2STR_POST; 790 default: llvm_unreachable("Unhandled opcode!"); 791 } 792 } 793 794 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 795 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 796 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 797 MachineBasicBlock::iterator MBBI, 798 const TargetInstrInfo *TII, 799 bool &Advance, 800 MachineBasicBlock::iterator &I) { 801 MachineInstr *MI = MBBI; 802 unsigned Base = MI->getOperand(1).getReg(); 803 bool BaseKill = MI->getOperand(1).isKill(); 804 unsigned Bytes = getLSMultipleTransferSize(MI); 805 int Opcode = MI->getOpcode(); 806 DebugLoc dl = MI->getDebugLoc(); 807 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 808 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 809 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 810 if (isi32Load(Opcode) || isi32Store(Opcode)) 811 if (MI->getOperand(2).getImm() != 0) 812 return false; 813 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 814 return false; 815 816 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 817 // Can't do the merge if the destination register is the same as the would-be 818 // writeback register. 819 if (isLd && MI->getOperand(0).getReg() == Base) 820 return false; 821 822 unsigned PredReg = 0; 823 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 824 bool DoMerge = false; 825 ARM_AM::AddrOpc AddSub = ARM_AM::add; 826 unsigned NewOpc = 0; 827 // AM2 - 12 bits, thumb2 - 8 bits. 828 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 829 830 // Try merging with the previous instruction. 831 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 832 if (MBBI != BeginMBBI) { 833 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 834 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 835 --PrevMBBI; 836 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 837 DoMerge = true; 838 AddSub = ARM_AM::sub; 839 } else if (!isAM5 && 840 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 841 DoMerge = true; 842 } 843 if (DoMerge) { 844 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 845 MBB.erase(PrevMBBI); 846 } 847 } 848 849 // Try merging with the next instruction. 850 MachineBasicBlock::iterator EndMBBI = MBB.end(); 851 if (!DoMerge && MBBI != EndMBBI) { 852 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 853 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 854 ++NextMBBI; 855 if (!isAM5 && 856 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 857 DoMerge = true; 858 AddSub = ARM_AM::sub; 859 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 860 DoMerge = true; 861 } 862 if (DoMerge) { 863 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 864 if (NextMBBI == I) { 865 Advance = true; 866 ++I; 867 } 868 MBB.erase(NextMBBI); 869 } 870 } 871 872 if (!DoMerge) 873 return false; 874 875 if (isAM5) { 876 // VLDM[SD}_UPD, VSTM[SD]_UPD 877 // (There are no base-updating versions of VLDR/VSTR instructions, but the 878 // updating load/store-multiple instructions can be used with only one 879 // register.) 880 MachineOperand &MO = MI->getOperand(0); 881 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 882 .addReg(Base, getDefRegState(true)) // WB base register 883 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 884 .addImm(Pred).addReg(PredReg) 885 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 886 getKillRegState(MO.isKill()))); 887 } else if (isLd) { 888 if (isAM2) { 889 // LDR_PRE, LDR_POST 890 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 891 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 892 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 893 .addReg(Base, RegState::Define) 894 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 895 } else { 896 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 897 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 898 .addReg(Base, RegState::Define) 899 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 900 } 901 } else { 902 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 903 // t2LDR_PRE, t2LDR_POST 904 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 905 .addReg(Base, RegState::Define) 906 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 907 } 908 } else { 909 MachineOperand &MO = MI->getOperand(0); 910 // FIXME: post-indexed stores use am2offset_imm, which still encodes 911 // the vestigal zero-reg offset register. When that's fixed, this clause 912 // can be removed entirely. 913 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 914 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 915 // STR_PRE, STR_POST 916 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 917 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 918 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 919 } else { 920 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 921 // t2STR_PRE, t2STR_POST 922 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 923 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 924 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 925 } 926 } 927 MBB.erase(MBBI); 928 929 return true; 930 } 931 932 /// isMemoryOp - Returns true if instruction is a memory operation that this 933 /// pass is capable of operating on. 934 static bool isMemoryOp(const MachineInstr *MI) { 935 // When no memory operands are present, conservatively assume unaligned, 936 // volatile, unfoldable. 937 if (!MI->hasOneMemOperand()) 938 return false; 939 940 const MachineMemOperand *MMO = *MI->memoperands_begin(); 941 942 // Don't touch volatile memory accesses - we may be changing their order. 943 if (MMO->isVolatile()) 944 return false; 945 946 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 947 // not. 948 if (MMO->getAlignment() < 4) 949 return false; 950 951 // str <undef> could probably be eliminated entirely, but for now we just want 952 // to avoid making a mess of it. 953 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 954 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 955 MI->getOperand(0).isUndef()) 956 return false; 957 958 // Likewise don't mess with references to undefined addresses. 959 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 960 MI->getOperand(1).isUndef()) 961 return false; 962 963 int Opcode = MI->getOpcode(); 964 switch (Opcode) { 965 default: break; 966 case ARM::VLDRS: 967 case ARM::VSTRS: 968 return MI->getOperand(1).isReg(); 969 case ARM::VLDRD: 970 case ARM::VSTRD: 971 return MI->getOperand(1).isReg(); 972 case ARM::LDRi12: 973 case ARM::STRi12: 974 case ARM::t2LDRi8: 975 case ARM::t2LDRi12: 976 case ARM::t2STRi8: 977 case ARM::t2STRi12: 978 return MI->getOperand(1).isReg(); 979 } 980 return false; 981 } 982 983 /// AdvanceRS - Advance register scavenger to just before the earliest memory 984 /// op that is being merged. 985 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 986 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 987 unsigned Position = MemOps[0].Position; 988 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 989 if (MemOps[i].Position < Position) { 990 Position = MemOps[i].Position; 991 Loc = MemOps[i].MBBI; 992 } 993 } 994 995 if (Loc != MBB.begin()) 996 RS->forward(prior(Loc)); 997 } 998 999 static int getMemoryOpOffset(const MachineInstr *MI) { 1000 int Opcode = MI->getOpcode(); 1001 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1002 unsigned NumOperands = MI->getDesc().getNumOperands(); 1003 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1004 1005 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1006 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1007 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1008 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1009 return OffField; 1010 1011 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1012 : ARM_AM::getAM5Offset(OffField) * 4; 1013 if (isAM3) { 1014 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1015 Offset = -Offset; 1016 } else { 1017 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1018 Offset = -Offset; 1019 } 1020 return Offset; 1021 } 1022 1023 static void InsertLDR_STR(MachineBasicBlock &MBB, 1024 MachineBasicBlock::iterator &MBBI, 1025 int Offset, bool isDef, 1026 DebugLoc dl, unsigned NewOpc, 1027 unsigned Reg, bool RegDeadKill, bool RegUndef, 1028 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1029 bool OffKill, bool OffUndef, 1030 ARMCC::CondCodes Pred, unsigned PredReg, 1031 const TargetInstrInfo *TII, bool isT2) { 1032 if (isDef) { 1033 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1034 TII->get(NewOpc)) 1035 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1036 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1037 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1038 } else { 1039 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1040 TII->get(NewOpc)) 1041 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1042 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1043 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1044 } 1045 } 1046 1047 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1048 MachineBasicBlock::iterator &MBBI) { 1049 MachineInstr *MI = &*MBBI; 1050 unsigned Opcode = MI->getOpcode(); 1051 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1052 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1053 const MachineOperand &BaseOp = MI->getOperand(2); 1054 unsigned BaseReg = BaseOp.getReg(); 1055 unsigned EvenReg = MI->getOperand(0).getReg(); 1056 unsigned OddReg = MI->getOperand(1).getReg(); 1057 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1058 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1059 // ARM errata 602117: LDRD with base in list may result in incorrect base 1060 // register when interrupted or faulted. 1061 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1062 if (!Errata602117 && 1063 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1064 return false; 1065 1066 MachineBasicBlock::iterator NewBBI = MBBI; 1067 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1068 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1069 bool EvenDeadKill = isLd ? 1070 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1071 bool EvenUndef = MI->getOperand(0).isUndef(); 1072 bool OddDeadKill = isLd ? 1073 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1074 bool OddUndef = MI->getOperand(1).isUndef(); 1075 bool BaseKill = BaseOp.isKill(); 1076 bool BaseUndef = BaseOp.isUndef(); 1077 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1078 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1079 int OffImm = getMemoryOpOffset(MI); 1080 unsigned PredReg = 0; 1081 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 1082 1083 if (OddRegNum > EvenRegNum && OffImm == 0) { 1084 // Ascending register numbers and no offset. It's safe to change it to a 1085 // ldm or stm. 1086 unsigned NewOpc = (isLd) 1087 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1088 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1089 if (isLd) { 1090 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1091 .addReg(BaseReg, getKillRegState(BaseKill)) 1092 .addImm(Pred).addReg(PredReg) 1093 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1094 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1095 ++NumLDRD2LDM; 1096 } else { 1097 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1098 .addReg(BaseReg, getKillRegState(BaseKill)) 1099 .addImm(Pred).addReg(PredReg) 1100 .addReg(EvenReg, 1101 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1102 .addReg(OddReg, 1103 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1104 ++NumSTRD2STM; 1105 } 1106 NewBBI = llvm::prior(MBBI); 1107 } else { 1108 // Split into two instructions. 1109 unsigned NewOpc = (isLd) 1110 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1111 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1112 DebugLoc dl = MBBI->getDebugLoc(); 1113 // If this is a load and base register is killed, it may have been 1114 // re-defed by the load, make sure the first load does not clobber it. 1115 if (isLd && 1116 (BaseKill || OffKill) && 1117 (TRI->regsOverlap(EvenReg, BaseReg))) { 1118 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1119 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1120 OddReg, OddDeadKill, false, 1121 BaseReg, false, BaseUndef, false, OffUndef, 1122 Pred, PredReg, TII, isT2); 1123 NewBBI = llvm::prior(MBBI); 1124 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1125 EvenReg, EvenDeadKill, false, 1126 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1127 Pred, PredReg, TII, isT2); 1128 } else { 1129 if (OddReg == EvenReg && EvenDeadKill) { 1130 // If the two source operands are the same, the kill marker is 1131 // probably on the first one. e.g. 1132 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1133 EvenDeadKill = false; 1134 OddDeadKill = true; 1135 } 1136 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1137 EvenReg, EvenDeadKill, EvenUndef, 1138 BaseReg, false, BaseUndef, false, OffUndef, 1139 Pred, PredReg, TII, isT2); 1140 NewBBI = llvm::prior(MBBI); 1141 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1142 OddReg, OddDeadKill, OddUndef, 1143 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1144 Pred, PredReg, TII, isT2); 1145 } 1146 if (isLd) 1147 ++NumLDRD2LDR; 1148 else 1149 ++NumSTRD2STR; 1150 } 1151 1152 MBB.erase(MI); 1153 MBBI = NewBBI; 1154 return true; 1155 } 1156 return false; 1157 } 1158 1159 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1160 /// ops of the same base and incrementing offset into LDM / STM ops. 1161 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1162 unsigned NumMerges = 0; 1163 unsigned NumMemOps = 0; 1164 MemOpQueue MemOps; 1165 unsigned CurrBase = 0; 1166 int CurrOpc = -1; 1167 unsigned CurrSize = 0; 1168 ARMCC::CondCodes CurrPred = ARMCC::AL; 1169 unsigned CurrPredReg = 0; 1170 unsigned Position = 0; 1171 SmallVector<MachineBasicBlock::iterator,4> Merges; 1172 1173 RS->enterBasicBlock(&MBB); 1174 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1175 while (MBBI != E) { 1176 if (FixInvalidRegPairOp(MBB, MBBI)) 1177 continue; 1178 1179 bool Advance = false; 1180 bool TryMerge = false; 1181 bool Clobber = false; 1182 1183 bool isMemOp = isMemoryOp(MBBI); 1184 if (isMemOp) { 1185 int Opcode = MBBI->getOpcode(); 1186 unsigned Size = getLSMultipleTransferSize(MBBI); 1187 const MachineOperand &MO = MBBI->getOperand(0); 1188 unsigned Reg = MO.getReg(); 1189 bool isKill = MO.isDef() ? false : MO.isKill(); 1190 unsigned Base = MBBI->getOperand(1).getReg(); 1191 unsigned PredReg = 0; 1192 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1193 int Offset = getMemoryOpOffset(MBBI); 1194 // Watch out for: 1195 // r4 := ldr [r5] 1196 // r5 := ldr [r5, #4] 1197 // r6 := ldr [r5, #8] 1198 // 1199 // The second ldr has effectively broken the chain even though it 1200 // looks like the later ldr(s) use the same base register. Try to 1201 // merge the ldr's so far, including this one. But don't try to 1202 // combine the following ldr(s). 1203 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1204 if (CurrBase == 0 && !Clobber) { 1205 // Start of a new chain. 1206 CurrBase = Base; 1207 CurrOpc = Opcode; 1208 CurrSize = Size; 1209 CurrPred = Pred; 1210 CurrPredReg = PredReg; 1211 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1212 ++NumMemOps; 1213 Advance = true; 1214 } else { 1215 if (Clobber) { 1216 TryMerge = true; 1217 Advance = true; 1218 } 1219 1220 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1221 // No need to match PredReg. 1222 // Continue adding to the queue. 1223 if (Offset > MemOps.back().Offset) { 1224 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1225 Position, MBBI)); 1226 ++NumMemOps; 1227 Advance = true; 1228 } else { 1229 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1230 I != E; ++I) { 1231 if (Offset < I->Offset) { 1232 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1233 Position, MBBI)); 1234 ++NumMemOps; 1235 Advance = true; 1236 break; 1237 } else if (Offset == I->Offset) { 1238 // Collision! This can't be merged! 1239 break; 1240 } 1241 } 1242 } 1243 } 1244 } 1245 } 1246 1247 if (MBBI->isDebugValue()) { 1248 ++MBBI; 1249 if (MBBI == E) 1250 // Reach the end of the block, try merging the memory instructions. 1251 TryMerge = true; 1252 } else if (Advance) { 1253 ++Position; 1254 ++MBBI; 1255 if (MBBI == E) 1256 // Reach the end of the block, try merging the memory instructions. 1257 TryMerge = true; 1258 } else 1259 TryMerge = true; 1260 1261 if (TryMerge) { 1262 if (NumMemOps > 1) { 1263 // Try to find a free register to use as a new base in case it's needed. 1264 // First advance to the instruction just before the start of the chain. 1265 AdvanceRS(MBB, MemOps); 1266 // Find a scratch register. 1267 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1268 // Process the load / store instructions. 1269 RS->forward(prior(MBBI)); 1270 1271 // Merge ops. 1272 Merges.clear(); 1273 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1274 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1275 1276 // Try folding preceding/trailing base inc/dec into the generated 1277 // LDM/STM ops. 1278 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1279 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1280 ++NumMerges; 1281 NumMerges += Merges.size(); 1282 1283 // Try folding preceding/trailing base inc/dec into those load/store 1284 // that were not merged to form LDM/STM ops. 1285 for (unsigned i = 0; i != NumMemOps; ++i) 1286 if (!MemOps[i].Merged) 1287 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1288 ++NumMerges; 1289 1290 // RS may be pointing to an instruction that's deleted. 1291 RS->skipTo(prior(MBBI)); 1292 } else if (NumMemOps == 1) { 1293 // Try folding preceding/trailing base inc/dec into the single 1294 // load/store. 1295 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1296 ++NumMerges; 1297 RS->forward(prior(MBBI)); 1298 } 1299 } 1300 1301 CurrBase = 0; 1302 CurrOpc = -1; 1303 CurrSize = 0; 1304 CurrPred = ARMCC::AL; 1305 CurrPredReg = 0; 1306 if (NumMemOps) { 1307 MemOps.clear(); 1308 NumMemOps = 0; 1309 } 1310 1311 // If iterator hasn't been advanced and this is not a memory op, skip it. 1312 // It can't start a new chain anyway. 1313 if (!Advance && !isMemOp && MBBI != E) { 1314 ++Position; 1315 ++MBBI; 1316 } 1317 } 1318 } 1319 return NumMerges > 0; 1320 } 1321 1322 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1323 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1324 /// directly restore the value of LR into pc. 1325 /// ldmfd sp!, {..., lr} 1326 /// bx lr 1327 /// or 1328 /// ldmfd sp!, {..., lr} 1329 /// mov pc, lr 1330 /// => 1331 /// ldmfd sp!, {..., pc} 1332 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1333 if (MBB.empty()) return false; 1334 1335 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1336 if (MBBI != MBB.begin() && 1337 (MBBI->getOpcode() == ARM::BX_RET || 1338 MBBI->getOpcode() == ARM::tBX_RET || 1339 MBBI->getOpcode() == ARM::MOVPCLR)) { 1340 MachineInstr *PrevMI = prior(MBBI); 1341 unsigned Opcode = PrevMI->getOpcode(); 1342 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1343 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1344 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1345 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1346 if (MO.getReg() != ARM::LR) 1347 return false; 1348 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1349 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1350 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1351 PrevMI->setDesc(TII->get(NewOpc)); 1352 MO.setReg(ARM::PC); 1353 PrevMI->copyImplicitOps(&*MBBI); 1354 MBB.erase(MBBI); 1355 return true; 1356 } 1357 } 1358 return false; 1359 } 1360 1361 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1362 const TargetMachine &TM = Fn.getTarget(); 1363 AFI = Fn.getInfo<ARMFunctionInfo>(); 1364 TII = TM.getInstrInfo(); 1365 TRI = TM.getRegisterInfo(); 1366 STI = &TM.getSubtarget<ARMSubtarget>(); 1367 RS = new RegScavenger(); 1368 isThumb2 = AFI->isThumb2Function(); 1369 1370 bool Modified = false; 1371 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1372 ++MFI) { 1373 MachineBasicBlock &MBB = *MFI; 1374 Modified |= LoadStoreMultipleOpti(MBB); 1375 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1376 Modified |= MergeReturnIntoLDM(MBB); 1377 } 1378 1379 delete RS; 1380 return Modified; 1381 } 1382 1383 1384 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1385 /// load / stores from consecutive locations close to make it more 1386 /// likely they will be combined later. 1387 1388 namespace { 1389 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1390 static char ID; 1391 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1392 1393 const TargetData *TD; 1394 const TargetInstrInfo *TII; 1395 const TargetRegisterInfo *TRI; 1396 const ARMSubtarget *STI; 1397 MachineRegisterInfo *MRI; 1398 MachineFunction *MF; 1399 1400 virtual bool runOnMachineFunction(MachineFunction &Fn); 1401 1402 virtual const char *getPassName() const { 1403 return "ARM pre- register allocation load / store optimization pass"; 1404 } 1405 1406 private: 1407 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1408 unsigned &NewOpc, unsigned &EvenReg, 1409 unsigned &OddReg, unsigned &BaseReg, 1410 int &Offset, 1411 unsigned &PredReg, ARMCC::CondCodes &Pred, 1412 bool &isT2); 1413 bool RescheduleOps(MachineBasicBlock *MBB, 1414 SmallVector<MachineInstr*, 4> &Ops, 1415 unsigned Base, bool isLd, 1416 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1417 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1418 }; 1419 char ARMPreAllocLoadStoreOpt::ID = 0; 1420 } 1421 1422 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1423 TD = Fn.getTarget().getTargetData(); 1424 TII = Fn.getTarget().getInstrInfo(); 1425 TRI = Fn.getTarget().getRegisterInfo(); 1426 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1427 MRI = &Fn.getRegInfo(); 1428 MF = &Fn; 1429 1430 bool Modified = false; 1431 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1432 ++MFI) 1433 Modified |= RescheduleLoadStoreInstrs(MFI); 1434 1435 return Modified; 1436 } 1437 1438 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1439 MachineBasicBlock::iterator I, 1440 MachineBasicBlock::iterator E, 1441 SmallPtrSet<MachineInstr*, 4> &MemOps, 1442 SmallSet<unsigned, 4> &MemRegs, 1443 const TargetRegisterInfo *TRI) { 1444 // Are there stores / loads / calls between them? 1445 // FIXME: This is overly conservative. We should make use of alias information 1446 // some day. 1447 SmallSet<unsigned, 4> AddedRegPressure; 1448 while (++I != E) { 1449 if (I->isDebugValue() || MemOps.count(&*I)) 1450 continue; 1451 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1452 return false; 1453 if (isLd && I->mayStore()) 1454 return false; 1455 if (!isLd) { 1456 if (I->mayLoad()) 1457 return false; 1458 // It's not safe to move the first 'str' down. 1459 // str r1, [r0] 1460 // strh r5, [r0] 1461 // str r4, [r0, #+4] 1462 if (I->mayStore()) 1463 return false; 1464 } 1465 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1466 MachineOperand &MO = I->getOperand(j); 1467 if (!MO.isReg()) 1468 continue; 1469 unsigned Reg = MO.getReg(); 1470 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1471 return false; 1472 if (Reg != Base && !MemRegs.count(Reg)) 1473 AddedRegPressure.insert(Reg); 1474 } 1475 } 1476 1477 // Estimate register pressure increase due to the transformation. 1478 if (MemRegs.size() <= 4) 1479 // Ok if we are moving small number of instructions. 1480 return true; 1481 return AddedRegPressure.size() <= MemRegs.size() * 2; 1482 } 1483 1484 1485 /// Copy Op0 and Op1 operands into a new array assigned to MI. 1486 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1487 MachineInstr *Op1) { 1488 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1489 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1490 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1491 1492 MachineFunction *MF = MI->getParent()->getParent(); 1493 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1494 MachineSDNode::mmo_iterator MemEnd = 1495 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1496 MemEnd = 1497 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1498 MI->setMemRefs(MemBegin, MemEnd); 1499 } 1500 1501 bool 1502 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1503 DebugLoc &dl, 1504 unsigned &NewOpc, unsigned &EvenReg, 1505 unsigned &OddReg, unsigned &BaseReg, 1506 int &Offset, unsigned &PredReg, 1507 ARMCC::CondCodes &Pred, 1508 bool &isT2) { 1509 // Make sure we're allowed to generate LDRD/STRD. 1510 if (!STI->hasV5TEOps()) 1511 return false; 1512 1513 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1514 unsigned Scale = 1; 1515 unsigned Opcode = Op0->getOpcode(); 1516 if (Opcode == ARM::LDRi12) 1517 NewOpc = ARM::LDRD; 1518 else if (Opcode == ARM::STRi12) 1519 NewOpc = ARM::STRD; 1520 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1521 NewOpc = ARM::t2LDRDi8; 1522 Scale = 4; 1523 isT2 = true; 1524 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1525 NewOpc = ARM::t2STRDi8; 1526 Scale = 4; 1527 isT2 = true; 1528 } else 1529 return false; 1530 1531 // Make sure the base address satisfies i64 ld / st alignment requirement. 1532 if (!Op0->hasOneMemOperand() || 1533 !(*Op0->memoperands_begin())->getValue() || 1534 (*Op0->memoperands_begin())->isVolatile()) 1535 return false; 1536 1537 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1538 const Function *Func = MF->getFunction(); 1539 unsigned ReqAlign = STI->hasV6Ops() 1540 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1541 : 8; // Pre-v6 need 8-byte align 1542 if (Align < ReqAlign) 1543 return false; 1544 1545 // Then make sure the immediate offset fits. 1546 int OffImm = getMemoryOpOffset(Op0); 1547 if (isT2) { 1548 int Limit = (1 << 8) * Scale; 1549 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1550 return false; 1551 Offset = OffImm; 1552 } else { 1553 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1554 if (OffImm < 0) { 1555 AddSub = ARM_AM::sub; 1556 OffImm = - OffImm; 1557 } 1558 int Limit = (1 << 8) * Scale; 1559 if (OffImm >= Limit || (OffImm & (Scale-1))) 1560 return false; 1561 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1562 } 1563 EvenReg = Op0->getOperand(0).getReg(); 1564 OddReg = Op1->getOperand(0).getReg(); 1565 if (EvenReg == OddReg) 1566 return false; 1567 BaseReg = Op0->getOperand(1).getReg(); 1568 Pred = llvm::getInstrPredicate(Op0, PredReg); 1569 dl = Op0->getDebugLoc(); 1570 return true; 1571 } 1572 1573 namespace { 1574 struct OffsetCompare { 1575 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1576 int LOffset = getMemoryOpOffset(LHS); 1577 int ROffset = getMemoryOpOffset(RHS); 1578 assert(LHS == RHS || LOffset != ROffset); 1579 return LOffset > ROffset; 1580 } 1581 }; 1582 } 1583 1584 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1585 SmallVector<MachineInstr*, 4> &Ops, 1586 unsigned Base, bool isLd, 1587 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1588 bool RetVal = false; 1589 1590 // Sort by offset (in reverse order). 1591 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1592 1593 // The loads / stores of the same base are in order. Scan them from first to 1594 // last and check for the following: 1595 // 1. Any def of base. 1596 // 2. Any gaps. 1597 while (Ops.size() > 1) { 1598 unsigned FirstLoc = ~0U; 1599 unsigned LastLoc = 0; 1600 MachineInstr *FirstOp = 0; 1601 MachineInstr *LastOp = 0; 1602 int LastOffset = 0; 1603 unsigned LastOpcode = 0; 1604 unsigned LastBytes = 0; 1605 unsigned NumMove = 0; 1606 for (int i = Ops.size() - 1; i >= 0; --i) { 1607 MachineInstr *Op = Ops[i]; 1608 unsigned Loc = MI2LocMap[Op]; 1609 if (Loc <= FirstLoc) { 1610 FirstLoc = Loc; 1611 FirstOp = Op; 1612 } 1613 if (Loc >= LastLoc) { 1614 LastLoc = Loc; 1615 LastOp = Op; 1616 } 1617 1618 unsigned LSMOpcode 1619 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 1620 if (LastOpcode && LSMOpcode != LastOpcode) 1621 break; 1622 1623 int Offset = getMemoryOpOffset(Op); 1624 unsigned Bytes = getLSMultipleTransferSize(Op); 1625 if (LastBytes) { 1626 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1627 break; 1628 } 1629 LastOffset = Offset; 1630 LastBytes = Bytes; 1631 LastOpcode = LSMOpcode; 1632 if (++NumMove == 8) // FIXME: Tune this limit. 1633 break; 1634 } 1635 1636 if (NumMove <= 1) 1637 Ops.pop_back(); 1638 else { 1639 SmallPtrSet<MachineInstr*, 4> MemOps; 1640 SmallSet<unsigned, 4> MemRegs; 1641 for (int i = NumMove-1; i >= 0; --i) { 1642 MemOps.insert(Ops[i]); 1643 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1644 } 1645 1646 // Be conservative, if the instructions are too far apart, don't 1647 // move them. We want to limit the increase of register pressure. 1648 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1649 if (DoMove) 1650 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1651 MemOps, MemRegs, TRI); 1652 if (!DoMove) { 1653 for (unsigned i = 0; i != NumMove; ++i) 1654 Ops.pop_back(); 1655 } else { 1656 // This is the new location for the loads / stores. 1657 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1658 while (InsertPos != MBB->end() 1659 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1660 ++InsertPos; 1661 1662 // If we are moving a pair of loads / stores, see if it makes sense 1663 // to try to allocate a pair of registers that can form register pairs. 1664 MachineInstr *Op0 = Ops.back(); 1665 MachineInstr *Op1 = Ops[Ops.size()-2]; 1666 unsigned EvenReg = 0, OddReg = 0; 1667 unsigned BaseReg = 0, PredReg = 0; 1668 ARMCC::CondCodes Pred = ARMCC::AL; 1669 bool isT2 = false; 1670 unsigned NewOpc = 0; 1671 int Offset = 0; 1672 DebugLoc dl; 1673 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1674 EvenReg, OddReg, BaseReg, 1675 Offset, PredReg, Pred, isT2)) { 1676 Ops.pop_back(); 1677 Ops.pop_back(); 1678 1679 const MCInstrDesc &MCID = TII->get(NewOpc); 1680 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); 1681 MRI->constrainRegClass(EvenReg, TRC); 1682 MRI->constrainRegClass(OddReg, TRC); 1683 1684 // Form the pair instruction. 1685 if (isLd) { 1686 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1687 .addReg(EvenReg, RegState::Define) 1688 .addReg(OddReg, RegState::Define) 1689 .addReg(BaseReg); 1690 // FIXME: We're converting from LDRi12 to an insn that still 1691 // uses addrmode2, so we need an explicit offset reg. It should 1692 // always by reg0 since we're transforming LDRi12s. 1693 if (!isT2) 1694 MIB.addReg(0); 1695 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1696 concatenateMemOperands(MIB, Op0, Op1); 1697 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1698 ++NumLDRDFormed; 1699 } else { 1700 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1701 .addReg(EvenReg) 1702 .addReg(OddReg) 1703 .addReg(BaseReg); 1704 // FIXME: We're converting from LDRi12 to an insn that still 1705 // uses addrmode2, so we need an explicit offset reg. It should 1706 // always by reg0 since we're transforming STRi12s. 1707 if (!isT2) 1708 MIB.addReg(0); 1709 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1710 concatenateMemOperands(MIB, Op0, Op1); 1711 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1712 ++NumSTRDFormed; 1713 } 1714 MBB->erase(Op0); 1715 MBB->erase(Op1); 1716 1717 // Add register allocation hints to form register pairs. 1718 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1719 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1720 } else { 1721 for (unsigned i = 0; i != NumMove; ++i) { 1722 MachineInstr *Op = Ops.back(); 1723 Ops.pop_back(); 1724 MBB->splice(InsertPos, MBB, Op); 1725 } 1726 } 1727 1728 NumLdStMoved += NumMove; 1729 RetVal = true; 1730 } 1731 } 1732 } 1733 1734 return RetVal; 1735 } 1736 1737 bool 1738 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1739 bool RetVal = false; 1740 1741 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1742 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1743 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1744 SmallVector<unsigned, 4> LdBases; 1745 SmallVector<unsigned, 4> StBases; 1746 1747 unsigned Loc = 0; 1748 MachineBasicBlock::iterator MBBI = MBB->begin(); 1749 MachineBasicBlock::iterator E = MBB->end(); 1750 while (MBBI != E) { 1751 for (; MBBI != E; ++MBBI) { 1752 MachineInstr *MI = MBBI; 1753 if (MI->isCall() || MI->isTerminator()) { 1754 // Stop at barriers. 1755 ++MBBI; 1756 break; 1757 } 1758 1759 if (!MI->isDebugValue()) 1760 MI2LocMap[MI] = ++Loc; 1761 1762 if (!isMemoryOp(MI)) 1763 continue; 1764 unsigned PredReg = 0; 1765 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1766 continue; 1767 1768 int Opc = MI->getOpcode(); 1769 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1770 unsigned Base = MI->getOperand(1).getReg(); 1771 int Offset = getMemoryOpOffset(MI); 1772 1773 bool StopHere = false; 1774 if (isLd) { 1775 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1776 Base2LdsMap.find(Base); 1777 if (BI != Base2LdsMap.end()) { 1778 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1779 if (Offset == getMemoryOpOffset(BI->second[i])) { 1780 StopHere = true; 1781 break; 1782 } 1783 } 1784 if (!StopHere) 1785 BI->second.push_back(MI); 1786 } else { 1787 SmallVector<MachineInstr*, 4> MIs; 1788 MIs.push_back(MI); 1789 Base2LdsMap[Base] = MIs; 1790 LdBases.push_back(Base); 1791 } 1792 } else { 1793 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1794 Base2StsMap.find(Base); 1795 if (BI != Base2StsMap.end()) { 1796 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1797 if (Offset == getMemoryOpOffset(BI->second[i])) { 1798 StopHere = true; 1799 break; 1800 } 1801 } 1802 if (!StopHere) 1803 BI->second.push_back(MI); 1804 } else { 1805 SmallVector<MachineInstr*, 4> MIs; 1806 MIs.push_back(MI); 1807 Base2StsMap[Base] = MIs; 1808 StBases.push_back(Base); 1809 } 1810 } 1811 1812 if (StopHere) { 1813 // Found a duplicate (a base+offset combination that's seen earlier). 1814 // Backtrack. 1815 --Loc; 1816 break; 1817 } 1818 } 1819 1820 // Re-schedule loads. 1821 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1822 unsigned Base = LdBases[i]; 1823 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1824 if (Lds.size() > 1) 1825 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1826 } 1827 1828 // Re-schedule stores. 1829 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1830 unsigned Base = StBases[i]; 1831 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1832 if (Sts.size() > 1) 1833 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1834 } 1835 1836 if (MBBI != E) { 1837 Base2LdsMap.clear(); 1838 Base2StsMap.clear(); 1839 LdBases.clear(); 1840 StBases.clear(); 1841 } 1842 } 1843 1844 return RetVal; 1845 } 1846 1847 1848 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1849 /// optimization pass. 1850 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1851 if (PreAlloc) 1852 return new ARMPreAllocLoadStoreOpt(); 1853 return new ARMLoadStoreOpt(); 1854 } 1855