1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMMachineFunctionInfo.h" 19 #include "ARMRegisterInfo.h" 20 #include "MCTargetDesc/ARMAddressingModes.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/CodeGen/SelectionDAGNodes.h" 30 #include "llvm/Target/TargetData.h" 31 #include "llvm/Target/TargetInstrInfo.h" 32 #include "llvm/Target/TargetMachine.h" 33 #include "llvm/Target/TargetRegisterInfo.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/ADT/DenseMap.h" 36 #include "llvm/ADT/STLExtras.h" 37 #include "llvm/ADT/SmallPtrSet.h" 38 #include "llvm/ADT/SmallSet.h" 39 #include "llvm/ADT/SmallVector.h" 40 #include "llvm/ADT/Statistic.h" 41 using namespace llvm; 42 43 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 44 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 45 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 46 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 47 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 48 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 49 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 50 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 51 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 52 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 53 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 54 55 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 56 /// load / store instructions to form ldm / stm instructions. 57 58 namespace { 59 struct ARMLoadStoreOpt : public MachineFunctionPass { 60 static char ID; 61 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 62 63 const TargetInstrInfo *TII; 64 const TargetRegisterInfo *TRI; 65 ARMFunctionInfo *AFI; 66 RegScavenger *RS; 67 bool isThumb2; 68 69 virtual bool runOnMachineFunction(MachineFunction &Fn); 70 71 virtual const char *getPassName() const { 72 return "ARM load / store optimization pass"; 73 } 74 75 private: 76 struct MemOpQueueEntry { 77 int Offset; 78 unsigned Reg; 79 bool isKill; 80 unsigned Position; 81 MachineBasicBlock::iterator MBBI; 82 bool Merged; 83 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 84 MachineBasicBlock::iterator i) 85 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 86 }; 87 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 88 typedef MemOpQueue::iterator MemOpQueueIter; 89 90 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 91 int Offset, unsigned Base, bool BaseKill, int Opcode, 92 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 93 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 94 void MergeOpsUpdate(MachineBasicBlock &MBB, 95 MemOpQueue &MemOps, 96 unsigned memOpsBegin, 97 unsigned memOpsEnd, 98 unsigned insertAfter, 99 int Offset, 100 unsigned Base, 101 bool BaseKill, 102 int Opcode, 103 ARMCC::CondCodes Pred, 104 unsigned PredReg, 105 unsigned Scratch, 106 DebugLoc dl, 107 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 108 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 109 int Opcode, unsigned Size, 110 ARMCC::CondCodes Pred, unsigned PredReg, 111 unsigned Scratch, MemOpQueue &MemOps, 112 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 113 114 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 115 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 116 MachineBasicBlock::iterator &MBBI); 117 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 118 MachineBasicBlock::iterator MBBI, 119 const TargetInstrInfo *TII, 120 bool &Advance, 121 MachineBasicBlock::iterator &I); 122 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 123 MachineBasicBlock::iterator MBBI, 124 bool &Advance, 125 MachineBasicBlock::iterator &I); 126 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 127 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 128 }; 129 char ARMLoadStoreOpt::ID = 0; 130 } 131 132 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 133 switch (Opcode) { 134 default: llvm_unreachable("Unhandled opcode!"); 135 case ARM::LDRi12: 136 ++NumLDMGened; 137 switch (Mode) { 138 default: llvm_unreachable("Unhandled submode!"); 139 case ARM_AM::ia: return ARM::LDMIA; 140 case ARM_AM::da: return ARM::LDMDA; 141 case ARM_AM::db: return ARM::LDMDB; 142 case ARM_AM::ib: return ARM::LDMIB; 143 } 144 break; 145 case ARM::STRi12: 146 ++NumSTMGened; 147 switch (Mode) { 148 default: llvm_unreachable("Unhandled submode!"); 149 case ARM_AM::ia: return ARM::STMIA; 150 case ARM_AM::da: return ARM::STMDA; 151 case ARM_AM::db: return ARM::STMDB; 152 case ARM_AM::ib: return ARM::STMIB; 153 } 154 break; 155 case ARM::t2LDRi8: 156 case ARM::t2LDRi12: 157 ++NumLDMGened; 158 switch (Mode) { 159 default: llvm_unreachable("Unhandled submode!"); 160 case ARM_AM::ia: return ARM::t2LDMIA; 161 case ARM_AM::db: return ARM::t2LDMDB; 162 } 163 break; 164 case ARM::t2STRi8: 165 case ARM::t2STRi12: 166 ++NumSTMGened; 167 switch (Mode) { 168 default: llvm_unreachable("Unhandled submode!"); 169 case ARM_AM::ia: return ARM::t2STMIA; 170 case ARM_AM::db: return ARM::t2STMDB; 171 } 172 break; 173 case ARM::VLDRS: 174 ++NumVLDMGened; 175 switch (Mode) { 176 default: llvm_unreachable("Unhandled submode!"); 177 case ARM_AM::ia: return ARM::VLDMSIA; 178 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 179 } 180 break; 181 case ARM::VSTRS: 182 ++NumVSTMGened; 183 switch (Mode) { 184 default: llvm_unreachable("Unhandled submode!"); 185 case ARM_AM::ia: return ARM::VSTMSIA; 186 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 187 } 188 break; 189 case ARM::VLDRD: 190 ++NumVLDMGened; 191 switch (Mode) { 192 default: llvm_unreachable("Unhandled submode!"); 193 case ARM_AM::ia: return ARM::VLDMDIA; 194 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 195 } 196 break; 197 case ARM::VSTRD: 198 ++NumVSTMGened; 199 switch (Mode) { 200 default: llvm_unreachable("Unhandled submode!"); 201 case ARM_AM::ia: return ARM::VSTMDIA; 202 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 203 } 204 break; 205 } 206 207 return 0; 208 } 209 210 namespace llvm { 211 namespace ARM_AM { 212 213 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 214 switch (Opcode) { 215 default: llvm_unreachable("Unhandled opcode!"); 216 case ARM::LDMIA_RET: 217 case ARM::LDMIA: 218 case ARM::LDMIA_UPD: 219 case ARM::STMIA: 220 case ARM::STMIA_UPD: 221 case ARM::t2LDMIA_RET: 222 case ARM::t2LDMIA: 223 case ARM::t2LDMIA_UPD: 224 case ARM::t2STMIA: 225 case ARM::t2STMIA_UPD: 226 case ARM::VLDMSIA: 227 case ARM::VLDMSIA_UPD: 228 case ARM::VSTMSIA: 229 case ARM::VSTMSIA_UPD: 230 case ARM::VLDMDIA: 231 case ARM::VLDMDIA_UPD: 232 case ARM::VSTMDIA: 233 case ARM::VSTMDIA_UPD: 234 return ARM_AM::ia; 235 236 case ARM::LDMDA: 237 case ARM::LDMDA_UPD: 238 case ARM::STMDA: 239 case ARM::STMDA_UPD: 240 return ARM_AM::da; 241 242 case ARM::LDMDB: 243 case ARM::LDMDB_UPD: 244 case ARM::STMDB: 245 case ARM::STMDB_UPD: 246 case ARM::t2LDMDB: 247 case ARM::t2LDMDB_UPD: 248 case ARM::t2STMDB: 249 case ARM::t2STMDB_UPD: 250 case ARM::VLDMSDB_UPD: 251 case ARM::VSTMSDB_UPD: 252 case ARM::VLDMDDB_UPD: 253 case ARM::VSTMDDB_UPD: 254 return ARM_AM::db; 255 256 case ARM::LDMIB: 257 case ARM::LDMIB_UPD: 258 case ARM::STMIB: 259 case ARM::STMIB_UPD: 260 return ARM_AM::ib; 261 } 262 263 return ARM_AM::bad_am_submode; 264 } 265 266 } // end namespace ARM_AM 267 } // end namespace llvm 268 269 static bool isT2i32Load(unsigned Opc) { 270 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 271 } 272 273 static bool isi32Load(unsigned Opc) { 274 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 275 } 276 277 static bool isT2i32Store(unsigned Opc) { 278 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 279 } 280 281 static bool isi32Store(unsigned Opc) { 282 return Opc == ARM::STRi12 || isT2i32Store(Opc); 283 } 284 285 /// MergeOps - Create and insert a LDM or STM with Base as base register and 286 /// registers in Regs as the register operands that would be loaded / stored. 287 /// It returns true if the transformation is done. 288 bool 289 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 290 MachineBasicBlock::iterator MBBI, 291 int Offset, unsigned Base, bool BaseKill, 292 int Opcode, ARMCC::CondCodes Pred, 293 unsigned PredReg, unsigned Scratch, DebugLoc dl, 294 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 295 // Only a single register to load / store. Don't bother. 296 unsigned NumRegs = Regs.size(); 297 if (NumRegs <= 1) 298 return false; 299 300 ARM_AM::AMSubMode Mode = ARM_AM::ia; 301 // VFP and Thumb2 do not support IB or DA modes. 302 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 303 bool haveIBAndDA = isNotVFP && !isThumb2; 304 if (Offset == 4 && haveIBAndDA) 305 Mode = ARM_AM::ib; 306 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 307 Mode = ARM_AM::da; 308 else if (Offset == -4 * (int)NumRegs && isNotVFP) 309 // VLDM/VSTM do not support DB mode without also updating the base reg. 310 Mode = ARM_AM::db; 311 else if (Offset != 0) { 312 // Check if this is a supported opcode before we insert instructions to 313 // calculate a new base register. 314 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 315 316 // If starting offset isn't zero, insert a MI to materialize a new base. 317 // But only do so if it is cost effective, i.e. merging more than two 318 // loads / stores. 319 if (NumRegs <= 2) 320 return false; 321 322 unsigned NewBase; 323 if (isi32Load(Opcode)) 324 // If it is a load, then just use one of the destination register to 325 // use as the new base. 326 NewBase = Regs[NumRegs-1].first; 327 else { 328 // Use the scratch register to use as a new base. 329 NewBase = Scratch; 330 if (NewBase == 0) 331 return false; 332 } 333 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 334 if (Offset < 0) { 335 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 336 Offset = - Offset; 337 } 338 int ImmedOffset = isThumb2 339 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 340 if (ImmedOffset == -1) 341 // FIXME: Try t2ADDri12 or t2SUBri12? 342 return false; // Probably not worth it then. 343 344 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 345 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 346 .addImm(Pred).addReg(PredReg).addReg(0); 347 Base = NewBase; 348 BaseKill = true; // New base is always killed right its use. 349 } 350 351 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 352 Opcode == ARM::VLDRD); 353 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 354 if (!Opcode) return false; 355 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 356 .addReg(Base, getKillRegState(BaseKill)) 357 .addImm(Pred).addReg(PredReg); 358 for (unsigned i = 0; i != NumRegs; ++i) 359 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 360 | getKillRegState(Regs[i].second)); 361 362 return true; 363 } 364 365 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 366 // success. 367 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 368 MemOpQueue &memOps, 369 unsigned memOpsBegin, unsigned memOpsEnd, 370 unsigned insertAfter, int Offset, 371 unsigned Base, bool BaseKill, 372 int Opcode, 373 ARMCC::CondCodes Pred, unsigned PredReg, 374 unsigned Scratch, 375 DebugLoc dl, 376 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 377 // First calculate which of the registers should be killed by the merged 378 // instruction. 379 const unsigned insertPos = memOps[insertAfter].Position; 380 SmallSet<unsigned, 4> KilledRegs; 381 DenseMap<unsigned, unsigned> Killer; 382 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 383 if (i == memOpsBegin) { 384 i = memOpsEnd; 385 if (i == e) 386 break; 387 } 388 if (memOps[i].Position < insertPos && memOps[i].isKill) { 389 unsigned Reg = memOps[i].Reg; 390 KilledRegs.insert(Reg); 391 Killer[Reg] = i; 392 } 393 } 394 395 SmallVector<std::pair<unsigned, bool>, 8> Regs; 396 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 397 unsigned Reg = memOps[i].Reg; 398 // If we are inserting the merged operation after an operation that 399 // uses the same register, make sure to transfer any kill flag. 400 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 401 Regs.push_back(std::make_pair(Reg, isKill)); 402 } 403 404 // Try to do the merge. 405 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 406 ++Loc; 407 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 408 Pred, PredReg, Scratch, dl, Regs)) 409 return; 410 411 // Merge succeeded, update records. 412 Merges.push_back(prior(Loc)); 413 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 414 // Remove kill flags from any memops that come before insertPos. 415 if (Regs[i-memOpsBegin].second) { 416 unsigned Reg = Regs[i-memOpsBegin].first; 417 if (KilledRegs.count(Reg)) { 418 unsigned j = Killer[Reg]; 419 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 420 assert(Idx >= 0 && "Cannot find killing operand"); 421 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 422 memOps[j].isKill = false; 423 } 424 memOps[i].isKill = true; 425 } 426 MBB.erase(memOps[i].MBBI); 427 // Update this memop to refer to the merged instruction. 428 // We may need to move kill flags again. 429 memOps[i].Merged = true; 430 memOps[i].MBBI = Merges.back(); 431 memOps[i].Position = insertPos; 432 } 433 } 434 435 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 436 /// load / store multiple instructions. 437 void 438 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 439 unsigned Base, int Opcode, unsigned Size, 440 ARMCC::CondCodes Pred, unsigned PredReg, 441 unsigned Scratch, MemOpQueue &MemOps, 442 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 443 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 444 int Offset = MemOps[SIndex].Offset; 445 int SOffset = Offset; 446 unsigned insertAfter = SIndex; 447 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 448 DebugLoc dl = Loc->getDebugLoc(); 449 const MachineOperand &PMO = Loc->getOperand(0); 450 unsigned PReg = PMO.getReg(); 451 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 452 : getARMRegisterNumbering(PReg); 453 unsigned Count = 1; 454 unsigned Limit = ~0U; 455 456 // vldm / vstm limit are 32 for S variants, 16 for D variants. 457 458 switch (Opcode) { 459 default: break; 460 case ARM::VSTRS: 461 Limit = 32; 462 break; 463 case ARM::VSTRD: 464 Limit = 16; 465 break; 466 case ARM::VLDRD: 467 Limit = 16; 468 break; 469 case ARM::VLDRS: 470 Limit = 32; 471 break; 472 } 473 474 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 475 int NewOffset = MemOps[i].Offset; 476 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 477 unsigned Reg = MO.getReg(); 478 unsigned RegNum = MO.isUndef() ? UINT_MAX 479 : getARMRegisterNumbering(Reg); 480 // Register numbers must be in ascending order. For VFP / NEON load and 481 // store multiples, the registers must also be consecutive and within the 482 // limit on the number of registers per instruction. 483 if (Reg != ARM::SP && 484 NewOffset == Offset + (int)Size && 485 ((isNotVFP && RegNum > PRegNum) || 486 ((Count < Limit) && RegNum == PRegNum+1))) { 487 Offset += Size; 488 PRegNum = RegNum; 489 ++Count; 490 } else { 491 // Can't merge this in. Try merge the earlier ones first. 492 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 493 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 494 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 495 MemOps, Merges); 496 return; 497 } 498 499 if (MemOps[i].Position > MemOps[insertAfter].Position) 500 insertAfter = i; 501 } 502 503 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 504 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 505 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 506 return; 507 } 508 509 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 510 unsigned Bytes, unsigned Limit, 511 ARMCC::CondCodes Pred, unsigned PredReg){ 512 unsigned MyPredReg = 0; 513 if (!MI) 514 return false; 515 if (MI->getOpcode() != ARM::t2SUBri && 516 MI->getOpcode() != ARM::tSUBspi && 517 MI->getOpcode() != ARM::SUBri) 518 return false; 519 520 // Make sure the offset fits in 8 bits. 521 if (Bytes == 0 || (Limit && Bytes >= Limit)) 522 return false; 523 524 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 525 return (MI->getOperand(0).getReg() == Base && 526 MI->getOperand(1).getReg() == Base && 527 (MI->getOperand(2).getImm()*Scale) == Bytes && 528 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 529 MyPredReg == PredReg); 530 } 531 532 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 533 unsigned Bytes, unsigned Limit, 534 ARMCC::CondCodes Pred, unsigned PredReg){ 535 unsigned MyPredReg = 0; 536 if (!MI) 537 return false; 538 if (MI->getOpcode() != ARM::t2ADDri && 539 MI->getOpcode() != ARM::tADDspi && 540 MI->getOpcode() != ARM::ADDri) 541 return false; 542 543 if (Bytes == 0 || (Limit && Bytes >= Limit)) 544 // Make sure the offset fits in 8 bits. 545 return false; 546 547 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 548 return (MI->getOperand(0).getReg() == Base && 549 MI->getOperand(1).getReg() == Base && 550 (MI->getOperand(2).getImm()*Scale) == Bytes && 551 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 552 MyPredReg == PredReg); 553 } 554 555 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 556 switch (MI->getOpcode()) { 557 default: return 0; 558 case ARM::LDRi12: 559 case ARM::STRi12: 560 case ARM::t2LDRi8: 561 case ARM::t2LDRi12: 562 case ARM::t2STRi8: 563 case ARM::t2STRi12: 564 case ARM::VLDRS: 565 case ARM::VSTRS: 566 return 4; 567 case ARM::VLDRD: 568 case ARM::VSTRD: 569 return 8; 570 case ARM::LDMIA: 571 case ARM::LDMDA: 572 case ARM::LDMDB: 573 case ARM::LDMIB: 574 case ARM::STMIA: 575 case ARM::STMDA: 576 case ARM::STMDB: 577 case ARM::STMIB: 578 case ARM::t2LDMIA: 579 case ARM::t2LDMDB: 580 case ARM::t2STMIA: 581 case ARM::t2STMDB: 582 case ARM::VLDMSIA: 583 case ARM::VSTMSIA: 584 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 585 case ARM::VLDMDIA: 586 case ARM::VSTMDIA: 587 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 588 } 589 } 590 591 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 592 ARM_AM::AMSubMode Mode) { 593 switch (Opc) { 594 default: llvm_unreachable("Unhandled opcode!"); 595 case ARM::LDMIA: 596 case ARM::LDMDA: 597 case ARM::LDMDB: 598 case ARM::LDMIB: 599 switch (Mode) { 600 default: llvm_unreachable("Unhandled submode!"); 601 case ARM_AM::ia: return ARM::LDMIA_UPD; 602 case ARM_AM::ib: return ARM::LDMIB_UPD; 603 case ARM_AM::da: return ARM::LDMDA_UPD; 604 case ARM_AM::db: return ARM::LDMDB_UPD; 605 } 606 break; 607 case ARM::STMIA: 608 case ARM::STMDA: 609 case ARM::STMDB: 610 case ARM::STMIB: 611 switch (Mode) { 612 default: llvm_unreachable("Unhandled submode!"); 613 case ARM_AM::ia: return ARM::STMIA_UPD; 614 case ARM_AM::ib: return ARM::STMIB_UPD; 615 case ARM_AM::da: return ARM::STMDA_UPD; 616 case ARM_AM::db: return ARM::STMDB_UPD; 617 } 618 break; 619 case ARM::t2LDMIA: 620 case ARM::t2LDMDB: 621 switch (Mode) { 622 default: llvm_unreachable("Unhandled submode!"); 623 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 624 case ARM_AM::db: return ARM::t2LDMDB_UPD; 625 } 626 break; 627 case ARM::t2STMIA: 628 case ARM::t2STMDB: 629 switch (Mode) { 630 default: llvm_unreachable("Unhandled submode!"); 631 case ARM_AM::ia: return ARM::t2STMIA_UPD; 632 case ARM_AM::db: return ARM::t2STMDB_UPD; 633 } 634 break; 635 case ARM::VLDMSIA: 636 switch (Mode) { 637 default: llvm_unreachable("Unhandled submode!"); 638 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 639 case ARM_AM::db: return ARM::VLDMSDB_UPD; 640 } 641 break; 642 case ARM::VLDMDIA: 643 switch (Mode) { 644 default: llvm_unreachable("Unhandled submode!"); 645 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 646 case ARM_AM::db: return ARM::VLDMDDB_UPD; 647 } 648 break; 649 case ARM::VSTMSIA: 650 switch (Mode) { 651 default: llvm_unreachable("Unhandled submode!"); 652 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 653 case ARM_AM::db: return ARM::VSTMSDB_UPD; 654 } 655 break; 656 case ARM::VSTMDIA: 657 switch (Mode) { 658 default: llvm_unreachable("Unhandled submode!"); 659 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 660 case ARM_AM::db: return ARM::VSTMDDB_UPD; 661 } 662 break; 663 } 664 665 return 0; 666 } 667 668 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 669 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 670 /// 671 /// stmia rn, <ra, rb, rc> 672 /// rn := rn + 4 * 3; 673 /// => 674 /// stmia rn!, <ra, rb, rc> 675 /// 676 /// rn := rn - 4 * 3; 677 /// ldmia rn, <ra, rb, rc> 678 /// => 679 /// ldmdb rn!, <ra, rb, rc> 680 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 681 MachineBasicBlock::iterator MBBI, 682 bool &Advance, 683 MachineBasicBlock::iterator &I) { 684 MachineInstr *MI = MBBI; 685 unsigned Base = MI->getOperand(0).getReg(); 686 bool BaseKill = MI->getOperand(0).isKill(); 687 unsigned Bytes = getLSMultipleTransferSize(MI); 688 unsigned PredReg = 0; 689 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 690 int Opcode = MI->getOpcode(); 691 DebugLoc dl = MI->getDebugLoc(); 692 693 // Can't use an updating ld/st if the base register is also a dest 694 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 695 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 696 if (MI->getOperand(i).getReg() == Base) 697 return false; 698 699 bool DoMerge = false; 700 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 701 702 // Try merging with the previous instruction. 703 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 704 if (MBBI != BeginMBBI) { 705 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 706 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 707 --PrevMBBI; 708 if (Mode == ARM_AM::ia && 709 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 710 Mode = ARM_AM::db; 711 DoMerge = true; 712 } else if (Mode == ARM_AM::ib && 713 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 714 Mode = ARM_AM::da; 715 DoMerge = true; 716 } 717 if (DoMerge) 718 MBB.erase(PrevMBBI); 719 } 720 721 // Try merging with the next instruction. 722 MachineBasicBlock::iterator EndMBBI = MBB.end(); 723 if (!DoMerge && MBBI != EndMBBI) { 724 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 725 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 726 ++NextMBBI; 727 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 728 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 729 DoMerge = true; 730 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 731 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 732 DoMerge = true; 733 } 734 if (DoMerge) { 735 if (NextMBBI == I) { 736 Advance = true; 737 ++I; 738 } 739 MBB.erase(NextMBBI); 740 } 741 } 742 743 if (!DoMerge) 744 return false; 745 746 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 747 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 748 .addReg(Base, getDefRegState(true)) // WB base register 749 .addReg(Base, getKillRegState(BaseKill)) 750 .addImm(Pred).addReg(PredReg); 751 752 // Transfer the rest of operands. 753 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 754 MIB.addOperand(MI->getOperand(OpNum)); 755 756 // Transfer memoperands. 757 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 758 759 MBB.erase(MBBI); 760 return true; 761 } 762 763 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 764 ARM_AM::AddrOpc Mode) { 765 switch (Opc) { 766 case ARM::LDRi12: 767 return ARM::LDR_PRE; 768 case ARM::STRi12: 769 return ARM::STR_PRE_IMM; 770 case ARM::VLDRS: 771 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 772 case ARM::VLDRD: 773 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 774 case ARM::VSTRS: 775 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 776 case ARM::VSTRD: 777 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 778 case ARM::t2LDRi8: 779 case ARM::t2LDRi12: 780 return ARM::t2LDR_PRE; 781 case ARM::t2STRi8: 782 case ARM::t2STRi12: 783 return ARM::t2STR_PRE; 784 default: llvm_unreachable("Unhandled opcode!"); 785 } 786 return 0; 787 } 788 789 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 790 ARM_AM::AddrOpc Mode) { 791 switch (Opc) { 792 case ARM::LDRi12: 793 return ARM::LDR_POST_IMM; 794 case ARM::STRi12: 795 return ARM::STR_POST_IMM; 796 case ARM::VLDRS: 797 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 798 case ARM::VLDRD: 799 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 800 case ARM::VSTRS: 801 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 802 case ARM::VSTRD: 803 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 804 case ARM::t2LDRi8: 805 case ARM::t2LDRi12: 806 return ARM::t2LDR_POST; 807 case ARM::t2STRi8: 808 case ARM::t2STRi12: 809 return ARM::t2STR_POST; 810 default: llvm_unreachable("Unhandled opcode!"); 811 } 812 return 0; 813 } 814 815 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 816 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 817 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 818 MachineBasicBlock::iterator MBBI, 819 const TargetInstrInfo *TII, 820 bool &Advance, 821 MachineBasicBlock::iterator &I) { 822 MachineInstr *MI = MBBI; 823 unsigned Base = MI->getOperand(1).getReg(); 824 bool BaseKill = MI->getOperand(1).isKill(); 825 unsigned Bytes = getLSMultipleTransferSize(MI); 826 int Opcode = MI->getOpcode(); 827 DebugLoc dl = MI->getDebugLoc(); 828 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 829 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 830 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 831 if (isi32Load(Opcode) || isi32Store(Opcode)) 832 if (MI->getOperand(2).getImm() != 0) 833 return false; 834 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 835 return false; 836 837 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 838 // Can't do the merge if the destination register is the same as the would-be 839 // writeback register. 840 if (isLd && MI->getOperand(0).getReg() == Base) 841 return false; 842 843 unsigned PredReg = 0; 844 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 845 bool DoMerge = false; 846 ARM_AM::AddrOpc AddSub = ARM_AM::add; 847 unsigned NewOpc = 0; 848 // AM2 - 12 bits, thumb2 - 8 bits. 849 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 850 851 // Try merging with the previous instruction. 852 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 853 if (MBBI != BeginMBBI) { 854 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 855 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 856 --PrevMBBI; 857 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 858 DoMerge = true; 859 AddSub = ARM_AM::sub; 860 } else if (!isAM5 && 861 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 862 DoMerge = true; 863 } 864 if (DoMerge) { 865 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 866 MBB.erase(PrevMBBI); 867 } 868 } 869 870 // Try merging with the next instruction. 871 MachineBasicBlock::iterator EndMBBI = MBB.end(); 872 if (!DoMerge && MBBI != EndMBBI) { 873 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 874 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 875 ++NextMBBI; 876 if (!isAM5 && 877 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 878 DoMerge = true; 879 AddSub = ARM_AM::sub; 880 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 881 DoMerge = true; 882 } 883 if (DoMerge) { 884 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 885 if (NextMBBI == I) { 886 Advance = true; 887 ++I; 888 } 889 MBB.erase(NextMBBI); 890 } 891 } 892 893 if (!DoMerge) 894 return false; 895 896 unsigned Offset = 0; 897 // FIXME: Loads still use a combined reg/imm offset operand. When 898 // AM2 refactoring is complete, this can go away and just always use 899 // the raw Offset value. 900 if (isAM2 && isLd) 901 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 902 else if (!isAM5) 903 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 904 905 if (isAM5) { 906 // VLDM[SD}_UPD, VSTM[SD]_UPD 907 // (There are no base-updating versions of VLDR/VSTR instructions, but the 908 // updating load/store-multiple instructions can be used with only one 909 // register.) 910 MachineOperand &MO = MI->getOperand(0); 911 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 912 .addReg(Base, getDefRegState(true)) // WB base register 913 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 914 .addImm(Pred).addReg(PredReg) 915 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 916 getKillRegState(MO.isKill()))); 917 } else if (isLd) { 918 if (isAM2) 919 // LDR_PRE, LDR_POST, 920 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 921 .addReg(Base, RegState::Define) 922 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 923 else 924 // t2LDR_PRE, t2LDR_POST 925 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 926 .addReg(Base, RegState::Define) 927 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 928 } else { 929 MachineOperand &MO = MI->getOperand(0); 930 // FIXME: post-indexed stores use am2offset_imm, which still encodes 931 // the vestigal zero-reg offset register. When that's fixed, this clause 932 // can be removed entirely. 933 if (isAM2 && NewOpc == ARM::STR_POST_IMM) 934 // STR_PRE, STR_POST 935 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 936 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 937 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 938 else 939 // t2STR_PRE, t2STR_POST 940 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 941 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 942 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 943 } 944 MBB.erase(MBBI); 945 946 return true; 947 } 948 949 /// isMemoryOp - Returns true if instruction is a memory operation that this 950 /// pass is capable of operating on. 951 static bool isMemoryOp(const MachineInstr *MI) { 952 // When no memory operands are present, conservatively assume unaligned, 953 // volatile, unfoldable. 954 if (!MI->hasOneMemOperand()) 955 return false; 956 957 const MachineMemOperand *MMO = *MI->memoperands_begin(); 958 959 // Don't touch volatile memory accesses - we may be changing their order. 960 if (MMO->isVolatile()) 961 return false; 962 963 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 964 // not. 965 if (MMO->getAlignment() < 4) 966 return false; 967 968 // str <undef> could probably be eliminated entirely, but for now we just want 969 // to avoid making a mess of it. 970 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 971 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 972 MI->getOperand(0).isUndef()) 973 return false; 974 975 // Likewise don't mess with references to undefined addresses. 976 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 977 MI->getOperand(1).isUndef()) 978 return false; 979 980 int Opcode = MI->getOpcode(); 981 switch (Opcode) { 982 default: break; 983 case ARM::VLDRS: 984 case ARM::VSTRS: 985 return MI->getOperand(1).isReg(); 986 case ARM::VLDRD: 987 case ARM::VSTRD: 988 return MI->getOperand(1).isReg(); 989 case ARM::LDRi12: 990 case ARM::STRi12: 991 case ARM::t2LDRi8: 992 case ARM::t2LDRi12: 993 case ARM::t2STRi8: 994 case ARM::t2STRi12: 995 return MI->getOperand(1).isReg(); 996 } 997 return false; 998 } 999 1000 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1001 /// op that is being merged. 1002 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1003 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1004 unsigned Position = MemOps[0].Position; 1005 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1006 if (MemOps[i].Position < Position) { 1007 Position = MemOps[i].Position; 1008 Loc = MemOps[i].MBBI; 1009 } 1010 } 1011 1012 if (Loc != MBB.begin()) 1013 RS->forward(prior(Loc)); 1014 } 1015 1016 static int getMemoryOpOffset(const MachineInstr *MI) { 1017 int Opcode = MI->getOpcode(); 1018 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1019 unsigned NumOperands = MI->getDesc().getNumOperands(); 1020 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1021 1022 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1023 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1024 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1025 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1026 return OffField; 1027 1028 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1029 : ARM_AM::getAM5Offset(OffField) * 4; 1030 if (isAM3) { 1031 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1032 Offset = -Offset; 1033 } else { 1034 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1035 Offset = -Offset; 1036 } 1037 return Offset; 1038 } 1039 1040 static void InsertLDR_STR(MachineBasicBlock &MBB, 1041 MachineBasicBlock::iterator &MBBI, 1042 int Offset, bool isDef, 1043 DebugLoc dl, unsigned NewOpc, 1044 unsigned Reg, bool RegDeadKill, bool RegUndef, 1045 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1046 bool OffKill, bool OffUndef, 1047 ARMCC::CondCodes Pred, unsigned PredReg, 1048 const TargetInstrInfo *TII, bool isT2) { 1049 if (isDef) { 1050 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1051 TII->get(NewOpc)) 1052 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1053 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1054 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1055 } else { 1056 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1057 TII->get(NewOpc)) 1058 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1059 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1060 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1061 } 1062 } 1063 1064 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1065 MachineBasicBlock::iterator &MBBI) { 1066 MachineInstr *MI = &*MBBI; 1067 unsigned Opcode = MI->getOpcode(); 1068 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1069 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1070 unsigned EvenReg = MI->getOperand(0).getReg(); 1071 unsigned OddReg = MI->getOperand(1).getReg(); 1072 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1073 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1074 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 1075 return false; 1076 1077 MachineBasicBlock::iterator NewBBI = MBBI; 1078 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1079 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1080 bool EvenDeadKill = isLd ? 1081 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1082 bool EvenUndef = MI->getOperand(0).isUndef(); 1083 bool OddDeadKill = isLd ? 1084 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1085 bool OddUndef = MI->getOperand(1).isUndef(); 1086 const MachineOperand &BaseOp = MI->getOperand(2); 1087 unsigned BaseReg = BaseOp.getReg(); 1088 bool BaseKill = BaseOp.isKill(); 1089 bool BaseUndef = BaseOp.isUndef(); 1090 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1091 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1092 int OffImm = getMemoryOpOffset(MI); 1093 unsigned PredReg = 0; 1094 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 1095 1096 if (OddRegNum > EvenRegNum && OffImm == 0) { 1097 // Ascending register numbers and no offset. It's safe to change it to a 1098 // ldm or stm. 1099 unsigned NewOpc = (isLd) 1100 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1101 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1102 if (isLd) { 1103 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1104 .addReg(BaseReg, getKillRegState(BaseKill)) 1105 .addImm(Pred).addReg(PredReg) 1106 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1107 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1108 ++NumLDRD2LDM; 1109 } else { 1110 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1111 .addReg(BaseReg, getKillRegState(BaseKill)) 1112 .addImm(Pred).addReg(PredReg) 1113 .addReg(EvenReg, 1114 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1115 .addReg(OddReg, 1116 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1117 ++NumSTRD2STM; 1118 } 1119 NewBBI = llvm::prior(MBBI); 1120 } else { 1121 // Split into two instructions. 1122 unsigned NewOpc = (isLd) 1123 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1124 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1125 DebugLoc dl = MBBI->getDebugLoc(); 1126 // If this is a load and base register is killed, it may have been 1127 // re-defed by the load, make sure the first load does not clobber it. 1128 if (isLd && 1129 (BaseKill || OffKill) && 1130 (TRI->regsOverlap(EvenReg, BaseReg))) { 1131 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1132 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1133 OddReg, OddDeadKill, false, 1134 BaseReg, false, BaseUndef, false, OffUndef, 1135 Pred, PredReg, TII, isT2); 1136 NewBBI = llvm::prior(MBBI); 1137 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1138 EvenReg, EvenDeadKill, false, 1139 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1140 Pred, PredReg, TII, isT2); 1141 } else { 1142 if (OddReg == EvenReg && EvenDeadKill) { 1143 // If the two source operands are the same, the kill marker is 1144 // probably on the first one. e.g. 1145 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1146 EvenDeadKill = false; 1147 OddDeadKill = true; 1148 } 1149 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1150 EvenReg, EvenDeadKill, EvenUndef, 1151 BaseReg, false, BaseUndef, false, OffUndef, 1152 Pred, PredReg, TII, isT2); 1153 NewBBI = llvm::prior(MBBI); 1154 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1155 OddReg, OddDeadKill, OddUndef, 1156 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1157 Pred, PredReg, TII, isT2); 1158 } 1159 if (isLd) 1160 ++NumLDRD2LDR; 1161 else 1162 ++NumSTRD2STR; 1163 } 1164 1165 MBB.erase(MI); 1166 MBBI = NewBBI; 1167 return true; 1168 } 1169 return false; 1170 } 1171 1172 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1173 /// ops of the same base and incrementing offset into LDM / STM ops. 1174 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1175 unsigned NumMerges = 0; 1176 unsigned NumMemOps = 0; 1177 MemOpQueue MemOps; 1178 unsigned CurrBase = 0; 1179 int CurrOpc = -1; 1180 unsigned CurrSize = 0; 1181 ARMCC::CondCodes CurrPred = ARMCC::AL; 1182 unsigned CurrPredReg = 0; 1183 unsigned Position = 0; 1184 SmallVector<MachineBasicBlock::iterator,4> Merges; 1185 1186 RS->enterBasicBlock(&MBB); 1187 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1188 while (MBBI != E) { 1189 if (FixInvalidRegPairOp(MBB, MBBI)) 1190 continue; 1191 1192 bool Advance = false; 1193 bool TryMerge = false; 1194 bool Clobber = false; 1195 1196 bool isMemOp = isMemoryOp(MBBI); 1197 if (isMemOp) { 1198 int Opcode = MBBI->getOpcode(); 1199 unsigned Size = getLSMultipleTransferSize(MBBI); 1200 const MachineOperand &MO = MBBI->getOperand(0); 1201 unsigned Reg = MO.getReg(); 1202 bool isKill = MO.isDef() ? false : MO.isKill(); 1203 unsigned Base = MBBI->getOperand(1).getReg(); 1204 unsigned PredReg = 0; 1205 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1206 int Offset = getMemoryOpOffset(MBBI); 1207 // Watch out for: 1208 // r4 := ldr [r5] 1209 // r5 := ldr [r5, #4] 1210 // r6 := ldr [r5, #8] 1211 // 1212 // The second ldr has effectively broken the chain even though it 1213 // looks like the later ldr(s) use the same base register. Try to 1214 // merge the ldr's so far, including this one. But don't try to 1215 // combine the following ldr(s). 1216 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1217 if (CurrBase == 0 && !Clobber) { 1218 // Start of a new chain. 1219 CurrBase = Base; 1220 CurrOpc = Opcode; 1221 CurrSize = Size; 1222 CurrPred = Pred; 1223 CurrPredReg = PredReg; 1224 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1225 ++NumMemOps; 1226 Advance = true; 1227 } else { 1228 if (Clobber) { 1229 TryMerge = true; 1230 Advance = true; 1231 } 1232 1233 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1234 // No need to match PredReg. 1235 // Continue adding to the queue. 1236 if (Offset > MemOps.back().Offset) { 1237 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1238 Position, MBBI)); 1239 ++NumMemOps; 1240 Advance = true; 1241 } else { 1242 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1243 I != E; ++I) { 1244 if (Offset < I->Offset) { 1245 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1246 Position, MBBI)); 1247 ++NumMemOps; 1248 Advance = true; 1249 break; 1250 } else if (Offset == I->Offset) { 1251 // Collision! This can't be merged! 1252 break; 1253 } 1254 } 1255 } 1256 } 1257 } 1258 } 1259 1260 if (MBBI->isDebugValue()) { 1261 ++MBBI; 1262 if (MBBI == E) 1263 // Reach the end of the block, try merging the memory instructions. 1264 TryMerge = true; 1265 } else if (Advance) { 1266 ++Position; 1267 ++MBBI; 1268 if (MBBI == E) 1269 // Reach the end of the block, try merging the memory instructions. 1270 TryMerge = true; 1271 } else 1272 TryMerge = true; 1273 1274 if (TryMerge) { 1275 if (NumMemOps > 1) { 1276 // Try to find a free register to use as a new base in case it's needed. 1277 // First advance to the instruction just before the start of the chain. 1278 AdvanceRS(MBB, MemOps); 1279 // Find a scratch register. 1280 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1281 // Process the load / store instructions. 1282 RS->forward(prior(MBBI)); 1283 1284 // Merge ops. 1285 Merges.clear(); 1286 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1287 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1288 1289 // Try folding preceding/trailing base inc/dec into the generated 1290 // LDM/STM ops. 1291 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1292 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1293 ++NumMerges; 1294 NumMerges += Merges.size(); 1295 1296 // Try folding preceding/trailing base inc/dec into those load/store 1297 // that were not merged to form LDM/STM ops. 1298 for (unsigned i = 0; i != NumMemOps; ++i) 1299 if (!MemOps[i].Merged) 1300 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1301 ++NumMerges; 1302 1303 // RS may be pointing to an instruction that's deleted. 1304 RS->skipTo(prior(MBBI)); 1305 } else if (NumMemOps == 1) { 1306 // Try folding preceding/trailing base inc/dec into the single 1307 // load/store. 1308 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1309 ++NumMerges; 1310 RS->forward(prior(MBBI)); 1311 } 1312 } 1313 1314 CurrBase = 0; 1315 CurrOpc = -1; 1316 CurrSize = 0; 1317 CurrPred = ARMCC::AL; 1318 CurrPredReg = 0; 1319 if (NumMemOps) { 1320 MemOps.clear(); 1321 NumMemOps = 0; 1322 } 1323 1324 // If iterator hasn't been advanced and this is not a memory op, skip it. 1325 // It can't start a new chain anyway. 1326 if (!Advance && !isMemOp && MBBI != E) { 1327 ++Position; 1328 ++MBBI; 1329 } 1330 } 1331 } 1332 return NumMerges > 0; 1333 } 1334 1335 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1336 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1337 /// directly restore the value of LR into pc. 1338 /// ldmfd sp!, {..., lr} 1339 /// bx lr 1340 /// or 1341 /// ldmfd sp!, {..., lr} 1342 /// mov pc, lr 1343 /// => 1344 /// ldmfd sp!, {..., pc} 1345 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1346 if (MBB.empty()) return false; 1347 1348 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1349 if (MBBI != MBB.begin() && 1350 (MBBI->getOpcode() == ARM::BX_RET || 1351 MBBI->getOpcode() == ARM::tBX_RET || 1352 MBBI->getOpcode() == ARM::MOVPCLR)) { 1353 MachineInstr *PrevMI = prior(MBBI); 1354 unsigned Opcode = PrevMI->getOpcode(); 1355 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1356 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1357 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1358 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1359 if (MO.getReg() != ARM::LR) 1360 return false; 1361 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1362 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1363 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1364 PrevMI->setDesc(TII->get(NewOpc)); 1365 MO.setReg(ARM::PC); 1366 PrevMI->copyImplicitOps(&*MBBI); 1367 MBB.erase(MBBI); 1368 return true; 1369 } 1370 } 1371 return false; 1372 } 1373 1374 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1375 const TargetMachine &TM = Fn.getTarget(); 1376 AFI = Fn.getInfo<ARMFunctionInfo>(); 1377 TII = TM.getInstrInfo(); 1378 TRI = TM.getRegisterInfo(); 1379 RS = new RegScavenger(); 1380 isThumb2 = AFI->isThumb2Function(); 1381 1382 bool Modified = false; 1383 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1384 ++MFI) { 1385 MachineBasicBlock &MBB = *MFI; 1386 Modified |= LoadStoreMultipleOpti(MBB); 1387 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1388 Modified |= MergeReturnIntoLDM(MBB); 1389 } 1390 1391 delete RS; 1392 return Modified; 1393 } 1394 1395 1396 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1397 /// load / stores from consecutive locations close to make it more 1398 /// likely they will be combined later. 1399 1400 namespace { 1401 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1402 static char ID; 1403 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1404 1405 const TargetData *TD; 1406 const TargetInstrInfo *TII; 1407 const TargetRegisterInfo *TRI; 1408 const ARMSubtarget *STI; 1409 MachineRegisterInfo *MRI; 1410 MachineFunction *MF; 1411 1412 virtual bool runOnMachineFunction(MachineFunction &Fn); 1413 1414 virtual const char *getPassName() const { 1415 return "ARM pre- register allocation load / store optimization pass"; 1416 } 1417 1418 private: 1419 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1420 unsigned &NewOpc, unsigned &EvenReg, 1421 unsigned &OddReg, unsigned &BaseReg, 1422 int &Offset, 1423 unsigned &PredReg, ARMCC::CondCodes &Pred, 1424 bool &isT2); 1425 bool RescheduleOps(MachineBasicBlock *MBB, 1426 SmallVector<MachineInstr*, 4> &Ops, 1427 unsigned Base, bool isLd, 1428 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1429 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1430 }; 1431 char ARMPreAllocLoadStoreOpt::ID = 0; 1432 } 1433 1434 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1435 TD = Fn.getTarget().getTargetData(); 1436 TII = Fn.getTarget().getInstrInfo(); 1437 TRI = Fn.getTarget().getRegisterInfo(); 1438 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1439 MRI = &Fn.getRegInfo(); 1440 MF = &Fn; 1441 1442 bool Modified = false; 1443 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1444 ++MFI) 1445 Modified |= RescheduleLoadStoreInstrs(MFI); 1446 1447 return Modified; 1448 } 1449 1450 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1451 MachineBasicBlock::iterator I, 1452 MachineBasicBlock::iterator E, 1453 SmallPtrSet<MachineInstr*, 4> &MemOps, 1454 SmallSet<unsigned, 4> &MemRegs, 1455 const TargetRegisterInfo *TRI) { 1456 // Are there stores / loads / calls between them? 1457 // FIXME: This is overly conservative. We should make use of alias information 1458 // some day. 1459 SmallSet<unsigned, 4> AddedRegPressure; 1460 while (++I != E) { 1461 if (I->isDebugValue() || MemOps.count(&*I)) 1462 continue; 1463 const MCInstrDesc &MCID = I->getDesc(); 1464 if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects()) 1465 return false; 1466 if (isLd && MCID.mayStore()) 1467 return false; 1468 if (!isLd) { 1469 if (MCID.mayLoad()) 1470 return false; 1471 // It's not safe to move the first 'str' down. 1472 // str r1, [r0] 1473 // strh r5, [r0] 1474 // str r4, [r0, #+4] 1475 if (MCID.mayStore()) 1476 return false; 1477 } 1478 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1479 MachineOperand &MO = I->getOperand(j); 1480 if (!MO.isReg()) 1481 continue; 1482 unsigned Reg = MO.getReg(); 1483 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1484 return false; 1485 if (Reg != Base && !MemRegs.count(Reg)) 1486 AddedRegPressure.insert(Reg); 1487 } 1488 } 1489 1490 // Estimate register pressure increase due to the transformation. 1491 if (MemRegs.size() <= 4) 1492 // Ok if we are moving small number of instructions. 1493 return true; 1494 return AddedRegPressure.size() <= MemRegs.size() * 2; 1495 } 1496 1497 bool 1498 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1499 DebugLoc &dl, 1500 unsigned &NewOpc, unsigned &EvenReg, 1501 unsigned &OddReg, unsigned &BaseReg, 1502 int &Offset, unsigned &PredReg, 1503 ARMCC::CondCodes &Pred, 1504 bool &isT2) { 1505 // Make sure we're allowed to generate LDRD/STRD. 1506 if (!STI->hasV5TEOps()) 1507 return false; 1508 1509 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1510 unsigned Scale = 1; 1511 unsigned Opcode = Op0->getOpcode(); 1512 if (Opcode == ARM::LDRi12) 1513 NewOpc = ARM::LDRD; 1514 else if (Opcode == ARM::STRi12) 1515 NewOpc = ARM::STRD; 1516 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1517 NewOpc = ARM::t2LDRDi8; 1518 Scale = 4; 1519 isT2 = true; 1520 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1521 NewOpc = ARM::t2STRDi8; 1522 Scale = 4; 1523 isT2 = true; 1524 } else 1525 return false; 1526 1527 // Make sure the base address satisfies i64 ld / st alignment requirement. 1528 if (!Op0->hasOneMemOperand() || 1529 !(*Op0->memoperands_begin())->getValue() || 1530 (*Op0->memoperands_begin())->isVolatile()) 1531 return false; 1532 1533 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1534 const Function *Func = MF->getFunction(); 1535 unsigned ReqAlign = STI->hasV6Ops() 1536 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1537 : 8; // Pre-v6 need 8-byte align 1538 if (Align < ReqAlign) 1539 return false; 1540 1541 // Then make sure the immediate offset fits. 1542 int OffImm = getMemoryOpOffset(Op0); 1543 if (isT2) { 1544 int Limit = (1 << 8) * Scale; 1545 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1546 return false; 1547 Offset = OffImm; 1548 } else { 1549 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1550 if (OffImm < 0) { 1551 AddSub = ARM_AM::sub; 1552 OffImm = - OffImm; 1553 } 1554 int Limit = (1 << 8) * Scale; 1555 if (OffImm >= Limit || (OffImm & (Scale-1))) 1556 return false; 1557 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1558 } 1559 EvenReg = Op0->getOperand(0).getReg(); 1560 OddReg = Op1->getOperand(0).getReg(); 1561 if (EvenReg == OddReg) 1562 return false; 1563 BaseReg = Op0->getOperand(1).getReg(); 1564 Pred = llvm::getInstrPredicate(Op0, PredReg); 1565 dl = Op0->getDebugLoc(); 1566 return true; 1567 } 1568 1569 namespace { 1570 struct OffsetCompare { 1571 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1572 int LOffset = getMemoryOpOffset(LHS); 1573 int ROffset = getMemoryOpOffset(RHS); 1574 assert(LHS == RHS || LOffset != ROffset); 1575 return LOffset > ROffset; 1576 } 1577 }; 1578 } 1579 1580 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1581 SmallVector<MachineInstr*, 4> &Ops, 1582 unsigned Base, bool isLd, 1583 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1584 bool RetVal = false; 1585 1586 // Sort by offset (in reverse order). 1587 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1588 1589 // The loads / stores of the same base are in order. Scan them from first to 1590 // last and check for the following: 1591 // 1. Any def of base. 1592 // 2. Any gaps. 1593 while (Ops.size() > 1) { 1594 unsigned FirstLoc = ~0U; 1595 unsigned LastLoc = 0; 1596 MachineInstr *FirstOp = 0; 1597 MachineInstr *LastOp = 0; 1598 int LastOffset = 0; 1599 unsigned LastOpcode = 0; 1600 unsigned LastBytes = 0; 1601 unsigned NumMove = 0; 1602 for (int i = Ops.size() - 1; i >= 0; --i) { 1603 MachineInstr *Op = Ops[i]; 1604 unsigned Loc = MI2LocMap[Op]; 1605 if (Loc <= FirstLoc) { 1606 FirstLoc = Loc; 1607 FirstOp = Op; 1608 } 1609 if (Loc >= LastLoc) { 1610 LastLoc = Loc; 1611 LastOp = Op; 1612 } 1613 1614 unsigned Opcode = Op->getOpcode(); 1615 if (LastOpcode && Opcode != LastOpcode) 1616 break; 1617 1618 int Offset = getMemoryOpOffset(Op); 1619 unsigned Bytes = getLSMultipleTransferSize(Op); 1620 if (LastBytes) { 1621 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1622 break; 1623 } 1624 LastOffset = Offset; 1625 LastBytes = Bytes; 1626 LastOpcode = Opcode; 1627 if (++NumMove == 8) // FIXME: Tune this limit. 1628 break; 1629 } 1630 1631 if (NumMove <= 1) 1632 Ops.pop_back(); 1633 else { 1634 SmallPtrSet<MachineInstr*, 4> MemOps; 1635 SmallSet<unsigned, 4> MemRegs; 1636 for (int i = NumMove-1; i >= 0; --i) { 1637 MemOps.insert(Ops[i]); 1638 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1639 } 1640 1641 // Be conservative, if the instructions are too far apart, don't 1642 // move them. We want to limit the increase of register pressure. 1643 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1644 if (DoMove) 1645 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1646 MemOps, MemRegs, TRI); 1647 if (!DoMove) { 1648 for (unsigned i = 0; i != NumMove; ++i) 1649 Ops.pop_back(); 1650 } else { 1651 // This is the new location for the loads / stores. 1652 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1653 while (InsertPos != MBB->end() 1654 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1655 ++InsertPos; 1656 1657 // If we are moving a pair of loads / stores, see if it makes sense 1658 // to try to allocate a pair of registers that can form register pairs. 1659 MachineInstr *Op0 = Ops.back(); 1660 MachineInstr *Op1 = Ops[Ops.size()-2]; 1661 unsigned EvenReg = 0, OddReg = 0; 1662 unsigned BaseReg = 0, PredReg = 0; 1663 ARMCC::CondCodes Pred = ARMCC::AL; 1664 bool isT2 = false; 1665 unsigned NewOpc = 0; 1666 int Offset = 0; 1667 DebugLoc dl; 1668 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1669 EvenReg, OddReg, BaseReg, 1670 Offset, PredReg, Pred, isT2)) { 1671 Ops.pop_back(); 1672 Ops.pop_back(); 1673 1674 const MCInstrDesc &MCID = TII->get(NewOpc); 1675 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); 1676 MRI->constrainRegClass(EvenReg, TRC); 1677 MRI->constrainRegClass(OddReg, TRC); 1678 1679 // Form the pair instruction. 1680 if (isLd) { 1681 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1682 .addReg(EvenReg, RegState::Define) 1683 .addReg(OddReg, RegState::Define) 1684 .addReg(BaseReg); 1685 // FIXME: We're converting from LDRi12 to an insn that still 1686 // uses addrmode2, so we need an explicit offset reg. It should 1687 // always by reg0 since we're transforming LDRi12s. 1688 if (!isT2) 1689 MIB.addReg(0); 1690 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1691 ++NumLDRDFormed; 1692 } else { 1693 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1694 .addReg(EvenReg) 1695 .addReg(OddReg) 1696 .addReg(BaseReg); 1697 // FIXME: We're converting from LDRi12 to an insn that still 1698 // uses addrmode2, so we need an explicit offset reg. It should 1699 // always by reg0 since we're transforming STRi12s. 1700 if (!isT2) 1701 MIB.addReg(0); 1702 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1703 ++NumSTRDFormed; 1704 } 1705 MBB->erase(Op0); 1706 MBB->erase(Op1); 1707 1708 // Add register allocation hints to form register pairs. 1709 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1710 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1711 } else { 1712 for (unsigned i = 0; i != NumMove; ++i) { 1713 MachineInstr *Op = Ops.back(); 1714 Ops.pop_back(); 1715 MBB->splice(InsertPos, MBB, Op); 1716 } 1717 } 1718 1719 NumLdStMoved += NumMove; 1720 RetVal = true; 1721 } 1722 } 1723 } 1724 1725 return RetVal; 1726 } 1727 1728 bool 1729 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1730 bool RetVal = false; 1731 1732 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1733 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1734 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1735 SmallVector<unsigned, 4> LdBases; 1736 SmallVector<unsigned, 4> StBases; 1737 1738 unsigned Loc = 0; 1739 MachineBasicBlock::iterator MBBI = MBB->begin(); 1740 MachineBasicBlock::iterator E = MBB->end(); 1741 while (MBBI != E) { 1742 for (; MBBI != E; ++MBBI) { 1743 MachineInstr *MI = MBBI; 1744 const MCInstrDesc &MCID = MI->getDesc(); 1745 if (MCID.isCall() || MCID.isTerminator()) { 1746 // Stop at barriers. 1747 ++MBBI; 1748 break; 1749 } 1750 1751 if (!MI->isDebugValue()) 1752 MI2LocMap[MI] = ++Loc; 1753 1754 if (!isMemoryOp(MI)) 1755 continue; 1756 unsigned PredReg = 0; 1757 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1758 continue; 1759 1760 int Opc = MI->getOpcode(); 1761 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1762 unsigned Base = MI->getOperand(1).getReg(); 1763 int Offset = getMemoryOpOffset(MI); 1764 1765 bool StopHere = false; 1766 if (isLd) { 1767 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1768 Base2LdsMap.find(Base); 1769 if (BI != Base2LdsMap.end()) { 1770 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1771 if (Offset == getMemoryOpOffset(BI->second[i])) { 1772 StopHere = true; 1773 break; 1774 } 1775 } 1776 if (!StopHere) 1777 BI->second.push_back(MI); 1778 } else { 1779 SmallVector<MachineInstr*, 4> MIs; 1780 MIs.push_back(MI); 1781 Base2LdsMap[Base] = MIs; 1782 LdBases.push_back(Base); 1783 } 1784 } else { 1785 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1786 Base2StsMap.find(Base); 1787 if (BI != Base2StsMap.end()) { 1788 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1789 if (Offset == getMemoryOpOffset(BI->second[i])) { 1790 StopHere = true; 1791 break; 1792 } 1793 } 1794 if (!StopHere) 1795 BI->second.push_back(MI); 1796 } else { 1797 SmallVector<MachineInstr*, 4> MIs; 1798 MIs.push_back(MI); 1799 Base2StsMap[Base] = MIs; 1800 StBases.push_back(Base); 1801 } 1802 } 1803 1804 if (StopHere) { 1805 // Found a duplicate (a base+offset combination that's seen earlier). 1806 // Backtrack. 1807 --Loc; 1808 break; 1809 } 1810 } 1811 1812 // Re-schedule loads. 1813 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1814 unsigned Base = LdBases[i]; 1815 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1816 if (Lds.size() > 1) 1817 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1818 } 1819 1820 // Re-schedule stores. 1821 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1822 unsigned Base = StBases[i]; 1823 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1824 if (Sts.size() > 1) 1825 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1826 } 1827 1828 if (MBBI != E) { 1829 Base2LdsMap.clear(); 1830 Base2StsMap.clear(); 1831 LdBases.clear(); 1832 StBases.clear(); 1833 } 1834 } 1835 1836 return RetVal; 1837 } 1838 1839 1840 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1841 /// optimization pass. 1842 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1843 if (PreAlloc) 1844 return new ARMPreAllocLoadStoreOpt(); 1845 return new ARMLoadStoreOpt(); 1846 } 1847