1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMAddressingModes.h" 18 #include "ARMBaseInstrInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMRegisterInfo.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/Target/TargetData.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 #include "llvm/Target/TargetMachine.h" 32 #include "llvm/Target/TargetRegisterInfo.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallSet.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/Statistic.h" 40 using namespace llvm; 41 42 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55 /// load / store instructions to form ldm / stm instructions. 56 57 namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Reg; 78 bool isKill; 79 unsigned Position; 80 MachineBasicBlock::iterator MBBI; 81 bool Merged; 82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 83 MachineBasicBlock::iterator i) 84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 85 }; 86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 87 typedef MemOpQueue::iterator MemOpQueueIter; 88 89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 90 int Offset, unsigned Base, bool BaseKill, int Opcode, 91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 93 void MergeOpsUpdate(MachineBasicBlock &MBB, 94 MemOpQueue &MemOps, 95 unsigned memOpsBegin, 96 unsigned memOpsEnd, 97 unsigned insertAfter, 98 int Offset, 99 unsigned Base, 100 bool BaseKill, 101 int Opcode, 102 ARMCC::CondCodes Pred, 103 unsigned PredReg, 104 unsigned Scratch, 105 DebugLoc dl, 106 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 108 int Opcode, unsigned Size, 109 ARMCC::CondCodes Pred, unsigned PredReg, 110 unsigned Scratch, MemOpQueue &MemOps, 111 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 112 113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 115 MachineBasicBlock::iterator &MBBI); 116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 117 MachineBasicBlock::iterator MBBI, 118 const TargetInstrInfo *TII, 119 bool &Advance, 120 MachineBasicBlock::iterator &I); 121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 122 MachineBasicBlock::iterator MBBI, 123 bool &Advance, 124 MachineBasicBlock::iterator &I); 125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 127 }; 128 char ARMLoadStoreOpt::ID = 0; 129 } 130 131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 132 switch (Opcode) { 133 default: llvm_unreachable("Unhandled opcode!"); 134 case ARM::LDRi12: 135 ++NumLDMGened; 136 switch (Mode) { 137 default: llvm_unreachable("Unhandled submode!"); 138 case ARM_AM::ia: return ARM::LDMIA; 139 case ARM_AM::da: return ARM::LDMDA; 140 case ARM_AM::db: return ARM::LDMDB; 141 case ARM_AM::ib: return ARM::LDMIB; 142 } 143 break; 144 case ARM::STRi12: 145 ++NumSTMGened; 146 switch (Mode) { 147 default: llvm_unreachable("Unhandled submode!"); 148 case ARM_AM::ia: return ARM::STMIA; 149 case ARM_AM::da: return ARM::STMDA; 150 case ARM_AM::db: return ARM::STMDB; 151 case ARM_AM::ib: return ARM::STMIB; 152 } 153 break; 154 case ARM::t2LDRi8: 155 case ARM::t2LDRi12: 156 ++NumLDMGened; 157 switch (Mode) { 158 default: llvm_unreachable("Unhandled submode!"); 159 case ARM_AM::ia: return ARM::t2LDMIA; 160 case ARM_AM::db: return ARM::t2LDMDB; 161 } 162 break; 163 case ARM::t2STRi8: 164 case ARM::t2STRi12: 165 ++NumSTMGened; 166 switch (Mode) { 167 default: llvm_unreachable("Unhandled submode!"); 168 case ARM_AM::ia: return ARM::t2STMIA; 169 case ARM_AM::db: return ARM::t2STMDB; 170 } 171 break; 172 case ARM::VLDRS: 173 ++NumVLDMGened; 174 switch (Mode) { 175 default: llvm_unreachable("Unhandled submode!"); 176 case ARM_AM::ia: return ARM::VLDMSIA; 177 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 178 } 179 break; 180 case ARM::VSTRS: 181 ++NumVSTMGened; 182 switch (Mode) { 183 default: llvm_unreachable("Unhandled submode!"); 184 case ARM_AM::ia: return ARM::VSTMSIA; 185 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 186 } 187 break; 188 case ARM::VLDRD: 189 ++NumVLDMGened; 190 switch (Mode) { 191 default: llvm_unreachable("Unhandled submode!"); 192 case ARM_AM::ia: return ARM::VLDMDIA; 193 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 194 } 195 break; 196 case ARM::VSTRD: 197 ++NumVSTMGened; 198 switch (Mode) { 199 default: llvm_unreachable("Unhandled submode!"); 200 case ARM_AM::ia: return ARM::VSTMDIA; 201 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 202 } 203 break; 204 } 205 206 return 0; 207 } 208 209 namespace llvm { 210 namespace ARM_AM { 211 212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 213 switch (Opcode) { 214 default: llvm_unreachable("Unhandled opcode!"); 215 case ARM::LDMIA_RET: 216 case ARM::LDMIA: 217 case ARM::LDMIA_UPD: 218 case ARM::STMIA: 219 case ARM::STMIA_UPD: 220 case ARM::t2LDMIA_RET: 221 case ARM::t2LDMIA: 222 case ARM::t2LDMIA_UPD: 223 case ARM::t2STMIA: 224 case ARM::t2STMIA_UPD: 225 case ARM::VLDMSIA: 226 case ARM::VLDMSIA_UPD: 227 case ARM::VSTMSIA: 228 case ARM::VSTMSIA_UPD: 229 case ARM::VLDMDIA: 230 case ARM::VLDMDIA_UPD: 231 case ARM::VSTMDIA: 232 case ARM::VSTMDIA_UPD: 233 return ARM_AM::ia; 234 235 case ARM::LDMDA: 236 case ARM::LDMDA_UPD: 237 case ARM::STMDA: 238 case ARM::STMDA_UPD: 239 return ARM_AM::da; 240 241 case ARM::LDMDB: 242 case ARM::LDMDB_UPD: 243 case ARM::STMDB: 244 case ARM::STMDB_UPD: 245 case ARM::t2LDMDB: 246 case ARM::t2LDMDB_UPD: 247 case ARM::t2STMDB: 248 case ARM::t2STMDB_UPD: 249 case ARM::VLDMSDB_UPD: 250 case ARM::VSTMSDB_UPD: 251 case ARM::VLDMDDB_UPD: 252 case ARM::VSTMDDB_UPD: 253 return ARM_AM::db; 254 255 case ARM::LDMIB: 256 case ARM::LDMIB_UPD: 257 case ARM::STMIB: 258 case ARM::STMIB_UPD: 259 return ARM_AM::ib; 260 } 261 262 return ARM_AM::bad_am_submode; 263 } 264 265 } // end namespace ARM_AM 266 } // end namespace llvm 267 268 static bool isT2i32Load(unsigned Opc) { 269 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 270 } 271 272 static bool isi32Load(unsigned Opc) { 273 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 274 } 275 276 static bool isT2i32Store(unsigned Opc) { 277 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 278 } 279 280 static bool isi32Store(unsigned Opc) { 281 return Opc == ARM::STRi12 || isT2i32Store(Opc); 282 } 283 284 /// MergeOps - Create and insert a LDM or STM with Base as base register and 285 /// registers in Regs as the register operands that would be loaded / stored. 286 /// It returns true if the transformation is done. 287 bool 288 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 289 MachineBasicBlock::iterator MBBI, 290 int Offset, unsigned Base, bool BaseKill, 291 int Opcode, ARMCC::CondCodes Pred, 292 unsigned PredReg, unsigned Scratch, DebugLoc dl, 293 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 294 // Only a single register to load / store. Don't bother. 295 unsigned NumRegs = Regs.size(); 296 if (NumRegs <= 1) 297 return false; 298 299 ARM_AM::AMSubMode Mode = ARM_AM::ia; 300 // VFP and Thumb2 do not support IB or DA modes. 301 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 302 bool haveIBAndDA = isNotVFP && !isThumb2; 303 if (Offset == 4 && haveIBAndDA) 304 Mode = ARM_AM::ib; 305 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 306 Mode = ARM_AM::da; 307 else if (Offset == -4 * (int)NumRegs && isNotVFP) 308 // VLDM/VSTM do not support DB mode without also updating the base reg. 309 Mode = ARM_AM::db; 310 else if (Offset != 0) { 311 // Check if this is a supported opcode before we insert instructions to 312 // calculate a new base register. 313 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 314 315 // If starting offset isn't zero, insert a MI to materialize a new base. 316 // But only do so if it is cost effective, i.e. merging more than two 317 // loads / stores. 318 if (NumRegs <= 2) 319 return false; 320 321 unsigned NewBase; 322 if (isi32Load(Opcode)) 323 // If it is a load, then just use one of the destination register to 324 // use as the new base. 325 NewBase = Regs[NumRegs-1].first; 326 else { 327 // Use the scratch register to use as a new base. 328 NewBase = Scratch; 329 if (NewBase == 0) 330 return false; 331 } 332 int BaseOpc = !isThumb2 333 ? ARM::ADDri 334 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 335 if (Offset < 0) { 336 BaseOpc = !isThumb2 337 ? ARM::SUBri 338 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 339 Offset = - Offset; 340 } 341 int ImmedOffset = isThumb2 342 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 343 if (ImmedOffset == -1) 344 // FIXME: Try t2ADDri12 or t2SUBri12? 345 return false; // Probably not worth it then. 346 347 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 348 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 349 .addImm(Pred).addReg(PredReg).addReg(0); 350 Base = NewBase; 351 BaseKill = true; // New base is always killed right its use. 352 } 353 354 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 355 Opcode == ARM::VLDRD); 356 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 357 if (!Opcode) return false; 358 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 359 .addReg(Base, getKillRegState(BaseKill)) 360 .addImm(Pred).addReg(PredReg); 361 for (unsigned i = 0; i != NumRegs; ++i) 362 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 363 | getKillRegState(Regs[i].second)); 364 365 return true; 366 } 367 368 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 369 // success. 370 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 371 MemOpQueue &memOps, 372 unsigned memOpsBegin, unsigned memOpsEnd, 373 unsigned insertAfter, int Offset, 374 unsigned Base, bool BaseKill, 375 int Opcode, 376 ARMCC::CondCodes Pred, unsigned PredReg, 377 unsigned Scratch, 378 DebugLoc dl, 379 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 380 // First calculate which of the registers should be killed by the merged 381 // instruction. 382 const unsigned insertPos = memOps[insertAfter].Position; 383 SmallSet<unsigned, 4> KilledRegs; 384 DenseMap<unsigned, unsigned> Killer; 385 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 386 if (i == memOpsBegin) { 387 i = memOpsEnd; 388 if (i == e) 389 break; 390 } 391 if (memOps[i].Position < insertPos && memOps[i].isKill) { 392 unsigned Reg = memOps[i].Reg; 393 KilledRegs.insert(Reg); 394 Killer[Reg] = i; 395 } 396 } 397 398 SmallVector<std::pair<unsigned, bool>, 8> Regs; 399 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 400 unsigned Reg = memOps[i].Reg; 401 // If we are inserting the merged operation after an operation that 402 // uses the same register, make sure to transfer any kill flag. 403 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 404 Regs.push_back(std::make_pair(Reg, isKill)); 405 } 406 407 // Try to do the merge. 408 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 409 ++Loc; 410 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 411 Pred, PredReg, Scratch, dl, Regs)) 412 return; 413 414 // Merge succeeded, update records. 415 Merges.push_back(prior(Loc)); 416 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 417 // Remove kill flags from any memops that come before insertPos. 418 if (Regs[i-memOpsBegin].second) { 419 unsigned Reg = Regs[i-memOpsBegin].first; 420 if (KilledRegs.count(Reg)) { 421 unsigned j = Killer[Reg]; 422 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 423 assert(Idx >= 0 && "Cannot find killing operand"); 424 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 425 memOps[j].isKill = false; 426 } 427 memOps[i].isKill = true; 428 } 429 MBB.erase(memOps[i].MBBI); 430 // Update this memop to refer to the merged instruction. 431 // We may need to move kill flags again. 432 memOps[i].Merged = true; 433 memOps[i].MBBI = Merges.back(); 434 memOps[i].Position = insertPos; 435 } 436 } 437 438 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 439 /// load / store multiple instructions. 440 void 441 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 442 unsigned Base, int Opcode, unsigned Size, 443 ARMCC::CondCodes Pred, unsigned PredReg, 444 unsigned Scratch, MemOpQueue &MemOps, 445 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 446 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 447 int Offset = MemOps[SIndex].Offset; 448 int SOffset = Offset; 449 unsigned insertAfter = SIndex; 450 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 451 DebugLoc dl = Loc->getDebugLoc(); 452 const MachineOperand &PMO = Loc->getOperand(0); 453 unsigned PReg = PMO.getReg(); 454 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 455 : getARMRegisterNumbering(PReg); 456 unsigned Count = 1; 457 unsigned Limit = ~0U; 458 459 // vldm / vstm limit are 32 for S variants, 16 for D variants. 460 461 switch (Opcode) { 462 default: break; 463 case ARM::VSTRS: 464 Limit = 32; 465 break; 466 case ARM::VSTRD: 467 Limit = 16; 468 break; 469 case ARM::VLDRD: 470 Limit = 16; 471 break; 472 case ARM::VLDRS: 473 Limit = 32; 474 break; 475 } 476 477 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 478 int NewOffset = MemOps[i].Offset; 479 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 480 unsigned Reg = MO.getReg(); 481 unsigned RegNum = MO.isUndef() ? UINT_MAX 482 : getARMRegisterNumbering(Reg); 483 // Register numbers must be in ascending order. For VFP / NEON load and 484 // store multiples, the registers must also be consecutive and within the 485 // limit on the number of registers per instruction. 486 if (Reg != ARM::SP && 487 NewOffset == Offset + (int)Size && 488 ((isNotVFP && RegNum > PRegNum) || 489 ((Count < Limit) && RegNum == PRegNum+1))) { 490 Offset += Size; 491 PRegNum = RegNum; 492 ++Count; 493 } else { 494 // Can't merge this in. Try merge the earlier ones first. 495 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 496 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 497 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 498 MemOps, Merges); 499 return; 500 } 501 502 if (MemOps[i].Position > MemOps[insertAfter].Position) 503 insertAfter = i; 504 } 505 506 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 507 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 508 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 509 return; 510 } 511 512 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 513 unsigned Bytes, unsigned Limit, 514 ARMCC::CondCodes Pred, unsigned PredReg){ 515 unsigned MyPredReg = 0; 516 if (!MI) 517 return false; 518 if (MI->getOpcode() != ARM::t2SUBri && 519 MI->getOpcode() != ARM::t2SUBrSPi && 520 MI->getOpcode() != ARM::t2SUBrSPi12 && 521 MI->getOpcode() != ARM::tSUBspi && 522 MI->getOpcode() != ARM::SUBri) 523 return false; 524 525 // Make sure the offset fits in 8 bits. 526 if (Bytes == 0 || (Limit && Bytes >= Limit)) 527 return false; 528 529 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 530 return (MI->getOperand(0).getReg() == Base && 531 MI->getOperand(1).getReg() == Base && 532 (MI->getOperand(2).getImm()*Scale) == Bytes && 533 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 534 MyPredReg == PredReg); 535 } 536 537 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 538 unsigned Bytes, unsigned Limit, 539 ARMCC::CondCodes Pred, unsigned PredReg){ 540 unsigned MyPredReg = 0; 541 if (!MI) 542 return false; 543 if (MI->getOpcode() != ARM::t2ADDri && 544 MI->getOpcode() != ARM::t2ADDrSPi && 545 MI->getOpcode() != ARM::t2ADDrSPi12 && 546 MI->getOpcode() != ARM::tADDspi && 547 MI->getOpcode() != ARM::ADDri) 548 return false; 549 550 if (Bytes == 0 || (Limit && Bytes >= Limit)) 551 // Make sure the offset fits in 8 bits. 552 return false; 553 554 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 555 return (MI->getOperand(0).getReg() == Base && 556 MI->getOperand(1).getReg() == Base && 557 (MI->getOperand(2).getImm()*Scale) == Bytes && 558 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 559 MyPredReg == PredReg); 560 } 561 562 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 563 switch (MI->getOpcode()) { 564 default: return 0; 565 case ARM::LDRi12: 566 case ARM::STRi12: 567 case ARM::t2LDRi8: 568 case ARM::t2LDRi12: 569 case ARM::t2STRi8: 570 case ARM::t2STRi12: 571 case ARM::VLDRS: 572 case ARM::VSTRS: 573 return 4; 574 case ARM::VLDRD: 575 case ARM::VSTRD: 576 return 8; 577 case ARM::LDMIA: 578 case ARM::LDMDA: 579 case ARM::LDMDB: 580 case ARM::LDMIB: 581 case ARM::STMIA: 582 case ARM::STMDA: 583 case ARM::STMDB: 584 case ARM::STMIB: 585 case ARM::t2LDMIA: 586 case ARM::t2LDMDB: 587 case ARM::t2STMIA: 588 case ARM::t2STMDB: 589 case ARM::VLDMSIA: 590 case ARM::VSTMSIA: 591 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 592 case ARM::VLDMDIA: 593 case ARM::VSTMDIA: 594 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 595 } 596 } 597 598 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 599 ARM_AM::AMSubMode Mode) { 600 switch (Opc) { 601 default: llvm_unreachable("Unhandled opcode!"); 602 case ARM::LDMIA: 603 case ARM::LDMDA: 604 case ARM::LDMDB: 605 case ARM::LDMIB: 606 switch (Mode) { 607 default: llvm_unreachable("Unhandled submode!"); 608 case ARM_AM::ia: return ARM::LDMIA_UPD; 609 case ARM_AM::ib: return ARM::LDMIB_UPD; 610 case ARM_AM::da: return ARM::LDMDA_UPD; 611 case ARM_AM::db: return ARM::LDMDB_UPD; 612 } 613 break; 614 case ARM::STMIA: 615 case ARM::STMDA: 616 case ARM::STMDB: 617 case ARM::STMIB: 618 switch (Mode) { 619 default: llvm_unreachable("Unhandled submode!"); 620 case ARM_AM::ia: return ARM::STMIA_UPD; 621 case ARM_AM::ib: return ARM::STMIB_UPD; 622 case ARM_AM::da: return ARM::STMDA_UPD; 623 case ARM_AM::db: return ARM::STMDB_UPD; 624 } 625 break; 626 case ARM::t2LDMIA: 627 case ARM::t2LDMDB: 628 switch (Mode) { 629 default: llvm_unreachable("Unhandled submode!"); 630 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 631 case ARM_AM::db: return ARM::t2LDMDB_UPD; 632 } 633 break; 634 case ARM::t2STMIA: 635 case ARM::t2STMDB: 636 switch (Mode) { 637 default: llvm_unreachable("Unhandled submode!"); 638 case ARM_AM::ia: return ARM::t2STMIA_UPD; 639 case ARM_AM::db: return ARM::t2STMDB_UPD; 640 } 641 break; 642 case ARM::VLDMSIA: 643 switch (Mode) { 644 default: llvm_unreachable("Unhandled submode!"); 645 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 646 case ARM_AM::db: return ARM::VLDMSDB_UPD; 647 } 648 break; 649 case ARM::VLDMDIA: 650 switch (Mode) { 651 default: llvm_unreachable("Unhandled submode!"); 652 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 653 case ARM_AM::db: return ARM::VLDMDDB_UPD; 654 } 655 break; 656 case ARM::VSTMSIA: 657 switch (Mode) { 658 default: llvm_unreachable("Unhandled submode!"); 659 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 660 case ARM_AM::db: return ARM::VSTMSDB_UPD; 661 } 662 break; 663 case ARM::VSTMDIA: 664 switch (Mode) { 665 default: llvm_unreachable("Unhandled submode!"); 666 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 667 case ARM_AM::db: return ARM::VSTMDDB_UPD; 668 } 669 break; 670 } 671 672 return 0; 673 } 674 675 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 676 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 677 /// 678 /// stmia rn, <ra, rb, rc> 679 /// rn := rn + 4 * 3; 680 /// => 681 /// stmia rn!, <ra, rb, rc> 682 /// 683 /// rn := rn - 4 * 3; 684 /// ldmia rn, <ra, rb, rc> 685 /// => 686 /// ldmdb rn!, <ra, rb, rc> 687 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 688 MachineBasicBlock::iterator MBBI, 689 bool &Advance, 690 MachineBasicBlock::iterator &I) { 691 MachineInstr *MI = MBBI; 692 unsigned Base = MI->getOperand(0).getReg(); 693 bool BaseKill = MI->getOperand(0).isKill(); 694 unsigned Bytes = getLSMultipleTransferSize(MI); 695 unsigned PredReg = 0; 696 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 697 int Opcode = MI->getOpcode(); 698 DebugLoc dl = MI->getDebugLoc(); 699 700 // Can't use an updating ld/st if the base register is also a dest 701 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 702 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 703 if (MI->getOperand(i).getReg() == Base) 704 return false; 705 706 bool DoMerge = false; 707 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 708 709 // Try merging with the previous instruction. 710 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 711 if (MBBI != BeginMBBI) { 712 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 713 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 714 --PrevMBBI; 715 if (Mode == ARM_AM::ia && 716 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 717 Mode = ARM_AM::db; 718 DoMerge = true; 719 } else if (Mode == ARM_AM::ib && 720 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 721 Mode = ARM_AM::da; 722 DoMerge = true; 723 } 724 if (DoMerge) 725 MBB.erase(PrevMBBI); 726 } 727 728 // Try merging with the next instruction. 729 MachineBasicBlock::iterator EndMBBI = MBB.end(); 730 if (!DoMerge && MBBI != EndMBBI) { 731 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 732 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 733 ++NextMBBI; 734 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 735 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 736 DoMerge = true; 737 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 738 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 739 DoMerge = true; 740 } 741 if (DoMerge) { 742 if (NextMBBI == I) { 743 Advance = true; 744 ++I; 745 } 746 MBB.erase(NextMBBI); 747 } 748 } 749 750 if (!DoMerge) 751 return false; 752 753 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 754 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 755 .addReg(Base, getDefRegState(true)) // WB base register 756 .addReg(Base, getKillRegState(BaseKill)) 757 .addImm(Pred).addReg(PredReg); 758 759 // Transfer the rest of operands. 760 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 761 MIB.addOperand(MI->getOperand(OpNum)); 762 763 // Transfer memoperands. 764 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 765 766 MBB.erase(MBBI); 767 return true; 768 } 769 770 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 771 ARM_AM::AddrOpc Mode) { 772 switch (Opc) { 773 case ARM::LDRi12: 774 return ARM::LDR_PRE; 775 case ARM::STRi12: 776 return ARM::STR_PRE; 777 case ARM::VLDRS: 778 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 779 case ARM::VLDRD: 780 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 781 case ARM::VSTRS: 782 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 783 case ARM::VSTRD: 784 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 785 case ARM::t2LDRi8: 786 case ARM::t2LDRi12: 787 return ARM::t2LDR_PRE; 788 case ARM::t2STRi8: 789 case ARM::t2STRi12: 790 return ARM::t2STR_PRE; 791 default: llvm_unreachable("Unhandled opcode!"); 792 } 793 return 0; 794 } 795 796 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 797 ARM_AM::AddrOpc Mode) { 798 switch (Opc) { 799 case ARM::LDRi12: 800 return ARM::LDR_POST; 801 case ARM::STRi12: 802 return ARM::STR_POST; 803 case ARM::VLDRS: 804 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 805 case ARM::VLDRD: 806 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 807 case ARM::VSTRS: 808 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 809 case ARM::VSTRD: 810 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 811 case ARM::t2LDRi8: 812 case ARM::t2LDRi12: 813 return ARM::t2LDR_POST; 814 case ARM::t2STRi8: 815 case ARM::t2STRi12: 816 return ARM::t2STR_POST; 817 default: llvm_unreachable("Unhandled opcode!"); 818 } 819 return 0; 820 } 821 822 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 823 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 824 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 825 MachineBasicBlock::iterator MBBI, 826 const TargetInstrInfo *TII, 827 bool &Advance, 828 MachineBasicBlock::iterator &I) { 829 MachineInstr *MI = MBBI; 830 unsigned Base = MI->getOperand(1).getReg(); 831 bool BaseKill = MI->getOperand(1).isKill(); 832 unsigned Bytes = getLSMultipleTransferSize(MI); 833 int Opcode = MI->getOpcode(); 834 DebugLoc dl = MI->getDebugLoc(); 835 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 836 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 837 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 838 if (isi32Load(Opcode) || isi32Store(Opcode)) 839 if (MI->getOperand(2).getImm() != 0) 840 return false; 841 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 842 return false; 843 844 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 845 // Can't do the merge if the destination register is the same as the would-be 846 // writeback register. 847 if (isLd && MI->getOperand(0).getReg() == Base) 848 return false; 849 850 unsigned PredReg = 0; 851 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 852 bool DoMerge = false; 853 ARM_AM::AddrOpc AddSub = ARM_AM::add; 854 unsigned NewOpc = 0; 855 // AM2 - 12 bits, thumb2 - 8 bits. 856 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 857 858 // Try merging with the previous instruction. 859 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 860 if (MBBI != BeginMBBI) { 861 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 862 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 863 --PrevMBBI; 864 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 865 DoMerge = true; 866 AddSub = ARM_AM::sub; 867 } else if (!isAM5 && 868 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 869 DoMerge = true; 870 } 871 if (DoMerge) { 872 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 873 MBB.erase(PrevMBBI); 874 } 875 } 876 877 // Try merging with the next instruction. 878 MachineBasicBlock::iterator EndMBBI = MBB.end(); 879 if (!DoMerge && MBBI != EndMBBI) { 880 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 881 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 882 ++NextMBBI; 883 if (!isAM5 && 884 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 885 DoMerge = true; 886 AddSub = ARM_AM::sub; 887 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 888 DoMerge = true; 889 } 890 if (DoMerge) { 891 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 892 if (NextMBBI == I) { 893 Advance = true; 894 ++I; 895 } 896 MBB.erase(NextMBBI); 897 } 898 } 899 900 if (!DoMerge) 901 return false; 902 903 unsigned Offset = 0; 904 if (isAM2) 905 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 906 else if (!isAM5) 907 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 908 909 if (isAM5) { 910 // VLDM[SD}_UPD, VSTM[SD]_UPD 911 // (There are no base-updating versions of VLDR/VSTR instructions, but the 912 // updating load/store-multiple instructions can be used with only one 913 // register.) 914 MachineOperand &MO = MI->getOperand(0); 915 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 916 .addReg(Base, getDefRegState(true)) // WB base register 917 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 918 .addImm(Pred).addReg(PredReg) 919 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 920 getKillRegState(MO.isKill()))); 921 } else if (isLd) { 922 if (isAM2) 923 // LDR_PRE, LDR_POST, 924 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 925 .addReg(Base, RegState::Define) 926 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 927 else 928 // t2LDR_PRE, t2LDR_POST 929 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 930 .addReg(Base, RegState::Define) 931 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 932 } else { 933 MachineOperand &MO = MI->getOperand(0); 934 if (isAM2) 935 // STR_PRE, STR_POST 936 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 937 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 938 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 939 else 940 // t2STR_PRE, t2STR_POST 941 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 942 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 943 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 944 } 945 MBB.erase(MBBI); 946 947 return true; 948 } 949 950 /// isMemoryOp - Returns true if instruction is a memory operations (that this 951 /// pass is capable of operating on). 952 static bool isMemoryOp(const MachineInstr *MI) { 953 // When no memory operands are present, conservatively assume unaligned, 954 // volatile, unfoldable. 955 if (!MI->hasOneMemOperand()) 956 return false; 957 958 const MachineMemOperand *MMO = *MI->memoperands_begin(); 959 960 // Don't touch volatile memory accesses - we may be changing their order. 961 if (MMO->isVolatile()) 962 return false; 963 964 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 965 // not. 966 if (MMO->getAlignment() < 4) 967 return false; 968 969 // str <undef> could probably be eliminated entirely, but for now we just want 970 // to avoid making a mess of it. 971 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 972 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 973 MI->getOperand(0).isUndef()) 974 return false; 975 976 // Likewise don't mess with references to undefined addresses. 977 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 978 MI->getOperand(1).isUndef()) 979 return false; 980 981 int Opcode = MI->getOpcode(); 982 switch (Opcode) { 983 default: break; 984 case ARM::VLDRS: 985 case ARM::VSTRS: 986 return MI->getOperand(1).isReg(); 987 case ARM::VLDRD: 988 case ARM::VSTRD: 989 return MI->getOperand(1).isReg(); 990 case ARM::LDRi12: 991 case ARM::STRi12: 992 case ARM::t2LDRi8: 993 case ARM::t2LDRi12: 994 case ARM::t2STRi8: 995 case ARM::t2STRi12: 996 return MI->getOperand(1).isReg(); 997 } 998 return false; 999 } 1000 1001 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1002 /// op that is being merged. 1003 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1004 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1005 unsigned Position = MemOps[0].Position; 1006 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1007 if (MemOps[i].Position < Position) { 1008 Position = MemOps[i].Position; 1009 Loc = MemOps[i].MBBI; 1010 } 1011 } 1012 1013 if (Loc != MBB.begin()) 1014 RS->forward(prior(Loc)); 1015 } 1016 1017 static int getMemoryOpOffset(const MachineInstr *MI) { 1018 int Opcode = MI->getOpcode(); 1019 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1020 unsigned NumOperands = MI->getDesc().getNumOperands(); 1021 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1022 1023 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1024 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1025 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1026 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1027 return OffField; 1028 1029 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1030 : ARM_AM::getAM5Offset(OffField) * 4; 1031 if (isAM3) { 1032 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1033 Offset = -Offset; 1034 } else { 1035 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1036 Offset = -Offset; 1037 } 1038 return Offset; 1039 } 1040 1041 static void InsertLDR_STR(MachineBasicBlock &MBB, 1042 MachineBasicBlock::iterator &MBBI, 1043 int Offset, bool isDef, 1044 DebugLoc dl, unsigned NewOpc, 1045 unsigned Reg, bool RegDeadKill, bool RegUndef, 1046 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1047 bool OffKill, bool OffUndef, 1048 ARMCC::CondCodes Pred, unsigned PredReg, 1049 const TargetInstrInfo *TII, bool isT2) { 1050 if (isDef) { 1051 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1052 TII->get(NewOpc)) 1053 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1054 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1055 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1056 } else { 1057 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1058 TII->get(NewOpc)) 1059 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1060 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1061 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1062 } 1063 } 1064 1065 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1066 MachineBasicBlock::iterator &MBBI) { 1067 MachineInstr *MI = &*MBBI; 1068 unsigned Opcode = MI->getOpcode(); 1069 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1070 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1071 unsigned EvenReg = MI->getOperand(0).getReg(); 1072 unsigned OddReg = MI->getOperand(1).getReg(); 1073 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1074 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1075 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 1076 return false; 1077 1078 MachineBasicBlock::iterator NewBBI = MBBI; 1079 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1080 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1081 bool EvenDeadKill = isLd ? 1082 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1083 bool EvenUndef = MI->getOperand(0).isUndef(); 1084 bool OddDeadKill = isLd ? 1085 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1086 bool OddUndef = MI->getOperand(1).isUndef(); 1087 const MachineOperand &BaseOp = MI->getOperand(2); 1088 unsigned BaseReg = BaseOp.getReg(); 1089 bool BaseKill = BaseOp.isKill(); 1090 bool BaseUndef = BaseOp.isUndef(); 1091 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1092 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1093 int OffImm = getMemoryOpOffset(MI); 1094 unsigned PredReg = 0; 1095 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 1096 1097 if (OddRegNum > EvenRegNum && OffImm == 0) { 1098 // Ascending register numbers and no offset. It's safe to change it to a 1099 // ldm or stm. 1100 unsigned NewOpc = (isLd) 1101 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1102 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1103 if (isLd) { 1104 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1105 .addReg(BaseReg, getKillRegState(BaseKill)) 1106 .addImm(Pred).addReg(PredReg) 1107 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1108 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1109 ++NumLDRD2LDM; 1110 } else { 1111 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1112 .addReg(BaseReg, getKillRegState(BaseKill)) 1113 .addImm(Pred).addReg(PredReg) 1114 .addReg(EvenReg, 1115 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1116 .addReg(OddReg, 1117 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1118 ++NumSTRD2STM; 1119 } 1120 NewBBI = llvm::prior(MBBI); 1121 } else { 1122 // Split into two instructions. 1123 unsigned NewOpc = (isLd) 1124 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1125 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1126 DebugLoc dl = MBBI->getDebugLoc(); 1127 // If this is a load and base register is killed, it may have been 1128 // re-defed by the load, make sure the first load does not clobber it. 1129 if (isLd && 1130 (BaseKill || OffKill) && 1131 (TRI->regsOverlap(EvenReg, BaseReg))) { 1132 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1133 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1134 OddReg, OddDeadKill, false, 1135 BaseReg, false, BaseUndef, false, OffUndef, 1136 Pred, PredReg, TII, isT2); 1137 NewBBI = llvm::prior(MBBI); 1138 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1139 EvenReg, EvenDeadKill, false, 1140 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1141 Pred, PredReg, TII, isT2); 1142 } else { 1143 if (OddReg == EvenReg && EvenDeadKill) { 1144 // If the two source operands are the same, the kill marker is 1145 // probably on the first one. e.g. 1146 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1147 EvenDeadKill = false; 1148 OddDeadKill = true; 1149 } 1150 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1151 EvenReg, EvenDeadKill, EvenUndef, 1152 BaseReg, false, BaseUndef, false, OffUndef, 1153 Pred, PredReg, TII, isT2); 1154 NewBBI = llvm::prior(MBBI); 1155 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1156 OddReg, OddDeadKill, OddUndef, 1157 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1158 Pred, PredReg, TII, isT2); 1159 } 1160 if (isLd) 1161 ++NumLDRD2LDR; 1162 else 1163 ++NumSTRD2STR; 1164 } 1165 1166 MBB.erase(MI); 1167 MBBI = NewBBI; 1168 return true; 1169 } 1170 return false; 1171 } 1172 1173 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1174 /// ops of the same base and incrementing offset into LDM / STM ops. 1175 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1176 unsigned NumMerges = 0; 1177 unsigned NumMemOps = 0; 1178 MemOpQueue MemOps; 1179 unsigned CurrBase = 0; 1180 int CurrOpc = -1; 1181 unsigned CurrSize = 0; 1182 ARMCC::CondCodes CurrPred = ARMCC::AL; 1183 unsigned CurrPredReg = 0; 1184 unsigned Position = 0; 1185 SmallVector<MachineBasicBlock::iterator,4> Merges; 1186 1187 RS->enterBasicBlock(&MBB); 1188 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1189 while (MBBI != E) { 1190 if (FixInvalidRegPairOp(MBB, MBBI)) 1191 continue; 1192 1193 bool Advance = false; 1194 bool TryMerge = false; 1195 bool Clobber = false; 1196 1197 bool isMemOp = isMemoryOp(MBBI); 1198 if (isMemOp) { 1199 int Opcode = MBBI->getOpcode(); 1200 unsigned Size = getLSMultipleTransferSize(MBBI); 1201 const MachineOperand &MO = MBBI->getOperand(0); 1202 unsigned Reg = MO.getReg(); 1203 bool isKill = MO.isDef() ? false : MO.isKill(); 1204 unsigned Base = MBBI->getOperand(1).getReg(); 1205 unsigned PredReg = 0; 1206 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1207 int Offset = getMemoryOpOffset(MBBI); 1208 // Watch out for: 1209 // r4 := ldr [r5] 1210 // r5 := ldr [r5, #4] 1211 // r6 := ldr [r5, #8] 1212 // 1213 // The second ldr has effectively broken the chain even though it 1214 // looks like the later ldr(s) use the same base register. Try to 1215 // merge the ldr's so far, including this one. But don't try to 1216 // combine the following ldr(s). 1217 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1218 if (CurrBase == 0 && !Clobber) { 1219 // Start of a new chain. 1220 CurrBase = Base; 1221 CurrOpc = Opcode; 1222 CurrSize = Size; 1223 CurrPred = Pred; 1224 CurrPredReg = PredReg; 1225 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1226 ++NumMemOps; 1227 Advance = true; 1228 } else { 1229 if (Clobber) { 1230 TryMerge = true; 1231 Advance = true; 1232 } 1233 1234 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1235 // No need to match PredReg. 1236 // Continue adding to the queue. 1237 if (Offset > MemOps.back().Offset) { 1238 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1239 Position, MBBI)); 1240 ++NumMemOps; 1241 Advance = true; 1242 } else { 1243 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1244 I != E; ++I) { 1245 if (Offset < I->Offset) { 1246 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1247 Position, MBBI)); 1248 ++NumMemOps; 1249 Advance = true; 1250 break; 1251 } else if (Offset == I->Offset) { 1252 // Collision! This can't be merged! 1253 break; 1254 } 1255 } 1256 } 1257 } 1258 } 1259 } 1260 1261 if (MBBI->isDebugValue()) { 1262 ++MBBI; 1263 if (MBBI == E) 1264 // Reach the end of the block, try merging the memory instructions. 1265 TryMerge = true; 1266 } else if (Advance) { 1267 ++Position; 1268 ++MBBI; 1269 if (MBBI == E) 1270 // Reach the end of the block, try merging the memory instructions. 1271 TryMerge = true; 1272 } else 1273 TryMerge = true; 1274 1275 if (TryMerge) { 1276 if (NumMemOps > 1) { 1277 // Try to find a free register to use as a new base in case it's needed. 1278 // First advance to the instruction just before the start of the chain. 1279 AdvanceRS(MBB, MemOps); 1280 // Find a scratch register. 1281 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1282 // Process the load / store instructions. 1283 RS->forward(prior(MBBI)); 1284 1285 // Merge ops. 1286 Merges.clear(); 1287 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1288 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1289 1290 // Try folding preceding/trailing base inc/dec into the generated 1291 // LDM/STM ops. 1292 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1293 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1294 ++NumMerges; 1295 NumMerges += Merges.size(); 1296 1297 // Try folding preceding/trailing base inc/dec into those load/store 1298 // that were not merged to form LDM/STM ops. 1299 for (unsigned i = 0; i != NumMemOps; ++i) 1300 if (!MemOps[i].Merged) 1301 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1302 ++NumMerges; 1303 1304 // RS may be pointing to an instruction that's deleted. 1305 RS->skipTo(prior(MBBI)); 1306 } else if (NumMemOps == 1) { 1307 // Try folding preceding/trailing base inc/dec into the single 1308 // load/store. 1309 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1310 ++NumMerges; 1311 RS->forward(prior(MBBI)); 1312 } 1313 } 1314 1315 CurrBase = 0; 1316 CurrOpc = -1; 1317 CurrSize = 0; 1318 CurrPred = ARMCC::AL; 1319 CurrPredReg = 0; 1320 if (NumMemOps) { 1321 MemOps.clear(); 1322 NumMemOps = 0; 1323 } 1324 1325 // If iterator hasn't been advanced and this is not a memory op, skip it. 1326 // It can't start a new chain anyway. 1327 if (!Advance && !isMemOp && MBBI != E) { 1328 ++Position; 1329 ++MBBI; 1330 } 1331 } 1332 } 1333 return NumMerges > 0; 1334 } 1335 1336 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1337 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1338 /// directly restore the value of LR into pc. 1339 /// ldmfd sp!, {..., lr} 1340 /// bx lr 1341 /// or 1342 /// ldmfd sp!, {..., lr} 1343 /// mov pc, lr 1344 /// => 1345 /// ldmfd sp!, {..., pc} 1346 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1347 if (MBB.empty()) return false; 1348 1349 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1350 if (MBBI != MBB.begin() && 1351 (MBBI->getOpcode() == ARM::BX_RET || 1352 MBBI->getOpcode() == ARM::tBX_RET || 1353 MBBI->getOpcode() == ARM::MOVPCLR)) { 1354 MachineInstr *PrevMI = prior(MBBI); 1355 unsigned Opcode = PrevMI->getOpcode(); 1356 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1357 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1358 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1359 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1360 if (MO.getReg() != ARM::LR) 1361 return false; 1362 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1363 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1364 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1365 PrevMI->setDesc(TII->get(NewOpc)); 1366 MO.setReg(ARM::PC); 1367 PrevMI->copyImplicitOps(&*MBBI); 1368 MBB.erase(MBBI); 1369 return true; 1370 } 1371 } 1372 return false; 1373 } 1374 1375 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1376 const TargetMachine &TM = Fn.getTarget(); 1377 AFI = Fn.getInfo<ARMFunctionInfo>(); 1378 TII = TM.getInstrInfo(); 1379 TRI = TM.getRegisterInfo(); 1380 RS = new RegScavenger(); 1381 isThumb2 = AFI->isThumb2Function(); 1382 1383 bool Modified = false; 1384 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1385 ++MFI) { 1386 MachineBasicBlock &MBB = *MFI; 1387 Modified |= LoadStoreMultipleOpti(MBB); 1388 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1389 Modified |= MergeReturnIntoLDM(MBB); 1390 } 1391 1392 delete RS; 1393 return Modified; 1394 } 1395 1396 1397 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1398 /// load / stores from consecutive locations close to make it more 1399 /// likely they will be combined later. 1400 1401 namespace { 1402 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1403 static char ID; 1404 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1405 1406 const TargetData *TD; 1407 const TargetInstrInfo *TII; 1408 const TargetRegisterInfo *TRI; 1409 const ARMSubtarget *STI; 1410 MachineRegisterInfo *MRI; 1411 MachineFunction *MF; 1412 1413 virtual bool runOnMachineFunction(MachineFunction &Fn); 1414 1415 virtual const char *getPassName() const { 1416 return "ARM pre- register allocation load / store optimization pass"; 1417 } 1418 1419 private: 1420 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1421 unsigned &NewOpc, unsigned &EvenReg, 1422 unsigned &OddReg, unsigned &BaseReg, 1423 int &Offset, 1424 unsigned &PredReg, ARMCC::CondCodes &Pred, 1425 bool &isT2); 1426 bool RescheduleOps(MachineBasicBlock *MBB, 1427 SmallVector<MachineInstr*, 4> &Ops, 1428 unsigned Base, bool isLd, 1429 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1430 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1431 }; 1432 char ARMPreAllocLoadStoreOpt::ID = 0; 1433 } 1434 1435 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1436 TD = Fn.getTarget().getTargetData(); 1437 TII = Fn.getTarget().getInstrInfo(); 1438 TRI = Fn.getTarget().getRegisterInfo(); 1439 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1440 MRI = &Fn.getRegInfo(); 1441 MF = &Fn; 1442 1443 bool Modified = false; 1444 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1445 ++MFI) 1446 Modified |= RescheduleLoadStoreInstrs(MFI); 1447 1448 return Modified; 1449 } 1450 1451 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1452 MachineBasicBlock::iterator I, 1453 MachineBasicBlock::iterator E, 1454 SmallPtrSet<MachineInstr*, 4> &MemOps, 1455 SmallSet<unsigned, 4> &MemRegs, 1456 const TargetRegisterInfo *TRI) { 1457 // Are there stores / loads / calls between them? 1458 // FIXME: This is overly conservative. We should make use of alias information 1459 // some day. 1460 SmallSet<unsigned, 4> AddedRegPressure; 1461 while (++I != E) { 1462 if (I->isDebugValue() || MemOps.count(&*I)) 1463 continue; 1464 const TargetInstrDesc &TID = I->getDesc(); 1465 if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects()) 1466 return false; 1467 if (isLd && TID.mayStore()) 1468 return false; 1469 if (!isLd) { 1470 if (TID.mayLoad()) 1471 return false; 1472 // It's not safe to move the first 'str' down. 1473 // str r1, [r0] 1474 // strh r5, [r0] 1475 // str r4, [r0, #+4] 1476 if (TID.mayStore()) 1477 return false; 1478 } 1479 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1480 MachineOperand &MO = I->getOperand(j); 1481 if (!MO.isReg()) 1482 continue; 1483 unsigned Reg = MO.getReg(); 1484 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1485 return false; 1486 if (Reg != Base && !MemRegs.count(Reg)) 1487 AddedRegPressure.insert(Reg); 1488 } 1489 } 1490 1491 // Estimate register pressure increase due to the transformation. 1492 if (MemRegs.size() <= 4) 1493 // Ok if we are moving small number of instructions. 1494 return true; 1495 return AddedRegPressure.size() <= MemRegs.size() * 2; 1496 } 1497 1498 bool 1499 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1500 DebugLoc &dl, 1501 unsigned &NewOpc, unsigned &EvenReg, 1502 unsigned &OddReg, unsigned &BaseReg, 1503 int &Offset, unsigned &PredReg, 1504 ARMCC::CondCodes &Pred, 1505 bool &isT2) { 1506 // Make sure we're allowed to generate LDRD/STRD. 1507 if (!STI->hasV5TEOps()) 1508 return false; 1509 1510 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1511 unsigned Scale = 1; 1512 unsigned Opcode = Op0->getOpcode(); 1513 if (Opcode == ARM::LDRi12) 1514 NewOpc = ARM::LDRD; 1515 else if (Opcode == ARM::STRi12) 1516 NewOpc = ARM::STRD; 1517 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1518 NewOpc = ARM::t2LDRDi8; 1519 Scale = 4; 1520 isT2 = true; 1521 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1522 NewOpc = ARM::t2STRDi8; 1523 Scale = 4; 1524 isT2 = true; 1525 } else 1526 return false; 1527 1528 // Make sure the base address satisfies i64 ld / st alignment requirement. 1529 if (!Op0->hasOneMemOperand() || 1530 !(*Op0->memoperands_begin())->getValue() || 1531 (*Op0->memoperands_begin())->isVolatile()) 1532 return false; 1533 1534 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1535 const Function *Func = MF->getFunction(); 1536 unsigned ReqAlign = STI->hasV6Ops() 1537 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1538 : 8; // Pre-v6 need 8-byte align 1539 if (Align < ReqAlign) 1540 return false; 1541 1542 // Then make sure the immediate offset fits. 1543 int OffImm = getMemoryOpOffset(Op0); 1544 if (isT2) { 1545 int Limit = (1 << 8) * Scale; 1546 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1547 return false; 1548 Offset = OffImm; 1549 } else { 1550 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1551 if (OffImm < 0) { 1552 AddSub = ARM_AM::sub; 1553 OffImm = - OffImm; 1554 } 1555 int Limit = (1 << 8) * Scale; 1556 if (OffImm >= Limit || (OffImm & (Scale-1))) 1557 return false; 1558 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1559 } 1560 EvenReg = Op0->getOperand(0).getReg(); 1561 OddReg = Op1->getOperand(0).getReg(); 1562 if (EvenReg == OddReg) 1563 return false; 1564 BaseReg = Op0->getOperand(1).getReg(); 1565 Pred = llvm::getInstrPredicate(Op0, PredReg); 1566 dl = Op0->getDebugLoc(); 1567 return true; 1568 } 1569 1570 namespace { 1571 struct OffsetCompare { 1572 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1573 int LOffset = getMemoryOpOffset(LHS); 1574 int ROffset = getMemoryOpOffset(RHS); 1575 assert(LHS == RHS || LOffset != ROffset); 1576 return LOffset > ROffset; 1577 } 1578 }; 1579 } 1580 1581 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1582 SmallVector<MachineInstr*, 4> &Ops, 1583 unsigned Base, bool isLd, 1584 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1585 bool RetVal = false; 1586 1587 // Sort by offset (in reverse order). 1588 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1589 1590 // The loads / stores of the same base are in order. Scan them from first to 1591 // last and check for the following: 1592 // 1. Any def of base. 1593 // 2. Any gaps. 1594 while (Ops.size() > 1) { 1595 unsigned FirstLoc = ~0U; 1596 unsigned LastLoc = 0; 1597 MachineInstr *FirstOp = 0; 1598 MachineInstr *LastOp = 0; 1599 int LastOffset = 0; 1600 unsigned LastOpcode = 0; 1601 unsigned LastBytes = 0; 1602 unsigned NumMove = 0; 1603 for (int i = Ops.size() - 1; i >= 0; --i) { 1604 MachineInstr *Op = Ops[i]; 1605 unsigned Loc = MI2LocMap[Op]; 1606 if (Loc <= FirstLoc) { 1607 FirstLoc = Loc; 1608 FirstOp = Op; 1609 } 1610 if (Loc >= LastLoc) { 1611 LastLoc = Loc; 1612 LastOp = Op; 1613 } 1614 1615 unsigned Opcode = Op->getOpcode(); 1616 if (LastOpcode && Opcode != LastOpcode) 1617 break; 1618 1619 int Offset = getMemoryOpOffset(Op); 1620 unsigned Bytes = getLSMultipleTransferSize(Op); 1621 if (LastBytes) { 1622 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1623 break; 1624 } 1625 LastOffset = Offset; 1626 LastBytes = Bytes; 1627 LastOpcode = Opcode; 1628 if (++NumMove == 8) // FIXME: Tune this limit. 1629 break; 1630 } 1631 1632 if (NumMove <= 1) 1633 Ops.pop_back(); 1634 else { 1635 SmallPtrSet<MachineInstr*, 4> MemOps; 1636 SmallSet<unsigned, 4> MemRegs; 1637 for (int i = NumMove-1; i >= 0; --i) { 1638 MemOps.insert(Ops[i]); 1639 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1640 } 1641 1642 // Be conservative, if the instructions are too far apart, don't 1643 // move them. We want to limit the increase of register pressure. 1644 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1645 if (DoMove) 1646 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1647 MemOps, MemRegs, TRI); 1648 if (!DoMove) { 1649 for (unsigned i = 0; i != NumMove; ++i) 1650 Ops.pop_back(); 1651 } else { 1652 // This is the new location for the loads / stores. 1653 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1654 while (InsertPos != MBB->end() 1655 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1656 ++InsertPos; 1657 1658 // If we are moving a pair of loads / stores, see if it makes sense 1659 // to try to allocate a pair of registers that can form register pairs. 1660 MachineInstr *Op0 = Ops.back(); 1661 MachineInstr *Op1 = Ops[Ops.size()-2]; 1662 unsigned EvenReg = 0, OddReg = 0; 1663 unsigned BaseReg = 0, PredReg = 0; 1664 ARMCC::CondCodes Pred = ARMCC::AL; 1665 bool isT2 = false; 1666 unsigned NewOpc = 0; 1667 int Offset = 0; 1668 DebugLoc dl; 1669 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1670 EvenReg, OddReg, BaseReg, 1671 Offset, PredReg, Pred, isT2)) { 1672 Ops.pop_back(); 1673 Ops.pop_back(); 1674 1675 // Form the pair instruction. 1676 if (isLd) { 1677 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1678 dl, TII->get(NewOpc)) 1679 .addReg(EvenReg, RegState::Define) 1680 .addReg(OddReg, RegState::Define) 1681 .addReg(BaseReg); 1682 // FIXME: We're converting from LDRi12 to an insn that still 1683 // uses addrmode2, so we need an explicit offset reg. It should 1684 // always by reg0 since we're transforming LDRi12s. 1685 if (!isT2) 1686 MIB.addReg(0); 1687 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1688 ++NumLDRDFormed; 1689 } else { 1690 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1691 dl, TII->get(NewOpc)) 1692 .addReg(EvenReg) 1693 .addReg(OddReg) 1694 .addReg(BaseReg); 1695 // FIXME: We're converting from LDRi12 to an insn that still 1696 // uses addrmode2, so we need an explicit offset reg. It should 1697 // always by reg0 since we're transforming STRi12s. 1698 if (!isT2) 1699 MIB.addReg(0); 1700 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1701 ++NumSTRDFormed; 1702 } 1703 MBB->erase(Op0); 1704 MBB->erase(Op1); 1705 1706 // Add register allocation hints to form register pairs. 1707 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1708 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1709 } else { 1710 for (unsigned i = 0; i != NumMove; ++i) { 1711 MachineInstr *Op = Ops.back(); 1712 Ops.pop_back(); 1713 MBB->splice(InsertPos, MBB, Op); 1714 } 1715 } 1716 1717 NumLdStMoved += NumMove; 1718 RetVal = true; 1719 } 1720 } 1721 } 1722 1723 return RetVal; 1724 } 1725 1726 bool 1727 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1728 bool RetVal = false; 1729 1730 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1731 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1732 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1733 SmallVector<unsigned, 4> LdBases; 1734 SmallVector<unsigned, 4> StBases; 1735 1736 unsigned Loc = 0; 1737 MachineBasicBlock::iterator MBBI = MBB->begin(); 1738 MachineBasicBlock::iterator E = MBB->end(); 1739 while (MBBI != E) { 1740 for (; MBBI != E; ++MBBI) { 1741 MachineInstr *MI = MBBI; 1742 const TargetInstrDesc &TID = MI->getDesc(); 1743 if (TID.isCall() || TID.isTerminator()) { 1744 // Stop at barriers. 1745 ++MBBI; 1746 break; 1747 } 1748 1749 if (!MI->isDebugValue()) 1750 MI2LocMap[MI] = ++Loc; 1751 1752 if (!isMemoryOp(MI)) 1753 continue; 1754 unsigned PredReg = 0; 1755 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1756 continue; 1757 1758 int Opc = MI->getOpcode(); 1759 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1760 unsigned Base = MI->getOperand(1).getReg(); 1761 int Offset = getMemoryOpOffset(MI); 1762 1763 bool StopHere = false; 1764 if (isLd) { 1765 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1766 Base2LdsMap.find(Base); 1767 if (BI != Base2LdsMap.end()) { 1768 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1769 if (Offset == getMemoryOpOffset(BI->second[i])) { 1770 StopHere = true; 1771 break; 1772 } 1773 } 1774 if (!StopHere) 1775 BI->second.push_back(MI); 1776 } else { 1777 SmallVector<MachineInstr*, 4> MIs; 1778 MIs.push_back(MI); 1779 Base2LdsMap[Base] = MIs; 1780 LdBases.push_back(Base); 1781 } 1782 } else { 1783 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1784 Base2StsMap.find(Base); 1785 if (BI != Base2StsMap.end()) { 1786 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1787 if (Offset == getMemoryOpOffset(BI->second[i])) { 1788 StopHere = true; 1789 break; 1790 } 1791 } 1792 if (!StopHere) 1793 BI->second.push_back(MI); 1794 } else { 1795 SmallVector<MachineInstr*, 4> MIs; 1796 MIs.push_back(MI); 1797 Base2StsMap[Base] = MIs; 1798 StBases.push_back(Base); 1799 } 1800 } 1801 1802 if (StopHere) { 1803 // Found a duplicate (a base+offset combination that's seen earlier). 1804 // Backtrack. 1805 --Loc; 1806 break; 1807 } 1808 } 1809 1810 // Re-schedule loads. 1811 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1812 unsigned Base = LdBases[i]; 1813 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1814 if (Lds.size() > 1) 1815 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1816 } 1817 1818 // Re-schedule stores. 1819 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1820 unsigned Base = StBases[i]; 1821 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1822 if (Sts.size() > 1) 1823 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1824 } 1825 1826 if (MBBI != E) { 1827 Base2LdsMap.clear(); 1828 Base2StsMap.clear(); 1829 LdBases.clear(); 1830 StBases.clear(); 1831 } 1832 } 1833 1834 return RetVal; 1835 } 1836 1837 1838 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1839 /// optimization pass. 1840 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1841 if (PreAlloc) 1842 return new ARMPreAllocLoadStoreOpt(); 1843 return new ARMLoadStoreOpt(); 1844 } 1845