1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMMachineFunctionInfo.h" 19 #include "ARMRegisterInfo.h" 20 #include "MCTargetDesc/ARMAddressingModes.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/CodeGen/SelectionDAGNodes.h" 30 #include "llvm/Target/TargetData.h" 31 #include "llvm/Target/TargetInstrInfo.h" 32 #include "llvm/Target/TargetMachine.h" 33 #include "llvm/Target/TargetRegisterInfo.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/ADT/DenseMap.h" 36 #include "llvm/ADT/STLExtras.h" 37 #include "llvm/ADT/SmallPtrSet.h" 38 #include "llvm/ADT/SmallSet.h" 39 #include "llvm/ADT/SmallVector.h" 40 #include "llvm/ADT/Statistic.h" 41 using namespace llvm; 42 43 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 44 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 45 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 46 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 47 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 48 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 49 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 50 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 51 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 52 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 53 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 54 55 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 56 /// load / store instructions to form ldm / stm instructions. 57 58 namespace { 59 struct ARMLoadStoreOpt : public MachineFunctionPass { 60 static char ID; 61 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 62 63 const TargetInstrInfo *TII; 64 const TargetRegisterInfo *TRI; 65 const ARMSubtarget *STI; 66 ARMFunctionInfo *AFI; 67 RegScavenger *RS; 68 bool isThumb2; 69 70 virtual bool runOnMachineFunction(MachineFunction &Fn); 71 72 virtual const char *getPassName() const { 73 return "ARM load / store optimization pass"; 74 } 75 76 private: 77 struct MemOpQueueEntry { 78 int Offset; 79 unsigned Reg; 80 bool isKill; 81 unsigned Position; 82 MachineBasicBlock::iterator MBBI; 83 bool Merged; 84 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 85 MachineBasicBlock::iterator i) 86 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 87 }; 88 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 89 typedef MemOpQueue::iterator MemOpQueueIter; 90 91 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 92 int Offset, unsigned Base, bool BaseKill, int Opcode, 93 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 94 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 95 void MergeOpsUpdate(MachineBasicBlock &MBB, 96 MemOpQueue &MemOps, 97 unsigned memOpsBegin, 98 unsigned memOpsEnd, 99 unsigned insertAfter, 100 int Offset, 101 unsigned Base, 102 bool BaseKill, 103 int Opcode, 104 ARMCC::CondCodes Pred, 105 unsigned PredReg, 106 unsigned Scratch, 107 DebugLoc dl, 108 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 109 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 110 int Opcode, unsigned Size, 111 ARMCC::CondCodes Pred, unsigned PredReg, 112 unsigned Scratch, MemOpQueue &MemOps, 113 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 114 115 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 116 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 117 MachineBasicBlock::iterator &MBBI); 118 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator MBBI, 120 const TargetInstrInfo *TII, 121 bool &Advance, 122 MachineBasicBlock::iterator &I); 123 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 124 MachineBasicBlock::iterator MBBI, 125 bool &Advance, 126 MachineBasicBlock::iterator &I); 127 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 128 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 129 }; 130 char ARMLoadStoreOpt::ID = 0; 131 } 132 133 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 134 switch (Opcode) { 135 default: llvm_unreachable("Unhandled opcode!"); 136 case ARM::LDRi12: 137 ++NumLDMGened; 138 switch (Mode) { 139 default: llvm_unreachable("Unhandled submode!"); 140 case ARM_AM::ia: return ARM::LDMIA; 141 case ARM_AM::da: return ARM::LDMDA; 142 case ARM_AM::db: return ARM::LDMDB; 143 case ARM_AM::ib: return ARM::LDMIB; 144 } 145 break; 146 case ARM::STRi12: 147 ++NumSTMGened; 148 switch (Mode) { 149 default: llvm_unreachable("Unhandled submode!"); 150 case ARM_AM::ia: return ARM::STMIA; 151 case ARM_AM::da: return ARM::STMDA; 152 case ARM_AM::db: return ARM::STMDB; 153 case ARM_AM::ib: return ARM::STMIB; 154 } 155 break; 156 case ARM::t2LDRi8: 157 case ARM::t2LDRi12: 158 ++NumLDMGened; 159 switch (Mode) { 160 default: llvm_unreachable("Unhandled submode!"); 161 case ARM_AM::ia: return ARM::t2LDMIA; 162 case ARM_AM::db: return ARM::t2LDMDB; 163 } 164 break; 165 case ARM::t2STRi8: 166 case ARM::t2STRi12: 167 ++NumSTMGened; 168 switch (Mode) { 169 default: llvm_unreachable("Unhandled submode!"); 170 case ARM_AM::ia: return ARM::t2STMIA; 171 case ARM_AM::db: return ARM::t2STMDB; 172 } 173 break; 174 case ARM::VLDRS: 175 ++NumVLDMGened; 176 switch (Mode) { 177 default: llvm_unreachable("Unhandled submode!"); 178 case ARM_AM::ia: return ARM::VLDMSIA; 179 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 180 } 181 break; 182 case ARM::VSTRS: 183 ++NumVSTMGened; 184 switch (Mode) { 185 default: llvm_unreachable("Unhandled submode!"); 186 case ARM_AM::ia: return ARM::VSTMSIA; 187 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 188 } 189 break; 190 case ARM::VLDRD: 191 ++NumVLDMGened; 192 switch (Mode) { 193 default: llvm_unreachable("Unhandled submode!"); 194 case ARM_AM::ia: return ARM::VLDMDIA; 195 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 196 } 197 break; 198 case ARM::VSTRD: 199 ++NumVSTMGened; 200 switch (Mode) { 201 default: llvm_unreachable("Unhandled submode!"); 202 case ARM_AM::ia: return ARM::VSTMDIA; 203 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 204 } 205 break; 206 } 207 208 return 0; 209 } 210 211 namespace llvm { 212 namespace ARM_AM { 213 214 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 215 switch (Opcode) { 216 default: llvm_unreachable("Unhandled opcode!"); 217 case ARM::LDMIA_RET: 218 case ARM::LDMIA: 219 case ARM::LDMIA_UPD: 220 case ARM::STMIA: 221 case ARM::STMIA_UPD: 222 case ARM::t2LDMIA_RET: 223 case ARM::t2LDMIA: 224 case ARM::t2LDMIA_UPD: 225 case ARM::t2STMIA: 226 case ARM::t2STMIA_UPD: 227 case ARM::VLDMSIA: 228 case ARM::VLDMSIA_UPD: 229 case ARM::VSTMSIA: 230 case ARM::VSTMSIA_UPD: 231 case ARM::VLDMDIA: 232 case ARM::VLDMDIA_UPD: 233 case ARM::VSTMDIA: 234 case ARM::VSTMDIA_UPD: 235 return ARM_AM::ia; 236 237 case ARM::LDMDA: 238 case ARM::LDMDA_UPD: 239 case ARM::STMDA: 240 case ARM::STMDA_UPD: 241 return ARM_AM::da; 242 243 case ARM::LDMDB: 244 case ARM::LDMDB_UPD: 245 case ARM::STMDB: 246 case ARM::STMDB_UPD: 247 case ARM::t2LDMDB: 248 case ARM::t2LDMDB_UPD: 249 case ARM::t2STMDB: 250 case ARM::t2STMDB_UPD: 251 case ARM::VLDMSDB_UPD: 252 case ARM::VSTMSDB_UPD: 253 case ARM::VLDMDDB_UPD: 254 case ARM::VSTMDDB_UPD: 255 return ARM_AM::db; 256 257 case ARM::LDMIB: 258 case ARM::LDMIB_UPD: 259 case ARM::STMIB: 260 case ARM::STMIB_UPD: 261 return ARM_AM::ib; 262 } 263 264 return ARM_AM::bad_am_submode; 265 } 266 267 } // end namespace ARM_AM 268 } // end namespace llvm 269 270 static bool isT2i32Load(unsigned Opc) { 271 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 272 } 273 274 static bool isi32Load(unsigned Opc) { 275 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 276 } 277 278 static bool isT2i32Store(unsigned Opc) { 279 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 280 } 281 282 static bool isi32Store(unsigned Opc) { 283 return Opc == ARM::STRi12 || isT2i32Store(Opc); 284 } 285 286 /// MergeOps - Create and insert a LDM or STM with Base as base register and 287 /// registers in Regs as the register operands that would be loaded / stored. 288 /// It returns true if the transformation is done. 289 bool 290 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 291 MachineBasicBlock::iterator MBBI, 292 int Offset, unsigned Base, bool BaseKill, 293 int Opcode, ARMCC::CondCodes Pred, 294 unsigned PredReg, unsigned Scratch, DebugLoc dl, 295 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 296 // Only a single register to load / store. Don't bother. 297 unsigned NumRegs = Regs.size(); 298 if (NumRegs <= 1) 299 return false; 300 301 ARM_AM::AMSubMode Mode = ARM_AM::ia; 302 // VFP and Thumb2 do not support IB or DA modes. 303 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 304 bool haveIBAndDA = isNotVFP && !isThumb2; 305 if (Offset == 4 && haveIBAndDA) 306 Mode = ARM_AM::ib; 307 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 308 Mode = ARM_AM::da; 309 else if (Offset == -4 * (int)NumRegs && isNotVFP) 310 // VLDM/VSTM do not support DB mode without also updating the base reg. 311 Mode = ARM_AM::db; 312 else if (Offset != 0) { 313 // Check if this is a supported opcode before we insert instructions to 314 // calculate a new base register. 315 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 316 317 // If starting offset isn't zero, insert a MI to materialize a new base. 318 // But only do so if it is cost effective, i.e. merging more than two 319 // loads / stores. 320 if (NumRegs <= 2) 321 return false; 322 323 unsigned NewBase; 324 if (isi32Load(Opcode)) 325 // If it is a load, then just use one of the destination register to 326 // use as the new base. 327 NewBase = Regs[NumRegs-1].first; 328 else { 329 // Use the scratch register to use as a new base. 330 NewBase = Scratch; 331 if (NewBase == 0) 332 return false; 333 } 334 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 335 if (Offset < 0) { 336 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 337 Offset = - Offset; 338 } 339 int ImmedOffset = isThumb2 340 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 341 if (ImmedOffset == -1) 342 // FIXME: Try t2ADDri12 or t2SUBri12? 343 return false; // Probably not worth it then. 344 345 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 346 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 347 .addImm(Pred).addReg(PredReg).addReg(0); 348 Base = NewBase; 349 BaseKill = true; // New base is always killed right its use. 350 } 351 352 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 353 Opcode == ARM::VLDRD); 354 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 355 if (!Opcode) return false; 356 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 357 .addReg(Base, getKillRegState(BaseKill)) 358 .addImm(Pred).addReg(PredReg); 359 for (unsigned i = 0; i != NumRegs; ++i) 360 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 361 | getKillRegState(Regs[i].second)); 362 363 return true; 364 } 365 366 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 367 // success. 368 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 369 MemOpQueue &memOps, 370 unsigned memOpsBegin, unsigned memOpsEnd, 371 unsigned insertAfter, int Offset, 372 unsigned Base, bool BaseKill, 373 int Opcode, 374 ARMCC::CondCodes Pred, unsigned PredReg, 375 unsigned Scratch, 376 DebugLoc dl, 377 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 378 // First calculate which of the registers should be killed by the merged 379 // instruction. 380 const unsigned insertPos = memOps[insertAfter].Position; 381 SmallSet<unsigned, 4> KilledRegs; 382 DenseMap<unsigned, unsigned> Killer; 383 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 384 if (i == memOpsBegin) { 385 i = memOpsEnd; 386 if (i == e) 387 break; 388 } 389 if (memOps[i].Position < insertPos && memOps[i].isKill) { 390 unsigned Reg = memOps[i].Reg; 391 KilledRegs.insert(Reg); 392 Killer[Reg] = i; 393 } 394 } 395 396 SmallVector<std::pair<unsigned, bool>, 8> Regs; 397 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 398 unsigned Reg = memOps[i].Reg; 399 // If we are inserting the merged operation after an operation that 400 // uses the same register, make sure to transfer any kill flag. 401 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 402 Regs.push_back(std::make_pair(Reg, isKill)); 403 } 404 405 // Try to do the merge. 406 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 407 ++Loc; 408 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 409 Pred, PredReg, Scratch, dl, Regs)) 410 return; 411 412 // Merge succeeded, update records. 413 Merges.push_back(prior(Loc)); 414 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 415 // Remove kill flags from any memops that come before insertPos. 416 if (Regs[i-memOpsBegin].second) { 417 unsigned Reg = Regs[i-memOpsBegin].first; 418 if (KilledRegs.count(Reg)) { 419 unsigned j = Killer[Reg]; 420 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 421 assert(Idx >= 0 && "Cannot find killing operand"); 422 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 423 memOps[j].isKill = false; 424 } 425 memOps[i].isKill = true; 426 } 427 MBB.erase(memOps[i].MBBI); 428 // Update this memop to refer to the merged instruction. 429 // We may need to move kill flags again. 430 memOps[i].Merged = true; 431 memOps[i].MBBI = Merges.back(); 432 memOps[i].Position = insertPos; 433 } 434 } 435 436 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 437 /// load / store multiple instructions. 438 void 439 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 440 unsigned Base, int Opcode, unsigned Size, 441 ARMCC::CondCodes Pred, unsigned PredReg, 442 unsigned Scratch, MemOpQueue &MemOps, 443 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 444 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 445 int Offset = MemOps[SIndex].Offset; 446 int SOffset = Offset; 447 unsigned insertAfter = SIndex; 448 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 449 DebugLoc dl = Loc->getDebugLoc(); 450 const MachineOperand &PMO = Loc->getOperand(0); 451 unsigned PReg = PMO.getReg(); 452 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 453 : getARMRegisterNumbering(PReg); 454 unsigned Count = 1; 455 unsigned Limit = ~0U; 456 457 // vldm / vstm limit are 32 for S variants, 16 for D variants. 458 459 switch (Opcode) { 460 default: break; 461 case ARM::VSTRS: 462 Limit = 32; 463 break; 464 case ARM::VSTRD: 465 Limit = 16; 466 break; 467 case ARM::VLDRD: 468 Limit = 16; 469 break; 470 case ARM::VLDRS: 471 Limit = 32; 472 break; 473 } 474 475 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 476 int NewOffset = MemOps[i].Offset; 477 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 478 unsigned Reg = MO.getReg(); 479 unsigned RegNum = MO.isUndef() ? UINT_MAX 480 : getARMRegisterNumbering(Reg); 481 // Register numbers must be in ascending order. For VFP / NEON load and 482 // store multiples, the registers must also be consecutive and within the 483 // limit on the number of registers per instruction. 484 if (Reg != ARM::SP && 485 NewOffset == Offset + (int)Size && 486 ((isNotVFP && RegNum > PRegNum) || 487 ((Count < Limit) && RegNum == PRegNum+1))) { 488 Offset += Size; 489 PRegNum = RegNum; 490 ++Count; 491 } else { 492 // Can't merge this in. Try merge the earlier ones first. 493 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 494 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 495 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 496 MemOps, Merges); 497 return; 498 } 499 500 if (MemOps[i].Position > MemOps[insertAfter].Position) 501 insertAfter = i; 502 } 503 504 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 505 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 506 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 507 return; 508 } 509 510 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 511 unsigned Bytes, unsigned Limit, 512 ARMCC::CondCodes Pred, unsigned PredReg){ 513 unsigned MyPredReg = 0; 514 if (!MI) 515 return false; 516 if (MI->getOpcode() != ARM::t2SUBri && 517 MI->getOpcode() != ARM::tSUBspi && 518 MI->getOpcode() != ARM::SUBri) 519 return false; 520 521 // Make sure the offset fits in 8 bits. 522 if (Bytes == 0 || (Limit && Bytes >= Limit)) 523 return false; 524 525 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 526 return (MI->getOperand(0).getReg() == Base && 527 MI->getOperand(1).getReg() == Base && 528 (MI->getOperand(2).getImm()*Scale) == Bytes && 529 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 530 MyPredReg == PredReg); 531 } 532 533 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 534 unsigned Bytes, unsigned Limit, 535 ARMCC::CondCodes Pred, unsigned PredReg){ 536 unsigned MyPredReg = 0; 537 if (!MI) 538 return false; 539 if (MI->getOpcode() != ARM::t2ADDri && 540 MI->getOpcode() != ARM::tADDspi && 541 MI->getOpcode() != ARM::ADDri) 542 return false; 543 544 if (Bytes == 0 || (Limit && Bytes >= Limit)) 545 // Make sure the offset fits in 8 bits. 546 return false; 547 548 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 549 return (MI->getOperand(0).getReg() == Base && 550 MI->getOperand(1).getReg() == Base && 551 (MI->getOperand(2).getImm()*Scale) == Bytes && 552 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 553 MyPredReg == PredReg); 554 } 555 556 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 557 switch (MI->getOpcode()) { 558 default: return 0; 559 case ARM::LDRi12: 560 case ARM::STRi12: 561 case ARM::t2LDRi8: 562 case ARM::t2LDRi12: 563 case ARM::t2STRi8: 564 case ARM::t2STRi12: 565 case ARM::VLDRS: 566 case ARM::VSTRS: 567 return 4; 568 case ARM::VLDRD: 569 case ARM::VSTRD: 570 return 8; 571 case ARM::LDMIA: 572 case ARM::LDMDA: 573 case ARM::LDMDB: 574 case ARM::LDMIB: 575 case ARM::STMIA: 576 case ARM::STMDA: 577 case ARM::STMDB: 578 case ARM::STMIB: 579 case ARM::t2LDMIA: 580 case ARM::t2LDMDB: 581 case ARM::t2STMIA: 582 case ARM::t2STMDB: 583 case ARM::VLDMSIA: 584 case ARM::VSTMSIA: 585 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 586 case ARM::VLDMDIA: 587 case ARM::VSTMDIA: 588 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 589 } 590 } 591 592 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 593 ARM_AM::AMSubMode Mode) { 594 switch (Opc) { 595 default: llvm_unreachable("Unhandled opcode!"); 596 case ARM::LDMIA: 597 case ARM::LDMDA: 598 case ARM::LDMDB: 599 case ARM::LDMIB: 600 switch (Mode) { 601 default: llvm_unreachable("Unhandled submode!"); 602 case ARM_AM::ia: return ARM::LDMIA_UPD; 603 case ARM_AM::ib: return ARM::LDMIB_UPD; 604 case ARM_AM::da: return ARM::LDMDA_UPD; 605 case ARM_AM::db: return ARM::LDMDB_UPD; 606 } 607 break; 608 case ARM::STMIA: 609 case ARM::STMDA: 610 case ARM::STMDB: 611 case ARM::STMIB: 612 switch (Mode) { 613 default: llvm_unreachable("Unhandled submode!"); 614 case ARM_AM::ia: return ARM::STMIA_UPD; 615 case ARM_AM::ib: return ARM::STMIB_UPD; 616 case ARM_AM::da: return ARM::STMDA_UPD; 617 case ARM_AM::db: return ARM::STMDB_UPD; 618 } 619 break; 620 case ARM::t2LDMIA: 621 case ARM::t2LDMDB: 622 switch (Mode) { 623 default: llvm_unreachable("Unhandled submode!"); 624 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 625 case ARM_AM::db: return ARM::t2LDMDB_UPD; 626 } 627 break; 628 case ARM::t2STMIA: 629 case ARM::t2STMDB: 630 switch (Mode) { 631 default: llvm_unreachable("Unhandled submode!"); 632 case ARM_AM::ia: return ARM::t2STMIA_UPD; 633 case ARM_AM::db: return ARM::t2STMDB_UPD; 634 } 635 break; 636 case ARM::VLDMSIA: 637 switch (Mode) { 638 default: llvm_unreachable("Unhandled submode!"); 639 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 640 case ARM_AM::db: return ARM::VLDMSDB_UPD; 641 } 642 break; 643 case ARM::VLDMDIA: 644 switch (Mode) { 645 default: llvm_unreachable("Unhandled submode!"); 646 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 647 case ARM_AM::db: return ARM::VLDMDDB_UPD; 648 } 649 break; 650 case ARM::VSTMSIA: 651 switch (Mode) { 652 default: llvm_unreachable("Unhandled submode!"); 653 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 654 case ARM_AM::db: return ARM::VSTMSDB_UPD; 655 } 656 break; 657 case ARM::VSTMDIA: 658 switch (Mode) { 659 default: llvm_unreachable("Unhandled submode!"); 660 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 661 case ARM_AM::db: return ARM::VSTMDDB_UPD; 662 } 663 break; 664 } 665 666 return 0; 667 } 668 669 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 670 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 671 /// 672 /// stmia rn, <ra, rb, rc> 673 /// rn := rn + 4 * 3; 674 /// => 675 /// stmia rn!, <ra, rb, rc> 676 /// 677 /// rn := rn - 4 * 3; 678 /// ldmia rn, <ra, rb, rc> 679 /// => 680 /// ldmdb rn!, <ra, rb, rc> 681 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 682 MachineBasicBlock::iterator MBBI, 683 bool &Advance, 684 MachineBasicBlock::iterator &I) { 685 MachineInstr *MI = MBBI; 686 unsigned Base = MI->getOperand(0).getReg(); 687 bool BaseKill = MI->getOperand(0).isKill(); 688 unsigned Bytes = getLSMultipleTransferSize(MI); 689 unsigned PredReg = 0; 690 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 691 int Opcode = MI->getOpcode(); 692 DebugLoc dl = MI->getDebugLoc(); 693 694 // Can't use an updating ld/st if the base register is also a dest 695 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 696 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 697 if (MI->getOperand(i).getReg() == Base) 698 return false; 699 700 bool DoMerge = false; 701 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 702 703 // Try merging with the previous instruction. 704 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 705 if (MBBI != BeginMBBI) { 706 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 707 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 708 --PrevMBBI; 709 if (Mode == ARM_AM::ia && 710 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 711 Mode = ARM_AM::db; 712 DoMerge = true; 713 } else if (Mode == ARM_AM::ib && 714 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 715 Mode = ARM_AM::da; 716 DoMerge = true; 717 } 718 if (DoMerge) 719 MBB.erase(PrevMBBI); 720 } 721 722 // Try merging with the next instruction. 723 MachineBasicBlock::iterator EndMBBI = MBB.end(); 724 if (!DoMerge && MBBI != EndMBBI) { 725 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 726 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 727 ++NextMBBI; 728 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 729 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 730 DoMerge = true; 731 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 732 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 733 DoMerge = true; 734 } 735 if (DoMerge) { 736 if (NextMBBI == I) { 737 Advance = true; 738 ++I; 739 } 740 MBB.erase(NextMBBI); 741 } 742 } 743 744 if (!DoMerge) 745 return false; 746 747 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 748 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 749 .addReg(Base, getDefRegState(true)) // WB base register 750 .addReg(Base, getKillRegState(BaseKill)) 751 .addImm(Pred).addReg(PredReg); 752 753 // Transfer the rest of operands. 754 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 755 MIB.addOperand(MI->getOperand(OpNum)); 756 757 // Transfer memoperands. 758 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 759 760 MBB.erase(MBBI); 761 return true; 762 } 763 764 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 765 ARM_AM::AddrOpc Mode) { 766 switch (Opc) { 767 case ARM::LDRi12: 768 return ARM::LDR_PRE_IMM; 769 case ARM::STRi12: 770 return ARM::STR_PRE_IMM; 771 case ARM::VLDRS: 772 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 773 case ARM::VLDRD: 774 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 775 case ARM::VSTRS: 776 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 777 case ARM::VSTRD: 778 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 779 case ARM::t2LDRi8: 780 case ARM::t2LDRi12: 781 return ARM::t2LDR_PRE; 782 case ARM::t2STRi8: 783 case ARM::t2STRi12: 784 return ARM::t2STR_PRE; 785 default: llvm_unreachable("Unhandled opcode!"); 786 } 787 return 0; 788 } 789 790 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 791 ARM_AM::AddrOpc Mode) { 792 switch (Opc) { 793 case ARM::LDRi12: 794 return ARM::LDR_POST_IMM; 795 case ARM::STRi12: 796 return ARM::STR_POST_IMM; 797 case ARM::VLDRS: 798 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 799 case ARM::VLDRD: 800 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 801 case ARM::VSTRS: 802 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 803 case ARM::VSTRD: 804 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 805 case ARM::t2LDRi8: 806 case ARM::t2LDRi12: 807 return ARM::t2LDR_POST; 808 case ARM::t2STRi8: 809 case ARM::t2STRi12: 810 return ARM::t2STR_POST; 811 default: llvm_unreachable("Unhandled opcode!"); 812 } 813 return 0; 814 } 815 816 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 817 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 818 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 819 MachineBasicBlock::iterator MBBI, 820 const TargetInstrInfo *TII, 821 bool &Advance, 822 MachineBasicBlock::iterator &I) { 823 MachineInstr *MI = MBBI; 824 unsigned Base = MI->getOperand(1).getReg(); 825 bool BaseKill = MI->getOperand(1).isKill(); 826 unsigned Bytes = getLSMultipleTransferSize(MI); 827 int Opcode = MI->getOpcode(); 828 DebugLoc dl = MI->getDebugLoc(); 829 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 830 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 831 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 832 if (isi32Load(Opcode) || isi32Store(Opcode)) 833 if (MI->getOperand(2).getImm() != 0) 834 return false; 835 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 836 return false; 837 838 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 839 // Can't do the merge if the destination register is the same as the would-be 840 // writeback register. 841 if (isLd && MI->getOperand(0).getReg() == Base) 842 return false; 843 844 unsigned PredReg = 0; 845 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 846 bool DoMerge = false; 847 ARM_AM::AddrOpc AddSub = ARM_AM::add; 848 unsigned NewOpc = 0; 849 // AM2 - 12 bits, thumb2 - 8 bits. 850 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 851 852 // Try merging with the previous instruction. 853 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 854 if (MBBI != BeginMBBI) { 855 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 856 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 857 --PrevMBBI; 858 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 859 DoMerge = true; 860 AddSub = ARM_AM::sub; 861 } else if (!isAM5 && 862 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 863 DoMerge = true; 864 } 865 if (DoMerge) { 866 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 867 MBB.erase(PrevMBBI); 868 } 869 } 870 871 // Try merging with the next instruction. 872 MachineBasicBlock::iterator EndMBBI = MBB.end(); 873 if (!DoMerge && MBBI != EndMBBI) { 874 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 875 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 876 ++NextMBBI; 877 if (!isAM5 && 878 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 879 DoMerge = true; 880 AddSub = ARM_AM::sub; 881 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 882 DoMerge = true; 883 } 884 if (DoMerge) { 885 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 886 if (NextMBBI == I) { 887 Advance = true; 888 ++I; 889 } 890 MBB.erase(NextMBBI); 891 } 892 } 893 894 if (!DoMerge) 895 return false; 896 897 if (isAM5) { 898 // VLDM[SD}_UPD, VSTM[SD]_UPD 899 // (There are no base-updating versions of VLDR/VSTR instructions, but the 900 // updating load/store-multiple instructions can be used with only one 901 // register.) 902 MachineOperand &MO = MI->getOperand(0); 903 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 904 .addReg(Base, getDefRegState(true)) // WB base register 905 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 906 .addImm(Pred).addReg(PredReg) 907 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 908 getKillRegState(MO.isKill()))); 909 } else if (isLd) { 910 if (isAM2) { 911 // LDR_PRE, LDR_POST 912 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 913 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 914 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 915 .addReg(Base, RegState::Define) 916 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 917 } else { 918 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 919 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 920 .addReg(Base, RegState::Define) 921 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 922 } 923 } else { 924 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 925 // t2LDR_PRE, t2LDR_POST 926 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 927 .addReg(Base, RegState::Define) 928 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 929 } 930 } else { 931 MachineOperand &MO = MI->getOperand(0); 932 // FIXME: post-indexed stores use am2offset_imm, which still encodes 933 // the vestigal zero-reg offset register. When that's fixed, this clause 934 // can be removed entirely. 935 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 936 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 937 // STR_PRE, STR_POST 938 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 939 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 940 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 941 } else { 942 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 943 // t2STR_PRE, t2STR_POST 944 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 945 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 946 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 947 } 948 } 949 MBB.erase(MBBI); 950 951 return true; 952 } 953 954 /// isMemoryOp - Returns true if instruction is a memory operation that this 955 /// pass is capable of operating on. 956 static bool isMemoryOp(const MachineInstr *MI) { 957 // When no memory operands are present, conservatively assume unaligned, 958 // volatile, unfoldable. 959 if (!MI->hasOneMemOperand()) 960 return false; 961 962 const MachineMemOperand *MMO = *MI->memoperands_begin(); 963 964 // Don't touch volatile memory accesses - we may be changing their order. 965 if (MMO->isVolatile()) 966 return false; 967 968 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 969 // not. 970 if (MMO->getAlignment() < 4) 971 return false; 972 973 // str <undef> could probably be eliminated entirely, but for now we just want 974 // to avoid making a mess of it. 975 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 976 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 977 MI->getOperand(0).isUndef()) 978 return false; 979 980 // Likewise don't mess with references to undefined addresses. 981 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 982 MI->getOperand(1).isUndef()) 983 return false; 984 985 int Opcode = MI->getOpcode(); 986 switch (Opcode) { 987 default: break; 988 case ARM::VLDRS: 989 case ARM::VSTRS: 990 return MI->getOperand(1).isReg(); 991 case ARM::VLDRD: 992 case ARM::VSTRD: 993 return MI->getOperand(1).isReg(); 994 case ARM::LDRi12: 995 case ARM::STRi12: 996 case ARM::t2LDRi8: 997 case ARM::t2LDRi12: 998 case ARM::t2STRi8: 999 case ARM::t2STRi12: 1000 return MI->getOperand(1).isReg(); 1001 } 1002 return false; 1003 } 1004 1005 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1006 /// op that is being merged. 1007 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1008 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1009 unsigned Position = MemOps[0].Position; 1010 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1011 if (MemOps[i].Position < Position) { 1012 Position = MemOps[i].Position; 1013 Loc = MemOps[i].MBBI; 1014 } 1015 } 1016 1017 if (Loc != MBB.begin()) 1018 RS->forward(prior(Loc)); 1019 } 1020 1021 static int getMemoryOpOffset(const MachineInstr *MI) { 1022 int Opcode = MI->getOpcode(); 1023 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1024 unsigned NumOperands = MI->getDesc().getNumOperands(); 1025 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1026 1027 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1028 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1029 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1030 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1031 return OffField; 1032 1033 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1034 : ARM_AM::getAM5Offset(OffField) * 4; 1035 if (isAM3) { 1036 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1037 Offset = -Offset; 1038 } else { 1039 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1040 Offset = -Offset; 1041 } 1042 return Offset; 1043 } 1044 1045 static void InsertLDR_STR(MachineBasicBlock &MBB, 1046 MachineBasicBlock::iterator &MBBI, 1047 int Offset, bool isDef, 1048 DebugLoc dl, unsigned NewOpc, 1049 unsigned Reg, bool RegDeadKill, bool RegUndef, 1050 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1051 bool OffKill, bool OffUndef, 1052 ARMCC::CondCodes Pred, unsigned PredReg, 1053 const TargetInstrInfo *TII, bool isT2) { 1054 if (isDef) { 1055 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1056 TII->get(NewOpc)) 1057 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1058 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1059 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1060 } else { 1061 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1062 TII->get(NewOpc)) 1063 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1064 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1065 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1066 } 1067 } 1068 1069 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1070 MachineBasicBlock::iterator &MBBI) { 1071 MachineInstr *MI = &*MBBI; 1072 unsigned Opcode = MI->getOpcode(); 1073 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1074 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1075 const MachineOperand &BaseOp = MI->getOperand(2); 1076 unsigned BaseReg = BaseOp.getReg(); 1077 unsigned EvenReg = MI->getOperand(0).getReg(); 1078 unsigned OddReg = MI->getOperand(1).getReg(); 1079 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1080 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1081 // ARM errata 602117: LDRD with base in list may result in incorrect base 1082 // register when interrupted or faulted. 1083 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1084 if (!Errata602117 && 1085 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1086 return false; 1087 1088 MachineBasicBlock::iterator NewBBI = MBBI; 1089 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1090 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1091 bool EvenDeadKill = isLd ? 1092 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1093 bool EvenUndef = MI->getOperand(0).isUndef(); 1094 bool OddDeadKill = isLd ? 1095 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1096 bool OddUndef = MI->getOperand(1).isUndef(); 1097 bool BaseKill = BaseOp.isKill(); 1098 bool BaseUndef = BaseOp.isUndef(); 1099 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1100 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1101 int OffImm = getMemoryOpOffset(MI); 1102 unsigned PredReg = 0; 1103 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 1104 1105 if (OddRegNum > EvenRegNum && OffImm == 0) { 1106 // Ascending register numbers and no offset. It's safe to change it to a 1107 // ldm or stm. 1108 unsigned NewOpc = (isLd) 1109 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1110 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1111 if (isLd) { 1112 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1113 .addReg(BaseReg, getKillRegState(BaseKill)) 1114 .addImm(Pred).addReg(PredReg) 1115 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1116 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1117 ++NumLDRD2LDM; 1118 } else { 1119 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1120 .addReg(BaseReg, getKillRegState(BaseKill)) 1121 .addImm(Pred).addReg(PredReg) 1122 .addReg(EvenReg, 1123 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1124 .addReg(OddReg, 1125 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1126 ++NumSTRD2STM; 1127 } 1128 NewBBI = llvm::prior(MBBI); 1129 } else { 1130 // Split into two instructions. 1131 unsigned NewOpc = (isLd) 1132 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1133 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1134 DebugLoc dl = MBBI->getDebugLoc(); 1135 // If this is a load and base register is killed, it may have been 1136 // re-defed by the load, make sure the first load does not clobber it. 1137 if (isLd && 1138 (BaseKill || OffKill) && 1139 (TRI->regsOverlap(EvenReg, BaseReg))) { 1140 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1141 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1142 OddReg, OddDeadKill, false, 1143 BaseReg, false, BaseUndef, false, OffUndef, 1144 Pred, PredReg, TII, isT2); 1145 NewBBI = llvm::prior(MBBI); 1146 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1147 EvenReg, EvenDeadKill, false, 1148 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1149 Pred, PredReg, TII, isT2); 1150 } else { 1151 if (OddReg == EvenReg && EvenDeadKill) { 1152 // If the two source operands are the same, the kill marker is 1153 // probably on the first one. e.g. 1154 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1155 EvenDeadKill = false; 1156 OddDeadKill = true; 1157 } 1158 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1159 EvenReg, EvenDeadKill, EvenUndef, 1160 BaseReg, false, BaseUndef, false, OffUndef, 1161 Pred, PredReg, TII, isT2); 1162 NewBBI = llvm::prior(MBBI); 1163 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1164 OddReg, OddDeadKill, OddUndef, 1165 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1166 Pred, PredReg, TII, isT2); 1167 } 1168 if (isLd) 1169 ++NumLDRD2LDR; 1170 else 1171 ++NumSTRD2STR; 1172 } 1173 1174 MBB.erase(MI); 1175 MBBI = NewBBI; 1176 return true; 1177 } 1178 return false; 1179 } 1180 1181 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1182 /// ops of the same base and incrementing offset into LDM / STM ops. 1183 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1184 unsigned NumMerges = 0; 1185 unsigned NumMemOps = 0; 1186 MemOpQueue MemOps; 1187 unsigned CurrBase = 0; 1188 int CurrOpc = -1; 1189 unsigned CurrSize = 0; 1190 ARMCC::CondCodes CurrPred = ARMCC::AL; 1191 unsigned CurrPredReg = 0; 1192 unsigned Position = 0; 1193 SmallVector<MachineBasicBlock::iterator,4> Merges; 1194 1195 RS->enterBasicBlock(&MBB); 1196 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1197 while (MBBI != E) { 1198 if (FixInvalidRegPairOp(MBB, MBBI)) 1199 continue; 1200 1201 bool Advance = false; 1202 bool TryMerge = false; 1203 bool Clobber = false; 1204 1205 bool isMemOp = isMemoryOp(MBBI); 1206 if (isMemOp) { 1207 int Opcode = MBBI->getOpcode(); 1208 unsigned Size = getLSMultipleTransferSize(MBBI); 1209 const MachineOperand &MO = MBBI->getOperand(0); 1210 unsigned Reg = MO.getReg(); 1211 bool isKill = MO.isDef() ? false : MO.isKill(); 1212 unsigned Base = MBBI->getOperand(1).getReg(); 1213 unsigned PredReg = 0; 1214 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1215 int Offset = getMemoryOpOffset(MBBI); 1216 // Watch out for: 1217 // r4 := ldr [r5] 1218 // r5 := ldr [r5, #4] 1219 // r6 := ldr [r5, #8] 1220 // 1221 // The second ldr has effectively broken the chain even though it 1222 // looks like the later ldr(s) use the same base register. Try to 1223 // merge the ldr's so far, including this one. But don't try to 1224 // combine the following ldr(s). 1225 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1226 if (CurrBase == 0 && !Clobber) { 1227 // Start of a new chain. 1228 CurrBase = Base; 1229 CurrOpc = Opcode; 1230 CurrSize = Size; 1231 CurrPred = Pred; 1232 CurrPredReg = PredReg; 1233 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1234 ++NumMemOps; 1235 Advance = true; 1236 } else { 1237 if (Clobber) { 1238 TryMerge = true; 1239 Advance = true; 1240 } 1241 1242 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1243 // No need to match PredReg. 1244 // Continue adding to the queue. 1245 if (Offset > MemOps.back().Offset) { 1246 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1247 Position, MBBI)); 1248 ++NumMemOps; 1249 Advance = true; 1250 } else { 1251 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1252 I != E; ++I) { 1253 if (Offset < I->Offset) { 1254 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1255 Position, MBBI)); 1256 ++NumMemOps; 1257 Advance = true; 1258 break; 1259 } else if (Offset == I->Offset) { 1260 // Collision! This can't be merged! 1261 break; 1262 } 1263 } 1264 } 1265 } 1266 } 1267 } 1268 1269 if (MBBI->isDebugValue()) { 1270 ++MBBI; 1271 if (MBBI == E) 1272 // Reach the end of the block, try merging the memory instructions. 1273 TryMerge = true; 1274 } else if (Advance) { 1275 ++Position; 1276 ++MBBI; 1277 if (MBBI == E) 1278 // Reach the end of the block, try merging the memory instructions. 1279 TryMerge = true; 1280 } else 1281 TryMerge = true; 1282 1283 if (TryMerge) { 1284 if (NumMemOps > 1) { 1285 // Try to find a free register to use as a new base in case it's needed. 1286 // First advance to the instruction just before the start of the chain. 1287 AdvanceRS(MBB, MemOps); 1288 // Find a scratch register. 1289 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1290 // Process the load / store instructions. 1291 RS->forward(prior(MBBI)); 1292 1293 // Merge ops. 1294 Merges.clear(); 1295 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1296 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1297 1298 // Try folding preceding/trailing base inc/dec into the generated 1299 // LDM/STM ops. 1300 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1301 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1302 ++NumMerges; 1303 NumMerges += Merges.size(); 1304 1305 // Try folding preceding/trailing base inc/dec into those load/store 1306 // that were not merged to form LDM/STM ops. 1307 for (unsigned i = 0; i != NumMemOps; ++i) 1308 if (!MemOps[i].Merged) 1309 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1310 ++NumMerges; 1311 1312 // RS may be pointing to an instruction that's deleted. 1313 RS->skipTo(prior(MBBI)); 1314 } else if (NumMemOps == 1) { 1315 // Try folding preceding/trailing base inc/dec into the single 1316 // load/store. 1317 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1318 ++NumMerges; 1319 RS->forward(prior(MBBI)); 1320 } 1321 } 1322 1323 CurrBase = 0; 1324 CurrOpc = -1; 1325 CurrSize = 0; 1326 CurrPred = ARMCC::AL; 1327 CurrPredReg = 0; 1328 if (NumMemOps) { 1329 MemOps.clear(); 1330 NumMemOps = 0; 1331 } 1332 1333 // If iterator hasn't been advanced and this is not a memory op, skip it. 1334 // It can't start a new chain anyway. 1335 if (!Advance && !isMemOp && MBBI != E) { 1336 ++Position; 1337 ++MBBI; 1338 } 1339 } 1340 } 1341 return NumMerges > 0; 1342 } 1343 1344 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1345 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1346 /// directly restore the value of LR into pc. 1347 /// ldmfd sp!, {..., lr} 1348 /// bx lr 1349 /// or 1350 /// ldmfd sp!, {..., lr} 1351 /// mov pc, lr 1352 /// => 1353 /// ldmfd sp!, {..., pc} 1354 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1355 if (MBB.empty()) return false; 1356 1357 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1358 if (MBBI != MBB.begin() && 1359 (MBBI->getOpcode() == ARM::BX_RET || 1360 MBBI->getOpcode() == ARM::tBX_RET || 1361 MBBI->getOpcode() == ARM::MOVPCLR)) { 1362 MachineInstr *PrevMI = prior(MBBI); 1363 unsigned Opcode = PrevMI->getOpcode(); 1364 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1365 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1366 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1367 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1368 if (MO.getReg() != ARM::LR) 1369 return false; 1370 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1371 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1372 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1373 PrevMI->setDesc(TII->get(NewOpc)); 1374 MO.setReg(ARM::PC); 1375 PrevMI->copyImplicitOps(&*MBBI); 1376 MBB.erase(MBBI); 1377 return true; 1378 } 1379 } 1380 return false; 1381 } 1382 1383 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1384 const TargetMachine &TM = Fn.getTarget(); 1385 AFI = Fn.getInfo<ARMFunctionInfo>(); 1386 TII = TM.getInstrInfo(); 1387 TRI = TM.getRegisterInfo(); 1388 STI = &TM.getSubtarget<ARMSubtarget>(); 1389 RS = new RegScavenger(); 1390 isThumb2 = AFI->isThumb2Function(); 1391 1392 bool Modified = false; 1393 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1394 ++MFI) { 1395 MachineBasicBlock &MBB = *MFI; 1396 Modified |= LoadStoreMultipleOpti(MBB); 1397 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1398 Modified |= MergeReturnIntoLDM(MBB); 1399 } 1400 1401 delete RS; 1402 return Modified; 1403 } 1404 1405 1406 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1407 /// load / stores from consecutive locations close to make it more 1408 /// likely they will be combined later. 1409 1410 namespace { 1411 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1412 static char ID; 1413 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1414 1415 const TargetData *TD; 1416 const TargetInstrInfo *TII; 1417 const TargetRegisterInfo *TRI; 1418 const ARMSubtarget *STI; 1419 MachineRegisterInfo *MRI; 1420 MachineFunction *MF; 1421 1422 virtual bool runOnMachineFunction(MachineFunction &Fn); 1423 1424 virtual const char *getPassName() const { 1425 return "ARM pre- register allocation load / store optimization pass"; 1426 } 1427 1428 private: 1429 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1430 unsigned &NewOpc, unsigned &EvenReg, 1431 unsigned &OddReg, unsigned &BaseReg, 1432 int &Offset, 1433 unsigned &PredReg, ARMCC::CondCodes &Pred, 1434 bool &isT2); 1435 bool RescheduleOps(MachineBasicBlock *MBB, 1436 SmallVector<MachineInstr*, 4> &Ops, 1437 unsigned Base, bool isLd, 1438 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1439 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1440 }; 1441 char ARMPreAllocLoadStoreOpt::ID = 0; 1442 } 1443 1444 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1445 TD = Fn.getTarget().getTargetData(); 1446 TII = Fn.getTarget().getInstrInfo(); 1447 TRI = Fn.getTarget().getRegisterInfo(); 1448 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1449 MRI = &Fn.getRegInfo(); 1450 MF = &Fn; 1451 1452 bool Modified = false; 1453 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1454 ++MFI) 1455 Modified |= RescheduleLoadStoreInstrs(MFI); 1456 1457 return Modified; 1458 } 1459 1460 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1461 MachineBasicBlock::iterator I, 1462 MachineBasicBlock::iterator E, 1463 SmallPtrSet<MachineInstr*, 4> &MemOps, 1464 SmallSet<unsigned, 4> &MemRegs, 1465 const TargetRegisterInfo *TRI) { 1466 // Are there stores / loads / calls between them? 1467 // FIXME: This is overly conservative. We should make use of alias information 1468 // some day. 1469 SmallSet<unsigned, 4> AddedRegPressure; 1470 while (++I != E) { 1471 if (I->isDebugValue() || MemOps.count(&*I)) 1472 continue; 1473 const MCInstrDesc &MCID = I->getDesc(); 1474 if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects()) 1475 return false; 1476 if (isLd && MCID.mayStore()) 1477 return false; 1478 if (!isLd) { 1479 if (MCID.mayLoad()) 1480 return false; 1481 // It's not safe to move the first 'str' down. 1482 // str r1, [r0] 1483 // strh r5, [r0] 1484 // str r4, [r0, #+4] 1485 if (MCID.mayStore()) 1486 return false; 1487 } 1488 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1489 MachineOperand &MO = I->getOperand(j); 1490 if (!MO.isReg()) 1491 continue; 1492 unsigned Reg = MO.getReg(); 1493 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1494 return false; 1495 if (Reg != Base && !MemRegs.count(Reg)) 1496 AddedRegPressure.insert(Reg); 1497 } 1498 } 1499 1500 // Estimate register pressure increase due to the transformation. 1501 if (MemRegs.size() <= 4) 1502 // Ok if we are moving small number of instructions. 1503 return true; 1504 return AddedRegPressure.size() <= MemRegs.size() * 2; 1505 } 1506 1507 bool 1508 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1509 DebugLoc &dl, 1510 unsigned &NewOpc, unsigned &EvenReg, 1511 unsigned &OddReg, unsigned &BaseReg, 1512 int &Offset, unsigned &PredReg, 1513 ARMCC::CondCodes &Pred, 1514 bool &isT2) { 1515 // Make sure we're allowed to generate LDRD/STRD. 1516 if (!STI->hasV5TEOps()) 1517 return false; 1518 1519 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1520 unsigned Scale = 1; 1521 unsigned Opcode = Op0->getOpcode(); 1522 if (Opcode == ARM::LDRi12) 1523 NewOpc = ARM::LDRD; 1524 else if (Opcode == ARM::STRi12) 1525 NewOpc = ARM::STRD; 1526 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1527 NewOpc = ARM::t2LDRDi8; 1528 Scale = 4; 1529 isT2 = true; 1530 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1531 NewOpc = ARM::t2STRDi8; 1532 Scale = 4; 1533 isT2 = true; 1534 } else 1535 return false; 1536 1537 // Make sure the base address satisfies i64 ld / st alignment requirement. 1538 if (!Op0->hasOneMemOperand() || 1539 !(*Op0->memoperands_begin())->getValue() || 1540 (*Op0->memoperands_begin())->isVolatile()) 1541 return false; 1542 1543 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1544 const Function *Func = MF->getFunction(); 1545 unsigned ReqAlign = STI->hasV6Ops() 1546 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1547 : 8; // Pre-v6 need 8-byte align 1548 if (Align < ReqAlign) 1549 return false; 1550 1551 // Then make sure the immediate offset fits. 1552 int OffImm = getMemoryOpOffset(Op0); 1553 if (isT2) { 1554 int Limit = (1 << 8) * Scale; 1555 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1556 return false; 1557 Offset = OffImm; 1558 } else { 1559 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1560 if (OffImm < 0) { 1561 AddSub = ARM_AM::sub; 1562 OffImm = - OffImm; 1563 } 1564 int Limit = (1 << 8) * Scale; 1565 if (OffImm >= Limit || (OffImm & (Scale-1))) 1566 return false; 1567 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1568 } 1569 EvenReg = Op0->getOperand(0).getReg(); 1570 OddReg = Op1->getOperand(0).getReg(); 1571 if (EvenReg == OddReg) 1572 return false; 1573 BaseReg = Op0->getOperand(1).getReg(); 1574 Pred = llvm::getInstrPredicate(Op0, PredReg); 1575 dl = Op0->getDebugLoc(); 1576 return true; 1577 } 1578 1579 namespace { 1580 struct OffsetCompare { 1581 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1582 int LOffset = getMemoryOpOffset(LHS); 1583 int ROffset = getMemoryOpOffset(RHS); 1584 assert(LHS == RHS || LOffset != ROffset); 1585 return LOffset > ROffset; 1586 } 1587 }; 1588 } 1589 1590 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1591 SmallVector<MachineInstr*, 4> &Ops, 1592 unsigned Base, bool isLd, 1593 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1594 bool RetVal = false; 1595 1596 // Sort by offset (in reverse order). 1597 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1598 1599 // The loads / stores of the same base are in order. Scan them from first to 1600 // last and check for the following: 1601 // 1. Any def of base. 1602 // 2. Any gaps. 1603 while (Ops.size() > 1) { 1604 unsigned FirstLoc = ~0U; 1605 unsigned LastLoc = 0; 1606 MachineInstr *FirstOp = 0; 1607 MachineInstr *LastOp = 0; 1608 int LastOffset = 0; 1609 unsigned LastOpcode = 0; 1610 unsigned LastBytes = 0; 1611 unsigned NumMove = 0; 1612 for (int i = Ops.size() - 1; i >= 0; --i) { 1613 MachineInstr *Op = Ops[i]; 1614 unsigned Loc = MI2LocMap[Op]; 1615 if (Loc <= FirstLoc) { 1616 FirstLoc = Loc; 1617 FirstOp = Op; 1618 } 1619 if (Loc >= LastLoc) { 1620 LastLoc = Loc; 1621 LastOp = Op; 1622 } 1623 1624 unsigned Opcode = Op->getOpcode(); 1625 if (LastOpcode && Opcode != LastOpcode) 1626 break; 1627 1628 int Offset = getMemoryOpOffset(Op); 1629 unsigned Bytes = getLSMultipleTransferSize(Op); 1630 if (LastBytes) { 1631 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1632 break; 1633 } 1634 LastOffset = Offset; 1635 LastBytes = Bytes; 1636 LastOpcode = Opcode; 1637 if (++NumMove == 8) // FIXME: Tune this limit. 1638 break; 1639 } 1640 1641 if (NumMove <= 1) 1642 Ops.pop_back(); 1643 else { 1644 SmallPtrSet<MachineInstr*, 4> MemOps; 1645 SmallSet<unsigned, 4> MemRegs; 1646 for (int i = NumMove-1; i >= 0; --i) { 1647 MemOps.insert(Ops[i]); 1648 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1649 } 1650 1651 // Be conservative, if the instructions are too far apart, don't 1652 // move them. We want to limit the increase of register pressure. 1653 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1654 if (DoMove) 1655 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1656 MemOps, MemRegs, TRI); 1657 if (!DoMove) { 1658 for (unsigned i = 0; i != NumMove; ++i) 1659 Ops.pop_back(); 1660 } else { 1661 // This is the new location for the loads / stores. 1662 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1663 while (InsertPos != MBB->end() 1664 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1665 ++InsertPos; 1666 1667 // If we are moving a pair of loads / stores, see if it makes sense 1668 // to try to allocate a pair of registers that can form register pairs. 1669 MachineInstr *Op0 = Ops.back(); 1670 MachineInstr *Op1 = Ops[Ops.size()-2]; 1671 unsigned EvenReg = 0, OddReg = 0; 1672 unsigned BaseReg = 0, PredReg = 0; 1673 ARMCC::CondCodes Pred = ARMCC::AL; 1674 bool isT2 = false; 1675 unsigned NewOpc = 0; 1676 int Offset = 0; 1677 DebugLoc dl; 1678 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1679 EvenReg, OddReg, BaseReg, 1680 Offset, PredReg, Pred, isT2)) { 1681 Ops.pop_back(); 1682 Ops.pop_back(); 1683 1684 const MCInstrDesc &MCID = TII->get(NewOpc); 1685 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); 1686 MRI->constrainRegClass(EvenReg, TRC); 1687 MRI->constrainRegClass(OddReg, TRC); 1688 1689 // Form the pair instruction. 1690 if (isLd) { 1691 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1692 .addReg(EvenReg, RegState::Define) 1693 .addReg(OddReg, RegState::Define) 1694 .addReg(BaseReg); 1695 // FIXME: We're converting from LDRi12 to an insn that still 1696 // uses addrmode2, so we need an explicit offset reg. It should 1697 // always by reg0 since we're transforming LDRi12s. 1698 if (!isT2) 1699 MIB.addReg(0); 1700 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1701 ++NumLDRDFormed; 1702 } else { 1703 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1704 .addReg(EvenReg) 1705 .addReg(OddReg) 1706 .addReg(BaseReg); 1707 // FIXME: We're converting from LDRi12 to an insn that still 1708 // uses addrmode2, so we need an explicit offset reg. It should 1709 // always by reg0 since we're transforming STRi12s. 1710 if (!isT2) 1711 MIB.addReg(0); 1712 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1713 ++NumSTRDFormed; 1714 } 1715 MBB->erase(Op0); 1716 MBB->erase(Op1); 1717 1718 // Add register allocation hints to form register pairs. 1719 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1720 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1721 } else { 1722 for (unsigned i = 0; i != NumMove; ++i) { 1723 MachineInstr *Op = Ops.back(); 1724 Ops.pop_back(); 1725 MBB->splice(InsertPos, MBB, Op); 1726 } 1727 } 1728 1729 NumLdStMoved += NumMove; 1730 RetVal = true; 1731 } 1732 } 1733 } 1734 1735 return RetVal; 1736 } 1737 1738 bool 1739 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1740 bool RetVal = false; 1741 1742 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1743 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1744 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1745 SmallVector<unsigned, 4> LdBases; 1746 SmallVector<unsigned, 4> StBases; 1747 1748 unsigned Loc = 0; 1749 MachineBasicBlock::iterator MBBI = MBB->begin(); 1750 MachineBasicBlock::iterator E = MBB->end(); 1751 while (MBBI != E) { 1752 for (; MBBI != E; ++MBBI) { 1753 MachineInstr *MI = MBBI; 1754 const MCInstrDesc &MCID = MI->getDesc(); 1755 if (MCID.isCall() || MCID.isTerminator()) { 1756 // Stop at barriers. 1757 ++MBBI; 1758 break; 1759 } 1760 1761 if (!MI->isDebugValue()) 1762 MI2LocMap[MI] = ++Loc; 1763 1764 if (!isMemoryOp(MI)) 1765 continue; 1766 unsigned PredReg = 0; 1767 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1768 continue; 1769 1770 int Opc = MI->getOpcode(); 1771 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1772 unsigned Base = MI->getOperand(1).getReg(); 1773 int Offset = getMemoryOpOffset(MI); 1774 1775 bool StopHere = false; 1776 if (isLd) { 1777 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1778 Base2LdsMap.find(Base); 1779 if (BI != Base2LdsMap.end()) { 1780 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1781 if (Offset == getMemoryOpOffset(BI->second[i])) { 1782 StopHere = true; 1783 break; 1784 } 1785 } 1786 if (!StopHere) 1787 BI->second.push_back(MI); 1788 } else { 1789 SmallVector<MachineInstr*, 4> MIs; 1790 MIs.push_back(MI); 1791 Base2LdsMap[Base] = MIs; 1792 LdBases.push_back(Base); 1793 } 1794 } else { 1795 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1796 Base2StsMap.find(Base); 1797 if (BI != Base2StsMap.end()) { 1798 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1799 if (Offset == getMemoryOpOffset(BI->second[i])) { 1800 StopHere = true; 1801 break; 1802 } 1803 } 1804 if (!StopHere) 1805 BI->second.push_back(MI); 1806 } else { 1807 SmallVector<MachineInstr*, 4> MIs; 1808 MIs.push_back(MI); 1809 Base2StsMap[Base] = MIs; 1810 StBases.push_back(Base); 1811 } 1812 } 1813 1814 if (StopHere) { 1815 // Found a duplicate (a base+offset combination that's seen earlier). 1816 // Backtrack. 1817 --Loc; 1818 break; 1819 } 1820 } 1821 1822 // Re-schedule loads. 1823 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1824 unsigned Base = LdBases[i]; 1825 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1826 if (Lds.size() > 1) 1827 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1828 } 1829 1830 // Re-schedule stores. 1831 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1832 unsigned Base = StBases[i]; 1833 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1834 if (Sts.size() > 1) 1835 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1836 } 1837 1838 if (MBBI != E) { 1839 Base2LdsMap.clear(); 1840 Base2StsMap.clear(); 1841 LdBases.clear(); 1842 StBases.clear(); 1843 } 1844 } 1845 1846 return RetVal; 1847 } 1848 1849 1850 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1851 /// optimization pass. 1852 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1853 if (PreAlloc) 1854 return new ARMPreAllocLoadStoreOpt(); 1855 return new ARMLoadStoreOpt(); 1856 } 1857