1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMBaseRegisterInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "MCTargetDesc/ARMAddressingModes.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallSet.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/CodeGen/MachineBasicBlock.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstr.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/RegisterScavenging.h" 33 #include "llvm/CodeGen/SelectionDAGNodes.h" 34 #include "llvm/IR/DataLayout.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/Function.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include "llvm/Target/TargetInstrInfo.h" 41 #include "llvm/Target/TargetMachine.h" 42 #include "llvm/Target/TargetRegisterInfo.h" 43 using namespace llvm; 44 45 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 46 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 56 57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 58 /// load / store instructions to form ldm / stm instructions. 59 60 namespace { 61 struct ARMLoadStoreOpt : public MachineFunctionPass { 62 static char ID; 63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 64 65 const TargetInstrInfo *TII; 66 const TargetRegisterInfo *TRI; 67 const ARMSubtarget *STI; 68 ARMFunctionInfo *AFI; 69 RegScavenger *RS; 70 bool isThumb2; 71 72 virtual bool runOnMachineFunction(MachineFunction &Fn); 73 74 virtual const char *getPassName() const { 75 return "ARM load / store optimization pass"; 76 } 77 78 private: 79 struct MemOpQueueEntry { 80 int Offset; 81 unsigned Reg; 82 bool isKill; 83 unsigned Position; 84 MachineBasicBlock::iterator MBBI; 85 bool Merged; 86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 87 MachineBasicBlock::iterator i) 88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 89 }; 90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 91 typedef MemOpQueue::iterator MemOpQueueIter; 92 93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 94 int Offset, unsigned Base, bool BaseKill, int Opcode, 95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 96 DebugLoc dl, 97 ArrayRef<std::pair<unsigned, bool> > Regs, 98 ArrayRef<unsigned> ImpDefs); 99 void MergeOpsUpdate(MachineBasicBlock &MBB, 100 MemOpQueue &MemOps, 101 unsigned memOpsBegin, 102 unsigned memOpsEnd, 103 unsigned insertAfter, 104 int Offset, 105 unsigned Base, 106 bool BaseKill, 107 int Opcode, 108 ARMCC::CondCodes Pred, 109 unsigned PredReg, 110 unsigned Scratch, 111 DebugLoc dl, 112 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 113 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 114 int Opcode, unsigned Size, 115 ARMCC::CondCodes Pred, unsigned PredReg, 116 unsigned Scratch, MemOpQueue &MemOps, 117 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 118 119 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 120 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 121 MachineBasicBlock::iterator &MBBI); 122 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 123 MachineBasicBlock::iterator MBBI, 124 const TargetInstrInfo *TII, 125 bool &Advance, 126 MachineBasicBlock::iterator &I); 127 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 128 MachineBasicBlock::iterator MBBI, 129 bool &Advance, 130 MachineBasicBlock::iterator &I); 131 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 132 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 133 }; 134 char ARMLoadStoreOpt::ID = 0; 135 } 136 137 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 138 switch (Opcode) { 139 default: llvm_unreachable("Unhandled opcode!"); 140 case ARM::LDRi12: 141 ++NumLDMGened; 142 switch (Mode) { 143 default: llvm_unreachable("Unhandled submode!"); 144 case ARM_AM::ia: return ARM::LDMIA; 145 case ARM_AM::da: return ARM::LDMDA; 146 case ARM_AM::db: return ARM::LDMDB; 147 case ARM_AM::ib: return ARM::LDMIB; 148 } 149 case ARM::STRi12: 150 ++NumSTMGened; 151 switch (Mode) { 152 default: llvm_unreachable("Unhandled submode!"); 153 case ARM_AM::ia: return ARM::STMIA; 154 case ARM_AM::da: return ARM::STMDA; 155 case ARM_AM::db: return ARM::STMDB; 156 case ARM_AM::ib: return ARM::STMIB; 157 } 158 case ARM::t2LDRi8: 159 case ARM::t2LDRi12: 160 ++NumLDMGened; 161 switch (Mode) { 162 default: llvm_unreachable("Unhandled submode!"); 163 case ARM_AM::ia: return ARM::t2LDMIA; 164 case ARM_AM::db: return ARM::t2LDMDB; 165 } 166 case ARM::t2STRi8: 167 case ARM::t2STRi12: 168 ++NumSTMGened; 169 switch (Mode) { 170 default: llvm_unreachable("Unhandled submode!"); 171 case ARM_AM::ia: return ARM::t2STMIA; 172 case ARM_AM::db: return ARM::t2STMDB; 173 } 174 case ARM::VLDRS: 175 ++NumVLDMGened; 176 switch (Mode) { 177 default: llvm_unreachable("Unhandled submode!"); 178 case ARM_AM::ia: return ARM::VLDMSIA; 179 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 180 } 181 case ARM::VSTRS: 182 ++NumVSTMGened; 183 switch (Mode) { 184 default: llvm_unreachable("Unhandled submode!"); 185 case ARM_AM::ia: return ARM::VSTMSIA; 186 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 187 } 188 case ARM::VLDRD: 189 ++NumVLDMGened; 190 switch (Mode) { 191 default: llvm_unreachable("Unhandled submode!"); 192 case ARM_AM::ia: return ARM::VLDMDIA; 193 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 194 } 195 case ARM::VSTRD: 196 ++NumVSTMGened; 197 switch (Mode) { 198 default: llvm_unreachable("Unhandled submode!"); 199 case ARM_AM::ia: return ARM::VSTMDIA; 200 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 201 } 202 } 203 } 204 205 namespace llvm { 206 namespace ARM_AM { 207 208 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 209 switch (Opcode) { 210 default: llvm_unreachable("Unhandled opcode!"); 211 case ARM::LDMIA_RET: 212 case ARM::LDMIA: 213 case ARM::LDMIA_UPD: 214 case ARM::STMIA: 215 case ARM::STMIA_UPD: 216 case ARM::t2LDMIA_RET: 217 case ARM::t2LDMIA: 218 case ARM::t2LDMIA_UPD: 219 case ARM::t2STMIA: 220 case ARM::t2STMIA_UPD: 221 case ARM::VLDMSIA: 222 case ARM::VLDMSIA_UPD: 223 case ARM::VSTMSIA: 224 case ARM::VSTMSIA_UPD: 225 case ARM::VLDMDIA: 226 case ARM::VLDMDIA_UPD: 227 case ARM::VSTMDIA: 228 case ARM::VSTMDIA_UPD: 229 return ARM_AM::ia; 230 231 case ARM::LDMDA: 232 case ARM::LDMDA_UPD: 233 case ARM::STMDA: 234 case ARM::STMDA_UPD: 235 return ARM_AM::da; 236 237 case ARM::LDMDB: 238 case ARM::LDMDB_UPD: 239 case ARM::STMDB: 240 case ARM::STMDB_UPD: 241 case ARM::t2LDMDB: 242 case ARM::t2LDMDB_UPD: 243 case ARM::t2STMDB: 244 case ARM::t2STMDB_UPD: 245 case ARM::VLDMSDB_UPD: 246 case ARM::VSTMSDB_UPD: 247 case ARM::VLDMDDB_UPD: 248 case ARM::VSTMDDB_UPD: 249 return ARM_AM::db; 250 251 case ARM::LDMIB: 252 case ARM::LDMIB_UPD: 253 case ARM::STMIB: 254 case ARM::STMIB_UPD: 255 return ARM_AM::ib; 256 } 257 } 258 259 } // end namespace ARM_AM 260 } // end namespace llvm 261 262 static bool isT2i32Load(unsigned Opc) { 263 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 264 } 265 266 static bool isi32Load(unsigned Opc) { 267 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 268 } 269 270 static bool isT2i32Store(unsigned Opc) { 271 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 272 } 273 274 static bool isi32Store(unsigned Opc) { 275 return Opc == ARM::STRi12 || isT2i32Store(Opc); 276 } 277 278 /// MergeOps - Create and insert a LDM or STM with Base as base register and 279 /// registers in Regs as the register operands that would be loaded / stored. 280 /// It returns true if the transformation is done. 281 bool 282 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 283 MachineBasicBlock::iterator MBBI, 284 int Offset, unsigned Base, bool BaseKill, 285 int Opcode, ARMCC::CondCodes Pred, 286 unsigned PredReg, unsigned Scratch, DebugLoc dl, 287 ArrayRef<std::pair<unsigned, bool> > Regs, 288 ArrayRef<unsigned> ImpDefs) { 289 // Only a single register to load / store. Don't bother. 290 unsigned NumRegs = Regs.size(); 291 if (NumRegs <= 1) 292 return false; 293 294 ARM_AM::AMSubMode Mode = ARM_AM::ia; 295 // VFP and Thumb2 do not support IB or DA modes. 296 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 297 bool haveIBAndDA = isNotVFP && !isThumb2; 298 if (Offset == 4 && haveIBAndDA) 299 Mode = ARM_AM::ib; 300 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 301 Mode = ARM_AM::da; 302 else if (Offset == -4 * (int)NumRegs && isNotVFP) 303 // VLDM/VSTM do not support DB mode without also updating the base reg. 304 Mode = ARM_AM::db; 305 else if (Offset != 0) { 306 // Check if this is a supported opcode before we insert instructions to 307 // calculate a new base register. 308 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 309 310 // If starting offset isn't zero, insert a MI to materialize a new base. 311 // But only do so if it is cost effective, i.e. merging more than two 312 // loads / stores. 313 if (NumRegs <= 2) 314 return false; 315 316 unsigned NewBase; 317 if (isi32Load(Opcode)) 318 // If it is a load, then just use one of the destination register to 319 // use as the new base. 320 NewBase = Regs[NumRegs-1].first; 321 else { 322 // Use the scratch register to use as a new base. 323 NewBase = Scratch; 324 if (NewBase == 0) 325 return false; 326 } 327 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 328 if (Offset < 0) { 329 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 330 Offset = - Offset; 331 } 332 int ImmedOffset = isThumb2 333 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 334 if (ImmedOffset == -1) 335 // FIXME: Try t2ADDri12 or t2SUBri12? 336 return false; // Probably not worth it then. 337 338 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 339 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 340 .addImm(Pred).addReg(PredReg).addReg(0); 341 Base = NewBase; 342 BaseKill = true; // New base is always killed right its use. 343 } 344 345 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 346 Opcode == ARM::VLDRD); 347 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 348 if (!Opcode) return false; 349 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 350 .addReg(Base, getKillRegState(BaseKill)) 351 .addImm(Pred).addReg(PredReg); 352 for (unsigned i = 0; i != NumRegs; ++i) 353 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 354 | getKillRegState(Regs[i].second)); 355 356 // Add implicit defs for super-registers. 357 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) 358 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); 359 360 return true; 361 } 362 363 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 364 // success. 365 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 366 MemOpQueue &memOps, 367 unsigned memOpsBegin, unsigned memOpsEnd, 368 unsigned insertAfter, int Offset, 369 unsigned Base, bool BaseKill, 370 int Opcode, 371 ARMCC::CondCodes Pred, unsigned PredReg, 372 unsigned Scratch, 373 DebugLoc dl, 374 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 375 // First calculate which of the registers should be killed by the merged 376 // instruction. 377 const unsigned insertPos = memOps[insertAfter].Position; 378 SmallSet<unsigned, 4> KilledRegs; 379 DenseMap<unsigned, unsigned> Killer; 380 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 381 if (i == memOpsBegin) { 382 i = memOpsEnd; 383 if (i == e) 384 break; 385 } 386 if (memOps[i].Position < insertPos && memOps[i].isKill) { 387 unsigned Reg = memOps[i].Reg; 388 KilledRegs.insert(Reg); 389 Killer[Reg] = i; 390 } 391 } 392 393 SmallVector<std::pair<unsigned, bool>, 8> Regs; 394 SmallVector<unsigned, 8> ImpDefs; 395 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 396 unsigned Reg = memOps[i].Reg; 397 // If we are inserting the merged operation after an operation that 398 // uses the same register, make sure to transfer any kill flag. 399 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 400 Regs.push_back(std::make_pair(Reg, isKill)); 401 402 // Collect any implicit defs of super-registers. They must be preserved. 403 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { 404 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) 405 continue; 406 unsigned DefReg = MO->getReg(); 407 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 408 ImpDefs.push_back(DefReg); 409 } 410 } 411 412 // Try to do the merge. 413 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 414 ++Loc; 415 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 416 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 417 return; 418 419 // Merge succeeded, update records. 420 Merges.push_back(prior(Loc)); 421 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 422 // Remove kill flags from any memops that come before insertPos. 423 if (Regs[i-memOpsBegin].second) { 424 unsigned Reg = Regs[i-memOpsBegin].first; 425 if (KilledRegs.count(Reg)) { 426 unsigned j = Killer[Reg]; 427 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 428 assert(Idx >= 0 && "Cannot find killing operand"); 429 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 430 memOps[j].isKill = false; 431 } 432 memOps[i].isKill = true; 433 } 434 MBB.erase(memOps[i].MBBI); 435 // Update this memop to refer to the merged instruction. 436 // We may need to move kill flags again. 437 memOps[i].Merged = true; 438 memOps[i].MBBI = Merges.back(); 439 memOps[i].Position = insertPos; 440 } 441 } 442 443 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 444 /// load / store multiple instructions. 445 void 446 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 447 unsigned Base, int Opcode, unsigned Size, 448 ARMCC::CondCodes Pred, unsigned PredReg, 449 unsigned Scratch, MemOpQueue &MemOps, 450 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 451 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 452 int Offset = MemOps[SIndex].Offset; 453 int SOffset = Offset; 454 unsigned insertAfter = SIndex; 455 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 456 DebugLoc dl = Loc->getDebugLoc(); 457 const MachineOperand &PMO = Loc->getOperand(0); 458 unsigned PReg = PMO.getReg(); 459 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 460 unsigned Count = 1; 461 unsigned Limit = ~0U; 462 463 // vldm / vstm limit are 32 for S variants, 16 for D variants. 464 465 switch (Opcode) { 466 default: break; 467 case ARM::VSTRS: 468 Limit = 32; 469 break; 470 case ARM::VSTRD: 471 Limit = 16; 472 break; 473 case ARM::VLDRD: 474 Limit = 16; 475 break; 476 case ARM::VLDRS: 477 Limit = 32; 478 break; 479 } 480 481 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 482 int NewOffset = MemOps[i].Offset; 483 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 484 unsigned Reg = MO.getReg(); 485 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 486 // Register numbers must be in ascending order. For VFP / NEON load and 487 // store multiples, the registers must also be consecutive and within the 488 // limit on the number of registers per instruction. 489 if (Reg != ARM::SP && 490 NewOffset == Offset + (int)Size && 491 ((isNotVFP && RegNum > PRegNum) || 492 ((Count < Limit) && RegNum == PRegNum+1)) && 493 // On Swift we don't want vldm/vstm to start with a odd register num 494 // because Q register unaligned vldm/vstm need more uops. 495 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 496 Offset += Size; 497 PRegNum = RegNum; 498 ++Count; 499 } else { 500 // Can't merge this in. Try merge the earlier ones first. 501 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 502 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 503 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 504 MemOps, Merges); 505 return; 506 } 507 508 if (MemOps[i].Position > MemOps[insertAfter].Position) 509 insertAfter = i; 510 } 511 512 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 513 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 514 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 515 return; 516 } 517 518 static bool definesCPSR(MachineInstr *MI) { 519 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 520 const MachineOperand &MO = MI->getOperand(i); 521 if (!MO.isReg()) 522 continue; 523 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 524 // If the instruction has live CPSR def, then it's not safe to fold it 525 // into load / store. 526 return true; 527 } 528 529 return false; 530 } 531 532 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 533 unsigned Bytes, unsigned Limit, 534 ARMCC::CondCodes Pred, unsigned PredReg) { 535 unsigned MyPredReg = 0; 536 if (!MI) 537 return false; 538 539 bool CheckCPSRDef = false; 540 switch (MI->getOpcode()) { 541 default: return false; 542 case ARM::t2SUBri: 543 case ARM::SUBri: 544 CheckCPSRDef = true; 545 // fallthrough 546 case ARM::tSUBspi: 547 break; 548 } 549 550 // Make sure the offset fits in 8 bits. 551 if (Bytes == 0 || (Limit && Bytes >= Limit)) 552 return false; 553 554 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 555 if (!(MI->getOperand(0).getReg() == Base && 556 MI->getOperand(1).getReg() == Base && 557 (MI->getOperand(2).getImm()*Scale) == Bytes && 558 getInstrPredicate(MI, MyPredReg) == Pred && 559 MyPredReg == PredReg)) 560 return false; 561 562 return CheckCPSRDef ? !definesCPSR(MI) : true; 563 } 564 565 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 566 unsigned Bytes, unsigned Limit, 567 ARMCC::CondCodes Pred, unsigned PredReg) { 568 unsigned MyPredReg = 0; 569 if (!MI) 570 return false; 571 572 bool CheckCPSRDef = false; 573 switch (MI->getOpcode()) { 574 default: return false; 575 case ARM::t2ADDri: 576 case ARM::ADDri: 577 CheckCPSRDef = true; 578 // fallthrough 579 case ARM::tADDspi: 580 break; 581 } 582 583 if (Bytes == 0 || (Limit && Bytes >= Limit)) 584 // Make sure the offset fits in 8 bits. 585 return false; 586 587 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 588 if (!(MI->getOperand(0).getReg() == Base && 589 MI->getOperand(1).getReg() == Base && 590 (MI->getOperand(2).getImm()*Scale) == Bytes && 591 getInstrPredicate(MI, MyPredReg) == Pred && 592 MyPredReg == PredReg)) 593 return false; 594 595 return CheckCPSRDef ? !definesCPSR(MI) : true; 596 } 597 598 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 599 switch (MI->getOpcode()) { 600 default: return 0; 601 case ARM::LDRi12: 602 case ARM::STRi12: 603 case ARM::t2LDRi8: 604 case ARM::t2LDRi12: 605 case ARM::t2STRi8: 606 case ARM::t2STRi12: 607 case ARM::VLDRS: 608 case ARM::VSTRS: 609 return 4; 610 case ARM::VLDRD: 611 case ARM::VSTRD: 612 return 8; 613 case ARM::LDMIA: 614 case ARM::LDMDA: 615 case ARM::LDMDB: 616 case ARM::LDMIB: 617 case ARM::STMIA: 618 case ARM::STMDA: 619 case ARM::STMDB: 620 case ARM::STMIB: 621 case ARM::t2LDMIA: 622 case ARM::t2LDMDB: 623 case ARM::t2STMIA: 624 case ARM::t2STMDB: 625 case ARM::VLDMSIA: 626 case ARM::VSTMSIA: 627 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 628 case ARM::VLDMDIA: 629 case ARM::VSTMDIA: 630 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 631 } 632 } 633 634 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 635 ARM_AM::AMSubMode Mode) { 636 switch (Opc) { 637 default: llvm_unreachable("Unhandled opcode!"); 638 case ARM::LDMIA: 639 case ARM::LDMDA: 640 case ARM::LDMDB: 641 case ARM::LDMIB: 642 switch (Mode) { 643 default: llvm_unreachable("Unhandled submode!"); 644 case ARM_AM::ia: return ARM::LDMIA_UPD; 645 case ARM_AM::ib: return ARM::LDMIB_UPD; 646 case ARM_AM::da: return ARM::LDMDA_UPD; 647 case ARM_AM::db: return ARM::LDMDB_UPD; 648 } 649 case ARM::STMIA: 650 case ARM::STMDA: 651 case ARM::STMDB: 652 case ARM::STMIB: 653 switch (Mode) { 654 default: llvm_unreachable("Unhandled submode!"); 655 case ARM_AM::ia: return ARM::STMIA_UPD; 656 case ARM_AM::ib: return ARM::STMIB_UPD; 657 case ARM_AM::da: return ARM::STMDA_UPD; 658 case ARM_AM::db: return ARM::STMDB_UPD; 659 } 660 case ARM::t2LDMIA: 661 case ARM::t2LDMDB: 662 switch (Mode) { 663 default: llvm_unreachable("Unhandled submode!"); 664 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 665 case ARM_AM::db: return ARM::t2LDMDB_UPD; 666 } 667 case ARM::t2STMIA: 668 case ARM::t2STMDB: 669 switch (Mode) { 670 default: llvm_unreachable("Unhandled submode!"); 671 case ARM_AM::ia: return ARM::t2STMIA_UPD; 672 case ARM_AM::db: return ARM::t2STMDB_UPD; 673 } 674 case ARM::VLDMSIA: 675 switch (Mode) { 676 default: llvm_unreachable("Unhandled submode!"); 677 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 678 case ARM_AM::db: return ARM::VLDMSDB_UPD; 679 } 680 case ARM::VLDMDIA: 681 switch (Mode) { 682 default: llvm_unreachable("Unhandled submode!"); 683 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 684 case ARM_AM::db: return ARM::VLDMDDB_UPD; 685 } 686 case ARM::VSTMSIA: 687 switch (Mode) { 688 default: llvm_unreachable("Unhandled submode!"); 689 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 690 case ARM_AM::db: return ARM::VSTMSDB_UPD; 691 } 692 case ARM::VSTMDIA: 693 switch (Mode) { 694 default: llvm_unreachable("Unhandled submode!"); 695 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 696 case ARM_AM::db: return ARM::VSTMDDB_UPD; 697 } 698 } 699 } 700 701 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 702 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 703 /// 704 /// stmia rn, <ra, rb, rc> 705 /// rn := rn + 4 * 3; 706 /// => 707 /// stmia rn!, <ra, rb, rc> 708 /// 709 /// rn := rn - 4 * 3; 710 /// ldmia rn, <ra, rb, rc> 711 /// => 712 /// ldmdb rn!, <ra, rb, rc> 713 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 714 MachineBasicBlock::iterator MBBI, 715 bool &Advance, 716 MachineBasicBlock::iterator &I) { 717 MachineInstr *MI = MBBI; 718 unsigned Base = MI->getOperand(0).getReg(); 719 bool BaseKill = MI->getOperand(0).isKill(); 720 unsigned Bytes = getLSMultipleTransferSize(MI); 721 unsigned PredReg = 0; 722 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 723 int Opcode = MI->getOpcode(); 724 DebugLoc dl = MI->getDebugLoc(); 725 726 // Can't use an updating ld/st if the base register is also a dest 727 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 728 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 729 if (MI->getOperand(i).getReg() == Base) 730 return false; 731 732 bool DoMerge = false; 733 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 734 735 // Try merging with the previous instruction. 736 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 737 if (MBBI != BeginMBBI) { 738 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 739 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 740 --PrevMBBI; 741 if (Mode == ARM_AM::ia && 742 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 743 Mode = ARM_AM::db; 744 DoMerge = true; 745 } else if (Mode == ARM_AM::ib && 746 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 747 Mode = ARM_AM::da; 748 DoMerge = true; 749 } 750 if (DoMerge) 751 MBB.erase(PrevMBBI); 752 } 753 754 // Try merging with the next instruction. 755 MachineBasicBlock::iterator EndMBBI = MBB.end(); 756 if (!DoMerge && MBBI != EndMBBI) { 757 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 758 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 759 ++NextMBBI; 760 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 761 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 762 DoMerge = true; 763 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 764 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 765 DoMerge = true; 766 } 767 if (DoMerge) { 768 if (NextMBBI == I) { 769 Advance = true; 770 ++I; 771 } 772 MBB.erase(NextMBBI); 773 } 774 } 775 776 if (!DoMerge) 777 return false; 778 779 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 780 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 781 .addReg(Base, getDefRegState(true)) // WB base register 782 .addReg(Base, getKillRegState(BaseKill)) 783 .addImm(Pred).addReg(PredReg); 784 785 // Transfer the rest of operands. 786 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 787 MIB.addOperand(MI->getOperand(OpNum)); 788 789 // Transfer memoperands. 790 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 791 792 MBB.erase(MBBI); 793 return true; 794 } 795 796 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 797 ARM_AM::AddrOpc Mode) { 798 switch (Opc) { 799 case ARM::LDRi12: 800 return ARM::LDR_PRE_IMM; 801 case ARM::STRi12: 802 return ARM::STR_PRE_IMM; 803 case ARM::VLDRS: 804 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 805 case ARM::VLDRD: 806 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 807 case ARM::VSTRS: 808 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 809 case ARM::VSTRD: 810 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 811 case ARM::t2LDRi8: 812 case ARM::t2LDRi12: 813 return ARM::t2LDR_PRE; 814 case ARM::t2STRi8: 815 case ARM::t2STRi12: 816 return ARM::t2STR_PRE; 817 default: llvm_unreachable("Unhandled opcode!"); 818 } 819 } 820 821 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 822 ARM_AM::AddrOpc Mode) { 823 switch (Opc) { 824 case ARM::LDRi12: 825 return ARM::LDR_POST_IMM; 826 case ARM::STRi12: 827 return ARM::STR_POST_IMM; 828 case ARM::VLDRS: 829 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 830 case ARM::VLDRD: 831 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 832 case ARM::VSTRS: 833 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 834 case ARM::VSTRD: 835 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 836 case ARM::t2LDRi8: 837 case ARM::t2LDRi12: 838 return ARM::t2LDR_POST; 839 case ARM::t2STRi8: 840 case ARM::t2STRi12: 841 return ARM::t2STR_POST; 842 default: llvm_unreachable("Unhandled opcode!"); 843 } 844 } 845 846 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 847 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 848 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 849 MachineBasicBlock::iterator MBBI, 850 const TargetInstrInfo *TII, 851 bool &Advance, 852 MachineBasicBlock::iterator &I) { 853 MachineInstr *MI = MBBI; 854 unsigned Base = MI->getOperand(1).getReg(); 855 bool BaseKill = MI->getOperand(1).isKill(); 856 unsigned Bytes = getLSMultipleTransferSize(MI); 857 int Opcode = MI->getOpcode(); 858 DebugLoc dl = MI->getDebugLoc(); 859 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 860 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 861 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 862 if (isi32Load(Opcode) || isi32Store(Opcode)) 863 if (MI->getOperand(2).getImm() != 0) 864 return false; 865 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 866 return false; 867 868 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 869 // Can't do the merge if the destination register is the same as the would-be 870 // writeback register. 871 if (MI->getOperand(0).getReg() == Base) 872 return false; 873 874 unsigned PredReg = 0; 875 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 876 bool DoMerge = false; 877 ARM_AM::AddrOpc AddSub = ARM_AM::add; 878 unsigned NewOpc = 0; 879 // AM2 - 12 bits, thumb2 - 8 bits. 880 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 881 882 // Try merging with the previous instruction. 883 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 884 if (MBBI != BeginMBBI) { 885 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 886 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 887 --PrevMBBI; 888 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 889 DoMerge = true; 890 AddSub = ARM_AM::sub; 891 } else if (!isAM5 && 892 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 893 DoMerge = true; 894 } 895 if (DoMerge) { 896 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 897 MBB.erase(PrevMBBI); 898 } 899 } 900 901 // Try merging with the next instruction. 902 MachineBasicBlock::iterator EndMBBI = MBB.end(); 903 if (!DoMerge && MBBI != EndMBBI) { 904 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 905 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 906 ++NextMBBI; 907 if (!isAM5 && 908 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 909 DoMerge = true; 910 AddSub = ARM_AM::sub; 911 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 912 DoMerge = true; 913 } 914 if (DoMerge) { 915 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 916 if (NextMBBI == I) { 917 Advance = true; 918 ++I; 919 } 920 MBB.erase(NextMBBI); 921 } 922 } 923 924 if (!DoMerge) 925 return false; 926 927 if (isAM5) { 928 // VLDM[SD}_UPD, VSTM[SD]_UPD 929 // (There are no base-updating versions of VLDR/VSTR instructions, but the 930 // updating load/store-multiple instructions can be used with only one 931 // register.) 932 MachineOperand &MO = MI->getOperand(0); 933 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 934 .addReg(Base, getDefRegState(true)) // WB base register 935 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 936 .addImm(Pred).addReg(PredReg) 937 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 938 getKillRegState(MO.isKill()))); 939 } else if (isLd) { 940 if (isAM2) { 941 // LDR_PRE, LDR_POST 942 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 943 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 944 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 945 .addReg(Base, RegState::Define) 946 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 947 } else { 948 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 949 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 950 .addReg(Base, RegState::Define) 951 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 952 } 953 } else { 954 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 955 // t2LDR_PRE, t2LDR_POST 956 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 957 .addReg(Base, RegState::Define) 958 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 959 } 960 } else { 961 MachineOperand &MO = MI->getOperand(0); 962 // FIXME: post-indexed stores use am2offset_imm, which still encodes 963 // the vestigal zero-reg offset register. When that's fixed, this clause 964 // can be removed entirely. 965 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 966 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 967 // STR_PRE, STR_POST 968 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 969 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 970 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 971 } else { 972 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 973 // t2STR_PRE, t2STR_POST 974 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 975 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 976 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 977 } 978 } 979 MBB.erase(MBBI); 980 981 return true; 982 } 983 984 /// isMemoryOp - Returns true if instruction is a memory operation that this 985 /// pass is capable of operating on. 986 static bool isMemoryOp(const MachineInstr *MI) { 987 // When no memory operands are present, conservatively assume unaligned, 988 // volatile, unfoldable. 989 if (!MI->hasOneMemOperand()) 990 return false; 991 992 const MachineMemOperand *MMO = *MI->memoperands_begin(); 993 994 // Don't touch volatile memory accesses - we may be changing their order. 995 if (MMO->isVolatile()) 996 return false; 997 998 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 999 // not. 1000 if (MMO->getAlignment() < 4) 1001 return false; 1002 1003 // str <undef> could probably be eliminated entirely, but for now we just want 1004 // to avoid making a mess of it. 1005 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1006 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1007 MI->getOperand(0).isUndef()) 1008 return false; 1009 1010 // Likewise don't mess with references to undefined addresses. 1011 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1012 MI->getOperand(1).isUndef()) 1013 return false; 1014 1015 int Opcode = MI->getOpcode(); 1016 switch (Opcode) { 1017 default: break; 1018 case ARM::VLDRS: 1019 case ARM::VSTRS: 1020 return MI->getOperand(1).isReg(); 1021 case ARM::VLDRD: 1022 case ARM::VSTRD: 1023 return MI->getOperand(1).isReg(); 1024 case ARM::LDRi12: 1025 case ARM::STRi12: 1026 case ARM::t2LDRi8: 1027 case ARM::t2LDRi12: 1028 case ARM::t2STRi8: 1029 case ARM::t2STRi12: 1030 return MI->getOperand(1).isReg(); 1031 } 1032 return false; 1033 } 1034 1035 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1036 /// op that is being merged. 1037 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1038 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1039 unsigned Position = MemOps[0].Position; 1040 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1041 if (MemOps[i].Position < Position) { 1042 Position = MemOps[i].Position; 1043 Loc = MemOps[i].MBBI; 1044 } 1045 } 1046 1047 if (Loc != MBB.begin()) 1048 RS->forward(prior(Loc)); 1049 } 1050 1051 static int getMemoryOpOffset(const MachineInstr *MI) { 1052 int Opcode = MI->getOpcode(); 1053 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1054 unsigned NumOperands = MI->getDesc().getNumOperands(); 1055 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1056 1057 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1058 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1059 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1060 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1061 return OffField; 1062 1063 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1064 : ARM_AM::getAM5Offset(OffField) * 4; 1065 if (isAM3) { 1066 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1067 Offset = -Offset; 1068 } else { 1069 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1070 Offset = -Offset; 1071 } 1072 return Offset; 1073 } 1074 1075 static void InsertLDR_STR(MachineBasicBlock &MBB, 1076 MachineBasicBlock::iterator &MBBI, 1077 int Offset, bool isDef, 1078 DebugLoc dl, unsigned NewOpc, 1079 unsigned Reg, bool RegDeadKill, bool RegUndef, 1080 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1081 bool OffKill, bool OffUndef, 1082 ARMCC::CondCodes Pred, unsigned PredReg, 1083 const TargetInstrInfo *TII, bool isT2) { 1084 if (isDef) { 1085 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1086 TII->get(NewOpc)) 1087 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1088 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1089 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1090 } else { 1091 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1092 TII->get(NewOpc)) 1093 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1094 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1095 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1096 } 1097 } 1098 1099 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1100 MachineBasicBlock::iterator &MBBI) { 1101 MachineInstr *MI = &*MBBI; 1102 unsigned Opcode = MI->getOpcode(); 1103 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1104 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1105 const MachineOperand &BaseOp = MI->getOperand(2); 1106 unsigned BaseReg = BaseOp.getReg(); 1107 unsigned EvenReg = MI->getOperand(0).getReg(); 1108 unsigned OddReg = MI->getOperand(1).getReg(); 1109 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1110 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1111 // ARM errata 602117: LDRD with base in list may result in incorrect base 1112 // register when interrupted or faulted. 1113 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1114 if (!Errata602117 && 1115 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1116 return false; 1117 1118 MachineBasicBlock::iterator NewBBI = MBBI; 1119 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1120 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1121 bool EvenDeadKill = isLd ? 1122 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1123 bool EvenUndef = MI->getOperand(0).isUndef(); 1124 bool OddDeadKill = isLd ? 1125 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1126 bool OddUndef = MI->getOperand(1).isUndef(); 1127 bool BaseKill = BaseOp.isKill(); 1128 bool BaseUndef = BaseOp.isUndef(); 1129 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1130 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1131 int OffImm = getMemoryOpOffset(MI); 1132 unsigned PredReg = 0; 1133 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1134 1135 if (OddRegNum > EvenRegNum && OffImm == 0) { 1136 // Ascending register numbers and no offset. It's safe to change it to a 1137 // ldm or stm. 1138 unsigned NewOpc = (isLd) 1139 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1140 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1141 if (isLd) { 1142 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1143 .addReg(BaseReg, getKillRegState(BaseKill)) 1144 .addImm(Pred).addReg(PredReg) 1145 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1146 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1147 ++NumLDRD2LDM; 1148 } else { 1149 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1150 .addReg(BaseReg, getKillRegState(BaseKill)) 1151 .addImm(Pred).addReg(PredReg) 1152 .addReg(EvenReg, 1153 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1154 .addReg(OddReg, 1155 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1156 ++NumSTRD2STM; 1157 } 1158 NewBBI = llvm::prior(MBBI); 1159 } else { 1160 // Split into two instructions. 1161 unsigned NewOpc = (isLd) 1162 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1163 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1164 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1165 // so adjust and use t2LDRi12 here for that. 1166 unsigned NewOpc2 = (isLd) 1167 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1168 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1169 DebugLoc dl = MBBI->getDebugLoc(); 1170 // If this is a load and base register is killed, it may have been 1171 // re-defed by the load, make sure the first load does not clobber it. 1172 if (isLd && 1173 (BaseKill || OffKill) && 1174 (TRI->regsOverlap(EvenReg, BaseReg))) { 1175 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1176 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1177 OddReg, OddDeadKill, false, 1178 BaseReg, false, BaseUndef, false, OffUndef, 1179 Pred, PredReg, TII, isT2); 1180 NewBBI = llvm::prior(MBBI); 1181 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1182 EvenReg, EvenDeadKill, false, 1183 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1184 Pred, PredReg, TII, isT2); 1185 } else { 1186 if (OddReg == EvenReg && EvenDeadKill) { 1187 // If the two source operands are the same, the kill marker is 1188 // probably on the first one. e.g. 1189 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1190 EvenDeadKill = false; 1191 OddDeadKill = true; 1192 } 1193 // Never kill the base register in the first instruction. 1194 if (EvenReg == BaseReg) 1195 EvenDeadKill = false; 1196 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1197 EvenReg, EvenDeadKill, EvenUndef, 1198 BaseReg, false, BaseUndef, false, OffUndef, 1199 Pred, PredReg, TII, isT2); 1200 NewBBI = llvm::prior(MBBI); 1201 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1202 OddReg, OddDeadKill, OddUndef, 1203 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1204 Pred, PredReg, TII, isT2); 1205 } 1206 if (isLd) 1207 ++NumLDRD2LDR; 1208 else 1209 ++NumSTRD2STR; 1210 } 1211 1212 MBB.erase(MI); 1213 MBBI = NewBBI; 1214 return true; 1215 } 1216 return false; 1217 } 1218 1219 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1220 /// ops of the same base and incrementing offset into LDM / STM ops. 1221 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1222 unsigned NumMerges = 0; 1223 unsigned NumMemOps = 0; 1224 MemOpQueue MemOps; 1225 unsigned CurrBase = 0; 1226 int CurrOpc = -1; 1227 unsigned CurrSize = 0; 1228 ARMCC::CondCodes CurrPred = ARMCC::AL; 1229 unsigned CurrPredReg = 0; 1230 unsigned Position = 0; 1231 SmallVector<MachineBasicBlock::iterator,4> Merges; 1232 1233 RS->enterBasicBlock(&MBB); 1234 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1235 while (MBBI != E) { 1236 if (FixInvalidRegPairOp(MBB, MBBI)) 1237 continue; 1238 1239 bool Advance = false; 1240 bool TryMerge = false; 1241 bool Clobber = false; 1242 1243 bool isMemOp = isMemoryOp(MBBI); 1244 if (isMemOp) { 1245 int Opcode = MBBI->getOpcode(); 1246 unsigned Size = getLSMultipleTransferSize(MBBI); 1247 const MachineOperand &MO = MBBI->getOperand(0); 1248 unsigned Reg = MO.getReg(); 1249 bool isKill = MO.isDef() ? false : MO.isKill(); 1250 unsigned Base = MBBI->getOperand(1).getReg(); 1251 unsigned PredReg = 0; 1252 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1253 int Offset = getMemoryOpOffset(MBBI); 1254 // Watch out for: 1255 // r4 := ldr [r5] 1256 // r5 := ldr [r5, #4] 1257 // r6 := ldr [r5, #8] 1258 // 1259 // The second ldr has effectively broken the chain even though it 1260 // looks like the later ldr(s) use the same base register. Try to 1261 // merge the ldr's so far, including this one. But don't try to 1262 // combine the following ldr(s). 1263 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1264 1265 // Watch out for: 1266 // r4 := ldr [r0, #8] 1267 // r4 := ldr [r0, #4] 1268 // 1269 // The optimization may reorder the second ldr in front of the first 1270 // ldr, which violates write after write(WAW) dependence. The same as 1271 // str. Try to merge inst(s) already in MemOps. 1272 bool Overlap = false; 1273 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 1274 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 1275 Overlap = true; 1276 break; 1277 } 1278 } 1279 1280 if (CurrBase == 0 && !Clobber) { 1281 // Start of a new chain. 1282 CurrBase = Base; 1283 CurrOpc = Opcode; 1284 CurrSize = Size; 1285 CurrPred = Pred; 1286 CurrPredReg = PredReg; 1287 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1288 ++NumMemOps; 1289 Advance = true; 1290 } else if (!Overlap) { 1291 if (Clobber) { 1292 TryMerge = true; 1293 Advance = true; 1294 } 1295 1296 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1297 // No need to match PredReg. 1298 // Continue adding to the queue. 1299 if (Offset > MemOps.back().Offset) { 1300 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1301 Position, MBBI)); 1302 ++NumMemOps; 1303 Advance = true; 1304 } else { 1305 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1306 I != E; ++I) { 1307 if (Offset < I->Offset) { 1308 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1309 Position, MBBI)); 1310 ++NumMemOps; 1311 Advance = true; 1312 break; 1313 } else if (Offset == I->Offset) { 1314 // Collision! This can't be merged! 1315 break; 1316 } 1317 } 1318 } 1319 } 1320 } 1321 } 1322 1323 if (MBBI->isDebugValue()) { 1324 ++MBBI; 1325 if (MBBI == E) 1326 // Reach the end of the block, try merging the memory instructions. 1327 TryMerge = true; 1328 } else if (Advance) { 1329 ++Position; 1330 ++MBBI; 1331 if (MBBI == E) 1332 // Reach the end of the block, try merging the memory instructions. 1333 TryMerge = true; 1334 } else 1335 TryMerge = true; 1336 1337 if (TryMerge) { 1338 if (NumMemOps > 1) { 1339 // Try to find a free register to use as a new base in case it's needed. 1340 // First advance to the instruction just before the start of the chain. 1341 AdvanceRS(MBB, MemOps); 1342 // Find a scratch register. 1343 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass); 1344 // Process the load / store instructions. 1345 RS->forward(prior(MBBI)); 1346 1347 // Merge ops. 1348 Merges.clear(); 1349 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1350 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1351 1352 // Try folding preceding/trailing base inc/dec into the generated 1353 // LDM/STM ops. 1354 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1355 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1356 ++NumMerges; 1357 NumMerges += Merges.size(); 1358 1359 // Try folding preceding/trailing base inc/dec into those load/store 1360 // that were not merged to form LDM/STM ops. 1361 for (unsigned i = 0; i != NumMemOps; ++i) 1362 if (!MemOps[i].Merged) 1363 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1364 ++NumMerges; 1365 1366 // RS may be pointing to an instruction that's deleted. 1367 RS->skipTo(prior(MBBI)); 1368 } else if (NumMemOps == 1) { 1369 // Try folding preceding/trailing base inc/dec into the single 1370 // load/store. 1371 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1372 ++NumMerges; 1373 RS->forward(prior(MBBI)); 1374 } 1375 } 1376 1377 CurrBase = 0; 1378 CurrOpc = -1; 1379 CurrSize = 0; 1380 CurrPred = ARMCC::AL; 1381 CurrPredReg = 0; 1382 if (NumMemOps) { 1383 MemOps.clear(); 1384 NumMemOps = 0; 1385 } 1386 1387 // If iterator hasn't been advanced and this is not a memory op, skip it. 1388 // It can't start a new chain anyway. 1389 if (!Advance && !isMemOp && MBBI != E) { 1390 ++Position; 1391 ++MBBI; 1392 } 1393 } 1394 } 1395 return NumMerges > 0; 1396 } 1397 1398 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1399 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1400 /// directly restore the value of LR into pc. 1401 /// ldmfd sp!, {..., lr} 1402 /// bx lr 1403 /// or 1404 /// ldmfd sp!, {..., lr} 1405 /// mov pc, lr 1406 /// => 1407 /// ldmfd sp!, {..., pc} 1408 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1409 if (MBB.empty()) return false; 1410 1411 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1412 if (MBBI != MBB.begin() && 1413 (MBBI->getOpcode() == ARM::BX_RET || 1414 MBBI->getOpcode() == ARM::tBX_RET || 1415 MBBI->getOpcode() == ARM::MOVPCLR)) { 1416 MachineInstr *PrevMI = prior(MBBI); 1417 unsigned Opcode = PrevMI->getOpcode(); 1418 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1419 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1420 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1421 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1422 if (MO.getReg() != ARM::LR) 1423 return false; 1424 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1425 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1426 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1427 PrevMI->setDesc(TII->get(NewOpc)); 1428 MO.setReg(ARM::PC); 1429 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1430 MBB.erase(MBBI); 1431 return true; 1432 } 1433 } 1434 return false; 1435 } 1436 1437 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1438 const TargetMachine &TM = Fn.getTarget(); 1439 AFI = Fn.getInfo<ARMFunctionInfo>(); 1440 TII = TM.getInstrInfo(); 1441 TRI = TM.getRegisterInfo(); 1442 STI = &TM.getSubtarget<ARMSubtarget>(); 1443 RS = new RegScavenger(); 1444 isThumb2 = AFI->isThumb2Function(); 1445 1446 bool Modified = false; 1447 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1448 ++MFI) { 1449 MachineBasicBlock &MBB = *MFI; 1450 Modified |= LoadStoreMultipleOpti(MBB); 1451 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1452 Modified |= MergeReturnIntoLDM(MBB); 1453 } 1454 1455 delete RS; 1456 return Modified; 1457 } 1458 1459 1460 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1461 /// load / stores from consecutive locations close to make it more 1462 /// likely they will be combined later. 1463 1464 namespace { 1465 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1466 static char ID; 1467 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1468 1469 const DataLayout *TD; 1470 const TargetInstrInfo *TII; 1471 const TargetRegisterInfo *TRI; 1472 const ARMSubtarget *STI; 1473 MachineRegisterInfo *MRI; 1474 MachineFunction *MF; 1475 1476 virtual bool runOnMachineFunction(MachineFunction &Fn); 1477 1478 virtual const char *getPassName() const { 1479 return "ARM pre- register allocation load / store optimization pass"; 1480 } 1481 1482 private: 1483 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1484 unsigned &NewOpc, unsigned &EvenReg, 1485 unsigned &OddReg, unsigned &BaseReg, 1486 int &Offset, 1487 unsigned &PredReg, ARMCC::CondCodes &Pred, 1488 bool &isT2); 1489 bool RescheduleOps(MachineBasicBlock *MBB, 1490 SmallVectorImpl<MachineInstr *> &Ops, 1491 unsigned Base, bool isLd, 1492 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1493 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1494 }; 1495 char ARMPreAllocLoadStoreOpt::ID = 0; 1496 } 1497 1498 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1499 TD = Fn.getTarget().getDataLayout(); 1500 TII = Fn.getTarget().getInstrInfo(); 1501 TRI = Fn.getTarget().getRegisterInfo(); 1502 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1503 MRI = &Fn.getRegInfo(); 1504 MF = &Fn; 1505 1506 bool Modified = false; 1507 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1508 ++MFI) 1509 Modified |= RescheduleLoadStoreInstrs(MFI); 1510 1511 return Modified; 1512 } 1513 1514 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1515 MachineBasicBlock::iterator I, 1516 MachineBasicBlock::iterator E, 1517 SmallPtrSet<MachineInstr*, 4> &MemOps, 1518 SmallSet<unsigned, 4> &MemRegs, 1519 const TargetRegisterInfo *TRI) { 1520 // Are there stores / loads / calls between them? 1521 // FIXME: This is overly conservative. We should make use of alias information 1522 // some day. 1523 SmallSet<unsigned, 4> AddedRegPressure; 1524 while (++I != E) { 1525 if (I->isDebugValue() || MemOps.count(&*I)) 1526 continue; 1527 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1528 return false; 1529 if (isLd && I->mayStore()) 1530 return false; 1531 if (!isLd) { 1532 if (I->mayLoad()) 1533 return false; 1534 // It's not safe to move the first 'str' down. 1535 // str r1, [r0] 1536 // strh r5, [r0] 1537 // str r4, [r0, #+4] 1538 if (I->mayStore()) 1539 return false; 1540 } 1541 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1542 MachineOperand &MO = I->getOperand(j); 1543 if (!MO.isReg()) 1544 continue; 1545 unsigned Reg = MO.getReg(); 1546 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1547 return false; 1548 if (Reg != Base && !MemRegs.count(Reg)) 1549 AddedRegPressure.insert(Reg); 1550 } 1551 } 1552 1553 // Estimate register pressure increase due to the transformation. 1554 if (MemRegs.size() <= 4) 1555 // Ok if we are moving small number of instructions. 1556 return true; 1557 return AddedRegPressure.size() <= MemRegs.size() * 2; 1558 } 1559 1560 1561 /// Copy Op0 and Op1 operands into a new array assigned to MI. 1562 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1563 MachineInstr *Op1) { 1564 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1565 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1566 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1567 1568 MachineFunction *MF = MI->getParent()->getParent(); 1569 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1570 MachineSDNode::mmo_iterator MemEnd = 1571 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1572 MemEnd = 1573 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1574 MI->setMemRefs(MemBegin, MemEnd); 1575 } 1576 1577 bool 1578 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1579 DebugLoc &dl, 1580 unsigned &NewOpc, unsigned &EvenReg, 1581 unsigned &OddReg, unsigned &BaseReg, 1582 int &Offset, unsigned &PredReg, 1583 ARMCC::CondCodes &Pred, 1584 bool &isT2) { 1585 // Make sure we're allowed to generate LDRD/STRD. 1586 if (!STI->hasV5TEOps()) 1587 return false; 1588 1589 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1590 unsigned Scale = 1; 1591 unsigned Opcode = Op0->getOpcode(); 1592 if (Opcode == ARM::LDRi12) 1593 NewOpc = ARM::LDRD; 1594 else if (Opcode == ARM::STRi12) 1595 NewOpc = ARM::STRD; 1596 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1597 NewOpc = ARM::t2LDRDi8; 1598 Scale = 4; 1599 isT2 = true; 1600 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1601 NewOpc = ARM::t2STRDi8; 1602 Scale = 4; 1603 isT2 = true; 1604 } else 1605 return false; 1606 1607 // Make sure the base address satisfies i64 ld / st alignment requirement. 1608 // At the moment, we ignore the memoryoperand's value. 1609 // If we want to use AliasAnalysis, we should check it accordingly. 1610 if (!Op0->hasOneMemOperand() || 1611 (*Op0->memoperands_begin())->isVolatile()) 1612 return false; 1613 1614 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1615 const Function *Func = MF->getFunction(); 1616 unsigned ReqAlign = STI->hasV6Ops() 1617 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1618 : 8; // Pre-v6 need 8-byte align 1619 if (Align < ReqAlign) 1620 return false; 1621 1622 // Then make sure the immediate offset fits. 1623 int OffImm = getMemoryOpOffset(Op0); 1624 if (isT2) { 1625 int Limit = (1 << 8) * Scale; 1626 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1627 return false; 1628 Offset = OffImm; 1629 } else { 1630 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1631 if (OffImm < 0) { 1632 AddSub = ARM_AM::sub; 1633 OffImm = - OffImm; 1634 } 1635 int Limit = (1 << 8) * Scale; 1636 if (OffImm >= Limit || (OffImm & (Scale-1))) 1637 return false; 1638 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1639 } 1640 EvenReg = Op0->getOperand(0).getReg(); 1641 OddReg = Op1->getOperand(0).getReg(); 1642 if (EvenReg == OddReg) 1643 return false; 1644 BaseReg = Op0->getOperand(1).getReg(); 1645 Pred = getInstrPredicate(Op0, PredReg); 1646 dl = Op0->getDebugLoc(); 1647 return true; 1648 } 1649 1650 namespace { 1651 struct OffsetCompare { 1652 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1653 int LOffset = getMemoryOpOffset(LHS); 1654 int ROffset = getMemoryOpOffset(RHS); 1655 assert(LHS == RHS || LOffset != ROffset); 1656 return LOffset > ROffset; 1657 } 1658 }; 1659 } 1660 1661 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1662 SmallVectorImpl<MachineInstr *> &Ops, 1663 unsigned Base, bool isLd, 1664 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1665 bool RetVal = false; 1666 1667 // Sort by offset (in reverse order). 1668 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1669 1670 // The loads / stores of the same base are in order. Scan them from first to 1671 // last and check for the following: 1672 // 1. Any def of base. 1673 // 2. Any gaps. 1674 while (Ops.size() > 1) { 1675 unsigned FirstLoc = ~0U; 1676 unsigned LastLoc = 0; 1677 MachineInstr *FirstOp = 0; 1678 MachineInstr *LastOp = 0; 1679 int LastOffset = 0; 1680 unsigned LastOpcode = 0; 1681 unsigned LastBytes = 0; 1682 unsigned NumMove = 0; 1683 for (int i = Ops.size() - 1; i >= 0; --i) { 1684 MachineInstr *Op = Ops[i]; 1685 unsigned Loc = MI2LocMap[Op]; 1686 if (Loc <= FirstLoc) { 1687 FirstLoc = Loc; 1688 FirstOp = Op; 1689 } 1690 if (Loc >= LastLoc) { 1691 LastLoc = Loc; 1692 LastOp = Op; 1693 } 1694 1695 unsigned LSMOpcode 1696 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 1697 if (LastOpcode && LSMOpcode != LastOpcode) 1698 break; 1699 1700 int Offset = getMemoryOpOffset(Op); 1701 unsigned Bytes = getLSMultipleTransferSize(Op); 1702 if (LastBytes) { 1703 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1704 break; 1705 } 1706 LastOffset = Offset; 1707 LastBytes = Bytes; 1708 LastOpcode = LSMOpcode; 1709 if (++NumMove == 8) // FIXME: Tune this limit. 1710 break; 1711 } 1712 1713 if (NumMove <= 1) 1714 Ops.pop_back(); 1715 else { 1716 SmallPtrSet<MachineInstr*, 4> MemOps; 1717 SmallSet<unsigned, 4> MemRegs; 1718 for (int i = NumMove-1; i >= 0; --i) { 1719 MemOps.insert(Ops[i]); 1720 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1721 } 1722 1723 // Be conservative, if the instructions are too far apart, don't 1724 // move them. We want to limit the increase of register pressure. 1725 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1726 if (DoMove) 1727 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1728 MemOps, MemRegs, TRI); 1729 if (!DoMove) { 1730 for (unsigned i = 0; i != NumMove; ++i) 1731 Ops.pop_back(); 1732 } else { 1733 // This is the new location for the loads / stores. 1734 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1735 while (InsertPos != MBB->end() 1736 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1737 ++InsertPos; 1738 1739 // If we are moving a pair of loads / stores, see if it makes sense 1740 // to try to allocate a pair of registers that can form register pairs. 1741 MachineInstr *Op0 = Ops.back(); 1742 MachineInstr *Op1 = Ops[Ops.size()-2]; 1743 unsigned EvenReg = 0, OddReg = 0; 1744 unsigned BaseReg = 0, PredReg = 0; 1745 ARMCC::CondCodes Pred = ARMCC::AL; 1746 bool isT2 = false; 1747 unsigned NewOpc = 0; 1748 int Offset = 0; 1749 DebugLoc dl; 1750 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1751 EvenReg, OddReg, BaseReg, 1752 Offset, PredReg, Pred, isT2)) { 1753 Ops.pop_back(); 1754 Ops.pop_back(); 1755 1756 const MCInstrDesc &MCID = TII->get(NewOpc); 1757 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 1758 MRI->constrainRegClass(EvenReg, TRC); 1759 MRI->constrainRegClass(OddReg, TRC); 1760 1761 // Form the pair instruction. 1762 if (isLd) { 1763 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1764 .addReg(EvenReg, RegState::Define) 1765 .addReg(OddReg, RegState::Define) 1766 .addReg(BaseReg); 1767 // FIXME: We're converting from LDRi12 to an insn that still 1768 // uses addrmode2, so we need an explicit offset reg. It should 1769 // always by reg0 since we're transforming LDRi12s. 1770 if (!isT2) 1771 MIB.addReg(0); 1772 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1773 concatenateMemOperands(MIB, Op0, Op1); 1774 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1775 ++NumLDRDFormed; 1776 } else { 1777 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1778 .addReg(EvenReg) 1779 .addReg(OddReg) 1780 .addReg(BaseReg); 1781 // FIXME: We're converting from LDRi12 to an insn that still 1782 // uses addrmode2, so we need an explicit offset reg. It should 1783 // always by reg0 since we're transforming STRi12s. 1784 if (!isT2) 1785 MIB.addReg(0); 1786 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1787 concatenateMemOperands(MIB, Op0, Op1); 1788 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1789 ++NumSTRDFormed; 1790 } 1791 MBB->erase(Op0); 1792 MBB->erase(Op1); 1793 1794 // Add register allocation hints to form register pairs. 1795 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1796 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1797 } else { 1798 for (unsigned i = 0; i != NumMove; ++i) { 1799 MachineInstr *Op = Ops.back(); 1800 Ops.pop_back(); 1801 MBB->splice(InsertPos, MBB, Op); 1802 } 1803 } 1804 1805 NumLdStMoved += NumMove; 1806 RetVal = true; 1807 } 1808 } 1809 } 1810 1811 return RetVal; 1812 } 1813 1814 bool 1815 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1816 bool RetVal = false; 1817 1818 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1819 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1820 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1821 SmallVector<unsigned, 4> LdBases; 1822 SmallVector<unsigned, 4> StBases; 1823 1824 unsigned Loc = 0; 1825 MachineBasicBlock::iterator MBBI = MBB->begin(); 1826 MachineBasicBlock::iterator E = MBB->end(); 1827 while (MBBI != E) { 1828 for (; MBBI != E; ++MBBI) { 1829 MachineInstr *MI = MBBI; 1830 if (MI->isCall() || MI->isTerminator()) { 1831 // Stop at barriers. 1832 ++MBBI; 1833 break; 1834 } 1835 1836 if (!MI->isDebugValue()) 1837 MI2LocMap[MI] = ++Loc; 1838 1839 if (!isMemoryOp(MI)) 1840 continue; 1841 unsigned PredReg = 0; 1842 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 1843 continue; 1844 1845 int Opc = MI->getOpcode(); 1846 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1847 unsigned Base = MI->getOperand(1).getReg(); 1848 int Offset = getMemoryOpOffset(MI); 1849 1850 bool StopHere = false; 1851 if (isLd) { 1852 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1853 Base2LdsMap.find(Base); 1854 if (BI != Base2LdsMap.end()) { 1855 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1856 if (Offset == getMemoryOpOffset(BI->second[i])) { 1857 StopHere = true; 1858 break; 1859 } 1860 } 1861 if (!StopHere) 1862 BI->second.push_back(MI); 1863 } else { 1864 Base2LdsMap[Base].push_back(MI); 1865 LdBases.push_back(Base); 1866 } 1867 } else { 1868 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1869 Base2StsMap.find(Base); 1870 if (BI != Base2StsMap.end()) { 1871 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1872 if (Offset == getMemoryOpOffset(BI->second[i])) { 1873 StopHere = true; 1874 break; 1875 } 1876 } 1877 if (!StopHere) 1878 BI->second.push_back(MI); 1879 } else { 1880 Base2StsMap[Base].push_back(MI); 1881 StBases.push_back(Base); 1882 } 1883 } 1884 1885 if (StopHere) { 1886 // Found a duplicate (a base+offset combination that's seen earlier). 1887 // Backtrack. 1888 --Loc; 1889 break; 1890 } 1891 } 1892 1893 // Re-schedule loads. 1894 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1895 unsigned Base = LdBases[i]; 1896 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 1897 if (Lds.size() > 1) 1898 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1899 } 1900 1901 // Re-schedule stores. 1902 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1903 unsigned Base = StBases[i]; 1904 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 1905 if (Sts.size() > 1) 1906 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1907 } 1908 1909 if (MBBI != E) { 1910 Base2LdsMap.clear(); 1911 Base2StsMap.clear(); 1912 LdBases.clear(); 1913 StBases.clear(); 1914 } 1915 } 1916 1917 return RetVal; 1918 } 1919 1920 1921 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1922 /// optimization pass. 1923 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1924 if (PreAlloc) 1925 return new ARMPreAllocLoadStoreOpt(); 1926 return new ARMLoadStoreOpt(); 1927 } 1928