1 //=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64InstrInfo.h" 16 #include "AArch64Subtarget.h" 17 #include "MCTargetDesc/AArch64AddressingModes.h" 18 #include "llvm/ADT/BitVector.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/MachineBasicBlock.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/MachineInstr.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/Target/TargetInstrInfo.h" 30 #include "llvm/Target/TargetMachine.h" 31 #include "llvm/Target/TargetRegisterInfo.h" 32 using namespace llvm; 33 34 #define DEBUG_TYPE "aarch64-ldst-opt" 35 36 /// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine 37 /// load / store instructions to form ldp / stp instructions. 38 39 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); 40 STATISTIC(NumPostFolded, "Number of post-index updates folded"); 41 STATISTIC(NumPreFolded, "Number of pre-index updates folded"); 42 STATISTIC(NumUnscaledPairCreated, 43 "Number of load/store from unscaled generated"); 44 45 static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", 46 cl::init(20), cl::Hidden); 47 48 // Place holder while testing unscaled load/store combining 49 static cl::opt<bool> EnableAArch64UnscaledMemOp( 50 "aarch64-unscaled-mem-op", cl::Hidden, 51 cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true)); 52 53 namespace llvm { 54 void initializeAArch64LoadStoreOptPass(PassRegistry &); 55 } 56 57 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" 58 59 namespace { 60 61 typedef struct LdStPairFlags { 62 // If a matching instruction is found, MergeForward is set to true if the 63 // merge is to remove the first instruction and replace the second with 64 // a pair-wise insn, and false if the reverse is true. 65 bool MergeForward; 66 67 // SExtIdx gives the index of the result of the load pair that must be 68 // extended. The value of SExtIdx assumes that the paired load produces the 69 // value in this order: (I, returned iterator), i.e., -1 means no value has 70 // to be extended, 0 means I, and 1 means the returned iterator. 71 int SExtIdx; 72 73 LdStPairFlags() : MergeForward(false), SExtIdx(-1) {} 74 75 void setMergeForward(bool V = true) { MergeForward = V; } 76 bool getMergeForward() const { return MergeForward; } 77 78 void setSExtIdx(int V) { SExtIdx = V; } 79 int getSExtIdx() const { return SExtIdx; } 80 81 } LdStPairFlags; 82 83 struct AArch64LoadStoreOpt : public MachineFunctionPass { 84 static char ID; 85 AArch64LoadStoreOpt() : MachineFunctionPass(ID) { 86 initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry()); 87 } 88 89 const AArch64InstrInfo *TII; 90 const TargetRegisterInfo *TRI; 91 92 // Scan the instructions looking for a load/store that can be combined 93 // with the current instruction into a load/store pair. 94 // Return the matching instruction if one is found, else MBB->end(). 95 MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, 96 LdStPairFlags &Flags, 97 unsigned Limit); 98 // Merge the two instructions indicated into a single pair-wise instruction. 99 // If MergeForward is true, erase the first instruction and fold its 100 // operation into the second. If false, the reverse. Return the instruction 101 // following the first instruction (which may change during processing). 102 MachineBasicBlock::iterator 103 mergePairedInsns(MachineBasicBlock::iterator I, 104 MachineBasicBlock::iterator Paired, 105 const LdStPairFlags &Flags); 106 107 // Scan the instruction list to find a base register update that can 108 // be combined with the current instruction (a load or store) using 109 // pre or post indexed addressing with writeback. Scan forwards. 110 MachineBasicBlock::iterator 111 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit, 112 int Value); 113 114 // Scan the instruction list to find a base register update that can 115 // be combined with the current instruction (a load or store) using 116 // pre or post indexed addressing with writeback. Scan backwards. 117 MachineBasicBlock::iterator 118 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); 119 120 // Find an instruction that updates the base register of the ld/st 121 // instruction. 122 bool isMatchingUpdateInsn(MachineInstr *MemMI, MachineInstr *MI, 123 unsigned BaseReg, int Offset); 124 125 // Merge a pre- or post-index base register update into a ld/st instruction. 126 MachineBasicBlock::iterator 127 mergeUpdateInsn(MachineBasicBlock::iterator I, 128 MachineBasicBlock::iterator Update, bool IsPreIdx); 129 130 bool optimizeBlock(MachineBasicBlock &MBB); 131 132 bool runOnMachineFunction(MachineFunction &Fn) override; 133 134 const char *getPassName() const override { 135 return AARCH64_LOAD_STORE_OPT_NAME; 136 } 137 }; 138 char AArch64LoadStoreOpt::ID = 0; 139 } // namespace 140 141 INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", 142 AARCH64_LOAD_STORE_OPT_NAME, false, false) 143 144 static bool isUnscaledLdSt(unsigned Opc) { 145 switch (Opc) { 146 default: 147 return false; 148 case AArch64::STURSi: 149 case AArch64::STURDi: 150 case AArch64::STURQi: 151 case AArch64::STURWi: 152 case AArch64::STURXi: 153 case AArch64::LDURSi: 154 case AArch64::LDURDi: 155 case AArch64::LDURQi: 156 case AArch64::LDURWi: 157 case AArch64::LDURXi: 158 case AArch64::LDURSWi: 159 return true; 160 } 161 } 162 163 static bool isUnscaledLdSt(MachineInstr *MI) { 164 return isUnscaledLdSt(MI->getOpcode()); 165 } 166 167 // Scaling factor for unscaled load or store. 168 static int getMemScale(MachineInstr *MI) { 169 switch (MI->getOpcode()) { 170 default: 171 llvm_unreachable("Opcode has unknown scale!"); 172 case AArch64::LDRBBui: 173 case AArch64::STRBBui: 174 return 1; 175 case AArch64::LDRHHui: 176 case AArch64::STRHHui: 177 return 2; 178 case AArch64::LDRSui: 179 case AArch64::LDURSi: 180 case AArch64::LDRSWui: 181 case AArch64::LDURSWi: 182 case AArch64::LDRWui: 183 case AArch64::LDURWi: 184 case AArch64::STRSui: 185 case AArch64::STURSi: 186 case AArch64::STRWui: 187 case AArch64::STURWi: 188 case AArch64::LDPSi: 189 case AArch64::LDPSWi: 190 case AArch64::LDPWi: 191 case AArch64::STPSi: 192 case AArch64::STPWi: 193 return 4; 194 case AArch64::LDRDui: 195 case AArch64::LDURDi: 196 case AArch64::LDRXui: 197 case AArch64::LDURXi: 198 case AArch64::STRDui: 199 case AArch64::STURDi: 200 case AArch64::STRXui: 201 case AArch64::STURXi: 202 case AArch64::LDPDi: 203 case AArch64::LDPXi: 204 case AArch64::STPDi: 205 case AArch64::STPXi: 206 return 8; 207 case AArch64::LDRQui: 208 case AArch64::LDURQi: 209 case AArch64::STRQui: 210 case AArch64::STURQi: 211 case AArch64::LDPQi: 212 case AArch64::STPQi: 213 return 16; 214 } 215 } 216 217 static unsigned getMatchingNonSExtOpcode(unsigned Opc, 218 bool *IsValidLdStrOpc = nullptr) { 219 if (IsValidLdStrOpc) 220 *IsValidLdStrOpc = true; 221 switch (Opc) { 222 default: 223 if (IsValidLdStrOpc) 224 *IsValidLdStrOpc = false; 225 return UINT_MAX; 226 case AArch64::STRDui: 227 case AArch64::STURDi: 228 case AArch64::STRQui: 229 case AArch64::STURQi: 230 case AArch64::STRWui: 231 case AArch64::STURWi: 232 case AArch64::STRXui: 233 case AArch64::STURXi: 234 case AArch64::LDRDui: 235 case AArch64::LDURDi: 236 case AArch64::LDRQui: 237 case AArch64::LDURQi: 238 case AArch64::LDRWui: 239 case AArch64::LDURWi: 240 case AArch64::LDRXui: 241 case AArch64::LDURXi: 242 case AArch64::STRSui: 243 case AArch64::STURSi: 244 case AArch64::LDRSui: 245 case AArch64::LDURSi: 246 return Opc; 247 case AArch64::LDRSWui: 248 return AArch64::LDRWui; 249 case AArch64::LDURSWi: 250 return AArch64::LDURWi; 251 } 252 } 253 254 static unsigned getMatchingPairOpcode(unsigned Opc) { 255 switch (Opc) { 256 default: 257 llvm_unreachable("Opcode has no pairwise equivalent!"); 258 case AArch64::STRSui: 259 case AArch64::STURSi: 260 return AArch64::STPSi; 261 case AArch64::STRDui: 262 case AArch64::STURDi: 263 return AArch64::STPDi; 264 case AArch64::STRQui: 265 case AArch64::STURQi: 266 return AArch64::STPQi; 267 case AArch64::STRWui: 268 case AArch64::STURWi: 269 return AArch64::STPWi; 270 case AArch64::STRXui: 271 case AArch64::STURXi: 272 return AArch64::STPXi; 273 case AArch64::LDRSui: 274 case AArch64::LDURSi: 275 return AArch64::LDPSi; 276 case AArch64::LDRDui: 277 case AArch64::LDURDi: 278 return AArch64::LDPDi; 279 case AArch64::LDRQui: 280 case AArch64::LDURQi: 281 return AArch64::LDPQi; 282 case AArch64::LDRWui: 283 case AArch64::LDURWi: 284 return AArch64::LDPWi; 285 case AArch64::LDRXui: 286 case AArch64::LDURXi: 287 return AArch64::LDPXi; 288 case AArch64::LDRSWui: 289 case AArch64::LDURSWi: 290 return AArch64::LDPSWi; 291 } 292 } 293 294 static unsigned getPreIndexedOpcode(unsigned Opc) { 295 switch (Opc) { 296 default: 297 llvm_unreachable("Opcode has no pre-indexed equivalent!"); 298 case AArch64::STRSui: 299 return AArch64::STRSpre; 300 case AArch64::STRDui: 301 return AArch64::STRDpre; 302 case AArch64::STRQui: 303 return AArch64::STRQpre; 304 case AArch64::STRBBui: 305 return AArch64::STRBBpre; 306 case AArch64::STRHHui: 307 return AArch64::STRHHpre; 308 case AArch64::STRWui: 309 return AArch64::STRWpre; 310 case AArch64::STRXui: 311 return AArch64::STRXpre; 312 case AArch64::LDRSui: 313 return AArch64::LDRSpre; 314 case AArch64::LDRDui: 315 return AArch64::LDRDpre; 316 case AArch64::LDRQui: 317 return AArch64::LDRQpre; 318 case AArch64::LDRBBui: 319 return AArch64::LDRBBpre; 320 case AArch64::LDRHHui: 321 return AArch64::LDRHHpre; 322 case AArch64::LDRWui: 323 return AArch64::LDRWpre; 324 case AArch64::LDRXui: 325 return AArch64::LDRXpre; 326 case AArch64::LDRSWui: 327 return AArch64::LDRSWpre; 328 case AArch64::LDPSi: 329 return AArch64::LDPSpre; 330 case AArch64::LDPSWi: 331 return AArch64::LDPSWpre; 332 case AArch64::LDPDi: 333 return AArch64::LDPDpre; 334 case AArch64::LDPQi: 335 return AArch64::LDPQpre; 336 case AArch64::LDPWi: 337 return AArch64::LDPWpre; 338 case AArch64::LDPXi: 339 return AArch64::LDPXpre; 340 case AArch64::STPSi: 341 return AArch64::STPSpre; 342 case AArch64::STPDi: 343 return AArch64::STPDpre; 344 case AArch64::STPQi: 345 return AArch64::STPQpre; 346 case AArch64::STPWi: 347 return AArch64::STPWpre; 348 case AArch64::STPXi: 349 return AArch64::STPXpre; 350 } 351 } 352 353 static unsigned getPostIndexedOpcode(unsigned Opc) { 354 switch (Opc) { 355 default: 356 llvm_unreachable("Opcode has no post-indexed wise equivalent!"); 357 case AArch64::STRSui: 358 return AArch64::STRSpost; 359 case AArch64::STRDui: 360 return AArch64::STRDpost; 361 case AArch64::STRQui: 362 return AArch64::STRQpost; 363 case AArch64::STRBBui: 364 return AArch64::STRBBpost; 365 case AArch64::STRHHui: 366 return AArch64::STRHHpost; 367 case AArch64::STRWui: 368 return AArch64::STRWpost; 369 case AArch64::STRXui: 370 return AArch64::STRXpost; 371 case AArch64::LDRSui: 372 return AArch64::LDRSpost; 373 case AArch64::LDRDui: 374 return AArch64::LDRDpost; 375 case AArch64::LDRQui: 376 return AArch64::LDRQpost; 377 case AArch64::LDRBBui: 378 return AArch64::LDRBBpost; 379 case AArch64::LDRHHui: 380 return AArch64::LDRHHpost; 381 case AArch64::LDRWui: 382 return AArch64::LDRWpost; 383 case AArch64::LDRXui: 384 return AArch64::LDRXpost; 385 case AArch64::LDRSWui: 386 return AArch64::LDRSWpost; 387 case AArch64::LDPSi: 388 return AArch64::LDPSpost; 389 case AArch64::LDPSWi: 390 return AArch64::LDPSWpost; 391 case AArch64::LDPDi: 392 return AArch64::LDPDpost; 393 case AArch64::LDPQi: 394 return AArch64::LDPQpost; 395 case AArch64::LDPWi: 396 return AArch64::LDPWpost; 397 case AArch64::LDPXi: 398 return AArch64::LDPXpost; 399 case AArch64::STPSi: 400 return AArch64::STPSpost; 401 case AArch64::STPDi: 402 return AArch64::STPDpost; 403 case AArch64::STPQi: 404 return AArch64::STPQpost; 405 case AArch64::STPWi: 406 return AArch64::STPWpost; 407 case AArch64::STPXi: 408 return AArch64::STPXpost; 409 } 410 } 411 412 static bool isPairedLdSt(const MachineInstr *MI) { 413 switch (MI->getOpcode()) { 414 default: 415 return false; 416 case AArch64::LDPSi: 417 case AArch64::LDPSWi: 418 case AArch64::LDPDi: 419 case AArch64::LDPQi: 420 case AArch64::LDPWi: 421 case AArch64::LDPXi: 422 case AArch64::STPSi: 423 case AArch64::STPDi: 424 case AArch64::STPQi: 425 case AArch64::STPWi: 426 case AArch64::STPXi: 427 return true; 428 } 429 } 430 431 static const MachineOperand &getLdStRegOp(const MachineInstr *MI, 432 unsigned PairedRegOp = 0) { 433 assert(PairedRegOp < 2 && "Unexpected register operand idx."); 434 unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0; 435 return MI->getOperand(Idx); 436 } 437 438 static const MachineOperand &getLdStBaseOp(const MachineInstr *MI) { 439 unsigned Idx = isPairedLdSt(MI) ? 2 : 1; 440 return MI->getOperand(Idx); 441 } 442 443 static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) { 444 unsigned Idx = isPairedLdSt(MI) ? 3 : 2; 445 return MI->getOperand(Idx); 446 } 447 448 MachineBasicBlock::iterator 449 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, 450 MachineBasicBlock::iterator Paired, 451 const LdStPairFlags &Flags) { 452 MachineBasicBlock::iterator NextI = I; 453 ++NextI; 454 // If NextI is the second of the two instructions to be merged, we need 455 // to skip one further. Either way we merge will invalidate the iterator, 456 // and we don't need to scan the new instruction, as it's a pairwise 457 // instruction, which we're not considering for further action anyway. 458 if (NextI == Paired) 459 ++NextI; 460 461 int SExtIdx = Flags.getSExtIdx(); 462 unsigned Opc = 463 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); 464 bool IsUnscaled = isUnscaledLdSt(Opc); 465 int OffsetStride = 466 IsUnscaled && EnableAArch64UnscaledMemOp ? getMemScale(I) : 1; 467 468 bool MergeForward = Flags.getMergeForward(); 469 unsigned NewOpc = getMatchingPairOpcode(Opc); 470 // Insert our new paired instruction after whichever of the paired 471 // instructions MergeForward indicates. 472 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; 473 // Also based on MergeForward is from where we copy the base register operand 474 // so we get the flags compatible with the input code. 475 const MachineOperand &BaseRegOp = 476 MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I); 477 478 // Which register is Rt and which is Rt2 depends on the offset order. 479 MachineInstr *RtMI, *Rt2MI; 480 if (getLdStOffsetOp(I).getImm() == 481 getLdStOffsetOp(Paired).getImm() + OffsetStride) { 482 RtMI = Paired; 483 Rt2MI = I; 484 // Here we swapped the assumption made for SExtIdx. 485 // I.e., we turn ldp I, Paired into ldp Paired, I. 486 // Update the index accordingly. 487 if (SExtIdx != -1) 488 SExtIdx = (SExtIdx + 1) % 2; 489 } else { 490 RtMI = I; 491 Rt2MI = Paired; 492 } 493 // Handle Unscaled 494 int OffsetImm = getLdStOffsetOp(RtMI).getImm(); 495 if (IsUnscaled && EnableAArch64UnscaledMemOp) 496 OffsetImm /= OffsetStride; 497 498 // Construct the new instruction. 499 MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, 500 I->getDebugLoc(), TII->get(NewOpc)) 501 .addOperand(getLdStRegOp(RtMI)) 502 .addOperand(getLdStRegOp(Rt2MI)) 503 .addOperand(BaseRegOp) 504 .addImm(OffsetImm); 505 (void)MIB; 506 507 // FIXME: Do we need/want to copy the mem operands from the source 508 // instructions? Probably. What uses them after this? 509 510 DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); 511 DEBUG(I->print(dbgs())); 512 DEBUG(dbgs() << " "); 513 DEBUG(Paired->print(dbgs())); 514 DEBUG(dbgs() << " with instruction:\n "); 515 516 if (SExtIdx != -1) { 517 // Generate the sign extension for the proper result of the ldp. 518 // I.e., with X1, that would be: 519 // %W1<def> = KILL %W1, %X1<imp-def> 520 // %X1<def> = SBFMXri %X1<kill>, 0, 31 521 MachineOperand &DstMO = MIB->getOperand(SExtIdx); 522 // Right now, DstMO has the extended register, since it comes from an 523 // extended opcode. 524 unsigned DstRegX = DstMO.getReg(); 525 // Get the W variant of that register. 526 unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); 527 // Update the result of LDP to use the W instead of the X variant. 528 DstMO.setReg(DstRegW); 529 DEBUG(((MachineInstr *)MIB)->print(dbgs())); 530 DEBUG(dbgs() << "\n"); 531 // Make the machine verifier happy by providing a definition for 532 // the X register. 533 // Insert this definition right after the generated LDP, i.e., before 534 // InsertionPoint. 535 MachineInstrBuilder MIBKill = 536 BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 537 TII->get(TargetOpcode::KILL), DstRegW) 538 .addReg(DstRegW) 539 .addReg(DstRegX, RegState::Define); 540 MIBKill->getOperand(2).setImplicit(); 541 // Create the sign extension. 542 MachineInstrBuilder MIBSXTW = 543 BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 544 TII->get(AArch64::SBFMXri), DstRegX) 545 .addReg(DstRegX) 546 .addImm(0) 547 .addImm(31); 548 (void)MIBSXTW; 549 DEBUG(dbgs() << " Extend operand:\n "); 550 DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); 551 DEBUG(dbgs() << "\n"); 552 } else { 553 DEBUG(((MachineInstr *)MIB)->print(dbgs())); 554 DEBUG(dbgs() << "\n"); 555 } 556 557 // Erase the old instructions. 558 I->eraseFromParent(); 559 Paired->eraseFromParent(); 560 561 return NextI; 562 } 563 564 /// trackRegDefsUses - Remember what registers the specified instruction uses 565 /// and modifies. 566 static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs, 567 BitVector &UsedRegs, 568 const TargetRegisterInfo *TRI) { 569 for (const MachineOperand &MO : MI->operands()) { 570 if (MO.isRegMask()) 571 ModifiedRegs.setBitsNotInMask(MO.getRegMask()); 572 573 if (!MO.isReg()) 574 continue; 575 unsigned Reg = MO.getReg(); 576 if (MO.isDef()) { 577 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 578 ModifiedRegs.set(*AI); 579 } else { 580 assert(MO.isUse() && "Reg operand not a def and not a use?!?"); 581 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 582 UsedRegs.set(*AI); 583 } 584 } 585 } 586 587 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { 588 // Convert the byte-offset used by unscaled into an "element" offset used 589 // by the scaled pair load/store instructions. 590 if (IsUnscaled) 591 Offset /= OffsetStride; 592 593 return Offset <= 63 && Offset >= -64; 594 } 595 596 // Do alignment, specialized to power of 2 and for signed ints, 597 // avoiding having to do a C-style cast from uint_64t to int when 598 // using RoundUpToAlignment from include/llvm/Support/MathExtras.h. 599 // FIXME: Move this function to include/MathExtras.h? 600 static int alignTo(int Num, int PowOf2) { 601 return (Num + PowOf2 - 1) & ~(PowOf2 - 1); 602 } 603 604 static bool mayAlias(MachineInstr *MIa, MachineInstr *MIb, 605 const AArch64InstrInfo *TII) { 606 // One of the instructions must modify memory. 607 if (!MIa->mayStore() && !MIb->mayStore()) 608 return false; 609 610 // Both instructions must be memory operations. 611 if (!MIa->mayLoadOrStore() && !MIb->mayLoadOrStore()) 612 return false; 613 614 return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb); 615 } 616 617 static bool mayAlias(MachineInstr *MIa, 618 SmallVectorImpl<MachineInstr *> &MemInsns, 619 const AArch64InstrInfo *TII) { 620 for (auto &MIb : MemInsns) 621 if (mayAlias(MIa, MIb, TII)) 622 return true; 623 624 return false; 625 } 626 627 /// findMatchingInsn - Scan the instructions looking for a load/store that can 628 /// be combined with the current instruction into a load/store pair. 629 MachineBasicBlock::iterator 630 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, 631 LdStPairFlags &Flags, 632 unsigned Limit) { 633 MachineBasicBlock::iterator E = I->getParent()->end(); 634 MachineBasicBlock::iterator MBBI = I; 635 MachineInstr *FirstMI = I; 636 ++MBBI; 637 638 unsigned Opc = FirstMI->getOpcode(); 639 bool MayLoad = FirstMI->mayLoad(); 640 bool IsUnscaled = isUnscaledLdSt(FirstMI); 641 unsigned Reg = getLdStRegOp(FirstMI).getReg(); 642 unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); 643 int Offset = getLdStOffsetOp(FirstMI).getImm(); 644 645 // Early exit if the first instruction modifies the base register. 646 // e.g., ldr x0, [x0] 647 if (FirstMI->modifiesRegister(BaseReg, TRI)) 648 return E; 649 650 // Early exit if the offset if not possible to match. (6 bits of positive 651 // range, plus allow an extra one in case we find a later insn that matches 652 // with Offset-1) 653 int OffsetStride = 654 IsUnscaled && EnableAArch64UnscaledMemOp ? getMemScale(FirstMI) : 1; 655 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) 656 return E; 657 658 // Track which registers have been modified and used between the first insn 659 // (inclusive) and the second insn. 660 BitVector ModifiedRegs, UsedRegs; 661 ModifiedRegs.resize(TRI->getNumRegs()); 662 UsedRegs.resize(TRI->getNumRegs()); 663 664 // Remember any instructions that read/write memory between FirstMI and MI. 665 SmallVector<MachineInstr *, 4> MemInsns; 666 667 for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { 668 MachineInstr *MI = MBBI; 669 // Skip DBG_VALUE instructions. Otherwise debug info can affect the 670 // optimization by changing how far we scan. 671 if (MI->isDebugValue()) 672 continue; 673 674 // Now that we know this is a real instruction, count it. 675 ++Count; 676 677 bool CanMergeOpc = Opc == MI->getOpcode(); 678 Flags.setSExtIdx(-1); 679 if (!CanMergeOpc) { 680 bool IsValidLdStrOpc; 681 unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc); 682 assert(IsValidLdStrOpc && 683 "Given Opc should be a Load or Store with an immediate"); 684 // Opc will be the first instruction in the pair. 685 Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0); 686 CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode()); 687 } 688 689 if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) { 690 assert(MI->mayLoadOrStore() && "Expected memory operation."); 691 // If we've found another instruction with the same opcode, check to see 692 // if the base and offset are compatible with our starting instruction. 693 // These instructions all have scaled immediate operands, so we just 694 // check for +1/-1. Make sure to check the new instruction offset is 695 // actually an immediate and not a symbolic reference destined for 696 // a relocation. 697 // 698 // Pairwise instructions have a 7-bit signed offset field. Single insns 699 // have a 12-bit unsigned offset field. To be a valid combine, the 700 // final offset must be in range. 701 unsigned MIBaseReg = getLdStBaseOp(MI).getReg(); 702 int MIOffset = getLdStOffsetOp(MI).getImm(); 703 if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || 704 (Offset + OffsetStride == MIOffset))) { 705 int MinOffset = Offset < MIOffset ? Offset : MIOffset; 706 // If this is a volatile load/store that otherwise matched, stop looking 707 // as something is going on that we don't have enough information to 708 // safely transform. Similarly, stop if we see a hint to avoid pairs. 709 if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) 710 return E; 711 // If the resultant immediate offset of merging these instructions 712 // is out of range for a pairwise instruction, bail and keep looking. 713 bool MIIsUnscaled = isUnscaledLdSt(MI); 714 if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { 715 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 716 MemInsns.push_back(MI); 717 continue; 718 } 719 // If the alignment requirements of the paired (scaled) instruction 720 // can't express the offset of the unscaled input, bail and keep 721 // looking. 722 if (IsUnscaled && EnableAArch64UnscaledMemOp && 723 (alignTo(MinOffset, OffsetStride) != MinOffset)) { 724 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 725 MemInsns.push_back(MI); 726 continue; 727 } 728 // If the destination register of the loads is the same register, bail 729 // and keep looking. A load-pair instruction with both destination 730 // registers the same is UNPREDICTABLE and will result in an exception. 731 if (MayLoad && Reg == getLdStRegOp(MI).getReg()) { 732 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 733 MemInsns.push_back(MI); 734 continue; 735 } 736 737 // If the Rt of the second instruction was not modified or used between 738 // the two instructions and none of the instructions between the second 739 // and first alias with the second, we can combine the second into the 740 // first. 741 if (!ModifiedRegs[getLdStRegOp(MI).getReg()] && 742 !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) && 743 !mayAlias(MI, MemInsns, TII)) { 744 Flags.setMergeForward(false); 745 return MBBI; 746 } 747 748 // Likewise, if the Rt of the first instruction is not modified or used 749 // between the two instructions and none of the instructions between the 750 // first and the second alias with the first, we can combine the first 751 // into the second. 752 if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] && 753 !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) && 754 !mayAlias(FirstMI, MemInsns, TII)) { 755 Flags.setMergeForward(true); 756 return MBBI; 757 } 758 // Unable to combine these instructions due to interference in between. 759 // Keep looking. 760 } 761 } 762 763 // If the instruction wasn't a matching load or store. Stop searching if we 764 // encounter a call instruction that might modify memory. 765 if (MI->isCall()) 766 return E; 767 768 // Update modified / uses register lists. 769 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 770 771 // Otherwise, if the base register is modified, we have no match, so 772 // return early. 773 if (ModifiedRegs[BaseReg]) 774 return E; 775 776 // Update list of instructions that read/write memory. 777 if (MI->mayLoadOrStore()) 778 MemInsns.push_back(MI); 779 } 780 return E; 781 } 782 783 MachineBasicBlock::iterator 784 AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, 785 MachineBasicBlock::iterator Update, 786 bool IsPreIdx) { 787 assert((Update->getOpcode() == AArch64::ADDXri || 788 Update->getOpcode() == AArch64::SUBXri) && 789 "Unexpected base register update instruction to merge!"); 790 MachineBasicBlock::iterator NextI = I; 791 // Return the instruction following the merged instruction, which is 792 // the instruction following our unmerged load. Unless that's the add/sub 793 // instruction we're merging, in which case it's the one after that. 794 if (++NextI == Update) 795 ++NextI; 796 797 int Value = Update->getOperand(2).getImm(); 798 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && 799 "Can't merge 1 << 12 offset into pre-/post-indexed load / store"); 800 if (Update->getOpcode() == AArch64::SUBXri) 801 Value = -Value; 802 803 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) 804 : getPostIndexedOpcode(I->getOpcode()); 805 MachineInstrBuilder MIB; 806 if (!isPairedLdSt(I)) { 807 // Non-paired instruction. 808 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 809 .addOperand(getLdStRegOp(Update)) 810 .addOperand(getLdStRegOp(I)) 811 .addOperand(getLdStBaseOp(I)) 812 .addImm(Value); 813 } else { 814 // Paired instruction. 815 int Scale = getMemScale(I); 816 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 817 .addOperand(getLdStRegOp(Update)) 818 .addOperand(getLdStRegOp(I, 0)) 819 .addOperand(getLdStRegOp(I, 1)) 820 .addOperand(getLdStBaseOp(I)) 821 .addImm(Value / Scale); 822 } 823 (void)MIB; 824 825 if (IsPreIdx) 826 DEBUG(dbgs() << "Creating pre-indexed load/store."); 827 else 828 DEBUG(dbgs() << "Creating post-indexed load/store."); 829 DEBUG(dbgs() << " Replacing instructions:\n "); 830 DEBUG(I->print(dbgs())); 831 DEBUG(dbgs() << " "); 832 DEBUG(Update->print(dbgs())); 833 DEBUG(dbgs() << " with instruction:\n "); 834 DEBUG(((MachineInstr *)MIB)->print(dbgs())); 835 DEBUG(dbgs() << "\n"); 836 837 // Erase the old instructions for the block. 838 I->eraseFromParent(); 839 Update->eraseFromParent(); 840 841 return NextI; 842 } 843 844 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI, 845 MachineInstr *MI, 846 unsigned BaseReg, int Offset) { 847 switch (MI->getOpcode()) { 848 default: 849 break; 850 case AArch64::SUBXri: 851 // Negate the offset for a SUB instruction. 852 Offset *= -1; 853 // FALLTHROUGH 854 case AArch64::ADDXri: 855 // Make sure it's a vanilla immediate operand, not a relocation or 856 // anything else we can't handle. 857 if (!MI->getOperand(2).isImm()) 858 break; 859 // Watch out for 1 << 12 shifted value. 860 if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm())) 861 break; 862 863 // The update instruction source and destination register must be the 864 // same as the load/store base register. 865 if (MI->getOperand(0).getReg() != BaseReg || 866 MI->getOperand(1).getReg() != BaseReg) 867 break; 868 869 bool IsPairedInsn = isPairedLdSt(MemMI); 870 int UpdateOffset = MI->getOperand(2).getImm(); 871 // For non-paired load/store instructions, the immediate must fit in a 872 // signed 9-bit integer. 873 if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256)) 874 break; 875 876 // For paired load/store instructions, the immediate must be a multiple of 877 // the scaling factor. The scaled offset must also fit into a signed 7-bit 878 // integer. 879 if (IsPairedInsn) { 880 int Scale = getMemScale(MemMI); 881 if (UpdateOffset % Scale != 0) 882 break; 883 884 int ScaledOffset = UpdateOffset / Scale; 885 if (ScaledOffset > 64 || ScaledOffset < -64) 886 break; 887 } 888 889 // If we have a non-zero Offset, we check that it matches the amount 890 // we're adding to the register. 891 if (!Offset || Offset == MI->getOperand(2).getImm()) 892 return true; 893 break; 894 } 895 return false; 896 } 897 898 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( 899 MachineBasicBlock::iterator I, unsigned Limit, int Value) { 900 MachineBasicBlock::iterator E = I->getParent()->end(); 901 MachineInstr *MemMI = I; 902 MachineBasicBlock::iterator MBBI = I; 903 904 unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); 905 int Offset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI); 906 907 // If the base register overlaps a destination register, we can't 908 // merge the update. 909 bool IsPairedInsn = isPairedLdSt(MemMI); 910 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { 911 unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); 912 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 913 return E; 914 } 915 916 // Scan forward looking for post-index opportunities. 917 // Updating instructions can't be formed if the memory insn already 918 // has an offset other than the value we're looking for. 919 if (Offset != Value) 920 return E; 921 922 // Track which registers have been modified and used between the first insn 923 // (inclusive) and the second insn. 924 BitVector ModifiedRegs, UsedRegs; 925 ModifiedRegs.resize(TRI->getNumRegs()); 926 UsedRegs.resize(TRI->getNumRegs()); 927 ++MBBI; 928 for (unsigned Count = 0; MBBI != E; ++MBBI) { 929 MachineInstr *MI = MBBI; 930 // Skip DBG_VALUE instructions. Otherwise debug info can affect the 931 // optimization by changing how far we scan. 932 if (MI->isDebugValue()) 933 continue; 934 935 // Now that we know this is a real instruction, count it. 936 ++Count; 937 938 // If we found a match, return it. 939 if (isMatchingUpdateInsn(I, MI, BaseReg, Value)) 940 return MBBI; 941 942 // Update the status of what the instruction clobbered and used. 943 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 944 945 // Otherwise, if the base register is used or modified, we have no match, so 946 // return early. 947 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) 948 return E; 949 } 950 return E; 951 } 952 953 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( 954 MachineBasicBlock::iterator I, unsigned Limit) { 955 MachineBasicBlock::iterator B = I->getParent()->begin(); 956 MachineBasicBlock::iterator E = I->getParent()->end(); 957 MachineInstr *MemMI = I; 958 MachineBasicBlock::iterator MBBI = I; 959 960 unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); 961 int Offset = getLdStOffsetOp(MemMI).getImm(); 962 963 // If the load/store is the first instruction in the block, there's obviously 964 // not any matching update. Ditto if the memory offset isn't zero. 965 if (MBBI == B || Offset != 0) 966 return E; 967 // If the base register overlaps a destination register, we can't 968 // merge the update. 969 bool IsPairedInsn = isPairedLdSt(MemMI); 970 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { 971 unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); 972 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 973 return E; 974 } 975 976 // Track which registers have been modified and used between the first insn 977 // (inclusive) and the second insn. 978 BitVector ModifiedRegs, UsedRegs; 979 ModifiedRegs.resize(TRI->getNumRegs()); 980 UsedRegs.resize(TRI->getNumRegs()); 981 --MBBI; 982 for (unsigned Count = 0; MBBI != B; --MBBI) { 983 MachineInstr *MI = MBBI; 984 // Skip DBG_VALUE instructions. Otherwise debug info can affect the 985 // optimization by changing how far we scan. 986 if (MI->isDebugValue()) 987 continue; 988 989 // Now that we know this is a real instruction, count it. 990 ++Count; 991 992 // If we found a match, return it. 993 if (isMatchingUpdateInsn(I, MI, BaseReg, Offset)) 994 return MBBI; 995 996 // Update the status of what the instruction clobbered and used. 997 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 998 999 // Otherwise, if the base register is used or modified, we have no match, so 1000 // return early. 1001 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) 1002 return E; 1003 } 1004 return E; 1005 } 1006 1007 bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { 1008 bool Modified = false; 1009 // Two tranformations to do here: 1010 // 1) Find loads and stores that can be merged into a single load or store 1011 // pair instruction. 1012 // e.g., 1013 // ldr x0, [x2] 1014 // ldr x1, [x2, #8] 1015 // ; becomes 1016 // ldp x0, x1, [x2] 1017 // 2) Find base register updates that can be merged into the load or store 1018 // as a base-reg writeback. 1019 // e.g., 1020 // ldr x0, [x2] 1021 // add x2, x2, #4 1022 // ; becomes 1023 // ldr x0, [x2], #4 1024 1025 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1026 MBBI != E;) { 1027 MachineInstr *MI = MBBI; 1028 switch (MI->getOpcode()) { 1029 default: 1030 // Just move on to the next instruction. 1031 ++MBBI; 1032 break; 1033 // Scaled instructions. 1034 case AArch64::STRSui: 1035 case AArch64::STRDui: 1036 case AArch64::STRQui: 1037 case AArch64::STRXui: 1038 case AArch64::STRWui: 1039 case AArch64::LDRSui: 1040 case AArch64::LDRDui: 1041 case AArch64::LDRQui: 1042 case AArch64::LDRXui: 1043 case AArch64::LDRWui: 1044 case AArch64::LDRSWui: 1045 // Unscaled instructions. 1046 case AArch64::STURSi: 1047 case AArch64::STURDi: 1048 case AArch64::STURQi: 1049 case AArch64::STURWi: 1050 case AArch64::STURXi: 1051 case AArch64::LDURSi: 1052 case AArch64::LDURDi: 1053 case AArch64::LDURQi: 1054 case AArch64::LDURWi: 1055 case AArch64::LDURXi: 1056 case AArch64::LDURSWi: { 1057 // If this is a volatile load/store, don't mess with it. 1058 if (MI->hasOrderedMemoryRef()) { 1059 ++MBBI; 1060 break; 1061 } 1062 // Make sure this is a reg+imm (as opposed to an address reloc). 1063 if (!getLdStOffsetOp(MI).isImm()) { 1064 ++MBBI; 1065 break; 1066 } 1067 // Check if this load/store has a hint to avoid pair formation. 1068 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 1069 if (TII->isLdStPairSuppressed(MI)) { 1070 ++MBBI; 1071 break; 1072 } 1073 // Look ahead up to ScanLimit instructions for a pairable instruction. 1074 LdStPairFlags Flags; 1075 MachineBasicBlock::iterator Paired = 1076 findMatchingInsn(MBBI, Flags, ScanLimit); 1077 if (Paired != E) { 1078 ++NumPairCreated; 1079 if (isUnscaledLdSt(MI)) 1080 ++NumUnscaledPairCreated; 1081 1082 // Merge the loads into a pair. Keeping the iterator straight is a 1083 // pain, so we let the merge routine tell us what the next instruction 1084 // is after it's done mucking about. 1085 MBBI = mergePairedInsns(MBBI, Paired, Flags); 1086 Modified = true; 1087 break; 1088 } 1089 ++MBBI; 1090 break; 1091 } 1092 // FIXME: Do the other instructions. 1093 } 1094 } 1095 1096 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1097 MBBI != E;) { 1098 MachineInstr *MI = MBBI; 1099 // Do update merging. It's simpler to keep this separate from the above 1100 // switch, though not strictly necessary. 1101 unsigned Opc = MI->getOpcode(); 1102 switch (Opc) { 1103 default: 1104 // Just move on to the next instruction. 1105 ++MBBI; 1106 break; 1107 // Scaled instructions. 1108 case AArch64::STRSui: 1109 case AArch64::STRDui: 1110 case AArch64::STRQui: 1111 case AArch64::STRXui: 1112 case AArch64::STRWui: 1113 case AArch64::STRHHui: 1114 case AArch64::STRBBui: 1115 case AArch64::LDRSui: 1116 case AArch64::LDRDui: 1117 case AArch64::LDRQui: 1118 case AArch64::LDRXui: 1119 case AArch64::LDRWui: 1120 case AArch64::LDRHHui: 1121 case AArch64::LDRBBui: 1122 // Unscaled instructions. 1123 case AArch64::STURSi: 1124 case AArch64::STURDi: 1125 case AArch64::STURQi: 1126 case AArch64::STURWi: 1127 case AArch64::STURXi: 1128 case AArch64::LDURSi: 1129 case AArch64::LDURDi: 1130 case AArch64::LDURQi: 1131 case AArch64::LDURWi: 1132 case AArch64::LDURXi: 1133 // Paired instructions. 1134 case AArch64::LDPSi: 1135 case AArch64::LDPSWi: 1136 case AArch64::LDPDi: 1137 case AArch64::LDPQi: 1138 case AArch64::LDPWi: 1139 case AArch64::LDPXi: 1140 case AArch64::STPSi: 1141 case AArch64::STPDi: 1142 case AArch64::STPQi: 1143 case AArch64::STPWi: 1144 case AArch64::STPXi: { 1145 // Make sure this is a reg+imm (as opposed to an address reloc). 1146 if (!getLdStOffsetOp(MI).isImm()) { 1147 ++MBBI; 1148 break; 1149 } 1150 // Look forward to try to form a post-index instruction. For example, 1151 // ldr x0, [x20] 1152 // add x20, x20, #32 1153 // merged into: 1154 // ldr x0, [x20], #32 1155 MachineBasicBlock::iterator Update = 1156 findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); 1157 if (Update != E) { 1158 // Merge the update into the ld/st. 1159 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); 1160 Modified = true; 1161 ++NumPostFolded; 1162 break; 1163 } 1164 // Don't know how to handle pre/post-index versions, so move to the next 1165 // instruction. 1166 if (isUnscaledLdSt(Opc)) { 1167 ++MBBI; 1168 break; 1169 } 1170 1171 // Look back to try to find a pre-index instruction. For example, 1172 // add x0, x0, #8 1173 // ldr x1, [x0] 1174 // merged into: 1175 // ldr x1, [x0, #8]! 1176 Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); 1177 if (Update != E) { 1178 // Merge the update into the ld/st. 1179 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); 1180 Modified = true; 1181 ++NumPreFolded; 1182 break; 1183 } 1184 // The immediate in the load/store is scaled by the size of the register 1185 // being loaded. The immediate in the add we're looking for, 1186 // however, is not, so adjust here. 1187 int Value = getLdStOffsetOp(MI).getImm() * getMemScale(MI); 1188 1189 // Look forward to try to find a post-index instruction. For example, 1190 // ldr x1, [x0, #64] 1191 // add x0, x0, #64 1192 // merged into: 1193 // ldr x1, [x0, #64]! 1194 Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value); 1195 if (Update != E) { 1196 // Merge the update into the ld/st. 1197 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); 1198 Modified = true; 1199 ++NumPreFolded; 1200 break; 1201 } 1202 1203 // Nothing found. Just move to the next instruction. 1204 ++MBBI; 1205 break; 1206 } 1207 // FIXME: Do the other instructions. 1208 } 1209 } 1210 1211 return Modified; 1212 } 1213 1214 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1215 TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo()); 1216 TRI = Fn.getSubtarget().getRegisterInfo(); 1217 1218 bool Modified = false; 1219 for (auto &MBB : Fn) 1220 Modified |= optimizeBlock(MBB); 1221 1222 return Modified; 1223 } 1224 1225 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep 1226 // loads and stores near one another? 1227 1228 /// createAArch64LoadStoreOptimizationPass - returns an instance of the 1229 /// load / store optimization pass. 1230 FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { 1231 return new AArch64LoadStoreOpt(); 1232 } 1233